claude-print/tests/transcript.rs
jedarden 7176ef2939 Add bf-5nr validation notes: claude-print-ci WorkflowTemplate YAML is valid
YAML parses cleanly and kubectl dry-run returns no errors.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-06-10 02:11:37 -04:00

436 lines
15 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

use claude_print::transcript::{parse_transcript, read_transcript, AggregatedUsage};
use std::io::Write;
use std::path::Path;
use tempfile::TempDir;
fn write_jsonl(path: &Path, lines: &[String]) {
let mut f = std::fs::File::create(path).unwrap();
for line in lines {
writeln!(f, "{}", line).unwrap();
}
}
fn assistant_event(
id: &str,
text: &str,
in_tok: u64,
out_tok: u64,
cache_create: u64,
cache_read: u64,
) -> String {
serde_json::json!({
"type": "assistant",
"message": {
"id": id,
"content": [{"type": "text", "text": text}],
"usage": {
"input_tokens": in_tok,
"output_tokens": out_tok,
"cache_creation_input_tokens": cache_create,
"cache_read_input_tokens": cache_read
}
}
})
.to_string()
}
fn assistant_event_no_id(usage_in: u64, usage_out: u64, text: &str) -> String {
serde_json::json!({
"type": "assistant",
"message": {
"content": [{"type": "text", "text": text}],
"usage": {
"input_tokens": usage_in,
"output_tokens": usage_out,
"cache_creation_input_tokens": 0,
"cache_read_input_tokens": 0
}
}
})
.to_string()
}
fn result_event(session_id: &str, is_error: bool) -> String {
serde_json::json!({
"type": "result",
"session_id": session_id,
"is_error": is_error
})
.to_string()
}
// ── Single turn, single text block ───────────────────────────────────────────
#[test]
fn test_single_turn_single_text_block() {
let dir = TempDir::new().unwrap();
let path = dir.path().join("t.jsonl");
write_jsonl(
&path,
&[assistant_event("msg-1", "hello world", 10, 5, 0, 0)],
);
let r = parse_transcript(&path).unwrap();
assert_eq!(r.text, "hello world");
assert_eq!(r.num_turns, 1);
assert_eq!(r.usage.input_tokens, 10);
assert_eq!(r.usage.output_tokens, 5);
}
// ── Multi-block content: text + tool_use + thinking + text → text concatenated
#[test]
fn test_multi_block_content() {
let dir = TempDir::new().unwrap();
let path = dir.path().join("t.jsonl");
let event = serde_json::json!({
"type": "assistant",
"message": {
"id": "msg-2",
"content": [
{"type": "text", "text": "first "},
{"type": "tool_use", "name": "bash", "id": "toolu_1", "input": {}},
{"type": "thinking", "thinking": "reasoning here"},
{"type": "text", "text": "second"}
],
"usage": {"input_tokens": 20, "output_tokens": 10, "cache_creation_input_tokens": 0, "cache_read_input_tokens": 0}
}
})
.to_string();
write_jsonl(&path, &[event]);
let r = parse_transcript(&path).unwrap();
assert_eq!(r.text, "first second");
assert_eq!(r.num_turns, 1);
}
// ── Multi-turn: 3 unique usage keys → 3 turns, last turn's text returned ─────
#[test]
fn test_multi_turn_unique_keys() {
let dir = TempDir::new().unwrap();
let path = dir.path().join("t.jsonl");
write_jsonl(
&path,
&[
assistant_event("msg-a", "turn one", 10, 5, 0, 0),
assistant_event("msg-b", "turn two", 20, 8, 0, 0),
assistant_event("msg-c", "turn three", 30, 12, 0, 0),
],
);
let r = parse_transcript(&path).unwrap();
assert_eq!(r.num_turns, 3);
assert_eq!(r.text, "turn three");
assert_eq!(r.usage.input_tokens, 60);
assert_eq!(r.usage.output_tokens, 25);
}
// ── Streaming dedup: 5 consecutive events with identical usage → 1 turn ──────
#[test]
fn test_streaming_dedup_five_chunks() {
let dir = TempDir::new().unwrap();
let path = dir.path().join("t.jsonl");
let chunks: Vec<String> = (0..5)
.map(|i| assistant_event("msg-stream", &format!("chunk{i}"), 10, 5, 0, 0))
.collect();
write_jsonl(&path, &chunks);
let r = parse_transcript(&path).unwrap();
assert_eq!(r.num_turns, 1, "5 chunks of same message.id = 1 turn");
assert_eq!(r.text, "chunk0chunk1chunk2chunk3chunk4");
assert_eq!(r.usage.input_tokens, 10);
}
// ── Token aggregation: 45 unique turns → correct sum ─────────────────────────
#[test]
fn test_token_aggregation_45_turns() {
let dir = TempDir::new().unwrap();
let path = dir.path().join("t.jsonl");
let lines: Vec<String> = (0..45)
.map(|i| assistant_event(&format!("msg-{i}"), "x", 100, 50, 10, 20))
.collect();
write_jsonl(&path, &lines);
let r = parse_transcript(&path).unwrap();
assert_eq!(r.num_turns, 45);
assert_eq!(r.usage.input_tokens, 45 * 100);
assert_eq!(r.usage.output_tokens, 45 * 50);
assert_eq!(r.usage.cache_creation_input_tokens, 45 * 10);
assert_eq!(r.usage.cache_read_input_tokens, 45 * 20);
}
// ── Missing cache_creation_input_tokens → defaults to 0 ──────────────────────
#[test]
fn test_missing_cache_creation_tokens() {
let dir = TempDir::new().unwrap();
let path = dir.path().join("t.jsonl");
let event = serde_json::json!({
"type": "assistant",
"message": {
"id": "msg-3",
"content": [{"type": "text", "text": "hello"}],
"usage": {"input_tokens": 5, "output_tokens": 3}
}
})
.to_string();
write_jsonl(&path, &[event]);
let r = parse_transcript(&path).unwrap();
assert_eq!(r.text, "hello");
assert_eq!(r.usage.cache_creation_input_tokens, 0);
}
// ── input_tokens: null → treated as 0 ────────────────────────────────────────
#[test]
fn test_null_input_tokens() {
let dir = TempDir::new().unwrap();
let path = dir.path().join("t.jsonl");
let event = serde_json::json!({
"type": "assistant",
"message": {
"id": "msg-4",
"content": [{"type": "text", "text": "text"}],
"usage": {"input_tokens": null, "output_tokens": 7, "cache_creation_input_tokens": null, "cache_read_input_tokens": null}
}
})
.to_string();
write_jsonl(&path, &[event]);
let r = parse_transcript(&path).unwrap();
assert_eq!(r.usage.input_tokens, 0);
assert_eq!(r.usage.output_tokens, 7);
assert_eq!(r.usage.cache_creation_input_tokens, 0);
}
// ── Unknown event type → silently skipped ────────────────────────────────────
#[test]
fn test_unknown_event_type_skipped() {
let dir = TempDir::new().unwrap();
let path = dir.path().join("t.jsonl");
write_jsonl(
&path,
&[
r#"{"type":"new-future-event","data":{"foo":42}}"#.to_string(),
assistant_event("msg-5", "real text", 10, 5, 0, 0),
],
);
let r = parse_transcript(&path).unwrap();
assert_eq!(r.text, "real text");
assert_eq!(r.num_turns, 1);
}
// ── Unknown content block type → skipped, text blocks still extracted ─────────
#[test]
fn test_unknown_content_block_skipped() {
let dir = TempDir::new().unwrap();
let path = dir.path().join("t.jsonl");
let event = serde_json::json!({
"type": "assistant",
"message": {
"id": "msg-6",
"content": [
{"type": "image", "source": {"type": "base64", "data": "abc"}},
{"type": "text", "text": "still here"}
],
"usage": {"input_tokens": 5, "output_tokens": 3, "cache_creation_input_tokens": 0, "cache_read_input_tokens": 0}
}
})
.to_string();
write_jsonl(&path, &[event]);
let r = parse_transcript(&path).unwrap();
assert_eq!(r.text, "still here");
}
// ── Unknown usage fields → silently ignored ───────────────────────────────────
#[test]
fn test_unknown_usage_fields_ignored() {
let dir = TempDir::new().unwrap();
let path = dir.path().join("t.jsonl");
let event = serde_json::json!({
"type": "assistant",
"message": {
"id": "msg-7",
"content": [{"type": "text", "text": "ok"}],
"usage": {
"input_tokens": 8,
"output_tokens": 4,
"cache_creation_input_tokens": 0,
"cache_read_input_tokens": 0,
"future_token_field": 999,
"nested_future": {"a": 1}
}
}
})
.to_string();
write_jsonl(&path, &[event]);
let r = parse_transcript(&path).unwrap();
assert_eq!(r.usage.input_tokens, 8);
assert_eq!(r.usage.output_tokens, 4);
}
// ── Malformed JSONL line → skipped, subsequent lines parsed ──────────────────
#[test]
fn test_malformed_line_skipped() {
let dir = TempDir::new().unwrap();
let path = dir.path().join("t.jsonl");
write_jsonl(
&path,
&[
r#"{"type":"assistant","message":{"id":"msg-bad","content":[{"type":"text""#
.to_string(), // truncated
assistant_event("msg-8", "recovered", 5, 3, 0, 0),
],
);
let r = parse_transcript(&path).unwrap();
assert_eq!(r.text, "recovered");
assert_eq!(r.num_turns, 1);
}
// ── Empty file → empty text, zero token counts, no panic ─────────────────────
#[test]
fn test_empty_file() {
let dir = TempDir::new().unwrap();
let path = dir.path().join("t.jsonl");
std::fs::File::create(&path).unwrap();
let r = parse_transcript(&path).unwrap();
assert_eq!(r.text, "");
assert_eq!(r.num_turns, 0);
assert_eq!(r.usage, AggregatedUsage::default());
}
// ── Usage-fingerprint fallback dedup (no message.id) ─────────────────────────
#[test]
fn test_fingerprint_dedup_no_id() {
let dir = TempDir::new().unwrap();
let path = dir.path().join("t.jsonl");
// 3 chunks with same usage but no message.id → 1 turn
let chunks: Vec<String> = (0..3)
.map(|i| assistant_event_no_id(10, 5, &format!("p{i}")))
.collect();
write_jsonl(&path, &chunks);
let r = parse_transcript(&path).unwrap();
assert_eq!(r.num_turns, 1);
assert_eq!(r.text, "p0p1p2");
}
// ── Result event: session_id and is_error extracted ──────────────────────────
#[test]
fn test_result_event_fields() {
let dir = TempDir::new().unwrap();
let path = dir.path().join("t.jsonl");
write_jsonl(
&path,
&[
assistant_event("msg-9", "response", 5, 3, 0, 0),
result_event("session-xyz", false),
],
);
let r = parse_transcript(&path).unwrap();
assert_eq!(r.session_id.as_deref(), Some("session-xyz"));
assert!(!r.is_error);
}
// ── read_transcript: fallback to last_assistant_message ──────────────────────
#[test]
fn test_fallback_to_last_assistant_message() {
let dir = TempDir::new().unwrap();
let _path = dir.path().join("nonexistent.jsonl");
// File doesn't exist; retries would time out, so use a real file with no text
// For speed, use a transcript with no text content and provide fallback
let path2 = dir.path().join("empty.jsonl");
// Write a file that has no text (only a result event without assistant)
write_jsonl(&path2, &[result_event("s1", false)]);
let r = read_transcript(&path2, Some("fallback text")).unwrap();
assert_eq!(r.text, "fallback text");
assert!(r.used_fallback);
}
// ── read_transcript: error when both empty ────────────────────────────────────
#[test]
fn test_both_empty_returns_error() {
let dir = TempDir::new().unwrap();
let path = dir.path().join("empty.jsonl");
// File with result event only (no assistant text) and no fallback
write_jsonl(&path, &[result_event("s2", false)]);
let result = read_transcript(&path, None);
assert!(result.is_err());
}
// ── test_streaming_dedup_40_retries: race + dedup combined ───────────────────
// Simulates Stop-before-JSONL-flush (AS-6 / MOCK_DELAY_JSONL=100):
// JSONL is written after 100ms; retry loop (40×50ms = 2s budget) catches it.
// Also verifies that streaming chunks (same message.id) are correctly deduped.
#[test]
fn test_streaming_dedup_40_retries() {
let dir = TempDir::new().unwrap();
let path = dir.path().join("race.jsonl");
let path_clone = path.clone();
std::thread::spawn(move || {
let delay_ms: u64 = std::env::var("MOCK_DELAY_JSONL")
.ok()
.and_then(|v| v.parse().ok())
.unwrap_or(100);
std::thread::sleep(std::time::Duration::from_millis(delay_ms));
// 5 streaming chunks of the same turn (same message.id)
let mut content = String::new();
for i in 0..5 {
let line = serde_json::json!({
"type": "assistant",
"message": {
"id": "msg-race-1",
"content": [{"type": "text", "text": format!("chunk{i}")}],
"usage": {"input_tokens": 10, "output_tokens": 5, "cache_creation_input_tokens": 0, "cache_read_input_tokens": 0}
}
})
.to_string();
content.push_str(&line);
content.push('\n');
}
std::fs::write(&path_clone, content).unwrap();
});
let r = read_transcript(&path, None).unwrap();
assert_eq!(
r.num_turns, 1,
"5 streaming chunks of same message.id = 1 turn"
);
assert_eq!(r.text, "chunk0chunk1chunk2chunk3chunk4");
assert_eq!(r.usage.input_tokens, 10);
}
// ── test_transcript_race: MOCK_DELAY_JSONL=100 ───────────────────────────────
// Direct test for the race window mitigation (AS-6).
#[test]
fn test_transcript_race() {
let dir = TempDir::new().unwrap();
let path = dir.path().join("race2.jsonl");
let path_clone = path.clone();
std::thread::spawn(move || {
std::thread::sleep(std::time::Duration::from_millis(100));
std::fs::write(
&path_clone,
format!(
"{}\n",
assistant_event("msg-race-2", "race result", 10, 5, 0, 0)
),
)
.unwrap();
});
let r = read_transcript(&path, None).unwrap();
assert_eq!(r.text, "race result");
assert_eq!(r.num_turns, 1);
}