1pub mod trace;
11
12use std::path::{Path, PathBuf};
13use std::sync::atomic::{AtomicU32, Ordering};
14
15use base64::Engine as _;
16use zeph_llm::provider::{Message, MessagePart, Role, ToolDefinition};
17
18use crate::redact::scrub_content;
19
20pub use zeph_config::DumpFormat;
21
22pub struct DebugDumper {
23 dir: PathBuf,
24 counter: AtomicU32,
25 format: DumpFormat,
26}
27
28pub struct RequestDebugDump<'a> {
29 pub model_name: &'a str,
30 pub messages: &'a [Message],
31 pub tools: &'a [ToolDefinition],
32 pub provider_request: serde_json::Value,
33}
34
35impl DebugDumper {
36 pub fn new(base_dir: &Path, format: DumpFormat) -> std::io::Result<Self> {
42 let ts = std::time::SystemTime::now()
43 .duration_since(std::time::UNIX_EPOCH)
44 .map_or(0, |d| d.as_secs());
45 let dir = base_dir.join(ts.to_string());
46 std::fs::create_dir_all(&dir)?;
47 tracing::info!(path = %dir.display(), format = ?format, "debug dump directory created");
48 Ok(Self {
49 dir,
50 counter: AtomicU32::new(0),
51 format,
52 })
53 }
54
55 #[must_use]
57 pub fn dir(&self) -> &Path {
58 &self.dir
59 }
60
61 fn next_id(&self) -> u32 {
62 self.counter.fetch_add(1, Ordering::Relaxed)
63 }
64
65 fn write(&self, filename: &str, content: &[u8]) {
66 let path = self.dir.join(filename);
67 if let Err(e) = std::fs::write(&path, content) {
68 tracing::warn!(path = %path.display(), error = %e, "debug dump write failed");
69 }
70 }
71
72 pub fn dump_request(&self, request: &RequestDebugDump<'_>) -> u32 {
77 let id = self.next_id();
78 if self.format == DumpFormat::Trace {
80 return id;
81 }
82 let json = match self.format {
83 DumpFormat::Json => json_dump(request),
84 DumpFormat::Raw => raw_dump(request),
85 DumpFormat::Trace => unreachable!("handled above"),
86 };
87 self.write(&format!("{id:04}-request.json"), json.as_bytes());
88 id
89 }
90
91 pub fn dump_response(&self, id: u32, response: &str) {
94 if self.format == DumpFormat::Trace {
95 return;
96 }
97 self.write(&format!("{id:04}-response.txt"), response.as_bytes());
98 }
99
100 pub fn dump_tool_output(&self, tool_name: &str, output: &str) {
103 if self.format == DumpFormat::Trace {
104 return;
105 }
106 let id = self.next_id();
107 let safe_name = sanitize_dump_name(tool_name);
108 self.write(&format!("{id:04}-tool-{safe_name}.txt"), output.as_bytes());
109 }
110
111 pub(crate) fn dump_pruning_scores(
114 &self,
115 scores: &[crate::agent::compaction_strategy::BlockScore],
116 ) {
117 if self.format == DumpFormat::Trace {
118 return;
119 }
120 let id = self.next_id();
121 let payload: Vec<serde_json::Value> = scores
122 .iter()
123 .map(|s| {
124 serde_json::json!({
125 "msg_index": s.msg_index,
126 "relevance": s.relevance,
127 "redundancy": s.redundancy,
128 "mig": s.mig,
129 })
130 })
131 .collect();
132 match serde_json::to_string_pretty(&serde_json::json!({ "scores": payload })) {
133 Ok(json) => self.write(&format!("{id:04}-pruning-scores.json"), json.as_bytes()),
134 Err(e) => tracing::warn!("dump_pruning_scores: serialize failed: {e}"),
135 }
136 }
137
138 pub(crate) fn dump_anchored_summary(
143 &self,
144 summary: &zeph_memory::AnchoredSummary,
145 fallback: bool,
146 token_counter: &zeph_memory::TokenCounter,
147 ) {
148 if self.format == DumpFormat::Trace {
149 return;
150 }
151 let id = self.next_id();
152 let section_completeness = serde_json::json!({
153 "session_intent": !summary.session_intent.trim().is_empty(),
154 "files_modified": !summary.files_modified.is_empty(),
155 "decisions_made": !summary.decisions_made.is_empty(),
156 "open_questions": !summary.open_questions.is_empty(),
157 "next_steps": !summary.next_steps.is_empty(),
158 });
159 let total_items = summary.files_modified.len()
160 + summary.decisions_made.len()
161 + summary.open_questions.len()
162 + summary.next_steps.len();
163 let markdown = summary.to_markdown();
164 let token_estimate = token_counter.count_tokens(&markdown);
165 let payload = serde_json::json!({
166 "summary": summary,
167 "section_completeness": section_completeness,
168 "total_items": total_items,
169 "token_estimate": token_estimate,
170 "fallback": fallback,
171 });
172 match serde_json::to_string_pretty(&payload) {
173 Ok(json) => self.write(&format!("{id:04}-anchored-summary.json"), json.as_bytes()),
174 Err(e) => tracing::warn!("dump_anchored_summary: serialize failed: {e}"),
175 }
176 }
177
178 pub(crate) fn dump_compaction_probe(&self, result: &zeph_memory::CompactionProbeResult) {
181 if self.format == DumpFormat::Trace {
182 return;
183 }
184 let id = self.next_id();
185 let questions: Vec<serde_json::Value> = result
186 .questions
187 .iter()
188 .zip(
189 result
190 .answers
191 .iter()
192 .chain(std::iter::repeat(&String::new())),
193 )
194 .zip(
195 result
196 .per_question_scores
197 .iter()
198 .chain(std::iter::repeat(&0.0_f32)),
199 )
200 .map(|((q, a), &s)| {
201 serde_json::json!({
202 "question": scrub_content(&q.question),
203 "expected": scrub_content(&q.expected_answer),
204 "actual": scrub_content(a),
205 "score": s,
206 "category": format!("{:?}", q.category),
207 })
208 })
209 .collect();
210 let category_scores: Vec<serde_json::Value> = result
211 .category_scores
212 .iter()
213 .map(|cs| {
214 serde_json::json!({
215 "category": format!("{:?}", cs.category),
216 "score": cs.score,
217 "probes_run": cs.probes_run,
218 })
219 })
220 .collect();
221 let payload = serde_json::json!({
222 "score": result.score,
223 "category_scores": category_scores,
224 "threshold": result.threshold,
225 "hard_fail_threshold": result.hard_fail_threshold,
226 "verdict": format!("{:?}", result.verdict),
227 "model": result.model,
228 "duration_ms": result.duration_ms,
229 "questions": questions,
230 });
231 match serde_json::to_string_pretty(&payload) {
232 Ok(json) => {
233 self.write(&format!("{id:04}-compaction-probe.json"), json.as_bytes());
234 }
235 Err(e) => tracing::warn!("dump_compaction_probe: serialize failed: {e}"),
236 }
237 }
238
239 pub fn dump_focus_knowledge(&self, knowledge: &str) {
242 if self.format == DumpFormat::Trace {
243 return;
244 }
245 let id = self.next_id();
246 self.write(
247 &format!("{id:04}-focus-knowledge.txt"),
248 knowledge.as_bytes(),
249 );
250 }
251
252 pub(crate) fn dump_sidequest_eviction(
255 &self,
256 cursors: &[crate::agent::sidequest::ToolOutputCursor],
257 evicted_indices: &[usize],
258 freed_tokens: usize,
259 ) {
260 if self.format == DumpFormat::Trace {
261 return;
262 }
263 let id = self.next_id();
264 let cursor_info: Vec<serde_json::Value> = cursors
265 .iter()
266 .enumerate()
267 .map(|(i, c)| {
268 serde_json::json!({
269 "cursor_id": i,
270 "msg_index": c.msg_index,
271 "part_index": c.part_index,
272 "tool_name": c.tool_name,
273 "token_count": c.token_count,
274 "evicted": evicted_indices.contains(&i),
275 })
276 })
277 .collect();
278 let payload = serde_json::json!({
279 "cursors": cursor_info,
280 "evicted_indices": evicted_indices,
281 "freed_tokens": freed_tokens,
282 });
283 match serde_json::to_string_pretty(&payload) {
284 Ok(json) => self.write(&format!("{id:04}-sidequest-eviction.json"), json.as_bytes()),
285 Err(e) => tracing::warn!("dump_sidequest_eviction: serialize failed: {e}"),
286 }
287 }
288
289 pub(crate) fn dump_subgoal_registry(
294 &self,
295 registry: &crate::agent::compaction_strategy::SubgoalRegistry,
296 ) {
297 if self.format == DumpFormat::Trace {
298 return;
299 }
300 let id = self.next_id();
301 let mut output = String::from("=== Subgoal Registry ===\n");
302 if registry.subgoals.is_empty() {
303 output.push_str("(no subgoals tracked yet)\n");
304 } else {
305 for sg in ®istry.subgoals {
306 let state_str = match sg.state {
307 crate::agent::compaction_strategy::SubgoalState::Active => "Active ",
308 crate::agent::compaction_strategy::SubgoalState::Completed => "Completed",
309 };
310 let _ = std::fmt::write(
311 &mut output,
312 format_args!(
313 "[{}] {state_str}: \"{}\" (msgs {}-{})\n",
314 sg.id.0, sg.description, sg.start_msg_index, sg.end_msg_index,
315 ),
316 );
317 }
318 }
319 self.write(&format!("{id:04}-subgoal-registry.txt"), output.as_bytes());
320 }
321
322 pub fn dump_tool_error(&self, tool_name: &str, error: &zeph_tools::ToolError) {
325 if self.format == DumpFormat::Trace {
326 return;
327 }
328 let id = self.next_id();
329 let safe_name = sanitize_dump_name(tool_name);
330 let payload = serde_json::json!({
331 "tool": tool_name,
332 "error": error.to_string(),
333 "kind": error.kind().to_string(),
334 });
335 match serde_json::to_string_pretty(&payload) {
336 Ok(json) => {
337 self.write(
338 &format!("{id:04}-tool-error-{safe_name}.json"),
339 json.as_bytes(),
340 );
341 }
342 Err(e) => {
343 tracing::warn!("dump_tool_error: failed to serialize error payload: {e}");
344 }
345 }
346 }
347}
348
349fn json_dump(request: &RequestDebugDump<'_>) -> String {
350 let payload = serde_json::json!({
351 "model": extract_model(&request.provider_request, request.model_name),
352 "max_tokens": extract_max_tokens(&request.provider_request),
353 "messages": serde_json::to_value(request.messages)
354 .unwrap_or(serde_json::Value::Array(vec![])),
355 "tools": extract_tools(&request.provider_request, request.tools),
356 "temperature": request
357 .provider_request
358 .get("temperature")
359 .cloned()
360 .unwrap_or(serde_json::Value::Null),
361 "cache_control": request
362 .provider_request
363 .get("cache_control")
364 .cloned()
365 .unwrap_or(serde_json::Value::Null),
366 });
367 serde_json::to_string_pretty(&payload).unwrap_or_else(|e| format!("serialization error: {e}"))
368}
369
370fn raw_dump(request: &RequestDebugDump<'_>) -> String {
371 let mut payload = if request.provider_request.is_object() {
372 request.provider_request.clone()
373 } else {
374 serde_json::json!({})
375 };
376 let generic = messages_to_api_value(request.messages);
377 if let Some(obj) = payload.as_object_mut() {
378 obj.entry("model")
379 .or_insert_with(|| extract_model(&request.provider_request, request.model_name));
380 obj.entry("max_tokens")
381 .or_insert_with(|| extract_max_tokens(&request.provider_request));
382 obj.entry("tools")
383 .or_insert_with(|| extract_tools(&request.provider_request, request.tools));
384 obj.entry("temperature").or_insert_with(|| {
385 request
386 .provider_request
387 .get("temperature")
388 .cloned()
389 .unwrap_or(serde_json::Value::Null)
390 });
391 obj.entry("cache_control").or_insert_with(|| {
392 request
393 .provider_request
394 .get("cache_control")
395 .cloned()
396 .unwrap_or(serde_json::Value::Null)
397 });
398 if !obj.contains_key("messages")
399 && !obj.contains_key("system")
400 && let Some(generic_obj) = generic.as_object()
401 {
402 for (key, value) in generic_obj {
403 obj.insert(key.clone(), value.clone());
404 }
405 }
406 }
407 serde_json::to_string_pretty(&payload).unwrap_or_else(|e| format!("serialization error: {e}"))
408}
409
410fn extract_model(payload: &serde_json::Value, fallback: &str) -> serde_json::Value {
411 payload
412 .get("model")
413 .cloned()
414 .unwrap_or_else(|| serde_json::json!(fallback))
415}
416
417fn extract_max_tokens(payload: &serde_json::Value) -> serde_json::Value {
418 payload
419 .get("max_tokens")
420 .cloned()
421 .or_else(|| payload.get("max_completion_tokens").cloned())
422 .unwrap_or(serde_json::Value::Null)
423}
424
425fn extract_tools(payload: &serde_json::Value, fallback: &[ToolDefinition]) -> serde_json::Value {
426 payload.get("tools").cloned().unwrap_or_else(|| {
427 serde_json::to_value(fallback).unwrap_or(serde_json::Value::Array(vec![]))
428 })
429}
430
431fn sanitize_dump_name(name: &str) -> String {
432 name.chars()
433 .map(|c| {
434 if c.is_alphanumeric() || c == '-' {
435 c
436 } else {
437 '_'
438 }
439 })
440 .collect()
441}
442
443fn messages_to_api_value(messages: &[Message]) -> serde_json::Value {
447 let system: String = messages
448 .iter()
449 .filter(|m| m.metadata.agent_visible && m.role == Role::System)
450 .map(zeph_llm::provider::Message::to_llm_content)
451 .collect::<Vec<_>>()
452 .join("\n\n");
453
454 let chat: Vec<serde_json::Value> = messages
455 .iter()
456 .filter(|m| m.metadata.agent_visible && m.role != Role::System)
457 .filter_map(|m| {
458 let role = match m.role {
459 Role::User => "user",
460 Role::Assistant => "assistant",
461 Role::System => return None,
462 };
463 let is_assistant = m.role == Role::Assistant;
464 let has_structured = m.parts.iter().any(|p| {
465 matches!(
466 p,
467 MessagePart::ToolUse { .. }
468 | MessagePart::ToolResult { .. }
469 | MessagePart::Image(_)
470 | MessagePart::ThinkingBlock { .. }
471 | MessagePart::RedactedThinkingBlock { .. }
472 )
473 });
474 let content: serde_json::Value = if !has_structured || m.parts.is_empty() {
475 let text = m.to_llm_content();
476 if text.trim().is_empty() {
477 return None;
478 }
479 serde_json::json!(text)
480 } else {
481 let blocks: Vec<serde_json::Value> = m
482 .parts
483 .iter()
484 .filter_map(|p| part_to_block(p, is_assistant))
485 .collect();
486 if blocks.is_empty() {
487 return None;
488 }
489 serde_json::Value::Array(blocks)
490 };
491 Some(serde_json::json!({ "role": role, "content": content }))
492 })
493 .collect();
494
495 serde_json::json!({ "system": system, "messages": chat })
496}
497
498fn part_to_block(part: &MessagePart, is_assistant: bool) -> Option<serde_json::Value> {
499 match part {
500 MessagePart::Text { text }
501 | MessagePart::Recall { text }
502 | MessagePart::CodeContext { text }
503 | MessagePart::Summary { text }
504 | MessagePart::CrossSession { text } => {
505 if text.trim().is_empty() {
506 None
507 } else {
508 Some(serde_json::json!({ "type": "text", "text": text }))
509 }
510 }
511 MessagePart::ToolOutput {
512 tool_name,
513 body,
514 compacted_at,
515 } => {
516 let text = if compacted_at.is_some() {
517 if body.is_empty() {
518 format!("[tool output: {tool_name}] (pruned)")
519 } else {
520 format!("[tool output: {tool_name}] {body}")
521 }
522 } else {
523 format!("[tool output: {tool_name}]\n{body}")
524 };
525 Some(serde_json::json!({ "type": "text", "text": text }))
526 }
527 MessagePart::ToolUse { id, name, input } if is_assistant => {
528 Some(serde_json::json!({ "type": "tool_use", "id": id, "name": name, "input": input }))
529 }
530 MessagePart::ToolUse { name, input, .. } => Some(
531 serde_json::json!({ "type": "text", "text": format!("[tool_use: {name}] {input}") }),
532 ),
533 MessagePart::ToolResult {
534 tool_use_id,
535 content,
536 is_error,
537 } if !is_assistant => Some(
538 serde_json::json!({ "type": "tool_result", "tool_use_id": tool_use_id, "content": content, "is_error": is_error }),
539 ),
540 MessagePart::ToolResult { content, .. } => {
541 if content.trim().is_empty() {
542 None
543 } else {
544 Some(serde_json::json!({ "type": "text", "text": content }))
545 }
546 }
547 MessagePart::ThinkingBlock {
548 thinking,
549 signature,
550 } if is_assistant => Some(
551 serde_json::json!({ "type": "thinking", "thinking": thinking, "signature": signature }),
552 ),
553 MessagePart::RedactedThinkingBlock { data } if is_assistant => {
554 Some(serde_json::json!({ "type": "redacted_thinking", "data": data }))
555 }
556 MessagePart::ThinkingBlock { .. }
557 | MessagePart::RedactedThinkingBlock { .. }
558 | MessagePart::Compaction { .. }
559 if !is_assistant =>
560 {
561 None
562 }
563 MessagePart::ThinkingBlock { .. } | MessagePart::RedactedThinkingBlock { .. } => None,
564 MessagePart::Compaction { summary } => {
565 Some(serde_json::json!({ "type": "compaction", "summary": summary }))
566 }
567 MessagePart::Image(img) => Some(serde_json::json!({
568 "type": "image",
569 "source": {
570 "type": "base64",
571 "media_type": img.mime_type,
572 "data": base64::engine::general_purpose::STANDARD.encode(&img.data),
573 },
574 })),
575 }
576}
577
578#[cfg(test)]
579mod tests {
580 use super::*;
581 use tempfile::tempdir;
582
583 #[test]
584 fn dump_format_from_str_valid() {
585 assert_eq!("json".parse::<DumpFormat>().unwrap(), DumpFormat::Json);
586 assert_eq!("raw".parse::<DumpFormat>().unwrap(), DumpFormat::Raw);
587 assert_eq!("trace".parse::<DumpFormat>().unwrap(), DumpFormat::Trace);
588 }
589
590 #[test]
591 fn dump_format_from_str_invalid_returns_error() {
592 let err = "binary".parse::<DumpFormat>().unwrap_err();
593 assert!(
594 err.contains("unknown dump format"),
595 "error must mention unknown dump format: {err}"
596 );
597 }
598
599 fn sample_messages() -> Vec<Message> {
600 vec![
601 Message::from_legacy(Role::System, "system prompt"),
602 Message::from_legacy(Role::User, "hello"),
603 ]
604 }
605
606 fn sample_tools() -> Vec<ToolDefinition> {
607 vec![ToolDefinition {
608 name: "read_file".into(),
609 description: "Read a file".into(),
610 parameters: serde_json::json!({
611 "type": "object",
612 "properties": { "path": { "type": "string" } },
613 }),
614 }]
615 }
616
617 fn read_request_dump(dir: &Path) -> serde_json::Value {
618 let session = std::fs::read_dir(dir)
619 .unwrap()
620 .next()
621 .unwrap()
622 .unwrap()
623 .path();
624 serde_json::from_str(&std::fs::read_to_string(session.join("0000-request.json")).unwrap())
625 .unwrap()
626 }
627
628 #[test]
629 fn json_dump_request_includes_request_metadata() {
630 let dir = tempdir().unwrap();
631 let dumper = DebugDumper::new(dir.path(), DumpFormat::Json).unwrap();
632 let messages = sample_messages();
633 let tools = sample_tools();
634
635 dumper.dump_request(&RequestDebugDump {
636 model_name: "claude-sonnet-test",
637 messages: &messages,
638 tools: &tools,
639 provider_request: serde_json::json!({
640 "model": "claude-sonnet-test",
641 "max_tokens": 4096,
642 "tools": [{ "name": "read_file" }],
643 "temperature": 0.7,
644 "cache_control": { "type": "ephemeral" }
645 }),
646 });
647
648 let payload = read_request_dump(dir.path());
649 assert_eq!(payload["model"], "claude-sonnet-test");
650 assert_eq!(payload["max_tokens"], 4096);
651 assert_eq!(payload["tools"][0]["name"], "read_file");
652 assert_eq!(payload["temperature"], 0.7);
653 assert_eq!(payload["cache_control"]["type"], "ephemeral");
654 assert_eq!(payload["messages"][1]["content"], "hello");
655 }
656
657 #[test]
658 fn raw_dump_request_includes_request_metadata() {
659 let dir = tempdir().unwrap();
660 let dumper = DebugDumper::new(dir.path(), DumpFormat::Raw).unwrap();
661 let messages = sample_messages();
662 let tools = sample_tools();
663
664 dumper.dump_request(&RequestDebugDump {
665 model_name: "gpt-5-mini",
666 messages: &messages,
667 tools: &tools,
668 provider_request: serde_json::json!({
669 "model": "gpt-5-mini",
670 "max_completion_tokens": 2048,
671 "messages": [{ "role": "user", "content": "hello" }],
672 "tools": [{ "type": "function", "function": { "name": "read_file" } }],
673 "temperature": 0.3,
674 "cache_control": null
675 }),
676 });
677
678 let payload = read_request_dump(dir.path());
679 assert_eq!(payload["model"], "gpt-5-mini");
680 assert_eq!(payload["max_tokens"], 2048);
681 assert_eq!(payload["tools"][0]["function"]["name"], "read_file");
682 assert_eq!(payload["temperature"], 0.3);
683 assert_eq!(payload["messages"][0]["content"], "hello");
684 }
685}