1pub mod trace;
11
12use std::path::{Path, PathBuf};
13use std::sync::Arc;
14use std::sync::atomic::{AtomicU32, Ordering};
15
16use base64::Engine as _;
17use zeph_llm::provider::{Message, MessagePart, Role, ToolDefinition};
18
19use crate::redact::scrub_content;
20
21pub use zeph_config::DumpFormat;
22
23#[derive(Clone)]
25pub struct DebugDumper {
26 dir: PathBuf,
27 counter: Arc<AtomicU32>,
28 format: DumpFormat,
29}
30
31pub struct RequestDebugDump<'a> {
32 pub model_name: &'a str,
33 pub messages: &'a [Message],
34 pub tools: &'a [ToolDefinition],
35 pub provider_request: serde_json::Value,
36 pub memcot_state: Option<&'a str>,
41}
42
43impl DebugDumper {
44 pub fn new(base_dir: &Path, format: DumpFormat) -> std::io::Result<Self> {
50 let ts = std::time::SystemTime::now()
51 .duration_since(std::time::UNIX_EPOCH)
52 .map_or(0, |d| d.as_secs());
53 let dir = base_dir.join(ts.to_string());
54 std::fs::create_dir_all(&dir)?;
55 tracing::info!(path = %dir.display(), format = ?format, "debug dump directory created");
56 Ok(Self {
57 dir,
58 counter: Arc::new(AtomicU32::new(0)),
59 format,
60 })
61 }
62
63 #[must_use]
65 pub fn dir(&self) -> &Path {
66 &self.dir
67 }
68
69 #[must_use]
74 pub fn is_trace_format(&self) -> bool {
75 self.format == DumpFormat::Trace
76 }
77
78 fn next_id(&self) -> u32 {
79 self.counter.fetch_add(1, Ordering::Relaxed)
80 }
81
82 fn write(&self, filename: &str, content: &[u8]) {
83 let path = self.dir.join(filename);
84 if let Err(e) = zeph_common::fs_secure::write_private(&path, content) {
85 tracing::warn!(path = %path.display(), error = %e, "debug dump write failed");
86 }
87 }
88
89 #[must_use]
94 pub fn dump_request(&self, request: &RequestDebugDump<'_>) -> u32 {
95 let id = self.next_id();
96 if self.format == DumpFormat::Trace {
98 return id;
99 }
100 let json = match self.format {
101 DumpFormat::Json => json_dump(request),
102 DumpFormat::Raw => raw_dump(request),
103 DumpFormat::Trace => unreachable!("handled above"),
104 };
105 self.write(&format!("{id:04}-request.json"), json.as_bytes());
106 id
107 }
108
109 pub fn dump_response(&self, id: u32, response: &str) {
112 if self.format == DumpFormat::Trace {
113 return;
114 }
115 self.write(&format!("{id:04}-response.txt"), response.as_bytes());
116 }
117
118 pub fn dump_tool_output(&self, tool_name: &str, output: &str) {
121 if self.format == DumpFormat::Trace {
122 return;
123 }
124 let id = self.next_id();
125 let safe_name = sanitize_dump_name(tool_name);
126 self.write(&format!("{id:04}-tool-{safe_name}.txt"), output.as_bytes());
127 }
128
129 #[cfg(test)]
132 pub(crate) fn dump_pruning_scores(&self, scores: &[zeph_agent_context::BlockScore]) {
133 if self.format == DumpFormat::Trace {
134 return;
135 }
136 let id = self.next_id();
137 let payload: Vec<serde_json::Value> = scores
138 .iter()
139 .map(|s| {
140 serde_json::json!({
141 "msg_index": s.msg_index,
142 "relevance": s.relevance,
143 "redundancy": s.redundancy,
144 "mig": s.mig,
145 })
146 })
147 .collect();
148 match serde_json::to_string_pretty(&serde_json::json!({ "scores": payload })) {
149 Ok(json) => self.write(&format!("{id:04}-pruning-scores.json"), json.as_bytes()),
150 Err(e) => tracing::warn!("dump_pruning_scores: serialize failed: {e}"),
151 }
152 }
153
154 pub(crate) fn dump_anchored_summary(
159 &self,
160 summary: &zeph_memory::AnchoredSummary,
161 fallback: bool,
162 token_counter: &zeph_memory::TokenCounter,
163 ) {
164 if self.format == DumpFormat::Trace {
165 return;
166 }
167 let id = self.next_id();
168 let section_completeness = serde_json::json!({
169 "session_intent": !summary.session_intent.trim().is_empty(),
170 "files_modified": !summary.files_modified.is_empty(),
171 "decisions_made": !summary.decisions_made.is_empty(),
172 "open_questions": !summary.open_questions.is_empty(),
173 "next_steps": !summary.next_steps.is_empty(),
174 });
175 let total_items = summary.files_modified.len()
176 + summary.decisions_made.len()
177 + summary.open_questions.len()
178 + summary.next_steps.len();
179 let markdown = summary.to_markdown();
180 let token_estimate = token_counter.count_tokens(&markdown);
181 let payload = serde_json::json!({
182 "summary": summary,
183 "section_completeness": section_completeness,
184 "total_items": total_items,
185 "token_estimate": token_estimate,
186 "fallback": fallback,
187 });
188 match serde_json::to_string_pretty(&payload) {
189 Ok(json) => self.write(&format!("{id:04}-anchored-summary.json"), json.as_bytes()),
190 Err(e) => tracing::warn!("dump_anchored_summary: serialize failed: {e}"),
191 }
192 }
193
194 pub(crate) fn dump_compaction_probe(&self, result: &zeph_memory::CompactionProbeResult) {
197 if self.format == DumpFormat::Trace {
198 return;
199 }
200 let id = self.next_id();
201 let questions: Vec<serde_json::Value> = result
202 .questions
203 .iter()
204 .zip(
205 result
206 .answers
207 .iter()
208 .chain(std::iter::repeat(&String::new())),
209 )
210 .zip(
211 result
212 .per_question_scores
213 .iter()
214 .chain(std::iter::repeat(&0.0_f32)),
215 )
216 .map(|((q, a), &s)| {
217 serde_json::json!({
218 "question": scrub_content(&q.question),
219 "expected": scrub_content(&q.expected_answer),
220 "actual": scrub_content(a),
221 "score": s,
222 "category": format!("{:?}", q.category),
223 })
224 })
225 .collect();
226 let category_scores: Vec<serde_json::Value> = result
227 .category_scores
228 .iter()
229 .map(|cs| {
230 serde_json::json!({
231 "category": format!("{:?}", cs.category),
232 "score": cs.score,
233 "probes_run": cs.probes_run,
234 })
235 })
236 .collect();
237 let payload = serde_json::json!({
238 "score": result.score,
239 "category_scores": category_scores,
240 "threshold": result.threshold,
241 "hard_fail_threshold": result.hard_fail_threshold,
242 "verdict": format!("{:?}", result.verdict),
243 "model": result.model,
244 "duration_ms": result.duration_ms,
245 "questions": questions,
246 });
247 match serde_json::to_string_pretty(&payload) {
248 Ok(json) => {
249 self.write(&format!("{id:04}-compaction-probe.json"), json.as_bytes());
250 }
251 Err(e) => tracing::warn!("dump_compaction_probe: serialize failed: {e}"),
252 }
253 }
254
255 pub fn dump_focus_knowledge(&self, knowledge: &str) {
258 if self.format == DumpFormat::Trace {
259 return;
260 }
261 let id = self.next_id();
262 self.write(
263 &format!("{id:04}-focus-knowledge.txt"),
264 knowledge.as_bytes(),
265 );
266 }
267
268 pub(crate) fn dump_sidequest_eviction(
271 &self,
272 cursors: &[crate::agent::sidequest::ToolOutputCursor],
273 evicted_indices: &[usize],
274 freed_tokens: usize,
275 ) {
276 if self.format == DumpFormat::Trace {
277 return;
278 }
279 let id = self.next_id();
280 let cursor_info: Vec<serde_json::Value> = cursors
281 .iter()
282 .enumerate()
283 .map(|(i, c)| {
284 serde_json::json!({
285 "cursor_id": i,
286 "msg_index": c.msg_index,
287 "part_index": c.part_index,
288 "tool_name": c.tool_name,
289 "token_count": c.token_count,
290 "evicted": evicted_indices.contains(&i),
291 })
292 })
293 .collect();
294 let payload = serde_json::json!({
295 "cursors": cursor_info,
296 "evicted_indices": evicted_indices,
297 "freed_tokens": freed_tokens,
298 });
299 match serde_json::to_string_pretty(&payload) {
300 Ok(json) => self.write(&format!("{id:04}-sidequest-eviction.json"), json.as_bytes()),
301 Err(e) => tracing::warn!("dump_sidequest_eviction: serialize failed: {e}"),
302 }
303 }
304
305 #[cfg(test)]
310 pub(crate) fn dump_subgoal_registry(&self, registry: &zeph_agent_context::SubgoalRegistry) {
311 if self.format == DumpFormat::Trace {
312 return;
313 }
314 let id = self.next_id();
315 let mut output = String::from("=== Subgoal Registry ===\n");
316 if registry.subgoals.is_empty() {
317 output.push_str("(no subgoals tracked yet)\n");
318 } else {
319 for sg in ®istry.subgoals {
320 let state_str = match sg.state {
321 zeph_agent_context::SubgoalState::Active => "Active ",
322 zeph_agent_context::SubgoalState::Completed => "Completed",
323 };
324 let _ = std::fmt::write(
325 &mut output,
326 format_args!(
327 "[{}] {state_str}: \"{}\" (msgs {}-{})\n",
328 sg.id.0, sg.description, sg.start_msg_index, sg.end_msg_index,
329 ),
330 );
331 }
332 }
333 self.write(&format!("{id:04}-subgoal-registry.txt"), output.as_bytes());
334 }
335
336 pub fn dump_tool_error(&self, tool_name: &str, error: &zeph_tools::ToolError) {
339 if self.format == DumpFormat::Trace {
340 return;
341 }
342 let id = self.next_id();
343 let safe_name = sanitize_dump_name(tool_name);
344 let payload = serde_json::json!({
345 "tool": tool_name,
346 "error": error.to_string(),
347 "kind": error.kind().to_string(),
348 });
349 match serde_json::to_string_pretty(&payload) {
350 Ok(json) => {
351 self.write(
352 &format!("{id:04}-tool-error-{safe_name}.json"),
353 json.as_bytes(),
354 );
355 }
356 Err(e) => {
357 tracing::warn!("dump_tool_error: failed to serialize error payload: {e}");
358 }
359 }
360 }
361}
362
363fn json_dump(request: &RequestDebugDump<'_>) -> String {
364 let payload = serde_json::json!({
365 "model": extract_model(&request.provider_request, request.model_name),
366 "max_tokens": extract_max_tokens(&request.provider_request),
367 "messages": serde_json::to_value(request.messages)
368 .unwrap_or(serde_json::Value::Array(vec![])),
369 "tools": extract_tools(&request.provider_request, request.tools),
370 "temperature": request
371 .provider_request
372 .get("temperature")
373 .cloned()
374 .unwrap_or(serde_json::Value::Null),
375 "cache_control": request
376 .provider_request
377 .get("cache_control")
378 .cloned()
379 .unwrap_or(serde_json::Value::Null),
380 "memcot_state": request.memcot_state,
381 });
382 serde_json::to_string_pretty(&payload).unwrap_or_else(|e| format!("serialization error: {e}"))
383}
384
385fn raw_dump(request: &RequestDebugDump<'_>) -> String {
386 let mut payload = if request.provider_request.is_object() {
387 request.provider_request.clone()
388 } else {
389 serde_json::json!({})
390 };
391 if let Some(obj) = payload.as_object_mut() {
392 obj.entry("model")
393 .or_insert_with(|| extract_model(&request.provider_request, request.model_name));
394 obj.entry("max_tokens")
395 .or_insert_with(|| extract_max_tokens(&request.provider_request));
396 obj.entry("tools")
397 .or_insert_with(|| extract_tools(&request.provider_request, request.tools));
398 obj.entry("temperature").or_insert_with(|| {
399 request
400 .provider_request
401 .get("temperature")
402 .cloned()
403 .unwrap_or(serde_json::Value::Null)
404 });
405 obj.entry("cache_control").or_insert_with(|| {
406 request
407 .provider_request
408 .get("cache_control")
409 .cloned()
410 .unwrap_or(serde_json::Value::Null)
411 });
412 obj.insert(
413 "memcot_state".to_owned(),
414 match request.memcot_state {
415 Some(s) => serde_json::Value::String(s.to_owned()),
416 None => serde_json::Value::Null,
417 },
418 );
419 if !obj.contains_key("messages") && !obj.contains_key("system") {
420 let generic = messages_to_api_value(request.messages);
421 if let Some(generic_obj) = generic.as_object() {
422 for (key, value) in generic_obj {
423 obj.insert(key.clone(), value.clone());
424 }
425 }
426 }
427 }
428 serde_json::to_string_pretty(&payload).unwrap_or_else(|e| format!("serialization error: {e}"))
429}
430
431fn extract_model(payload: &serde_json::Value, fallback: &str) -> serde_json::Value {
432 payload
433 .get("model")
434 .cloned()
435 .unwrap_or_else(|| serde_json::json!(fallback))
436}
437
438fn extract_max_tokens(payload: &serde_json::Value) -> serde_json::Value {
439 payload
440 .get("max_tokens")
441 .cloned()
442 .or_else(|| payload.get("max_completion_tokens").cloned())
443 .unwrap_or(serde_json::Value::Null)
444}
445
446fn extract_tools(payload: &serde_json::Value, fallback: &[ToolDefinition]) -> serde_json::Value {
447 payload.get("tools").cloned().unwrap_or_else(|| {
448 serde_json::to_value(fallback).unwrap_or(serde_json::Value::Array(vec![]))
449 })
450}
451
452fn sanitize_dump_name(name: &str) -> String {
453 name.chars()
454 .map(|c| {
455 if c.is_alphanumeric() || c == '-' {
456 c
457 } else {
458 '_'
459 }
460 })
461 .collect()
462}
463
464fn messages_to_api_value(messages: &[Message]) -> serde_json::Value {
468 let system: String = messages
469 .iter()
470 .filter(|m| m.metadata.visibility.is_agent_visible() && m.role == Role::System)
471 .map(zeph_llm::provider::Message::to_llm_content)
472 .collect::<Vec<_>>()
473 .join("\n\n");
474
475 let chat: Vec<serde_json::Value> = messages
476 .iter()
477 .filter(|m| m.metadata.visibility.is_agent_visible() && m.role != Role::System)
478 .filter_map(|m| {
479 let role = match m.role {
480 Role::User => "user",
481 Role::Assistant => "assistant",
482 Role::System => return None,
483 };
484 let is_assistant = m.role == Role::Assistant;
485 let has_structured = m.parts.iter().any(|p| {
486 matches!(
487 p,
488 MessagePart::ToolUse { .. }
489 | MessagePart::ToolResult { .. }
490 | MessagePart::Image(_)
491 | MessagePart::ThinkingBlock { .. }
492 | MessagePart::RedactedThinkingBlock { .. }
493 )
494 });
495 let content: serde_json::Value = if !has_structured || m.parts.is_empty() {
496 let text = m.to_llm_content();
497 if text.trim().is_empty() {
498 return None;
499 }
500 serde_json::json!(text)
501 } else {
502 let blocks: Vec<serde_json::Value> = m
503 .parts
504 .iter()
505 .filter_map(|p| part_to_block(p, is_assistant))
506 .collect();
507 if blocks.is_empty() {
508 return None;
509 }
510 serde_json::Value::Array(blocks)
511 };
512 Some(serde_json::json!({ "role": role, "content": content }))
513 })
514 .collect();
515
516 serde_json::json!({ "system": system, "messages": chat })
517}
518
519fn part_to_block(part: &MessagePart, is_assistant: bool) -> Option<serde_json::Value> {
520 match part {
521 MessagePart::Text { text }
522 | MessagePart::Recall { text }
523 | MessagePart::CodeContext { text }
524 | MessagePart::Summary { text }
525 | MessagePart::CrossSession { text } => {
526 if text.trim().is_empty() {
527 None
528 } else {
529 Some(serde_json::json!({ "type": "text", "text": text }))
530 }
531 }
532 MessagePart::ToolOutput {
533 tool_name,
534 body,
535 compacted_at,
536 } => {
537 let text = if compacted_at.is_some() {
538 if body.is_empty() {
539 format!("[tool output: {tool_name}] (pruned)")
540 } else {
541 format!("[tool output: {tool_name}] {body}")
542 }
543 } else {
544 format!("[tool output: {tool_name}]\n{body}")
545 };
546 Some(serde_json::json!({ "type": "text", "text": text }))
547 }
548 MessagePart::ToolUse { id, name, input } if is_assistant => {
549 Some(serde_json::json!({ "type": "tool_use", "id": id, "name": name, "input": input }))
550 }
551 MessagePart::ToolUse { name, input, .. } => Some(
552 serde_json::json!({ "type": "text", "text": format!("[tool_use: {name}] {input}") }),
553 ),
554 MessagePart::ToolResult {
555 tool_use_id,
556 content,
557 is_error,
558 } if !is_assistant => Some(
559 serde_json::json!({ "type": "tool_result", "tool_use_id": tool_use_id, "content": content, "is_error": is_error }),
560 ),
561 MessagePart::ToolResult { content, .. } => {
562 if content.trim().is_empty() {
563 None
564 } else {
565 Some(serde_json::json!({ "type": "text", "text": content }))
566 }
567 }
568 MessagePart::ThinkingBlock {
569 thinking,
570 signature,
571 } if is_assistant => Some(
572 serde_json::json!({ "type": "thinking", "thinking": thinking, "signature": signature }),
573 ),
574 MessagePart::RedactedThinkingBlock { data } if is_assistant => {
575 Some(serde_json::json!({ "type": "redacted_thinking", "data": data }))
576 }
577 MessagePart::ThinkingBlock { .. }
578 | MessagePart::RedactedThinkingBlock { .. }
579 | MessagePart::Compaction { .. }
580 if !is_assistant =>
581 {
582 None
583 }
584 MessagePart::ThinkingBlock { .. } | MessagePart::RedactedThinkingBlock { .. } => None,
585 MessagePart::Compaction { summary } => {
586 Some(serde_json::json!({ "type": "compaction", "summary": summary }))
587 }
588 MessagePart::Image(img) => Some(serde_json::json!({
589 "type": "image",
590 "source": {
591 "type": "base64",
592 "media_type": img.mime_type,
593 "data": base64::engine::general_purpose::STANDARD.encode(&img.data),
594 },
595 })),
596 }
597}
598
599#[cfg(test)]
600mod tests {
601 use super::*;
602 use tempfile::tempdir;
603
604 #[test]
605 fn dump_format_from_str_valid() {
606 assert_eq!("json".parse::<DumpFormat>().unwrap(), DumpFormat::Json);
607 assert_eq!("raw".parse::<DumpFormat>().unwrap(), DumpFormat::Raw);
608 assert_eq!("trace".parse::<DumpFormat>().unwrap(), DumpFormat::Trace);
609 }
610
611 #[test]
612 fn dump_format_from_str_invalid_returns_error() {
613 let err = "binary".parse::<DumpFormat>().unwrap_err();
614 assert!(
615 err.contains("unknown dump format"),
616 "error must mention unknown dump format: {err}"
617 );
618 }
619
620 fn sample_messages() -> Vec<Message> {
621 vec![
622 Message::from_legacy(Role::System, "system prompt"),
623 Message::from_legacy(Role::User, "hello"),
624 ]
625 }
626
627 fn sample_tools() -> Vec<ToolDefinition> {
628 vec![ToolDefinition {
629 name: "read_file".into(),
630 description: "Read a file".into(),
631 parameters: serde_json::json!({
632 "type": "object",
633 "properties": { "path": { "type": "string" } },
634 }),
635 output_schema: None,
636 }]
637 }
638
639 fn read_request_dump(dir: &Path) -> serde_json::Value {
640 let session = std::fs::read_dir(dir)
641 .unwrap()
642 .next()
643 .unwrap()
644 .unwrap()
645 .path();
646 serde_json::from_str(&std::fs::read_to_string(session.join("0000-request.json")).unwrap())
647 .unwrap()
648 }
649
650 #[test]
651 fn json_dump_request_includes_request_metadata() {
652 let dir = tempdir().unwrap();
653 let dumper = DebugDumper::new(dir.path(), DumpFormat::Json).unwrap();
654 let messages = sample_messages();
655 let tools = sample_tools();
656
657 let _ = dumper.dump_request(&RequestDebugDump {
658 model_name: "claude-sonnet-test",
659 messages: &messages,
660 tools: &tools,
661 provider_request: serde_json::json!({
662 "model": "claude-sonnet-test",
663 "max_tokens": 4096,
664 "tools": [{ "name": "read_file" }],
665 "temperature": 0.7,
666 "cache_control": { "type": "ephemeral" }
667 }),
668 memcot_state: None,
669 });
670
671 let payload = read_request_dump(dir.path());
672 assert_eq!(payload["model"], "claude-sonnet-test");
673 assert_eq!(payload["max_tokens"], 4096);
674 assert_eq!(payload["tools"][0]["name"], "read_file");
675 assert_eq!(payload["temperature"], 0.7);
676 assert_eq!(payload["cache_control"]["type"], "ephemeral");
677 assert_eq!(payload["messages"][1]["content"], "hello");
678 }
679
680 #[test]
681 fn raw_dump_request_includes_request_metadata() {
682 let dir = tempdir().unwrap();
683 let dumper = DebugDumper::new(dir.path(), DumpFormat::Raw).unwrap();
684 let messages = sample_messages();
685 let tools = sample_tools();
686
687 let _ = dumper.dump_request(&RequestDebugDump {
688 model_name: "gpt-5-mini",
689 messages: &messages,
690 tools: &tools,
691 provider_request: serde_json::json!({
692 "model": "gpt-5-mini",
693 "max_completion_tokens": 2048,
694 "messages": [{ "role": "user", "content": "hello" }],
695 "tools": [{ "type": "function", "function": { "name": "read_file" } }],
696 "temperature": 0.3,
697 "cache_control": null
698 }),
699 memcot_state: None,
700 });
701
702 let payload = read_request_dump(dir.path());
703 assert_eq!(payload["model"], "gpt-5-mini");
704 assert_eq!(payload["max_tokens"], 2048);
705 assert_eq!(payload["tools"][0]["function"]["name"], "read_file");
706 assert_eq!(payload["temperature"], 0.3);
707 assert_eq!(payload["messages"][0]["content"], "hello");
708 }
709
710 #[test]
711 fn memcot_state_written_to_dump_when_present() {
712 for fmt in [DumpFormat::Json, DumpFormat::Raw] {
713 let dir = tempdir().unwrap();
714 let dumper = DebugDumper::new(dir.path(), fmt).unwrap();
715 let messages = sample_messages();
716 let tools = sample_tools();
717
718 let _ = dumper.dump_request(&RequestDebugDump {
719 model_name: "test-model",
720 messages: &messages,
721 tools: &tools,
722 provider_request: serde_json::json!({ "model": "test-model", "max_tokens": 1024 }),
723 memcot_state: Some("Rust uses LLVM; user is refactoring the parser"),
724 });
725
726 let payload = read_request_dump(dir.path());
727 assert_eq!(
728 payload["memcot_state"], "Rust uses LLVM; user is refactoring the parser",
729 "memcot_state must appear in {fmt:?} dump"
730 );
731 }
732 }
733
734 #[test]
735 fn memcot_state_null_when_absent() {
736 let dir = tempdir().unwrap();
737 let dumper = DebugDumper::new(dir.path(), DumpFormat::Json).unwrap();
738 let messages = sample_messages();
739 let tools = sample_tools();
740
741 let _ = dumper.dump_request(&RequestDebugDump {
742 model_name: "test-model",
743 messages: &messages,
744 tools: &tools,
745 provider_request: serde_json::json!({ "model": "test-model", "max_tokens": 1024 }),
746 memcot_state: None,
747 });
748
749 let payload = read_request_dump(dir.path());
750 assert!(
751 payload["memcot_state"].is_null(),
752 "memcot_state must be null when None"
753 );
754 }
755}