1pub mod trace;
11
12use std::path::{Path, PathBuf};
13use std::sync::Arc;
14use std::sync::atomic::{AtomicU32, Ordering};
15
16use base64::Engine as _;
17use zeph_llm::provider::{Message, MessagePart, Role, ToolDefinition};
18
19use crate::redact::scrub_content;
20
21pub use zeph_config::DumpFormat;
22
23#[derive(Clone)]
25pub struct DebugDumper {
26 dir: PathBuf,
27 counter: Arc<AtomicU32>,
28 format: DumpFormat,
29}
30
31pub struct RequestDebugDump<'a> {
32 pub model_name: &'a str,
33 pub messages: &'a [Message],
34 pub tools: &'a [ToolDefinition],
35 pub provider_request: serde_json::Value,
36}
37
38impl DebugDumper {
39 pub fn new(base_dir: &Path, format: DumpFormat) -> std::io::Result<Self> {
45 let ts = std::time::SystemTime::now()
46 .duration_since(std::time::UNIX_EPOCH)
47 .map_or(0, |d| d.as_secs());
48 let dir = base_dir.join(ts.to_string());
49 std::fs::create_dir_all(&dir)?;
50 tracing::info!(path = %dir.display(), format = ?format, "debug dump directory created");
51 Ok(Self {
52 dir,
53 counter: Arc::new(AtomicU32::new(0)),
54 format,
55 })
56 }
57
58 #[must_use]
60 pub fn dir(&self) -> &Path {
61 &self.dir
62 }
63
64 #[must_use]
69 pub fn is_trace_format(&self) -> bool {
70 self.format == DumpFormat::Trace
71 }
72
73 fn next_id(&self) -> u32 {
74 self.counter.fetch_add(1, Ordering::Relaxed)
75 }
76
77 fn write(&self, filename: &str, content: &[u8]) {
78 let path = self.dir.join(filename);
79 if let Err(e) = std::fs::write(&path, content) {
80 tracing::warn!(path = %path.display(), error = %e, "debug dump write failed");
81 }
82 }
83
84 #[must_use]
89 pub fn dump_request(&self, request: &RequestDebugDump<'_>) -> u32 {
90 let id = self.next_id();
91 if self.format == DumpFormat::Trace {
93 return id;
94 }
95 let json = match self.format {
96 DumpFormat::Json => json_dump(request),
97 DumpFormat::Raw => raw_dump(request),
98 DumpFormat::Trace => unreachable!("handled above"),
99 };
100 self.write(&format!("{id:04}-request.json"), json.as_bytes());
101 id
102 }
103
104 pub fn dump_response(&self, id: u32, response: &str) {
107 if self.format == DumpFormat::Trace {
108 return;
109 }
110 self.write(&format!("{id:04}-response.txt"), response.as_bytes());
111 }
112
113 pub fn dump_tool_output(&self, tool_name: &str, output: &str) {
116 if self.format == DumpFormat::Trace {
117 return;
118 }
119 let id = self.next_id();
120 let safe_name = sanitize_dump_name(tool_name);
121 self.write(&format!("{id:04}-tool-{safe_name}.txt"), output.as_bytes());
122 }
123
124 pub(crate) fn dump_pruning_scores(
127 &self,
128 scores: &[crate::agent::compaction_strategy::BlockScore],
129 ) {
130 if self.format == DumpFormat::Trace {
131 return;
132 }
133 let id = self.next_id();
134 let payload: Vec<serde_json::Value> = scores
135 .iter()
136 .map(|s| {
137 serde_json::json!({
138 "msg_index": s.msg_index,
139 "relevance": s.relevance,
140 "redundancy": s.redundancy,
141 "mig": s.mig,
142 })
143 })
144 .collect();
145 match serde_json::to_string_pretty(&serde_json::json!({ "scores": payload })) {
146 Ok(json) => self.write(&format!("{id:04}-pruning-scores.json"), json.as_bytes()),
147 Err(e) => tracing::warn!("dump_pruning_scores: serialize failed: {e}"),
148 }
149 }
150
151 pub(crate) fn dump_anchored_summary(
156 &self,
157 summary: &zeph_memory::AnchoredSummary,
158 fallback: bool,
159 token_counter: &zeph_memory::TokenCounter,
160 ) {
161 if self.format == DumpFormat::Trace {
162 return;
163 }
164 let id = self.next_id();
165 let section_completeness = serde_json::json!({
166 "session_intent": !summary.session_intent.trim().is_empty(),
167 "files_modified": !summary.files_modified.is_empty(),
168 "decisions_made": !summary.decisions_made.is_empty(),
169 "open_questions": !summary.open_questions.is_empty(),
170 "next_steps": !summary.next_steps.is_empty(),
171 });
172 let total_items = summary.files_modified.len()
173 + summary.decisions_made.len()
174 + summary.open_questions.len()
175 + summary.next_steps.len();
176 let markdown = summary.to_markdown();
177 let token_estimate = token_counter.count_tokens(&markdown);
178 let payload = serde_json::json!({
179 "summary": summary,
180 "section_completeness": section_completeness,
181 "total_items": total_items,
182 "token_estimate": token_estimate,
183 "fallback": fallback,
184 });
185 match serde_json::to_string_pretty(&payload) {
186 Ok(json) => self.write(&format!("{id:04}-anchored-summary.json"), json.as_bytes()),
187 Err(e) => tracing::warn!("dump_anchored_summary: serialize failed: {e}"),
188 }
189 }
190
191 pub(crate) fn dump_compaction_probe(&self, result: &zeph_memory::CompactionProbeResult) {
194 if self.format == DumpFormat::Trace {
195 return;
196 }
197 let id = self.next_id();
198 let questions: Vec<serde_json::Value> = result
199 .questions
200 .iter()
201 .zip(
202 result
203 .answers
204 .iter()
205 .chain(std::iter::repeat(&String::new())),
206 )
207 .zip(
208 result
209 .per_question_scores
210 .iter()
211 .chain(std::iter::repeat(&0.0_f32)),
212 )
213 .map(|((q, a), &s)| {
214 serde_json::json!({
215 "question": scrub_content(&q.question),
216 "expected": scrub_content(&q.expected_answer),
217 "actual": scrub_content(a),
218 "score": s,
219 "category": format!("{:?}", q.category),
220 })
221 })
222 .collect();
223 let category_scores: Vec<serde_json::Value> = result
224 .category_scores
225 .iter()
226 .map(|cs| {
227 serde_json::json!({
228 "category": format!("{:?}", cs.category),
229 "score": cs.score,
230 "probes_run": cs.probes_run,
231 })
232 })
233 .collect();
234 let payload = serde_json::json!({
235 "score": result.score,
236 "category_scores": category_scores,
237 "threshold": result.threshold,
238 "hard_fail_threshold": result.hard_fail_threshold,
239 "verdict": format!("{:?}", result.verdict),
240 "model": result.model,
241 "duration_ms": result.duration_ms,
242 "questions": questions,
243 });
244 match serde_json::to_string_pretty(&payload) {
245 Ok(json) => {
246 self.write(&format!("{id:04}-compaction-probe.json"), json.as_bytes());
247 }
248 Err(e) => tracing::warn!("dump_compaction_probe: serialize failed: {e}"),
249 }
250 }
251
252 pub fn dump_focus_knowledge(&self, knowledge: &str) {
255 if self.format == DumpFormat::Trace {
256 return;
257 }
258 let id = self.next_id();
259 self.write(
260 &format!("{id:04}-focus-knowledge.txt"),
261 knowledge.as_bytes(),
262 );
263 }
264
265 pub(crate) fn dump_sidequest_eviction(
268 &self,
269 cursors: &[crate::agent::sidequest::ToolOutputCursor],
270 evicted_indices: &[usize],
271 freed_tokens: usize,
272 ) {
273 if self.format == DumpFormat::Trace {
274 return;
275 }
276 let id = self.next_id();
277 let cursor_info: Vec<serde_json::Value> = cursors
278 .iter()
279 .enumerate()
280 .map(|(i, c)| {
281 serde_json::json!({
282 "cursor_id": i,
283 "msg_index": c.msg_index,
284 "part_index": c.part_index,
285 "tool_name": c.tool_name,
286 "token_count": c.token_count,
287 "evicted": evicted_indices.contains(&i),
288 })
289 })
290 .collect();
291 let payload = serde_json::json!({
292 "cursors": cursor_info,
293 "evicted_indices": evicted_indices,
294 "freed_tokens": freed_tokens,
295 });
296 match serde_json::to_string_pretty(&payload) {
297 Ok(json) => self.write(&format!("{id:04}-sidequest-eviction.json"), json.as_bytes()),
298 Err(e) => tracing::warn!("dump_sidequest_eviction: serialize failed: {e}"),
299 }
300 }
301
302 pub(crate) fn dump_subgoal_registry(
307 &self,
308 registry: &crate::agent::compaction_strategy::SubgoalRegistry,
309 ) {
310 if self.format == DumpFormat::Trace {
311 return;
312 }
313 let id = self.next_id();
314 let mut output = String::from("=== Subgoal Registry ===\n");
315 if registry.subgoals.is_empty() {
316 output.push_str("(no subgoals tracked yet)\n");
317 } else {
318 for sg in ®istry.subgoals {
319 let state_str = match sg.state {
320 crate::agent::compaction_strategy::SubgoalState::Active => "Active ",
321 crate::agent::compaction_strategy::SubgoalState::Completed => "Completed",
322 };
323 let _ = std::fmt::write(
324 &mut output,
325 format_args!(
326 "[{}] {state_str}: \"{}\" (msgs {}-{})\n",
327 sg.id.0, sg.description, sg.start_msg_index, sg.end_msg_index,
328 ),
329 );
330 }
331 }
332 self.write(&format!("{id:04}-subgoal-registry.txt"), output.as_bytes());
333 }
334
335 pub fn dump_tool_error(&self, tool_name: &str, error: &zeph_tools::ToolError) {
338 if self.format == DumpFormat::Trace {
339 return;
340 }
341 let id = self.next_id();
342 let safe_name = sanitize_dump_name(tool_name);
343 let payload = serde_json::json!({
344 "tool": tool_name,
345 "error": error.to_string(),
346 "kind": error.kind().to_string(),
347 });
348 match serde_json::to_string_pretty(&payload) {
349 Ok(json) => {
350 self.write(
351 &format!("{id:04}-tool-error-{safe_name}.json"),
352 json.as_bytes(),
353 );
354 }
355 Err(e) => {
356 tracing::warn!("dump_tool_error: failed to serialize error payload: {e}");
357 }
358 }
359 }
360}
361
362fn json_dump(request: &RequestDebugDump<'_>) -> String {
363 let payload = serde_json::json!({
364 "model": extract_model(&request.provider_request, request.model_name),
365 "max_tokens": extract_max_tokens(&request.provider_request),
366 "messages": serde_json::to_value(request.messages)
367 .unwrap_or(serde_json::Value::Array(vec![])),
368 "tools": extract_tools(&request.provider_request, request.tools),
369 "temperature": request
370 .provider_request
371 .get("temperature")
372 .cloned()
373 .unwrap_or(serde_json::Value::Null),
374 "cache_control": request
375 .provider_request
376 .get("cache_control")
377 .cloned()
378 .unwrap_or(serde_json::Value::Null),
379 });
380 serde_json::to_string_pretty(&payload).unwrap_or_else(|e| format!("serialization error: {e}"))
381}
382
383fn raw_dump(request: &RequestDebugDump<'_>) -> String {
384 let mut payload = if request.provider_request.is_object() {
385 request.provider_request.clone()
386 } else {
387 serde_json::json!({})
388 };
389 if let Some(obj) = payload.as_object_mut() {
390 obj.entry("model")
391 .or_insert_with(|| extract_model(&request.provider_request, request.model_name));
392 obj.entry("max_tokens")
393 .or_insert_with(|| extract_max_tokens(&request.provider_request));
394 obj.entry("tools")
395 .or_insert_with(|| extract_tools(&request.provider_request, request.tools));
396 obj.entry("temperature").or_insert_with(|| {
397 request
398 .provider_request
399 .get("temperature")
400 .cloned()
401 .unwrap_or(serde_json::Value::Null)
402 });
403 obj.entry("cache_control").or_insert_with(|| {
404 request
405 .provider_request
406 .get("cache_control")
407 .cloned()
408 .unwrap_or(serde_json::Value::Null)
409 });
410 if !obj.contains_key("messages") && !obj.contains_key("system") {
411 let generic = messages_to_api_value(request.messages);
412 if let Some(generic_obj) = generic.as_object() {
413 for (key, value) in generic_obj {
414 obj.insert(key.clone(), value.clone());
415 }
416 }
417 }
418 }
419 serde_json::to_string_pretty(&payload).unwrap_or_else(|e| format!("serialization error: {e}"))
420}
421
422fn extract_model(payload: &serde_json::Value, fallback: &str) -> serde_json::Value {
423 payload
424 .get("model")
425 .cloned()
426 .unwrap_or_else(|| serde_json::json!(fallback))
427}
428
429fn extract_max_tokens(payload: &serde_json::Value) -> serde_json::Value {
430 payload
431 .get("max_tokens")
432 .cloned()
433 .or_else(|| payload.get("max_completion_tokens").cloned())
434 .unwrap_or(serde_json::Value::Null)
435}
436
437fn extract_tools(payload: &serde_json::Value, fallback: &[ToolDefinition]) -> serde_json::Value {
438 payload.get("tools").cloned().unwrap_or_else(|| {
439 serde_json::to_value(fallback).unwrap_or(serde_json::Value::Array(vec![]))
440 })
441}
442
443fn sanitize_dump_name(name: &str) -> String {
444 name.chars()
445 .map(|c| {
446 if c.is_alphanumeric() || c == '-' {
447 c
448 } else {
449 '_'
450 }
451 })
452 .collect()
453}
454
455fn messages_to_api_value(messages: &[Message]) -> serde_json::Value {
459 let system: String = messages
460 .iter()
461 .filter(|m| m.metadata.visibility.is_agent_visible() && m.role == Role::System)
462 .map(zeph_llm::provider::Message::to_llm_content)
463 .collect::<Vec<_>>()
464 .join("\n\n");
465
466 let chat: Vec<serde_json::Value> = messages
467 .iter()
468 .filter(|m| m.metadata.visibility.is_agent_visible() && m.role != Role::System)
469 .filter_map(|m| {
470 let role = match m.role {
471 Role::User => "user",
472 Role::Assistant => "assistant",
473 Role::System => return None,
474 };
475 let is_assistant = m.role == Role::Assistant;
476 let has_structured = m.parts.iter().any(|p| {
477 matches!(
478 p,
479 MessagePart::ToolUse { .. }
480 | MessagePart::ToolResult { .. }
481 | MessagePart::Image(_)
482 | MessagePart::ThinkingBlock { .. }
483 | MessagePart::RedactedThinkingBlock { .. }
484 )
485 });
486 let content: serde_json::Value = if !has_structured || m.parts.is_empty() {
487 let text = m.to_llm_content();
488 if text.trim().is_empty() {
489 return None;
490 }
491 serde_json::json!(text)
492 } else {
493 let blocks: Vec<serde_json::Value> = m
494 .parts
495 .iter()
496 .filter_map(|p| part_to_block(p, is_assistant))
497 .collect();
498 if blocks.is_empty() {
499 return None;
500 }
501 serde_json::Value::Array(blocks)
502 };
503 Some(serde_json::json!({ "role": role, "content": content }))
504 })
505 .collect();
506
507 serde_json::json!({ "system": system, "messages": chat })
508}
509
510fn part_to_block(part: &MessagePart, is_assistant: bool) -> Option<serde_json::Value> {
511 match part {
512 MessagePart::Text { text }
513 | MessagePart::Recall { text }
514 | MessagePart::CodeContext { text }
515 | MessagePart::Summary { text }
516 | MessagePart::CrossSession { text } => {
517 if text.trim().is_empty() {
518 None
519 } else {
520 Some(serde_json::json!({ "type": "text", "text": text }))
521 }
522 }
523 MessagePart::ToolOutput {
524 tool_name,
525 body,
526 compacted_at,
527 } => {
528 let text = if compacted_at.is_some() {
529 if body.is_empty() {
530 format!("[tool output: {tool_name}] (pruned)")
531 } else {
532 format!("[tool output: {tool_name}] {body}")
533 }
534 } else {
535 format!("[tool output: {tool_name}]\n{body}")
536 };
537 Some(serde_json::json!({ "type": "text", "text": text }))
538 }
539 MessagePart::ToolUse { id, name, input } if is_assistant => {
540 Some(serde_json::json!({ "type": "tool_use", "id": id, "name": name, "input": input }))
541 }
542 MessagePart::ToolUse { name, input, .. } => Some(
543 serde_json::json!({ "type": "text", "text": format!("[tool_use: {name}] {input}") }),
544 ),
545 MessagePart::ToolResult {
546 tool_use_id,
547 content,
548 is_error,
549 } if !is_assistant => Some(
550 serde_json::json!({ "type": "tool_result", "tool_use_id": tool_use_id, "content": content, "is_error": is_error }),
551 ),
552 MessagePart::ToolResult { content, .. } => {
553 if content.trim().is_empty() {
554 None
555 } else {
556 Some(serde_json::json!({ "type": "text", "text": content }))
557 }
558 }
559 MessagePart::ThinkingBlock {
560 thinking,
561 signature,
562 } if is_assistant => Some(
563 serde_json::json!({ "type": "thinking", "thinking": thinking, "signature": signature }),
564 ),
565 MessagePart::RedactedThinkingBlock { data } if is_assistant => {
566 Some(serde_json::json!({ "type": "redacted_thinking", "data": data }))
567 }
568 MessagePart::ThinkingBlock { .. }
569 | MessagePart::RedactedThinkingBlock { .. }
570 | MessagePart::Compaction { .. }
571 if !is_assistant =>
572 {
573 None
574 }
575 MessagePart::ThinkingBlock { .. } | MessagePart::RedactedThinkingBlock { .. } => None,
576 MessagePart::Compaction { summary } => {
577 Some(serde_json::json!({ "type": "compaction", "summary": summary }))
578 }
579 MessagePart::Image(img) => Some(serde_json::json!({
580 "type": "image",
581 "source": {
582 "type": "base64",
583 "media_type": img.mime_type,
584 "data": base64::engine::general_purpose::STANDARD.encode(&img.data),
585 },
586 })),
587 }
588}
589
590#[cfg(test)]
591mod tests {
592 use super::*;
593 use tempfile::tempdir;
594
595 #[test]
596 fn dump_format_from_str_valid() {
597 assert_eq!("json".parse::<DumpFormat>().unwrap(), DumpFormat::Json);
598 assert_eq!("raw".parse::<DumpFormat>().unwrap(), DumpFormat::Raw);
599 assert_eq!("trace".parse::<DumpFormat>().unwrap(), DumpFormat::Trace);
600 }
601
602 #[test]
603 fn dump_format_from_str_invalid_returns_error() {
604 let err = "binary".parse::<DumpFormat>().unwrap_err();
605 assert!(
606 err.contains("unknown dump format"),
607 "error must mention unknown dump format: {err}"
608 );
609 }
610
611 fn sample_messages() -> Vec<Message> {
612 vec![
613 Message::from_legacy(Role::System, "system prompt"),
614 Message::from_legacy(Role::User, "hello"),
615 ]
616 }
617
618 fn sample_tools() -> Vec<ToolDefinition> {
619 vec![ToolDefinition {
620 name: "read_file".into(),
621 description: "Read a file".into(),
622 parameters: serde_json::json!({
623 "type": "object",
624 "properties": { "path": { "type": "string" } },
625 }),
626 }]
627 }
628
629 fn read_request_dump(dir: &Path) -> serde_json::Value {
630 let session = std::fs::read_dir(dir)
631 .unwrap()
632 .next()
633 .unwrap()
634 .unwrap()
635 .path();
636 serde_json::from_str(&std::fs::read_to_string(session.join("0000-request.json")).unwrap())
637 .unwrap()
638 }
639
640 #[test]
641 fn json_dump_request_includes_request_metadata() {
642 let dir = tempdir().unwrap();
643 let dumper = DebugDumper::new(dir.path(), DumpFormat::Json).unwrap();
644 let messages = sample_messages();
645 let tools = sample_tools();
646
647 let _ = dumper.dump_request(&RequestDebugDump {
648 model_name: "claude-sonnet-test",
649 messages: &messages,
650 tools: &tools,
651 provider_request: serde_json::json!({
652 "model": "claude-sonnet-test",
653 "max_tokens": 4096,
654 "tools": [{ "name": "read_file" }],
655 "temperature": 0.7,
656 "cache_control": { "type": "ephemeral" }
657 }),
658 });
659
660 let payload = read_request_dump(dir.path());
661 assert_eq!(payload["model"], "claude-sonnet-test");
662 assert_eq!(payload["max_tokens"], 4096);
663 assert_eq!(payload["tools"][0]["name"], "read_file");
664 assert_eq!(payload["temperature"], 0.7);
665 assert_eq!(payload["cache_control"]["type"], "ephemeral");
666 assert_eq!(payload["messages"][1]["content"], "hello");
667 }
668
669 #[test]
670 fn raw_dump_request_includes_request_metadata() {
671 let dir = tempdir().unwrap();
672 let dumper = DebugDumper::new(dir.path(), DumpFormat::Raw).unwrap();
673 let messages = sample_messages();
674 let tools = sample_tools();
675
676 let _ = dumper.dump_request(&RequestDebugDump {
677 model_name: "gpt-5-mini",
678 messages: &messages,
679 tools: &tools,
680 provider_request: serde_json::json!({
681 "model": "gpt-5-mini",
682 "max_completion_tokens": 2048,
683 "messages": [{ "role": "user", "content": "hello" }],
684 "tools": [{ "type": "function", "function": { "name": "read_file" } }],
685 "temperature": 0.3,
686 "cache_control": null
687 }),
688 });
689
690 let payload = read_request_dump(dir.path());
691 assert_eq!(payload["model"], "gpt-5-mini");
692 assert_eq!(payload["max_tokens"], 2048);
693 assert_eq!(payload["tools"][0]["function"]["name"], "read_file");
694 assert_eq!(payload["temperature"], 0.3);
695 assert_eq!(payload["messages"][0]["content"], "hello");
696 }
697}