1pub mod trace;
11
12use std::path::{Path, PathBuf};
13use std::sync::Arc;
14use std::sync::atomic::{AtomicU32, Ordering};
15
16use base64::Engine as _;
17use zeph_llm::provider::{Message, MessagePart, Role, ToolDefinition};
18
19use crate::redact::scrub_content;
20
21pub use zeph_config::DumpFormat;
22
23#[derive(Clone)]
25pub struct DebugDumper {
26 dir: PathBuf,
27 counter: Arc<AtomicU32>,
28 format: DumpFormat,
29}
30
31pub struct RequestDebugDump<'a> {
32 pub model_name: &'a str,
33 pub messages: &'a [Message],
34 pub tools: &'a [ToolDefinition],
35 pub provider_request: serde_json::Value,
36}
37
38impl DebugDumper {
39 pub fn new(base_dir: &Path, format: DumpFormat) -> std::io::Result<Self> {
45 let ts = std::time::SystemTime::now()
46 .duration_since(std::time::UNIX_EPOCH)
47 .map_or(0, |d| d.as_secs());
48 let dir = base_dir.join(ts.to_string());
49 std::fs::create_dir_all(&dir)?;
50 tracing::info!(path = %dir.display(), format = ?format, "debug dump directory created");
51 Ok(Self {
52 dir,
53 counter: Arc::new(AtomicU32::new(0)),
54 format,
55 })
56 }
57
58 #[must_use]
60 pub fn dir(&self) -> &Path {
61 &self.dir
62 }
63
64 #[must_use]
69 pub fn is_trace_format(&self) -> bool {
70 self.format == DumpFormat::Trace
71 }
72
73 fn next_id(&self) -> u32 {
74 self.counter.fetch_add(1, Ordering::Relaxed)
75 }
76
77 fn write(&self, filename: &str, content: &[u8]) {
78 let path = self.dir.join(filename);
79 if let Err(e) = zeph_common::fs_secure::write_private(&path, content) {
80 tracing::warn!(path = %path.display(), error = %e, "debug dump write failed");
81 }
82 }
83
84 #[must_use]
89 pub fn dump_request(&self, request: &RequestDebugDump<'_>) -> u32 {
90 let id = self.next_id();
91 if self.format == DumpFormat::Trace {
93 return id;
94 }
95 let json = match self.format {
96 DumpFormat::Json => json_dump(request),
97 DumpFormat::Raw => raw_dump(request),
98 DumpFormat::Trace => unreachable!("handled above"),
99 };
100 self.write(&format!("{id:04}-request.json"), json.as_bytes());
101 id
102 }
103
104 pub fn dump_response(&self, id: u32, response: &str) {
107 if self.format == DumpFormat::Trace {
108 return;
109 }
110 self.write(&format!("{id:04}-response.txt"), response.as_bytes());
111 }
112
113 pub fn dump_tool_output(&self, tool_name: &str, output: &str) {
116 if self.format == DumpFormat::Trace {
117 return;
118 }
119 let id = self.next_id();
120 let safe_name = sanitize_dump_name(tool_name);
121 self.write(&format!("{id:04}-tool-{safe_name}.txt"), output.as_bytes());
122 }
123
124 #[cfg(test)]
127 pub(crate) fn dump_pruning_scores(&self, scores: &[zeph_agent_context::BlockScore]) {
128 if self.format == DumpFormat::Trace {
129 return;
130 }
131 let id = self.next_id();
132 let payload: Vec<serde_json::Value> = scores
133 .iter()
134 .map(|s| {
135 serde_json::json!({
136 "msg_index": s.msg_index,
137 "relevance": s.relevance,
138 "redundancy": s.redundancy,
139 "mig": s.mig,
140 })
141 })
142 .collect();
143 match serde_json::to_string_pretty(&serde_json::json!({ "scores": payload })) {
144 Ok(json) => self.write(&format!("{id:04}-pruning-scores.json"), json.as_bytes()),
145 Err(e) => tracing::warn!("dump_pruning_scores: serialize failed: {e}"),
146 }
147 }
148
149 pub(crate) fn dump_anchored_summary(
154 &self,
155 summary: &zeph_memory::AnchoredSummary,
156 fallback: bool,
157 token_counter: &zeph_memory::TokenCounter,
158 ) {
159 if self.format == DumpFormat::Trace {
160 return;
161 }
162 let id = self.next_id();
163 let section_completeness = serde_json::json!({
164 "session_intent": !summary.session_intent.trim().is_empty(),
165 "files_modified": !summary.files_modified.is_empty(),
166 "decisions_made": !summary.decisions_made.is_empty(),
167 "open_questions": !summary.open_questions.is_empty(),
168 "next_steps": !summary.next_steps.is_empty(),
169 });
170 let total_items = summary.files_modified.len()
171 + summary.decisions_made.len()
172 + summary.open_questions.len()
173 + summary.next_steps.len();
174 let markdown = summary.to_markdown();
175 let token_estimate = token_counter.count_tokens(&markdown);
176 let payload = serde_json::json!({
177 "summary": summary,
178 "section_completeness": section_completeness,
179 "total_items": total_items,
180 "token_estimate": token_estimate,
181 "fallback": fallback,
182 });
183 match serde_json::to_string_pretty(&payload) {
184 Ok(json) => self.write(&format!("{id:04}-anchored-summary.json"), json.as_bytes()),
185 Err(e) => tracing::warn!("dump_anchored_summary: serialize failed: {e}"),
186 }
187 }
188
189 pub(crate) fn dump_compaction_probe(&self, result: &zeph_memory::CompactionProbeResult) {
192 if self.format == DumpFormat::Trace {
193 return;
194 }
195 let id = self.next_id();
196 let questions: Vec<serde_json::Value> = result
197 .questions
198 .iter()
199 .zip(
200 result
201 .answers
202 .iter()
203 .chain(std::iter::repeat(&String::new())),
204 )
205 .zip(
206 result
207 .per_question_scores
208 .iter()
209 .chain(std::iter::repeat(&0.0_f32)),
210 )
211 .map(|((q, a), &s)| {
212 serde_json::json!({
213 "question": scrub_content(&q.question),
214 "expected": scrub_content(&q.expected_answer),
215 "actual": scrub_content(a),
216 "score": s,
217 "category": format!("{:?}", q.category),
218 })
219 })
220 .collect();
221 let category_scores: Vec<serde_json::Value> = result
222 .category_scores
223 .iter()
224 .map(|cs| {
225 serde_json::json!({
226 "category": format!("{:?}", cs.category),
227 "score": cs.score,
228 "probes_run": cs.probes_run,
229 })
230 })
231 .collect();
232 let payload = serde_json::json!({
233 "score": result.score,
234 "category_scores": category_scores,
235 "threshold": result.threshold,
236 "hard_fail_threshold": result.hard_fail_threshold,
237 "verdict": format!("{:?}", result.verdict),
238 "model": result.model,
239 "duration_ms": result.duration_ms,
240 "questions": questions,
241 });
242 match serde_json::to_string_pretty(&payload) {
243 Ok(json) => {
244 self.write(&format!("{id:04}-compaction-probe.json"), json.as_bytes());
245 }
246 Err(e) => tracing::warn!("dump_compaction_probe: serialize failed: {e}"),
247 }
248 }
249
250 pub fn dump_focus_knowledge(&self, knowledge: &str) {
253 if self.format == DumpFormat::Trace {
254 return;
255 }
256 let id = self.next_id();
257 self.write(
258 &format!("{id:04}-focus-knowledge.txt"),
259 knowledge.as_bytes(),
260 );
261 }
262
263 pub(crate) fn dump_sidequest_eviction(
266 &self,
267 cursors: &[crate::agent::sidequest::ToolOutputCursor],
268 evicted_indices: &[usize],
269 freed_tokens: usize,
270 ) {
271 if self.format == DumpFormat::Trace {
272 return;
273 }
274 let id = self.next_id();
275 let cursor_info: Vec<serde_json::Value> = cursors
276 .iter()
277 .enumerate()
278 .map(|(i, c)| {
279 serde_json::json!({
280 "cursor_id": i,
281 "msg_index": c.msg_index,
282 "part_index": c.part_index,
283 "tool_name": c.tool_name,
284 "token_count": c.token_count,
285 "evicted": evicted_indices.contains(&i),
286 })
287 })
288 .collect();
289 let payload = serde_json::json!({
290 "cursors": cursor_info,
291 "evicted_indices": evicted_indices,
292 "freed_tokens": freed_tokens,
293 });
294 match serde_json::to_string_pretty(&payload) {
295 Ok(json) => self.write(&format!("{id:04}-sidequest-eviction.json"), json.as_bytes()),
296 Err(e) => tracing::warn!("dump_sidequest_eviction: serialize failed: {e}"),
297 }
298 }
299
300 #[cfg(test)]
305 pub(crate) fn dump_subgoal_registry(&self, registry: &zeph_agent_context::SubgoalRegistry) {
306 if self.format == DumpFormat::Trace {
307 return;
308 }
309 let id = self.next_id();
310 let mut output = String::from("=== Subgoal Registry ===\n");
311 if registry.subgoals.is_empty() {
312 output.push_str("(no subgoals tracked yet)\n");
313 } else {
314 for sg in ®istry.subgoals {
315 let state_str = match sg.state {
316 zeph_agent_context::SubgoalState::Active => "Active ",
317 zeph_agent_context::SubgoalState::Completed => "Completed",
318 };
319 let _ = std::fmt::write(
320 &mut output,
321 format_args!(
322 "[{}] {state_str}: \"{}\" (msgs {}-{})\n",
323 sg.id.0, sg.description, sg.start_msg_index, sg.end_msg_index,
324 ),
325 );
326 }
327 }
328 self.write(&format!("{id:04}-subgoal-registry.txt"), output.as_bytes());
329 }
330
331 pub fn dump_tool_error(&self, tool_name: &str, error: &zeph_tools::ToolError) {
334 if self.format == DumpFormat::Trace {
335 return;
336 }
337 let id = self.next_id();
338 let safe_name = sanitize_dump_name(tool_name);
339 let payload = serde_json::json!({
340 "tool": tool_name,
341 "error": error.to_string(),
342 "kind": error.kind().to_string(),
343 });
344 match serde_json::to_string_pretty(&payload) {
345 Ok(json) => {
346 self.write(
347 &format!("{id:04}-tool-error-{safe_name}.json"),
348 json.as_bytes(),
349 );
350 }
351 Err(e) => {
352 tracing::warn!("dump_tool_error: failed to serialize error payload: {e}");
353 }
354 }
355 }
356}
357
358fn json_dump(request: &RequestDebugDump<'_>) -> String {
359 let payload = serde_json::json!({
360 "model": extract_model(&request.provider_request, request.model_name),
361 "max_tokens": extract_max_tokens(&request.provider_request),
362 "messages": serde_json::to_value(request.messages)
363 .unwrap_or(serde_json::Value::Array(vec![])),
364 "tools": extract_tools(&request.provider_request, request.tools),
365 "temperature": request
366 .provider_request
367 .get("temperature")
368 .cloned()
369 .unwrap_or(serde_json::Value::Null),
370 "cache_control": request
371 .provider_request
372 .get("cache_control")
373 .cloned()
374 .unwrap_or(serde_json::Value::Null),
375 });
376 serde_json::to_string_pretty(&payload).unwrap_or_else(|e| format!("serialization error: {e}"))
377}
378
379fn raw_dump(request: &RequestDebugDump<'_>) -> String {
380 let mut payload = if request.provider_request.is_object() {
381 request.provider_request.clone()
382 } else {
383 serde_json::json!({})
384 };
385 if let Some(obj) = payload.as_object_mut() {
386 obj.entry("model")
387 .or_insert_with(|| extract_model(&request.provider_request, request.model_name));
388 obj.entry("max_tokens")
389 .or_insert_with(|| extract_max_tokens(&request.provider_request));
390 obj.entry("tools")
391 .or_insert_with(|| extract_tools(&request.provider_request, request.tools));
392 obj.entry("temperature").or_insert_with(|| {
393 request
394 .provider_request
395 .get("temperature")
396 .cloned()
397 .unwrap_or(serde_json::Value::Null)
398 });
399 obj.entry("cache_control").or_insert_with(|| {
400 request
401 .provider_request
402 .get("cache_control")
403 .cloned()
404 .unwrap_or(serde_json::Value::Null)
405 });
406 if !obj.contains_key("messages") && !obj.contains_key("system") {
407 let generic = messages_to_api_value(request.messages);
408 if let Some(generic_obj) = generic.as_object() {
409 for (key, value) in generic_obj {
410 obj.insert(key.clone(), value.clone());
411 }
412 }
413 }
414 }
415 serde_json::to_string_pretty(&payload).unwrap_or_else(|e| format!("serialization error: {e}"))
416}
417
418fn extract_model(payload: &serde_json::Value, fallback: &str) -> serde_json::Value {
419 payload
420 .get("model")
421 .cloned()
422 .unwrap_or_else(|| serde_json::json!(fallback))
423}
424
425fn extract_max_tokens(payload: &serde_json::Value) -> serde_json::Value {
426 payload
427 .get("max_tokens")
428 .cloned()
429 .or_else(|| payload.get("max_completion_tokens").cloned())
430 .unwrap_or(serde_json::Value::Null)
431}
432
433fn extract_tools(payload: &serde_json::Value, fallback: &[ToolDefinition]) -> serde_json::Value {
434 payload.get("tools").cloned().unwrap_or_else(|| {
435 serde_json::to_value(fallback).unwrap_or(serde_json::Value::Array(vec![]))
436 })
437}
438
439fn sanitize_dump_name(name: &str) -> String {
440 name.chars()
441 .map(|c| {
442 if c.is_alphanumeric() || c == '-' {
443 c
444 } else {
445 '_'
446 }
447 })
448 .collect()
449}
450
451fn messages_to_api_value(messages: &[Message]) -> serde_json::Value {
455 let system: String = messages
456 .iter()
457 .filter(|m| m.metadata.visibility.is_agent_visible() && m.role == Role::System)
458 .map(zeph_llm::provider::Message::to_llm_content)
459 .collect::<Vec<_>>()
460 .join("\n\n");
461
462 let chat: Vec<serde_json::Value> = messages
463 .iter()
464 .filter(|m| m.metadata.visibility.is_agent_visible() && m.role != Role::System)
465 .filter_map(|m| {
466 let role = match m.role {
467 Role::User => "user",
468 Role::Assistant => "assistant",
469 Role::System => return None,
470 };
471 let is_assistant = m.role == Role::Assistant;
472 let has_structured = m.parts.iter().any(|p| {
473 matches!(
474 p,
475 MessagePart::ToolUse { .. }
476 | MessagePart::ToolResult { .. }
477 | MessagePart::Image(_)
478 | MessagePart::ThinkingBlock { .. }
479 | MessagePart::RedactedThinkingBlock { .. }
480 )
481 });
482 let content: serde_json::Value = if !has_structured || m.parts.is_empty() {
483 let text = m.to_llm_content();
484 if text.trim().is_empty() {
485 return None;
486 }
487 serde_json::json!(text)
488 } else {
489 let blocks: Vec<serde_json::Value> = m
490 .parts
491 .iter()
492 .filter_map(|p| part_to_block(p, is_assistant))
493 .collect();
494 if blocks.is_empty() {
495 return None;
496 }
497 serde_json::Value::Array(blocks)
498 };
499 Some(serde_json::json!({ "role": role, "content": content }))
500 })
501 .collect();
502
503 serde_json::json!({ "system": system, "messages": chat })
504}
505
506fn part_to_block(part: &MessagePart, is_assistant: bool) -> Option<serde_json::Value> {
507 match part {
508 MessagePart::Text { text }
509 | MessagePart::Recall { text }
510 | MessagePart::CodeContext { text }
511 | MessagePart::Summary { text }
512 | MessagePart::CrossSession { text } => {
513 if text.trim().is_empty() {
514 None
515 } else {
516 Some(serde_json::json!({ "type": "text", "text": text }))
517 }
518 }
519 MessagePart::ToolOutput {
520 tool_name,
521 body,
522 compacted_at,
523 } => {
524 let text = if compacted_at.is_some() {
525 if body.is_empty() {
526 format!("[tool output: {tool_name}] (pruned)")
527 } else {
528 format!("[tool output: {tool_name}] {body}")
529 }
530 } else {
531 format!("[tool output: {tool_name}]\n{body}")
532 };
533 Some(serde_json::json!({ "type": "text", "text": text }))
534 }
535 MessagePart::ToolUse { id, name, input } if is_assistant => {
536 Some(serde_json::json!({ "type": "tool_use", "id": id, "name": name, "input": input }))
537 }
538 MessagePart::ToolUse { name, input, .. } => Some(
539 serde_json::json!({ "type": "text", "text": format!("[tool_use: {name}] {input}") }),
540 ),
541 MessagePart::ToolResult {
542 tool_use_id,
543 content,
544 is_error,
545 } if !is_assistant => Some(
546 serde_json::json!({ "type": "tool_result", "tool_use_id": tool_use_id, "content": content, "is_error": is_error }),
547 ),
548 MessagePart::ToolResult { content, .. } => {
549 if content.trim().is_empty() {
550 None
551 } else {
552 Some(serde_json::json!({ "type": "text", "text": content }))
553 }
554 }
555 MessagePart::ThinkingBlock {
556 thinking,
557 signature,
558 } if is_assistant => Some(
559 serde_json::json!({ "type": "thinking", "thinking": thinking, "signature": signature }),
560 ),
561 MessagePart::RedactedThinkingBlock { data } if is_assistant => {
562 Some(serde_json::json!({ "type": "redacted_thinking", "data": data }))
563 }
564 MessagePart::ThinkingBlock { .. }
565 | MessagePart::RedactedThinkingBlock { .. }
566 | MessagePart::Compaction { .. }
567 if !is_assistant =>
568 {
569 None
570 }
571 MessagePart::ThinkingBlock { .. } | MessagePart::RedactedThinkingBlock { .. } => None,
572 MessagePart::Compaction { summary } => {
573 Some(serde_json::json!({ "type": "compaction", "summary": summary }))
574 }
575 MessagePart::Image(img) => Some(serde_json::json!({
576 "type": "image",
577 "source": {
578 "type": "base64",
579 "media_type": img.mime_type,
580 "data": base64::engine::general_purpose::STANDARD.encode(&img.data),
581 },
582 })),
583 }
584}
585
586#[cfg(test)]
587mod tests {
588 use super::*;
589 use tempfile::tempdir;
590
591 #[test]
592 fn dump_format_from_str_valid() {
593 assert_eq!("json".parse::<DumpFormat>().unwrap(), DumpFormat::Json);
594 assert_eq!("raw".parse::<DumpFormat>().unwrap(), DumpFormat::Raw);
595 assert_eq!("trace".parse::<DumpFormat>().unwrap(), DumpFormat::Trace);
596 }
597
598 #[test]
599 fn dump_format_from_str_invalid_returns_error() {
600 let err = "binary".parse::<DumpFormat>().unwrap_err();
601 assert!(
602 err.contains("unknown dump format"),
603 "error must mention unknown dump format: {err}"
604 );
605 }
606
607 fn sample_messages() -> Vec<Message> {
608 vec![
609 Message::from_legacy(Role::System, "system prompt"),
610 Message::from_legacy(Role::User, "hello"),
611 ]
612 }
613
614 fn sample_tools() -> Vec<ToolDefinition> {
615 vec![ToolDefinition {
616 name: "read_file".into(),
617 description: "Read a file".into(),
618 parameters: serde_json::json!({
619 "type": "object",
620 "properties": { "path": { "type": "string" } },
621 }),
622 output_schema: None,
623 }]
624 }
625
626 fn read_request_dump(dir: &Path) -> serde_json::Value {
627 let session = std::fs::read_dir(dir)
628 .unwrap()
629 .next()
630 .unwrap()
631 .unwrap()
632 .path();
633 serde_json::from_str(&std::fs::read_to_string(session.join("0000-request.json")).unwrap())
634 .unwrap()
635 }
636
637 #[test]
638 fn json_dump_request_includes_request_metadata() {
639 let dir = tempdir().unwrap();
640 let dumper = DebugDumper::new(dir.path(), DumpFormat::Json).unwrap();
641 let messages = sample_messages();
642 let tools = sample_tools();
643
644 let _ = dumper.dump_request(&RequestDebugDump {
645 model_name: "claude-sonnet-test",
646 messages: &messages,
647 tools: &tools,
648 provider_request: serde_json::json!({
649 "model": "claude-sonnet-test",
650 "max_tokens": 4096,
651 "tools": [{ "name": "read_file" }],
652 "temperature": 0.7,
653 "cache_control": { "type": "ephemeral" }
654 }),
655 });
656
657 let payload = read_request_dump(dir.path());
658 assert_eq!(payload["model"], "claude-sonnet-test");
659 assert_eq!(payload["max_tokens"], 4096);
660 assert_eq!(payload["tools"][0]["name"], "read_file");
661 assert_eq!(payload["temperature"], 0.7);
662 assert_eq!(payload["cache_control"]["type"], "ephemeral");
663 assert_eq!(payload["messages"][1]["content"], "hello");
664 }
665
666 #[test]
667 fn raw_dump_request_includes_request_metadata() {
668 let dir = tempdir().unwrap();
669 let dumper = DebugDumper::new(dir.path(), DumpFormat::Raw).unwrap();
670 let messages = sample_messages();
671 let tools = sample_tools();
672
673 let _ = dumper.dump_request(&RequestDebugDump {
674 model_name: "gpt-5-mini",
675 messages: &messages,
676 tools: &tools,
677 provider_request: serde_json::json!({
678 "model": "gpt-5-mini",
679 "max_completion_tokens": 2048,
680 "messages": [{ "role": "user", "content": "hello" }],
681 "tools": [{ "type": "function", "function": { "name": "read_file" } }],
682 "temperature": 0.3,
683 "cache_control": null
684 }),
685 });
686
687 let payload = read_request_dump(dir.path());
688 assert_eq!(payload["model"], "gpt-5-mini");
689 assert_eq!(payload["max_tokens"], 2048);
690 assert_eq!(payload["tools"][0]["function"]["name"], "read_file");
691 assert_eq!(payload["temperature"], 0.3);
692 assert_eq!(payload["messages"][0]["content"], "hello");
693 }
694}