1pub mod trace;
11
12use std::path::{Path, PathBuf};
13use std::sync::atomic::{AtomicU32, Ordering};
14
15use base64::Engine as _;
16use zeph_llm::provider::{Message, MessagePart, Role, ToolDefinition};
17
18use crate::redact::scrub_content;
19
20pub use zeph_config::DumpFormat;
21
22pub struct DebugDumper {
23 dir: PathBuf,
24 counter: AtomicU32,
25 format: DumpFormat,
26}
27
28pub struct RequestDebugDump<'a> {
29 pub model_name: &'a str,
30 pub messages: &'a [Message],
31 pub tools: &'a [ToolDefinition],
32 pub provider_request: serde_json::Value,
33}
34
35impl DebugDumper {
36 pub fn new(base_dir: &Path, format: DumpFormat) -> std::io::Result<Self> {
42 let ts = std::time::SystemTime::now()
43 .duration_since(std::time::UNIX_EPOCH)
44 .map_or(0, |d| d.as_secs());
45 let dir = base_dir.join(ts.to_string());
46 std::fs::create_dir_all(&dir)?;
47 tracing::info!(path = %dir.display(), format = ?format, "debug dump directory created");
48 Ok(Self {
49 dir,
50 counter: AtomicU32::new(0),
51 format,
52 })
53 }
54
55 #[must_use]
57 pub fn dir(&self) -> &Path {
58 &self.dir
59 }
60
61 fn next_id(&self) -> u32 {
62 self.counter.fetch_add(1, Ordering::Relaxed)
63 }
64
65 fn write(&self, filename: &str, content: &[u8]) {
66 let path = self.dir.join(filename);
67 if let Err(e) = std::fs::write(&path, content) {
68 tracing::warn!(path = %path.display(), error = %e, "debug dump write failed");
69 }
70 }
71
72 pub fn dump_request(&self, request: &RequestDebugDump<'_>) -> u32 {
77 let id = self.next_id();
78 if self.format == DumpFormat::Trace {
80 return id;
81 }
82 let json = match self.format {
83 DumpFormat::Json => json_dump(request),
84 DumpFormat::Raw => raw_dump(request),
85 DumpFormat::Trace => unreachable!("handled above"),
86 };
87 self.write(&format!("{id:04}-request.json"), json.as_bytes());
88 id
89 }
90
91 pub fn dump_response(&self, id: u32, response: &str) {
94 if self.format == DumpFormat::Trace {
95 return;
96 }
97 self.write(&format!("{id:04}-response.txt"), response.as_bytes());
98 }
99
100 pub fn dump_tool_output(&self, tool_name: &str, output: &str) {
103 if self.format == DumpFormat::Trace {
104 return;
105 }
106 let id = self.next_id();
107 let safe_name = sanitize_dump_name(tool_name);
108 self.write(&format!("{id:04}-tool-{safe_name}.txt"), output.as_bytes());
109 }
110
111 #[cfg(feature = "context-compression")]
114 pub(crate) fn dump_pruning_scores(
115 &self,
116 scores: &[crate::agent::compaction_strategy::BlockScore],
117 ) {
118 if self.format == DumpFormat::Trace {
119 return;
120 }
121 let id = self.next_id();
122 let payload: Vec<serde_json::Value> = scores
123 .iter()
124 .map(|s| {
125 serde_json::json!({
126 "msg_index": s.msg_index,
127 "relevance": s.relevance,
128 "redundancy": s.redundancy,
129 "mig": s.mig,
130 })
131 })
132 .collect();
133 match serde_json::to_string_pretty(&serde_json::json!({ "scores": payload })) {
134 Ok(json) => self.write(&format!("{id:04}-pruning-scores.json"), json.as_bytes()),
135 Err(e) => tracing::warn!("dump_pruning_scores: serialize failed: {e}"),
136 }
137 }
138
139 pub(crate) fn dump_anchored_summary(
144 &self,
145 summary: &zeph_memory::AnchoredSummary,
146 fallback: bool,
147 token_counter: &zeph_memory::TokenCounter,
148 ) {
149 if self.format == DumpFormat::Trace {
150 return;
151 }
152 let id = self.next_id();
153 let section_completeness = serde_json::json!({
154 "session_intent": !summary.session_intent.trim().is_empty(),
155 "files_modified": !summary.files_modified.is_empty(),
156 "decisions_made": !summary.decisions_made.is_empty(),
157 "open_questions": !summary.open_questions.is_empty(),
158 "next_steps": !summary.next_steps.is_empty(),
159 });
160 let total_items = summary.files_modified.len()
161 + summary.decisions_made.len()
162 + summary.open_questions.len()
163 + summary.next_steps.len();
164 let markdown = summary.to_markdown();
165 let token_estimate = token_counter.count_tokens(&markdown);
166 let payload = serde_json::json!({
167 "summary": summary,
168 "section_completeness": section_completeness,
169 "total_items": total_items,
170 "token_estimate": token_estimate,
171 "fallback": fallback,
172 });
173 match serde_json::to_string_pretty(&payload) {
174 Ok(json) => self.write(&format!("{id:04}-anchored-summary.json"), json.as_bytes()),
175 Err(e) => tracing::warn!("dump_anchored_summary: serialize failed: {e}"),
176 }
177 }
178
179 pub(crate) fn dump_compaction_probe(&self, result: &zeph_memory::CompactionProbeResult) {
182 if self.format == DumpFormat::Trace {
183 return;
184 }
185 let id = self.next_id();
186 let questions: Vec<serde_json::Value> = result
187 .questions
188 .iter()
189 .zip(
190 result
191 .answers
192 .iter()
193 .chain(std::iter::repeat(&String::new())),
194 )
195 .zip(
196 result
197 .per_question_scores
198 .iter()
199 .chain(std::iter::repeat(&0.0_f32)),
200 )
201 .map(|((q, a), &s)| {
202 serde_json::json!({
203 "question": scrub_content(&q.question),
204 "expected": scrub_content(&q.expected_answer),
205 "actual": scrub_content(a),
206 "score": s,
207 "category": format!("{:?}", q.category),
208 })
209 })
210 .collect();
211 let category_scores: Vec<serde_json::Value> = result
212 .category_scores
213 .iter()
214 .map(|cs| {
215 serde_json::json!({
216 "category": format!("{:?}", cs.category),
217 "score": cs.score,
218 "probes_run": cs.probes_run,
219 })
220 })
221 .collect();
222 let payload = serde_json::json!({
223 "score": result.score,
224 "category_scores": category_scores,
225 "threshold": result.threshold,
226 "hard_fail_threshold": result.hard_fail_threshold,
227 "verdict": format!("{:?}", result.verdict),
228 "model": result.model,
229 "duration_ms": result.duration_ms,
230 "questions": questions,
231 });
232 match serde_json::to_string_pretty(&payload) {
233 Ok(json) => {
234 self.write(&format!("{id:04}-compaction-probe.json"), json.as_bytes());
235 }
236 Err(e) => tracing::warn!("dump_compaction_probe: serialize failed: {e}"),
237 }
238 }
239
240 pub fn dump_focus_knowledge(&self, knowledge: &str) {
243 if self.format == DumpFormat::Trace {
244 return;
245 }
246 let id = self.next_id();
247 self.write(
248 &format!("{id:04}-focus-knowledge.txt"),
249 knowledge.as_bytes(),
250 );
251 }
252
253 #[cfg(feature = "context-compression")]
256 pub(crate) fn dump_sidequest_eviction(
257 &self,
258 cursors: &[crate::agent::sidequest::ToolOutputCursor],
259 evicted_indices: &[usize],
260 freed_tokens: usize,
261 ) {
262 if self.format == DumpFormat::Trace {
263 return;
264 }
265 let id = self.next_id();
266 let cursor_info: Vec<serde_json::Value> = cursors
267 .iter()
268 .enumerate()
269 .map(|(i, c)| {
270 serde_json::json!({
271 "cursor_id": i,
272 "msg_index": c.msg_index,
273 "part_index": c.part_index,
274 "tool_name": c.tool_name,
275 "token_count": c.token_count,
276 "evicted": evicted_indices.contains(&i),
277 })
278 })
279 .collect();
280 let payload = serde_json::json!({
281 "cursors": cursor_info,
282 "evicted_indices": evicted_indices,
283 "freed_tokens": freed_tokens,
284 });
285 match serde_json::to_string_pretty(&payload) {
286 Ok(json) => self.write(&format!("{id:04}-sidequest-eviction.json"), json.as_bytes()),
287 Err(e) => tracing::warn!("dump_sidequest_eviction: serialize failed: {e}"),
288 }
289 }
290
291 #[cfg(feature = "context-compression")]
296 pub(crate) fn dump_subgoal_registry(
297 &self,
298 registry: &crate::agent::compaction_strategy::SubgoalRegistry,
299 ) {
300 if self.format == DumpFormat::Trace {
301 return;
302 }
303 let id = self.next_id();
304 let mut output = String::from("=== Subgoal Registry ===\n");
305 if registry.subgoals.is_empty() {
306 output.push_str("(no subgoals tracked yet)\n");
307 } else {
308 for sg in ®istry.subgoals {
309 let state_str = match sg.state {
310 crate::agent::compaction_strategy::SubgoalState::Active => "Active ",
311 crate::agent::compaction_strategy::SubgoalState::Completed => "Completed",
312 };
313 let _ = std::fmt::write(
314 &mut output,
315 format_args!(
316 "[{}] {state_str}: \"{}\" (msgs {}-{})\n",
317 sg.id.0, sg.description, sg.start_msg_index, sg.end_msg_index,
318 ),
319 );
320 }
321 }
322 self.write(&format!("{id:04}-subgoal-registry.txt"), output.as_bytes());
323 }
324
325 pub fn dump_tool_error(&self, tool_name: &str, error: &zeph_tools::ToolError) {
328 if self.format == DumpFormat::Trace {
329 return;
330 }
331 let id = self.next_id();
332 let safe_name = sanitize_dump_name(tool_name);
333 let payload = serde_json::json!({
334 "tool": tool_name,
335 "error": error.to_string(),
336 "kind": error.kind().to_string(),
337 });
338 match serde_json::to_string_pretty(&payload) {
339 Ok(json) => {
340 self.write(
341 &format!("{id:04}-tool-error-{safe_name}.json"),
342 json.as_bytes(),
343 );
344 }
345 Err(e) => {
346 tracing::warn!("dump_tool_error: failed to serialize error payload: {e}");
347 }
348 }
349 }
350}
351
352fn json_dump(request: &RequestDebugDump<'_>) -> String {
353 let payload = serde_json::json!({
354 "model": extract_model(&request.provider_request, request.model_name),
355 "max_tokens": extract_max_tokens(&request.provider_request),
356 "messages": serde_json::to_value(request.messages)
357 .unwrap_or(serde_json::Value::Array(vec![])),
358 "tools": extract_tools(&request.provider_request, request.tools),
359 "temperature": request
360 .provider_request
361 .get("temperature")
362 .cloned()
363 .unwrap_or(serde_json::Value::Null),
364 "cache_control": request
365 .provider_request
366 .get("cache_control")
367 .cloned()
368 .unwrap_or(serde_json::Value::Null),
369 });
370 serde_json::to_string_pretty(&payload).unwrap_or_else(|e| format!("serialization error: {e}"))
371}
372
373fn raw_dump(request: &RequestDebugDump<'_>) -> String {
374 let mut payload = if request.provider_request.is_object() {
375 request.provider_request.clone()
376 } else {
377 serde_json::json!({})
378 };
379 let generic = messages_to_api_value(request.messages);
380 if let Some(obj) = payload.as_object_mut() {
381 obj.entry("model")
382 .or_insert_with(|| extract_model(&request.provider_request, request.model_name));
383 obj.entry("max_tokens")
384 .or_insert_with(|| extract_max_tokens(&request.provider_request));
385 obj.entry("tools")
386 .or_insert_with(|| extract_tools(&request.provider_request, request.tools));
387 obj.entry("temperature").or_insert_with(|| {
388 request
389 .provider_request
390 .get("temperature")
391 .cloned()
392 .unwrap_or(serde_json::Value::Null)
393 });
394 obj.entry("cache_control").or_insert_with(|| {
395 request
396 .provider_request
397 .get("cache_control")
398 .cloned()
399 .unwrap_or(serde_json::Value::Null)
400 });
401 if !obj.contains_key("messages")
402 && !obj.contains_key("system")
403 && let Some(generic_obj) = generic.as_object()
404 {
405 for (key, value) in generic_obj {
406 obj.insert(key.clone(), value.clone());
407 }
408 }
409 }
410 serde_json::to_string_pretty(&payload).unwrap_or_else(|e| format!("serialization error: {e}"))
411}
412
413fn extract_model(payload: &serde_json::Value, fallback: &str) -> serde_json::Value {
414 payload
415 .get("model")
416 .cloned()
417 .unwrap_or_else(|| serde_json::json!(fallback))
418}
419
420fn extract_max_tokens(payload: &serde_json::Value) -> serde_json::Value {
421 payload
422 .get("max_tokens")
423 .cloned()
424 .or_else(|| payload.get("max_completion_tokens").cloned())
425 .unwrap_or(serde_json::Value::Null)
426}
427
428fn extract_tools(payload: &serde_json::Value, fallback: &[ToolDefinition]) -> serde_json::Value {
429 payload.get("tools").cloned().unwrap_or_else(|| {
430 serde_json::to_value(fallback).unwrap_or(serde_json::Value::Array(vec![]))
431 })
432}
433
434fn sanitize_dump_name(name: &str) -> String {
435 name.chars()
436 .map(|c| {
437 if c.is_alphanumeric() || c == '-' {
438 c
439 } else {
440 '_'
441 }
442 })
443 .collect()
444}
445
446fn messages_to_api_value(messages: &[Message]) -> serde_json::Value {
450 let system: String = messages
451 .iter()
452 .filter(|m| m.metadata.agent_visible && m.role == Role::System)
453 .map(zeph_llm::provider::Message::to_llm_content)
454 .collect::<Vec<_>>()
455 .join("\n\n");
456
457 let chat: Vec<serde_json::Value> = messages
458 .iter()
459 .filter(|m| m.metadata.agent_visible && m.role != Role::System)
460 .filter_map(|m| {
461 let role = match m.role {
462 Role::User => "user",
463 Role::Assistant => "assistant",
464 Role::System => return None,
465 };
466 let is_assistant = m.role == Role::Assistant;
467 let has_structured = m.parts.iter().any(|p| {
468 matches!(
469 p,
470 MessagePart::ToolUse { .. }
471 | MessagePart::ToolResult { .. }
472 | MessagePart::Image(_)
473 | MessagePart::ThinkingBlock { .. }
474 | MessagePart::RedactedThinkingBlock { .. }
475 )
476 });
477 let content: serde_json::Value = if !has_structured || m.parts.is_empty() {
478 let text = m.to_llm_content();
479 if text.trim().is_empty() {
480 return None;
481 }
482 serde_json::json!(text)
483 } else {
484 let blocks: Vec<serde_json::Value> = m
485 .parts
486 .iter()
487 .filter_map(|p| part_to_block(p, is_assistant))
488 .collect();
489 if blocks.is_empty() {
490 return None;
491 }
492 serde_json::Value::Array(blocks)
493 };
494 Some(serde_json::json!({ "role": role, "content": content }))
495 })
496 .collect();
497
498 serde_json::json!({ "system": system, "messages": chat })
499}
500
501fn part_to_block(part: &MessagePart, is_assistant: bool) -> Option<serde_json::Value> {
502 match part {
503 MessagePart::Text { text }
504 | MessagePart::Recall { text }
505 | MessagePart::CodeContext { text }
506 | MessagePart::Summary { text }
507 | MessagePart::CrossSession { text } => {
508 if text.trim().is_empty() {
509 None
510 } else {
511 Some(serde_json::json!({ "type": "text", "text": text }))
512 }
513 }
514 MessagePart::ToolOutput {
515 tool_name,
516 body,
517 compacted_at,
518 } => {
519 let text = if compacted_at.is_some() {
520 if body.is_empty() {
521 format!("[tool output: {tool_name}] (pruned)")
522 } else {
523 format!("[tool output: {tool_name}] {body}")
524 }
525 } else {
526 format!("[tool output: {tool_name}]\n{body}")
527 };
528 Some(serde_json::json!({ "type": "text", "text": text }))
529 }
530 MessagePart::ToolUse { id, name, input } if is_assistant => {
531 Some(serde_json::json!({ "type": "tool_use", "id": id, "name": name, "input": input }))
532 }
533 MessagePart::ToolUse { name, input, .. } => Some(
534 serde_json::json!({ "type": "text", "text": format!("[tool_use: {name}] {input}") }),
535 ),
536 MessagePart::ToolResult {
537 tool_use_id,
538 content,
539 is_error,
540 } if !is_assistant => Some(
541 serde_json::json!({ "type": "tool_result", "tool_use_id": tool_use_id, "content": content, "is_error": is_error }),
542 ),
543 MessagePart::ToolResult { content, .. } => {
544 if content.trim().is_empty() {
545 None
546 } else {
547 Some(serde_json::json!({ "type": "text", "text": content }))
548 }
549 }
550 MessagePart::ThinkingBlock {
551 thinking,
552 signature,
553 } if is_assistant => Some(
554 serde_json::json!({ "type": "thinking", "thinking": thinking, "signature": signature }),
555 ),
556 MessagePart::RedactedThinkingBlock { data } if is_assistant => {
557 Some(serde_json::json!({ "type": "redacted_thinking", "data": data }))
558 }
559 MessagePart::ThinkingBlock { .. }
560 | MessagePart::RedactedThinkingBlock { .. }
561 | MessagePart::Compaction { .. }
562 if !is_assistant =>
563 {
564 None
565 }
566 MessagePart::ThinkingBlock { .. } | MessagePart::RedactedThinkingBlock { .. } => None,
567 MessagePart::Compaction { summary } => {
568 Some(serde_json::json!({ "type": "compaction", "summary": summary }))
569 }
570 MessagePart::Image(img) => Some(serde_json::json!({
571 "type": "image",
572 "source": {
573 "type": "base64",
574 "media_type": img.mime_type,
575 "data": base64::engine::general_purpose::STANDARD.encode(&img.data),
576 },
577 })),
578 }
579}
580
581#[cfg(test)]
582mod tests {
583 use super::*;
584 use tempfile::tempdir;
585
586 #[test]
587 fn dump_format_from_str_valid() {
588 assert_eq!("json".parse::<DumpFormat>().unwrap(), DumpFormat::Json);
589 assert_eq!("raw".parse::<DumpFormat>().unwrap(), DumpFormat::Raw);
590 assert_eq!("trace".parse::<DumpFormat>().unwrap(), DumpFormat::Trace);
591 }
592
593 #[test]
594 fn dump_format_from_str_invalid_returns_error() {
595 let err = "binary".parse::<DumpFormat>().unwrap_err();
596 assert!(
597 err.contains("unknown dump format"),
598 "error must mention unknown dump format: {err}"
599 );
600 }
601
602 fn sample_messages() -> Vec<Message> {
603 vec![
604 Message::from_legacy(Role::System, "system prompt"),
605 Message::from_legacy(Role::User, "hello"),
606 ]
607 }
608
609 fn sample_tools() -> Vec<ToolDefinition> {
610 vec![ToolDefinition {
611 name: "read_file".into(),
612 description: "Read a file".into(),
613 parameters: serde_json::json!({
614 "type": "object",
615 "properties": { "path": { "type": "string" } },
616 }),
617 }]
618 }
619
620 fn read_request_dump(dir: &Path) -> serde_json::Value {
621 let session = std::fs::read_dir(dir)
622 .unwrap()
623 .next()
624 .unwrap()
625 .unwrap()
626 .path();
627 serde_json::from_str(&std::fs::read_to_string(session.join("0000-request.json")).unwrap())
628 .unwrap()
629 }
630
631 #[test]
632 fn json_dump_request_includes_request_metadata() {
633 let dir = tempdir().unwrap();
634 let dumper = DebugDumper::new(dir.path(), DumpFormat::Json).unwrap();
635 let messages = sample_messages();
636 let tools = sample_tools();
637
638 dumper.dump_request(&RequestDebugDump {
639 model_name: "claude-sonnet-test",
640 messages: &messages,
641 tools: &tools,
642 provider_request: serde_json::json!({
643 "model": "claude-sonnet-test",
644 "max_tokens": 4096,
645 "tools": [{ "name": "read_file" }],
646 "temperature": 0.7,
647 "cache_control": { "type": "ephemeral" }
648 }),
649 });
650
651 let payload = read_request_dump(dir.path());
652 assert_eq!(payload["model"], "claude-sonnet-test");
653 assert_eq!(payload["max_tokens"], 4096);
654 assert_eq!(payload["tools"][0]["name"], "read_file");
655 assert_eq!(payload["temperature"], 0.7);
656 assert_eq!(payload["cache_control"]["type"], "ephemeral");
657 assert_eq!(payload["messages"][1]["content"], "hello");
658 }
659
660 #[test]
661 fn raw_dump_request_includes_request_metadata() {
662 let dir = tempdir().unwrap();
663 let dumper = DebugDumper::new(dir.path(), DumpFormat::Raw).unwrap();
664 let messages = sample_messages();
665 let tools = sample_tools();
666
667 dumper.dump_request(&RequestDebugDump {
668 model_name: "gpt-5-mini",
669 messages: &messages,
670 tools: &tools,
671 provider_request: serde_json::json!({
672 "model": "gpt-5-mini",
673 "max_completion_tokens": 2048,
674 "messages": [{ "role": "user", "content": "hello" }],
675 "tools": [{ "type": "function", "function": { "name": "read_file" } }],
676 "temperature": 0.3,
677 "cache_control": null
678 }),
679 });
680
681 let payload = read_request_dump(dir.path());
682 assert_eq!(payload["model"], "gpt-5-mini");
683 assert_eq!(payload["max_tokens"], 2048);
684 assert_eq!(payload["tools"][0]["function"]["name"], "read_file");
685 assert_eq!(payload["temperature"], 0.3);
686 assert_eq!(payload["messages"][0]["content"], "hello");
687 }
688}