Skip to main content

zeph_core/
debug_dump.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4//! Debug dump writer for a single agent session.
5//!
6//! When active, every LLM request/response pair and raw tool output is written to
7//! numbered files in a timestamped subdirectory of the configured output directory.
8//! Intended for context debugging only — do not use in production.
9
10use std::path::{Path, PathBuf};
11use std::sync::atomic::{AtomicU32, Ordering};
12
13use base64::Engine as _;
14use serde::{Deserialize, Serialize};
15use zeph_llm::provider::{Message, MessagePart, Role, ToolDefinition};
16
17/// Output format for debug dump files.
18#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
19#[serde(rename_all = "lowercase")]
20pub enum DumpFormat {
21    /// Write LLM requests as pretty-printed internal zeph-llm JSON (`{id}-request.json`).
22    #[default]
23    Json,
24    /// Write LLM requests as the actual API payload sent to the provider (`{id}-request.json`):
25    /// system extracted, `agent_invisible` messages filtered, parts rendered as content blocks.
26    Raw,
27}
28
29pub struct DebugDumper {
30    dir: PathBuf,
31    counter: AtomicU32,
32    format: DumpFormat,
33}
34
35pub struct RequestDebugDump<'a> {
36    pub model_name: &'a str,
37    pub messages: &'a [Message],
38    pub tools: &'a [ToolDefinition],
39    pub provider_request: serde_json::Value,
40}
41
42impl DebugDumper {
43    /// Create a new dumper, creating a timestamped subdirectory under `base_dir`.
44    ///
45    /// # Errors
46    ///
47    /// Returns an error if the directory cannot be created.
48    pub fn new(base_dir: &Path, format: DumpFormat) -> std::io::Result<Self> {
49        let ts = std::time::SystemTime::now()
50            .duration_since(std::time::UNIX_EPOCH)
51            .map_or(0, |d| d.as_secs());
52        let dir = base_dir.join(ts.to_string());
53        std::fs::create_dir_all(&dir)?;
54        tracing::info!(path = %dir.display(), format = ?format, "debug dump directory created");
55        Ok(Self {
56            dir,
57            counter: AtomicU32::new(0),
58            format,
59        })
60    }
61
62    /// Return the session dump directory.
63    #[must_use]
64    pub fn dir(&self) -> &Path {
65        &self.dir
66    }
67
68    fn next_id(&self) -> u32 {
69        self.counter.fetch_add(1, Ordering::Relaxed)
70    }
71
72    fn write(&self, filename: &str, content: &[u8]) {
73        let path = self.dir.join(filename);
74        if let Err(e) = std::fs::write(&path, content) {
75            tracing::warn!(path = %path.display(), error = %e, "debug dump write failed");
76        }
77    }
78
79    /// Dump the messages about to be sent to the LLM.
80    ///
81    /// Returns an ID that must be passed to [`dump_response`] to correlate request and response.
82    pub fn dump_request(&self, request: &RequestDebugDump<'_>) -> u32 {
83        let id = self.next_id();
84        let json = match self.format {
85            DumpFormat::Json => json_dump(request),
86            DumpFormat::Raw => raw_dump(request),
87        };
88        self.write(&format!("{id:04}-request.json"), json.as_bytes());
89        id
90    }
91
92    /// Dump the LLM response corresponding to a prior [`dump_request`] call.
93    pub fn dump_response(&self, id: u32, response: &str) {
94        self.write(&format!("{id:04}-response.txt"), response.as_bytes());
95    }
96
97    /// Dump raw tool output before any truncation or summarization.
98    pub fn dump_tool_output(&self, tool_name: &str, output: &str) {
99        let id = self.next_id();
100        let safe_name = sanitize_dump_name(tool_name);
101        self.write(&format!("{id:04}-tool-{safe_name}.txt"), output.as_bytes());
102    }
103
104    /// Dump a tool error with error classification for debugging transient/permanent failures.
105    pub fn dump_tool_error(&self, tool_name: &str, error: &zeph_tools::ToolError) {
106        let id = self.next_id();
107        let safe_name = sanitize_dump_name(tool_name);
108        let payload = serde_json::json!({
109            "tool": tool_name,
110            "error": error.to_string(),
111            "kind": error.kind().to_string(),
112        });
113        match serde_json::to_string_pretty(&payload) {
114            Ok(json) => {
115                self.write(
116                    &format!("{id:04}-tool-error-{safe_name}.json"),
117                    json.as_bytes(),
118                );
119            }
120            Err(e) => {
121                tracing::warn!("dump_tool_error: failed to serialize error payload: {e}");
122            }
123        }
124    }
125}
126
127fn json_dump(request: &RequestDebugDump<'_>) -> String {
128    let payload = serde_json::json!({
129        "model": extract_model(&request.provider_request, request.model_name),
130        "max_tokens": extract_max_tokens(&request.provider_request),
131        "messages": serde_json::to_value(request.messages)
132            .unwrap_or(serde_json::Value::Array(vec![])),
133        "tools": extract_tools(&request.provider_request, request.tools),
134        "temperature": request
135            .provider_request
136            .get("temperature")
137            .cloned()
138            .unwrap_or(serde_json::Value::Null),
139        "cache_control": request
140            .provider_request
141            .get("cache_control")
142            .cloned()
143            .unwrap_or(serde_json::Value::Null),
144    });
145    serde_json::to_string_pretty(&payload).unwrap_or_else(|e| format!("serialization error: {e}"))
146}
147
148fn raw_dump(request: &RequestDebugDump<'_>) -> String {
149    let mut payload = if request.provider_request.is_object() {
150        request.provider_request.clone()
151    } else {
152        serde_json::json!({})
153    };
154    let generic = messages_to_api_value(request.messages);
155    if let Some(obj) = payload.as_object_mut() {
156        obj.entry("model")
157            .or_insert_with(|| extract_model(&request.provider_request, request.model_name));
158        obj.entry("max_tokens")
159            .or_insert_with(|| extract_max_tokens(&request.provider_request));
160        obj.entry("tools")
161            .or_insert_with(|| extract_tools(&request.provider_request, request.tools));
162        obj.entry("temperature").or_insert_with(|| {
163            request
164                .provider_request
165                .get("temperature")
166                .cloned()
167                .unwrap_or(serde_json::Value::Null)
168        });
169        obj.entry("cache_control").or_insert_with(|| {
170            request
171                .provider_request
172                .get("cache_control")
173                .cloned()
174                .unwrap_or(serde_json::Value::Null)
175        });
176        if !obj.contains_key("messages")
177            && !obj.contains_key("system")
178            && let Some(generic_obj) = generic.as_object()
179        {
180            for (key, value) in generic_obj {
181                obj.insert(key.clone(), value.clone());
182            }
183        }
184    }
185    serde_json::to_string_pretty(&payload).unwrap_or_else(|e| format!("serialization error: {e}"))
186}
187
188fn extract_model(payload: &serde_json::Value, fallback: &str) -> serde_json::Value {
189    payload
190        .get("model")
191        .cloned()
192        .unwrap_or_else(|| serde_json::json!(fallback))
193}
194
195fn extract_max_tokens(payload: &serde_json::Value) -> serde_json::Value {
196    payload
197        .get("max_tokens")
198        .cloned()
199        .or_else(|| payload.get("max_completion_tokens").cloned())
200        .unwrap_or(serde_json::Value::Null)
201}
202
203fn extract_tools(payload: &serde_json::Value, fallback: &[ToolDefinition]) -> serde_json::Value {
204    payload.get("tools").cloned().unwrap_or_else(|| {
205        serde_json::to_value(fallback).unwrap_or(serde_json::Value::Array(vec![]))
206    })
207}
208
209fn sanitize_dump_name(name: &str) -> String {
210    name.chars()
211        .map(|c| {
212            if c.is_alphanumeric() || c == '-' {
213                c
214            } else {
215                '_'
216            }
217        })
218        .collect()
219}
220
221/// Render messages as the API payload format (mirrors `split_messages_structured` in the
222/// Claude provider): system extracted, `agent_visible = false` messages filtered out,
223/// parts converted to typed content blocks (`text`, `tool_use`, `tool_result`, etc.).
224fn messages_to_api_value(messages: &[Message]) -> serde_json::Value {
225    let system: String = messages
226        .iter()
227        .filter(|m| m.metadata.agent_visible && m.role == Role::System)
228        .map(zeph_llm::provider::Message::to_llm_content)
229        .collect::<Vec<_>>()
230        .join("\n\n");
231
232    let chat: Vec<serde_json::Value> = messages
233        .iter()
234        .filter(|m| m.metadata.agent_visible && m.role != Role::System)
235        .filter_map(|m| {
236            let role = match m.role {
237                Role::User => "user",
238                Role::Assistant => "assistant",
239                Role::System => return None,
240            };
241            let is_assistant = m.role == Role::Assistant;
242            let has_structured = m.parts.iter().any(|p| {
243                matches!(
244                    p,
245                    MessagePart::ToolUse { .. }
246                        | MessagePart::ToolResult { .. }
247                        | MessagePart::Image(_)
248                        | MessagePart::ThinkingBlock { .. }
249                        | MessagePart::RedactedThinkingBlock { .. }
250                )
251            });
252            let content: serde_json::Value = if !has_structured || m.parts.is_empty() {
253                let text = m.to_llm_content();
254                if text.trim().is_empty() {
255                    return None;
256                }
257                serde_json::json!(text)
258            } else {
259                let blocks: Vec<serde_json::Value> = m
260                    .parts
261                    .iter()
262                    .filter_map(|p| part_to_block(p, is_assistant))
263                    .collect();
264                if blocks.is_empty() {
265                    return None;
266                }
267                serde_json::Value::Array(blocks)
268            };
269            Some(serde_json::json!({ "role": role, "content": content }))
270        })
271        .collect();
272
273    serde_json::json!({ "system": system, "messages": chat })
274}
275
276fn part_to_block(part: &MessagePart, is_assistant: bool) -> Option<serde_json::Value> {
277    match part {
278        MessagePart::Text { text }
279        | MessagePart::Recall { text }
280        | MessagePart::CodeContext { text }
281        | MessagePart::Summary { text }
282        | MessagePart::CrossSession { text } => {
283            if text.trim().is_empty() {
284                None
285            } else {
286                Some(serde_json::json!({ "type": "text", "text": text }))
287            }
288        }
289        MessagePart::ToolOutput {
290            tool_name,
291            body,
292            compacted_at,
293        } => {
294            let text = if compacted_at.is_some() {
295                format!("[tool output: {tool_name}] (pruned)")
296            } else {
297                format!("[tool output: {tool_name}]\n{body}")
298            };
299            Some(serde_json::json!({ "type": "text", "text": text }))
300        }
301        MessagePart::ToolUse { id, name, input } if is_assistant => {
302            Some(serde_json::json!({ "type": "tool_use", "id": id, "name": name, "input": input }))
303        }
304        MessagePart::ToolUse { name, input, .. } => Some(
305            serde_json::json!({ "type": "text", "text": format!("[tool_use: {name}] {input}") }),
306        ),
307        MessagePart::ToolResult {
308            tool_use_id,
309            content,
310            is_error,
311        } if !is_assistant => Some(
312            serde_json::json!({ "type": "tool_result", "tool_use_id": tool_use_id, "content": content, "is_error": is_error }),
313        ),
314        MessagePart::ToolResult { content, .. } => {
315            if content.trim().is_empty() {
316                None
317            } else {
318                Some(serde_json::json!({ "type": "text", "text": content }))
319            }
320        }
321        MessagePart::ThinkingBlock {
322            thinking,
323            signature,
324        } if is_assistant => Some(
325            serde_json::json!({ "type": "thinking", "thinking": thinking, "signature": signature }),
326        ),
327        MessagePart::RedactedThinkingBlock { data } if is_assistant => {
328            Some(serde_json::json!({ "type": "redacted_thinking", "data": data }))
329        }
330        MessagePart::ThinkingBlock { .. }
331        | MessagePart::RedactedThinkingBlock { .. }
332        | MessagePart::Compaction { .. }
333            if !is_assistant =>
334        {
335            None
336        }
337        MessagePart::ThinkingBlock { .. } | MessagePart::RedactedThinkingBlock { .. } => None,
338        MessagePart::Compaction { summary } => {
339            Some(serde_json::json!({ "type": "compaction", "summary": summary }))
340        }
341        MessagePart::Image(img) => Some(serde_json::json!({
342            "type": "image",
343            "source": {
344                "type": "base64",
345                "media_type": img.mime_type,
346                "data": base64::engine::general_purpose::STANDARD.encode(&img.data),
347            },
348        })),
349    }
350}
351
352#[cfg(test)]
353mod tests {
354    use super::*;
355    use tempfile::tempdir;
356
357    fn sample_messages() -> Vec<Message> {
358        vec![
359            Message::from_legacy(Role::System, "system prompt"),
360            Message::from_legacy(Role::User, "hello"),
361        ]
362    }
363
364    fn sample_tools() -> Vec<ToolDefinition> {
365        vec![ToolDefinition {
366            name: "read_file".into(),
367            description: "Read a file".into(),
368            parameters: serde_json::json!({
369                "type": "object",
370                "properties": { "path": { "type": "string" } },
371            }),
372        }]
373    }
374
375    fn read_request_dump(dir: &Path) -> serde_json::Value {
376        let session = std::fs::read_dir(dir)
377            .unwrap()
378            .next()
379            .unwrap()
380            .unwrap()
381            .path();
382        serde_json::from_str(&std::fs::read_to_string(session.join("0000-request.json")).unwrap())
383            .unwrap()
384    }
385
386    #[test]
387    fn json_dump_request_includes_request_metadata() {
388        let dir = tempdir().unwrap();
389        let dumper = DebugDumper::new(dir.path(), DumpFormat::Json).unwrap();
390        let messages = sample_messages();
391        let tools = sample_tools();
392
393        dumper.dump_request(&RequestDebugDump {
394            model_name: "claude-sonnet-test",
395            messages: &messages,
396            tools: &tools,
397            provider_request: serde_json::json!({
398                "model": "claude-sonnet-test",
399                "max_tokens": 4096,
400                "tools": [{ "name": "read_file" }],
401                "temperature": 0.7,
402                "cache_control": { "type": "ephemeral" }
403            }),
404        });
405
406        let payload = read_request_dump(dir.path());
407        assert_eq!(payload["model"], "claude-sonnet-test");
408        assert_eq!(payload["max_tokens"], 4096);
409        assert_eq!(payload["tools"][0]["name"], "read_file");
410        assert_eq!(payload["temperature"], 0.7);
411        assert_eq!(payload["cache_control"]["type"], "ephemeral");
412        assert_eq!(payload["messages"][1]["content"], "hello");
413    }
414
415    #[test]
416    fn raw_dump_request_includes_request_metadata() {
417        let dir = tempdir().unwrap();
418        let dumper = DebugDumper::new(dir.path(), DumpFormat::Raw).unwrap();
419        let messages = sample_messages();
420        let tools = sample_tools();
421
422        dumper.dump_request(&RequestDebugDump {
423            model_name: "gpt-5-mini",
424            messages: &messages,
425            tools: &tools,
426            provider_request: serde_json::json!({
427                "model": "gpt-5-mini",
428                "max_completion_tokens": 2048,
429                "messages": [{ "role": "user", "content": "hello" }],
430                "tools": [{ "type": "function", "function": { "name": "read_file" } }],
431                "temperature": 0.3,
432                "cache_control": null
433            }),
434        });
435
436        let payload = read_request_dump(dir.path());
437        assert_eq!(payload["model"], "gpt-5-mini");
438        assert_eq!(payload["max_tokens"], 2048);
439        assert_eq!(payload["tools"][0]["function"]["name"], "read_file");
440        assert_eq!(payload["temperature"], 0.3);
441        assert_eq!(payload["messages"][0]["content"], "hello");
442    }
443}