1pub mod trace;
11
12use std::path::{Path, PathBuf};
13use std::sync::atomic::{AtomicU32, Ordering};
14
15use base64::Engine as _;
16use serde::{Deserialize, Serialize};
17use zeph_llm::provider::{Message, MessagePart, Role, ToolDefinition};
18
19#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
21#[serde(rename_all = "lowercase")]
22pub enum DumpFormat {
23 #[default]
25 Json,
26 Raw,
29 Trace,
32}
33
34impl std::str::FromStr for DumpFormat {
35 type Err = String;
36
37 fn from_str(s: &str) -> Result<Self, Self::Err> {
38 match s {
39 "json" => Ok(Self::Json),
40 "raw" => Ok(Self::Raw),
41 "trace" => Ok(Self::Trace),
42 other => Err(format!(
43 "unknown dump format `{other}`, expected json|raw|trace"
44 )),
45 }
46 }
47}
48
49pub struct DebugDumper {
50 dir: PathBuf,
51 counter: AtomicU32,
52 format: DumpFormat,
53}
54
55pub struct RequestDebugDump<'a> {
56 pub model_name: &'a str,
57 pub messages: &'a [Message],
58 pub tools: &'a [ToolDefinition],
59 pub provider_request: serde_json::Value,
60}
61
62impl DebugDumper {
63 pub fn new(base_dir: &Path, format: DumpFormat) -> std::io::Result<Self> {
69 let ts = std::time::SystemTime::now()
70 .duration_since(std::time::UNIX_EPOCH)
71 .map_or(0, |d| d.as_secs());
72 let dir = base_dir.join(ts.to_string());
73 std::fs::create_dir_all(&dir)?;
74 tracing::info!(path = %dir.display(), format = ?format, "debug dump directory created");
75 Ok(Self {
76 dir,
77 counter: AtomicU32::new(0),
78 format,
79 })
80 }
81
82 #[must_use]
84 pub fn dir(&self) -> &Path {
85 &self.dir
86 }
87
88 fn next_id(&self) -> u32 {
89 self.counter.fetch_add(1, Ordering::Relaxed)
90 }
91
92 fn write(&self, filename: &str, content: &[u8]) {
93 let path = self.dir.join(filename);
94 if let Err(e) = std::fs::write(&path, content) {
95 tracing::warn!(path = %path.display(), error = %e, "debug dump write failed");
96 }
97 }
98
99 pub fn dump_request(&self, request: &RequestDebugDump<'_>) -> u32 {
104 let id = self.next_id();
105 if self.format == DumpFormat::Trace {
107 return id;
108 }
109 let json = match self.format {
110 DumpFormat::Json => json_dump(request),
111 DumpFormat::Raw => raw_dump(request),
112 DumpFormat::Trace => unreachable!("handled above"),
113 };
114 self.write(&format!("{id:04}-request.json"), json.as_bytes());
115 id
116 }
117
118 pub fn dump_response(&self, id: u32, response: &str) {
121 if self.format == DumpFormat::Trace {
122 return;
123 }
124 self.write(&format!("{id:04}-response.txt"), response.as_bytes());
125 }
126
127 pub fn dump_tool_output(&self, tool_name: &str, output: &str) {
130 if self.format == DumpFormat::Trace {
131 return;
132 }
133 let id = self.next_id();
134 let safe_name = sanitize_dump_name(tool_name);
135 self.write(&format!("{id:04}-tool-{safe_name}.txt"), output.as_bytes());
136 }
137
138 pub fn dump_tool_error(&self, tool_name: &str, error: &zeph_tools::ToolError) {
141 if self.format == DumpFormat::Trace {
142 return;
143 }
144 let id = self.next_id();
145 let safe_name = sanitize_dump_name(tool_name);
146 let payload = serde_json::json!({
147 "tool": tool_name,
148 "error": error.to_string(),
149 "kind": error.kind().to_string(),
150 });
151 match serde_json::to_string_pretty(&payload) {
152 Ok(json) => {
153 self.write(
154 &format!("{id:04}-tool-error-{safe_name}.json"),
155 json.as_bytes(),
156 );
157 }
158 Err(e) => {
159 tracing::warn!("dump_tool_error: failed to serialize error payload: {e}");
160 }
161 }
162 }
163}
164
165fn json_dump(request: &RequestDebugDump<'_>) -> String {
166 let payload = serde_json::json!({
167 "model": extract_model(&request.provider_request, request.model_name),
168 "max_tokens": extract_max_tokens(&request.provider_request),
169 "messages": serde_json::to_value(request.messages)
170 .unwrap_or(serde_json::Value::Array(vec![])),
171 "tools": extract_tools(&request.provider_request, request.tools),
172 "temperature": request
173 .provider_request
174 .get("temperature")
175 .cloned()
176 .unwrap_or(serde_json::Value::Null),
177 "cache_control": request
178 .provider_request
179 .get("cache_control")
180 .cloned()
181 .unwrap_or(serde_json::Value::Null),
182 });
183 serde_json::to_string_pretty(&payload).unwrap_or_else(|e| format!("serialization error: {e}"))
184}
185
186fn raw_dump(request: &RequestDebugDump<'_>) -> String {
187 let mut payload = if request.provider_request.is_object() {
188 request.provider_request.clone()
189 } else {
190 serde_json::json!({})
191 };
192 let generic = messages_to_api_value(request.messages);
193 if let Some(obj) = payload.as_object_mut() {
194 obj.entry("model")
195 .or_insert_with(|| extract_model(&request.provider_request, request.model_name));
196 obj.entry("max_tokens")
197 .or_insert_with(|| extract_max_tokens(&request.provider_request));
198 obj.entry("tools")
199 .or_insert_with(|| extract_tools(&request.provider_request, request.tools));
200 obj.entry("temperature").or_insert_with(|| {
201 request
202 .provider_request
203 .get("temperature")
204 .cloned()
205 .unwrap_or(serde_json::Value::Null)
206 });
207 obj.entry("cache_control").or_insert_with(|| {
208 request
209 .provider_request
210 .get("cache_control")
211 .cloned()
212 .unwrap_or(serde_json::Value::Null)
213 });
214 if !obj.contains_key("messages")
215 && !obj.contains_key("system")
216 && let Some(generic_obj) = generic.as_object()
217 {
218 for (key, value) in generic_obj {
219 obj.insert(key.clone(), value.clone());
220 }
221 }
222 }
223 serde_json::to_string_pretty(&payload).unwrap_or_else(|e| format!("serialization error: {e}"))
224}
225
226fn extract_model(payload: &serde_json::Value, fallback: &str) -> serde_json::Value {
227 payload
228 .get("model")
229 .cloned()
230 .unwrap_or_else(|| serde_json::json!(fallback))
231}
232
233fn extract_max_tokens(payload: &serde_json::Value) -> serde_json::Value {
234 payload
235 .get("max_tokens")
236 .cloned()
237 .or_else(|| payload.get("max_completion_tokens").cloned())
238 .unwrap_or(serde_json::Value::Null)
239}
240
241fn extract_tools(payload: &serde_json::Value, fallback: &[ToolDefinition]) -> serde_json::Value {
242 payload.get("tools").cloned().unwrap_or_else(|| {
243 serde_json::to_value(fallback).unwrap_or(serde_json::Value::Array(vec![]))
244 })
245}
246
247fn sanitize_dump_name(name: &str) -> String {
248 name.chars()
249 .map(|c| {
250 if c.is_alphanumeric() || c == '-' {
251 c
252 } else {
253 '_'
254 }
255 })
256 .collect()
257}
258
259fn messages_to_api_value(messages: &[Message]) -> serde_json::Value {
263 let system: String = messages
264 .iter()
265 .filter(|m| m.metadata.agent_visible && m.role == Role::System)
266 .map(zeph_llm::provider::Message::to_llm_content)
267 .collect::<Vec<_>>()
268 .join("\n\n");
269
270 let chat: Vec<serde_json::Value> = messages
271 .iter()
272 .filter(|m| m.metadata.agent_visible && m.role != Role::System)
273 .filter_map(|m| {
274 let role = match m.role {
275 Role::User => "user",
276 Role::Assistant => "assistant",
277 Role::System => return None,
278 };
279 let is_assistant = m.role == Role::Assistant;
280 let has_structured = m.parts.iter().any(|p| {
281 matches!(
282 p,
283 MessagePart::ToolUse { .. }
284 | MessagePart::ToolResult { .. }
285 | MessagePart::Image(_)
286 | MessagePart::ThinkingBlock { .. }
287 | MessagePart::RedactedThinkingBlock { .. }
288 )
289 });
290 let content: serde_json::Value = if !has_structured || m.parts.is_empty() {
291 let text = m.to_llm_content();
292 if text.trim().is_empty() {
293 return None;
294 }
295 serde_json::json!(text)
296 } else {
297 let blocks: Vec<serde_json::Value> = m
298 .parts
299 .iter()
300 .filter_map(|p| part_to_block(p, is_assistant))
301 .collect();
302 if blocks.is_empty() {
303 return None;
304 }
305 serde_json::Value::Array(blocks)
306 };
307 Some(serde_json::json!({ "role": role, "content": content }))
308 })
309 .collect();
310
311 serde_json::json!({ "system": system, "messages": chat })
312}
313
314fn part_to_block(part: &MessagePart, is_assistant: bool) -> Option<serde_json::Value> {
315 match part {
316 MessagePart::Text { text }
317 | MessagePart::Recall { text }
318 | MessagePart::CodeContext { text }
319 | MessagePart::Summary { text }
320 | MessagePart::CrossSession { text } => {
321 if text.trim().is_empty() {
322 None
323 } else {
324 Some(serde_json::json!({ "type": "text", "text": text }))
325 }
326 }
327 MessagePart::ToolOutput {
328 tool_name,
329 body,
330 compacted_at,
331 } => {
332 let text = if compacted_at.is_some() {
333 if body.is_empty() {
334 format!("[tool output: {tool_name}] (pruned)")
335 } else {
336 format!("[tool output: {tool_name}] {body}")
337 }
338 } else {
339 format!("[tool output: {tool_name}]\n{body}")
340 };
341 Some(serde_json::json!({ "type": "text", "text": text }))
342 }
343 MessagePart::ToolUse { id, name, input } if is_assistant => {
344 Some(serde_json::json!({ "type": "tool_use", "id": id, "name": name, "input": input }))
345 }
346 MessagePart::ToolUse { name, input, .. } => Some(
347 serde_json::json!({ "type": "text", "text": format!("[tool_use: {name}] {input}") }),
348 ),
349 MessagePart::ToolResult {
350 tool_use_id,
351 content,
352 is_error,
353 } if !is_assistant => Some(
354 serde_json::json!({ "type": "tool_result", "tool_use_id": tool_use_id, "content": content, "is_error": is_error }),
355 ),
356 MessagePart::ToolResult { content, .. } => {
357 if content.trim().is_empty() {
358 None
359 } else {
360 Some(serde_json::json!({ "type": "text", "text": content }))
361 }
362 }
363 MessagePart::ThinkingBlock {
364 thinking,
365 signature,
366 } if is_assistant => Some(
367 serde_json::json!({ "type": "thinking", "thinking": thinking, "signature": signature }),
368 ),
369 MessagePart::RedactedThinkingBlock { data } if is_assistant => {
370 Some(serde_json::json!({ "type": "redacted_thinking", "data": data }))
371 }
372 MessagePart::ThinkingBlock { .. }
373 | MessagePart::RedactedThinkingBlock { .. }
374 | MessagePart::Compaction { .. }
375 if !is_assistant =>
376 {
377 None
378 }
379 MessagePart::ThinkingBlock { .. } | MessagePart::RedactedThinkingBlock { .. } => None,
380 MessagePart::Compaction { summary } => {
381 Some(serde_json::json!({ "type": "compaction", "summary": summary }))
382 }
383 MessagePart::Image(img) => Some(serde_json::json!({
384 "type": "image",
385 "source": {
386 "type": "base64",
387 "media_type": img.mime_type,
388 "data": base64::engine::general_purpose::STANDARD.encode(&img.data),
389 },
390 })),
391 }
392}
393
394#[cfg(test)]
395mod tests {
396 use super::*;
397 use tempfile::tempdir;
398
399 #[test]
400 fn dump_format_from_str_valid() {
401 assert_eq!("json".parse::<DumpFormat>().unwrap(), DumpFormat::Json);
402 assert_eq!("raw".parse::<DumpFormat>().unwrap(), DumpFormat::Raw);
403 assert_eq!("trace".parse::<DumpFormat>().unwrap(), DumpFormat::Trace);
404 }
405
406 #[test]
407 fn dump_format_from_str_invalid_returns_error() {
408 let err = "binary".parse::<DumpFormat>().unwrap_err();
409 assert!(
410 err.contains("unknown dump format"),
411 "error must mention unknown dump format: {err}"
412 );
413 }
414
415 fn sample_messages() -> Vec<Message> {
416 vec![
417 Message::from_legacy(Role::System, "system prompt"),
418 Message::from_legacy(Role::User, "hello"),
419 ]
420 }
421
422 fn sample_tools() -> Vec<ToolDefinition> {
423 vec![ToolDefinition {
424 name: "read_file".into(),
425 description: "Read a file".into(),
426 parameters: serde_json::json!({
427 "type": "object",
428 "properties": { "path": { "type": "string" } },
429 }),
430 }]
431 }
432
433 fn read_request_dump(dir: &Path) -> serde_json::Value {
434 let session = std::fs::read_dir(dir)
435 .unwrap()
436 .next()
437 .unwrap()
438 .unwrap()
439 .path();
440 serde_json::from_str(&std::fs::read_to_string(session.join("0000-request.json")).unwrap())
441 .unwrap()
442 }
443
444 #[test]
445 fn json_dump_request_includes_request_metadata() {
446 let dir = tempdir().unwrap();
447 let dumper = DebugDumper::new(dir.path(), DumpFormat::Json).unwrap();
448 let messages = sample_messages();
449 let tools = sample_tools();
450
451 dumper.dump_request(&RequestDebugDump {
452 model_name: "claude-sonnet-test",
453 messages: &messages,
454 tools: &tools,
455 provider_request: serde_json::json!({
456 "model": "claude-sonnet-test",
457 "max_tokens": 4096,
458 "tools": [{ "name": "read_file" }],
459 "temperature": 0.7,
460 "cache_control": { "type": "ephemeral" }
461 }),
462 });
463
464 let payload = read_request_dump(dir.path());
465 assert_eq!(payload["model"], "claude-sonnet-test");
466 assert_eq!(payload["max_tokens"], 4096);
467 assert_eq!(payload["tools"][0]["name"], "read_file");
468 assert_eq!(payload["temperature"], 0.7);
469 assert_eq!(payload["cache_control"]["type"], "ephemeral");
470 assert_eq!(payload["messages"][1]["content"], "hello");
471 }
472
473 #[test]
474 fn raw_dump_request_includes_request_metadata() {
475 let dir = tempdir().unwrap();
476 let dumper = DebugDumper::new(dir.path(), DumpFormat::Raw).unwrap();
477 let messages = sample_messages();
478 let tools = sample_tools();
479
480 dumper.dump_request(&RequestDebugDump {
481 model_name: "gpt-5-mini",
482 messages: &messages,
483 tools: &tools,
484 provider_request: serde_json::json!({
485 "model": "gpt-5-mini",
486 "max_completion_tokens": 2048,
487 "messages": [{ "role": "user", "content": "hello" }],
488 "tools": [{ "type": "function", "function": { "name": "read_file" } }],
489 "temperature": 0.3,
490 "cache_control": null
491 }),
492 });
493
494 let payload = read_request_dump(dir.path());
495 assert_eq!(payload["model"], "gpt-5-mini");
496 assert_eq!(payload["max_tokens"], 2048);
497 assert_eq!(payload["tools"][0]["function"]["name"], "read_file");
498 assert_eq!(payload["temperature"], 0.3);
499 assert_eq!(payload["messages"][0]["content"], "hello");
500 }
501}