Skip to main content

atomcode_core/turn/
log.rs

1use crate::conversation::message::Message;
2use crate::tool::{ToolCall, ToolDef};
3use std::path::{Path, PathBuf};
4
5/// Per-round LLM log files live under `<datalog_dir>/llm/` where
6/// `<datalog_dir>` is the per-project directory resolved by
7/// `DatalogWriter::resolve_log_dir` (typically
8/// `~/.atomcode/datalog/<project-slug>/`). The caller is responsible for
9/// passing in the resolved dir — see `runner.rs`. This keeps the JSONL dump
10/// in lockstep with the markdown writer's `[datalog].dir` config; the prior
11/// hard-coded `<working_dir>/datalog/llm/` would silently ignore the user's
12/// `dir` override.
13///
14/// One file per LLM round-trip, containing both `request` and `response`
15/// sections. Filename = timestamp. calls.log is a one-line-per-round index.
16///
17/// Split-file layout (prior design) produced two JSONs per round plus a CSV
18/// entry per half — hard to read and review. One-file-per-round is both
19/// AI-friendly (single JSON to grep/diff/feed back) and human-friendly.
20///
21/// Request / response pairing: `log_llm_request` returns the path of the
22/// JSON file it wrote; the caller holds that `PathBuf` locally and passes
23/// it to `log_llm_response` so the response section merges into the SAME
24/// file. No process-wide shared state — safe for concurrent `chat_stream`
25/// handlers in the daemon (prior design used a `static Mutex<Option<PathBuf>>`
26/// that bled across sessions).
27
28/// Log the LLM request. Writes a JSON file containing the `request` section
29/// under `<datalog_dir>/llm/<timestamp>.json`.
30///
31/// Returns the path so the caller can pass it to `log_llm_response` for
32/// in-place merge. Returns `None` if `enabled` is false or the write failed.
33pub fn log_llm_request(
34    datalog_dir: &Path,
35    messages: &[Message],
36    tool_defs: &[ToolDef],
37    model: &str,
38    context_window: usize,
39    step: usize,
40    enabled: bool,
41) -> Option<PathBuf> {
42    if !enabled {
43        return None;
44    }
45    use std::io::Write;
46
47    let log_dir = datalog_dir.join("llm");
48    let _ = std::fs::create_dir_all(&log_dir);
49
50    let ts = timestamp();
51    let path = log_dir.join(format!("{}.json", ts));
52
53    let msgs_json = serde_json::to_value(messages).unwrap_or(serde_json::json!([]));
54    let tools_json: Vec<serde_json::Value> = tool_defs
55        .iter()
56        .map(|td| {
57            serde_json::json!({
58                "name": td.name,
59                "description": td.description,
60                "parameters": td.parameters,
61            })
62        })
63        .collect();
64    let total_tokens: usize = messages.iter().map(|m| m.estimate_tokens()).sum();
65
66    let log = serde_json::json!({
67        "timestamp": ts,
68        "model": model,
69        "context_window": context_window,
70        "step": step,
71        "request": {
72            "message_count": messages.len(),
73            "estimated_tokens": total_tokens,
74            "tool_count": tool_defs.len(),
75            "messages": msgs_json,
76            "tools": tools_json,
77        },
78        // `response` key is inserted by log_llm_response.
79    });
80
81    let tmp = path.with_extension("json.tmp");
82    match std::fs::File::create(&tmp) {
83        Ok(mut f) => {
84            if f.write_all(
85                serde_json::to_string_pretty(&log)
86                    .unwrap_or_default()
87                    .as_bytes(),
88            )
89            .is_err()
90            {
91                return None;
92            }
93            if std::fs::rename(&tmp, &path).is_err() {
94                return None;
95            }
96            Some(path)
97        }
98        Err(_) => None,
99    }
100}
101
102/// Log the LLM response by merging it into the request file identified by
103/// `pending_request`. If `pending_request` is `None` (no prior request, or
104/// request write failed), writes a standalone `*_orphan_response.json`
105/// marked with a warning. Also appends a one-line summary to `calls.log`.
106///
107/// If `enabled` is false, this function is a no-op.
108pub fn log_llm_response(
109    datalog_dir: &Path,
110    pending_request: Option<PathBuf>,
111    text: &str,
112    tool_calls: &[ToolCall],
113    reasoning: &str,
114    model: &str,
115    step: usize,
116    duration_ms: u64,
117    enabled: bool,
118) {
119    if !enabled {
120        return;
121    }
122    use std::io::Write;
123
124    let log_dir = datalog_dir.join("llm");
125    let _ = std::fs::create_dir_all(&log_dir);
126
127    let path = pending_request;
128
129    let tools_json: Vec<serde_json::Value> = tool_calls
130        .iter()
131        .map(|tc| {
132            serde_json::json!({
133                "id": tc.id,
134                "name": tc.name,
135                "arguments": tc.arguments,
136            })
137        })
138        .collect();
139    // Record reasoning_content emitted by thinking models (Moonshot Kimi,
140    // DeepSeek-R1, MiniMax-M2, etc.). Without this, bugs like issue #165
141    // ("reasoning_content missing in assistant tool call message") are
142    // impossible to diagnose from datalog alone.
143    let response_value = serde_json::json!({
144        "duration_ms": duration_ms,
145        "text": text,
146        "reasoning_content": reasoning,
147        "tool_calls": tools_json,
148    });
149
150    // Determine target path: prefer the stashed pending request so we
151    // merge into the same file. Fallback: standalone orphan file, marked
152    // so the reader knows the pairing was lost (shouldn't happen in normal
153    // operation but we don't want to drop data on the floor).
154    let (target_path, merged) = match path.as_ref().and_then(|p| std::fs::read_to_string(p).ok()) {
155        Some(existing) => {
156            let mut val: serde_json::Value =
157                serde_json::from_str(&existing).unwrap_or_else(|_| serde_json::json!({}));
158            if let Some(obj) = val.as_object_mut() {
159                obj.insert("response".into(), response_value);
160            }
161            (path.unwrap(), val)
162        }
163        None => {
164            let ts = timestamp();
165            let orphan = log_dir.join(format!("{}_orphan_response.json", ts));
166            let val = serde_json::json!({
167                "timestamp": ts,
168                "model": model,
169                "step": step,
170                "warning": "no matching request found for this response",
171                "response": response_value,
172            });
173            (orphan, val)
174        }
175    };
176
177    let tmp = target_path.with_extension("json.tmp");
178    if let Ok(mut f) = std::fs::File::create(&tmp) {
179        let _ = f.write_all(
180            serde_json::to_string_pretty(&merged)
181                .unwrap_or_default()
182                .as_bytes(),
183        );
184        let _ = std::fs::rename(&tmp, &target_path);
185    }
186
187    // One-line summary to calls.log. Example:
188    //   2026-04-14_12-50-54_123  glm-5  step=3  msgs=20/15000tok  →  4200ms  tools=2 [read_file, grep]
189    let ts_for_log = merged
190        .get("timestamp")
191        .and_then(|v| v.as_str())
192        .unwrap_or("?")
193        .to_string();
194    let msg_count = merged
195        .pointer("/request/message_count")
196        .and_then(|v| v.as_u64())
197        .unwrap_or(0);
198    let est_tokens = merged
199        .pointer("/request/estimated_tokens")
200        .and_then(|v| v.as_u64())
201        .unwrap_or(0);
202    let tool_names: Vec<&str> = tool_calls.iter().map(|tc| tc.name.as_str()).collect();
203    let tools_str = if tool_names.is_empty() {
204        "text_only".to_string()
205    } else {
206        format!("[{}]", tool_names.join(", "))
207    };
208    let calls_path = log_dir.join("calls.log");
209    if let Ok(mut f) = std::fs::OpenOptions::new()
210        .create(true)
211        .append(true)
212        .open(&calls_path)
213    {
214        let _ = writeln!(
215            f,
216            "{}  {}  step={}  msgs={}/{}tok  →  {}ms  tools={} {}",
217            ts_for_log,
218            model,
219            step,
220            msg_count,
221            est_tokens,
222            duration_ms,
223            tool_calls.len(),
224            tools_str,
225        );
226    }
227}
228
229/// Filename timestamp: `YYYY-MM-DD_HH-MM-SS_sss` (UTC).
230/// Format preserved bit-for-bit from the prior hand-rolled implementation.
231fn timestamp() -> String {
232    chrono::Utc::now()
233        .format("%Y-%m-%d_%H-%M-%S_%3f")
234        .to_string()
235}
236
237#[cfg(test)]
238mod tests {
239    use super::*;
240    use crate::conversation::message::{Message, Role};
241    use crate::tool::{ToolCall, ToolDef};
242
243    #[test]
244    fn test_request_response_merged_into_single_file() {
245        let tmp = tempfile::TempDir::new().unwrap();
246        let messages = vec![
247            Message::new(Role::System, "You are helpful."),
248            Message::new(Role::User, "Hello"),
249        ];
250        let tools = vec![ToolDef {
251            name: "bash",
252            description: "Run a command".to_string(),
253            parameters: serde_json::json!({"type": "object"}),
254        }];
255
256        let pending = log_llm_request(tmp.path(), &messages, &tools, "test-model", 16000, 3, true);
257        assert!(pending.is_some(), "request log should return its path");
258        log_llm_response(
259            tmp.path(),
260            pending,
261            "hi back",
262            &[ToolCall {
263                id: "c1".into(),
264                name: "bash".into(),
265                arguments: "{}".into(),
266            }],
267            "",
268            "test-model",
269            3,
270            123,
271            true,
272        );
273
274        let log_dir = tmp.path().join("llm");
275        let json_files: Vec<_> = std::fs::read_dir(&log_dir)
276            .unwrap()
277            .filter_map(|e| e.ok().map(|e| e.path()))
278            .filter(|p| p.extension().map_or(false, |ext| ext == "json"))
279            .collect();
280        assert_eq!(
281            json_files.len(),
282            1,
283            "expected one merged file, got {}",
284            json_files.len()
285        );
286
287        let content = std::fs::read_to_string(&json_files[0]).unwrap();
288        let v: serde_json::Value = serde_json::from_str(&content).unwrap();
289        assert_eq!(v["model"], "test-model");
290        assert_eq!(v["request"]["message_count"], 2);
291        assert_eq!(v["request"]["tool_count"], 1);
292        assert_eq!(v["response"]["duration_ms"], 123);
293        assert_eq!(v["response"]["text"], "hi back");
294        assert_eq!(v["response"]["tool_calls"][0]["name"], "bash");
295
296        // calls.log should have exactly one line for this round.
297        let calls = std::fs::read_to_string(log_dir.join("calls.log")).unwrap();
298        assert_eq!(calls.lines().count(), 1);
299        assert!(calls.contains("test-model"));
300        assert!(calls.contains("step=3"));
301    }
302
303    #[test]
304    fn test_orphan_response_when_no_matching_request() {
305        let tmp = tempfile::TempDir::new().unwrap();
306
307        log_llm_response(
308            tmp.path(),
309            None,
310            "bare text",
311            &[],
312            "",
313            "solo-model",
314            7,
315            50,
316            true,
317        );
318
319        let log_dir = tmp.path().join("llm");
320        let orphans: Vec<_> = std::fs::read_dir(&log_dir)
321            .unwrap()
322            .filter_map(|e| e.ok().map(|e| e.path()))
323            .filter(|p| {
324                p.file_name()
325                    .map_or(false, |n| n.to_string_lossy().contains("orphan"))
326            })
327            .collect();
328        assert_eq!(orphans.len(), 1);
329        let v: serde_json::Value =
330            serde_json::from_str(&std::fs::read_to_string(&orphans[0]).unwrap()).unwrap();
331        assert!(v["warning"]
332            .as_str()
333            .unwrap()
334            .contains("no matching request"));
335    }
336
337    /// Regression: prior design used a process-wide `static Mutex<Option<PathBuf>>`
338    /// to pair request/response. Two concurrent sessions would clobber each
339    /// other's pending path — one session's response would merge into the
340    /// OTHER session's request file. This test interleaves two sessions
341    /// and asserts each response lands in its own request file.
342    #[test]
343    fn test_concurrent_sessions_do_not_mix_request_response() {
344        let tmp_a = tempfile::TempDir::new().unwrap();
345        let tmp_b = tempfile::TempDir::new().unwrap();
346        let msgs_a = vec![Message::new(Role::User, "alpha")];
347        let msgs_b = vec![Message::new(Role::User, "beta")];
348
349        // Interleaved: A-req, B-req, A-resp, B-resp — the exact pattern
350        // that would corrupt results under the old static-Mutex design.
351        let pending_a = log_llm_request(tmp_a.path(), &msgs_a, &[], "model-a", 16000, 0, true);
352        let pending_b = log_llm_request(tmp_b.path(), &msgs_b, &[], "model-b", 16000, 0, true);
353        log_llm_response(
354            tmp_a.path(),
355            pending_a,
356            "reply-A",
357            &[],
358            "",
359            "model-a",
360            0,
361            10,
362            true,
363        );
364        log_llm_response(
365            tmp_b.path(),
366            pending_b,
367            "reply-B",
368            &[],
369            "",
370            "model-b",
371            0,
372            20,
373            true,
374        );
375
376        let read_merged = |dir: &Path| -> serde_json::Value {
377            let log_dir = dir.join("llm");
378            let files: Vec<_> = std::fs::read_dir(&log_dir)
379                .unwrap()
380                .filter_map(|e| e.ok().map(|e| e.path()))
381                .filter(|p| {
382                    p.extension().map_or(false, |ext| ext == "json")
383                        && !p
384                            .file_name()
385                            .map_or(false, |n| n.to_string_lossy().contains("orphan"))
386                })
387                .collect();
388            assert_eq!(files.len(), 1, "each session gets its own merged file");
389            serde_json::from_str(&std::fs::read_to_string(&files[0]).unwrap()).unwrap()
390        };
391
392        let a = read_merged(tmp_a.path());
393        let b = read_merged(tmp_b.path());
394        assert_eq!(a["model"], "model-a");
395        assert_eq!(a["response"]["text"], "reply-A");
396        assert_eq!(b["model"], "model-b");
397        assert_eq!(b["response"]["text"], "reply-B");
398    }
399
400    #[test]
401    fn timestamp_format_is_stable() {
402        // Guard: filename pattern YYYY-MM-DD_HH-MM-SS_sss. Callers of
403        // log_llm_request rely on this shape (daemon / tests glob `*.json`
404        // and orphan_response detection splits on this layout).
405        let ts = timestamp();
406        assert_eq!(ts.len(), 23, "expected 23-char timestamp, got {:?}", ts);
407        assert_eq!(&ts[4..5], "-");
408        assert_eq!(&ts[7..8], "-");
409        assert_eq!(&ts[10..11], "_");
410        assert_eq!(&ts[13..14], "-");
411        assert_eq!(&ts[16..17], "-");
412        assert_eq!(&ts[19..20], "_");
413        assert!(ts[..4].chars().all(|c| c.is_ascii_digit()));
414        assert!(ts[20..].chars().all(|c| c.is_ascii_digit()));
415    }
416}