Skip to main content

open_loops/sessions/
claude_code.rs

1//! Adapter for Claude Code sessions (~/.claude/projects/<path-encoded>/*.jsonl).
2//! WARNING: internal Claude Code format, not a public API — may change.
3//! Parsing is therefore tolerant: a bad line is skipped, never aborts (spec risk 1).
4use super::{SessionExcerpt, SessionSource};
5use crate::index::Index;
6use anyhow::Result;
7use chrono::{DateTime, Duration, Utc};
8use std::path::{Path, PathBuf};
9
10pub struct ClaudeCode {
11    pub projects_dir: PathBuf,
12}
13
14/// Claude Code encodes the project path by replacing path separators and '.' with '-'.
15/// e.g. /home/g/repo/x -> -home-g-repo-x, C:\Users\g\app -> C--Users-g-app
16pub fn encode_project_path(p: &Path) -> String {
17    let raw = p.to_string_lossy();
18    // Windows canonicalize() may add \\?\ — Claude Code encodes the normal path.
19    let raw = raw.strip_prefix(r"\\?\").unwrap_or(&raw);
20    raw.replace(['/', '\\', '.', ':'], "-")
21}
22
23/// Extracts text from a session jsonl line. None for non-message,
24/// corrupted, or empty lines (tolerant parsing).
25pub fn extract_text(line: &str) -> Option<String> {
26    let v: serde_json::Value = serde_json::from_str(line).ok()?;
27    let role = v.get("type")?.as_str()?;
28    if role != "user" && role != "assistant" {
29        return None;
30    }
31    let content = v.get("message")?.get("content")?;
32    let text = match content {
33        serde_json::Value::String(s) => s.clone(),
34        serde_json::Value::Array(parts) => parts
35            .iter()
36            .filter_map(|p| p.get("text").and_then(|t| t.as_str()))
37            .collect::<Vec<_>>()
38            .join("\n"),
39        _ => return None,
40    };
41    let text = text.trim();
42    if text.is_empty() {
43        None
44    } else {
45        Some(format!("[{role}] {text}"))
46    }
47}
48
49/// Reads the last `max_bytes` of the file and extracts message text.
50/// The end of the conversation concentrates the "where I left off" signal (spec decision).
51fn read_tail_text(path: &Path, max_bytes: u64) -> Result<String> {
52    let raw = std::fs::read(path)?;
53    let start = raw.len().saturating_sub(max_bytes as usize);
54    let tail = String::from_utf8_lossy(&raw[start..]);
55    let mut lines = tail.lines();
56    if start > 0 {
57        lines.next(); // first line may be cut mid-way
58    }
59    Ok(lines
60        .filter_map(extract_text)
61        .collect::<Vec<_>>()
62        .join("\n"))
63}
64
65impl ClaudeCode {
66    /// Core excerpt logic, optionally accelerated by an FTS index.
67    ///
68    /// `None` index = in-memory path (reads bounded tail for every candidate).
69    /// `Some(index)` = FTS path (uses `session_mentions` for the mention probe;
70    /// still reads the bounded tail for the final selected sessions).
71    ///
72    /// # Errors
73    ///
74    /// Returns an error if the project directory cannot be read.
75    pub fn excerpts_indexed(
76        &self,
77        repo_path: &Path,
78        branch: &str,
79        window: (DateTime<Utc>, DateTime<Utc>),
80        max_sessions: usize,
81        max_kb: u64,
82        index: Option<&Index>,
83    ) -> Result<Vec<SessionExcerpt>> {
84        let dir = self.projects_dir.join(encode_project_path(repo_path));
85        if !dir.is_dir() {
86            return Ok(vec![]);
87        }
88        let pad = Duration::days(7);
89        let (start, end) = (window.0 - pad, window.1 + pad);
90
91        // When an index is available, get FTS-based mention set up-front.
92        // On index error, fall back to None (in-memory mention probe below).
93        let fts_mentions: Option<std::collections::HashSet<PathBuf>> = index.map(|idx| {
94            // Upsert every candidate file's bounded tail into the index.
95            // This is safe to call here: upsert skips unchanged (path,mtime) rows.
96            if let Ok(entries) = std::fs::read_dir(&dir) {
97                for entry in entries.flatten() {
98                    let path = entry.path();
99                    if path.extension().is_none_or(|e| e != "jsonl") {
100                        continue;
101                    }
102                    let Ok(meta) = entry.metadata() else { continue };
103                    let Ok(modified) = meta.modified() else {
104                        continue;
105                    };
106                    let mtime = modified
107                        .duration_since(std::time::UNIX_EPOCH)
108                        .unwrap_or_default()
109                        .as_secs() as i64;
110                    let size = meta.len() as i64;
111                    // Read bounded tail for indexing (skip on read error).
112                    if let Ok(tail) = read_tail_text(&path, max_kb * 1024) {
113                        idx.upsert_session(&path, repo_path, mtime, size, &tail);
114                    }
115                }
116            }
117            idx.session_mentions(repo_path, branch)
118        });
119
120        // Collect candidates: (modified, path, in_window, mentions_branch).
121        // For in-memory path: read the bounded tail ONCE for both mention probe
122        // AND excerpt text (fixes #14 — no unbounded whole-file read).
123        struct Candidate {
124            modified: DateTime<Utc>,
125            path: PathBuf,
126            in_window: bool,
127            mentions_branch: bool,
128            /// Tail text read during candidate collection (in-memory path only).
129            tail: Option<String>,
130        }
131
132        let mut candidates: Vec<Candidate> = Vec::new();
133        for entry in std::fs::read_dir(&dir)?.flatten() {
134            let path = entry.path();
135            if path.extension().is_none_or(|e| e != "jsonl") {
136                continue;
137            }
138            let Ok(meta) = entry.metadata() else { continue };
139            let Ok(modified) = meta.modified() else {
140                continue;
141            };
142            let modified: DateTime<Utc> = modified.into();
143            let in_window = modified >= start && modified <= end;
144
145            if let Some(ref fts_set) = fts_mentions {
146                // FTS path: mention signal comes from the index, no file read here.
147                let mentions_branch = fts_set.contains(&path);
148                if in_window || mentions_branch {
149                    candidates.push(Candidate {
150                        modified,
151                        path,
152                        in_window,
153                        mentions_branch,
154                        tail: None,
155                    });
156                }
157            } else {
158                // In-memory path: read bounded tail ONCE for mention probe (#14 fix).
159                let tail = read_tail_text(&path, max_kb * 1024).unwrap_or_default();
160                let mentions_branch = tail.contains(branch);
161                if in_window || mentions_branch {
162                    candidates.push(Candidate {
163                        modified,
164                        path,
165                        in_window,
166                        mentions_branch,
167                        tail: Some(tail),
168                    });
169                }
170            }
171        }
172
173        // Stable total-order sort: mentions_branch DESC, in_window DESC,
174        // modified DESC, path ASC (#15 fix — deterministic tie-break).
175        candidates.sort_by(|a, b| {
176            b.mentions_branch
177                .cmp(&a.mentions_branch)
178                .then(b.in_window.cmp(&a.in_window))
179                .then(b.modified.cmp(&a.modified))
180                .then(a.path.cmp(&b.path))
181        });
182
183        // Build output: filter empty-text sessions BEFORE truncate (#15 fix).
184        let mut out = Vec::new();
185        for cand in candidates {
186            if out.len() >= max_sessions {
187                break;
188            }
189            // Get the text: already read (in-memory path) or read now (FTS path).
190            let text = if let Some(t) = cand.tail {
191                t
192            } else {
193                read_tail_text(&cand.path, max_kb * 1024).unwrap_or_default()
194            };
195            if text.is_empty() {
196                // Skip empty sessions — do NOT count them toward the max_sessions
197                // limit (this is the #15 regression fix).
198                continue;
199            }
200            let source = cand
201                .path
202                .file_name()
203                .map(|n| n.to_string_lossy().into_owned())
204                .unwrap_or_default();
205            out.push(SessionExcerpt {
206                source,
207                modified: cand.modified,
208                text,
209                in_window: cand.in_window,
210                mentions_branch: cand.mentions_branch,
211            });
212        }
213        Ok(out)
214    }
215}
216
217impl SessionSource for ClaudeCode {
218    /// Excerpts of the sessions most relevant to the branch.
219    ///
220    /// Delegates to [`ClaudeCode::excerpts_indexed`] with no index (in-memory path).
221    ///
222    /// # Errors
223    ///
224    /// Returns an error if the project directory cannot be read.
225    fn excerpts(
226        &self,
227        repo_path: &Path,
228        branch: &str,
229        window: (DateTime<Utc>, DateTime<Utc>),
230        max_sessions: usize,
231        max_kb: u64,
232    ) -> Result<Vec<SessionExcerpt>> {
233        self.excerpts_indexed(repo_path, branch, window, max_sessions, max_kb, None)
234    }
235}
236
237#[cfg(test)]
238mod tests {
239    use super::*;
240    use crate::index::Index;
241    use crate::sessions::SessionSource;
242    use chrono::{Duration, Utc};
243    use std::path::Path;
244
245    #[test]
246    fn encode_project_path_matches_claude_code_format() {
247        assert_eq!(
248            encode_project_path(Path::new("/home/g/repo/me/open-loops")),
249            "-home-g-repo-me-open-loops"
250        );
251        assert_eq!(
252            encode_project_path(Path::new("/home/g/my.app")),
253            "-home-g-my-app"
254        );
255    }
256
257    #[test]
258    #[cfg(windows)]
259    fn encode_project_path_handles_windows_separators() {
260        assert_eq!(
261            encode_project_path(Path::new(r"C:\Users\g\app")),
262            "C--Users-g-app"
263        );
264    }
265
266    #[test]
267    fn extract_text_captures_user_assistant_and_ignores_rest() {
268        let user = r#"{"type":"user","message":{"content":"quero implementar login"}}"#;
269        let asst = r#"{"type":"assistant","message":{"content":[{"type":"text","text":"vou criar feat/login"}]}}"#;
270        let meta = r#"{"type":"summary","summary":"x"}"#;
271        assert_eq!(
272            extract_text(user).unwrap(),
273            "[user] quero implementar login"
274        );
275        assert_eq!(
276            extract_text(asst).unwrap(),
277            "[assistant] vou criar feat/login"
278        );
279        assert!(extract_text(meta).is_none());
280        assert!(extract_text("corrupted non-json line").is_none());
281    }
282
283    #[test]
284    fn excerpts_selects_by_window_tolerates_garbage_and_limits_count() {
285        let tmp = tempfile::tempdir().unwrap();
286        let projects = tmp.path().to_path_buf();
287        let repo = Path::new("/home/g/app");
288        let dir = projects.join(encode_project_path(repo));
289        std::fs::create_dir_all(&dir).unwrap();
290        std::fs::write(
291            dir.join("sessao1.jsonl"),
292            concat!(
293                r#"{"type":"user","message":{"content":"quero implementar login"}}"#, "\n",
294                "lixo nao-json\n",
295                r#"{"type":"assistant","message":{"content":[{"type":"text","text":"proximo passo: validar token"}]}}"#, "\n",
296            ),
297        )
298        .unwrap();
299        // files of other formats are ignored
300        std::fs::write(dir.join("nota.txt"), "nada").unwrap();
301
302        let src = ClaudeCode {
303            projects_dir: projects,
304        };
305        let now = Utc::now();
306        let window = (now - Duration::days(1), now + Duration::days(1));
307        let ex = src.excerpts(repo, "feat/login", window, 3, 50).unwrap();
308        assert_eq!(ex.len(), 1);
309        assert!(ex[0].text.contains("[user] quero implementar login"));
310        assert!(ex[0].text.contains("proximo passo: validar token"));
311        assert_eq!(ex[0].source, "sessao1.jsonl");
312    }
313
314    #[test]
315    fn excerpts_empty_when_project_dir_does_not_exist() {
316        let tmp = tempfile::tempdir().unwrap();
317        let src = ClaudeCode {
318            projects_dir: tmp.path().to_path_buf(),
319        };
320        let now = Utc::now();
321        let ex = src
322            .excerpts(Path::new("/nao/existe"), "b", (now, now), 3, 50)
323            .unwrap();
324        assert!(ex.is_empty());
325    }
326
327    #[test]
328    fn excerpts_includes_session_outside_window_if_it_mentions_branch() {
329        let tmp = tempfile::tempdir().unwrap();
330        let projects = tmp.path().to_path_buf();
331        let repo = Path::new("/home/g/app");
332        let dir = projects.join(encode_project_path(repo));
333        std::fs::create_dir_all(&dir).unwrap();
334        std::fs::write(
335            dir.join("antiga.jsonl"),
336            concat!(
337                r#"{"type":"user","message":{"content":"implementando feat/login agora"}}"#,
338                "\n",
339            ),
340        )
341        .unwrap();
342
343        let src = ClaudeCode {
344            projects_dir: projects,
345        };
346        let now = Utc::now();
347        // window two years ago — file mtime is now (outside the window)
348        let passado = now - Duration::days(730);
349        let window = (passado - Duration::days(1), passado);
350        let ex = src.excerpts(repo, "feat/login", window, 3, 50).unwrap();
351        assert_eq!(ex.len(), 1, "mention heuristic must include the session");
352        assert!(ex[0].text.contains("feat/login"));
353    }
354
355    #[test]
356    fn excerpts_truncates_large_file_and_skips_cut_line() {
357        let tmp = tempfile::tempdir().unwrap();
358        let projects = tmp.path().to_path_buf();
359        let repo = Path::new("/home/g/app");
360        let dir = projects.join(encode_project_path(repo));
361        std::fs::create_dir_all(&dir).unwrap();
362
363        // padding with summary lines (not extracted) to force file > 1 KB
364        let pad_line = format!("{{\"type\":\"summary\",\"x\":\"{}\"}}\n", "A".repeat(80));
365        let mut content = pad_line.repeat(15); // ~1500 bytes
366        content.push_str(r#"{"type":"user","message":{"content":"contexto final"}}"#);
367        content.push('\n');
368        assert!(content.len() > 1024);
369
370        std::fs::write(dir.join("grande.jsonl"), &content).unwrap();
371
372        let src = ClaudeCode {
373            projects_dir: projects,
374        };
375        let now = Utc::now();
376        let window = (now - Duration::days(1), now + Duration::days(1));
377        // max_kb=1 forces truncation: start > 0 → first line of the tail is skipped
378        let ex = src.excerpts(repo, "feat/x", window, 3, 1).unwrap();
379        assert_eq!(ex.len(), 1);
380        assert!(ex[0].text.contains("contexto final"));
381    }
382
383    #[test]
384    fn excerpts_skips_session_with_only_messages_without_text() {
385        let tmp = tempfile::tempdir().unwrap();
386        let projects = tmp.path().to_path_buf();
387        let repo = Path::new("/home/g/app");
388        let dir = projects.join(encode_project_path(repo));
389        std::fs::create_dir_all(&dir).unwrap();
390        // only summary and tool_result lines — extract_text returns None for all of them
391        std::fs::write(
392            dir.join("vazia.jsonl"),
393            concat!(
394                r#"{"type":"summary","summary":"nada util"}"#,
395                "\n",
396                r#"{"type":"tool_result","content":[]}"#,
397                "\n",
398            ),
399        )
400        .unwrap();
401
402        let src = ClaudeCode {
403            projects_dir: projects,
404        };
405        let now = Utc::now();
406        let window = (now - Duration::days(1), now + Duration::days(1));
407        let ex = src.excerpts(repo, "feat/x", window, 3, 50).unwrap();
408        assert!(
409            ex.is_empty(),
410            "session with no extractable text must be skipped"
411        );
412    }
413
414    // -----------------------------------------------------------------------
415    // (a) #15 — same mtime: stable tie-break by path ASC
416    // -----------------------------------------------------------------------
417
418    #[test]
419    fn excerpts_same_mtime_deterministic_order_by_path() {
420        let tmp = tempfile::tempdir().unwrap();
421        let projects = tmp.path().to_path_buf();
422        let repo = Path::new("/home/g/app");
423        let dir = projects.join(encode_project_path(repo));
424        std::fs::create_dir_all(&dir).unwrap();
425
426        let line = r#"{"type":"user","message":{"content":"trabalho"}}"#.to_string() + "\n";
427        // Write two sessions with the same content; they will have the same mtime
428        // (filesystem resolution may differ, so we use max_sessions=1 and verify
429        // that the one selected is always the lexicographically first path).
430        std::fs::write(dir.join("zzz.jsonl"), &line).unwrap();
431        std::fs::write(dir.join("aaa.jsonl"), &line).unwrap();
432
433        // Force identical mtime on both files.
434        let now_sys = std::time::SystemTime::now();
435        filetime::set_file_mtime(
436            dir.join("aaa.jsonl"),
437            filetime::FileTime::from_system_time(now_sys),
438        )
439        .unwrap();
440        filetime::set_file_mtime(
441            dir.join("zzz.jsonl"),
442            filetime::FileTime::from_system_time(now_sys),
443        )
444        .unwrap();
445
446        let src = ClaudeCode {
447            projects_dir: projects,
448        };
449        let now = chrono::Utc::now();
450        let window = (now - Duration::days(1), now + Duration::days(1));
451
452        // max_sessions=1: stable tie-break must pick "aaa.jsonl" every time.
453        let ex = src.excerpts(repo, "feat/x", window, 1, 50).unwrap();
454        assert_eq!(ex.len(), 1, "must return exactly 1 session");
455        assert_eq!(ex[0].source, "aaa.jsonl", "tie-break must pick path ASC");
456    }
457
458    // -----------------------------------------------------------------------
459    // (b) #15 — empty session excluded BEFORE truncate, real session survives
460    // -----------------------------------------------------------------------
461
462    #[test]
463    fn excerpts_empty_session_excluded_before_max_sessions_truncate() {
464        let tmp = tempfile::tempdir().unwrap();
465        let projects = tmp.path().to_path_buf();
466        let repo = Path::new("/home/g/app");
467        let dir = projects.join(encode_project_path(repo));
468        std::fs::create_dir_all(&dir).unwrap();
469
470        // Session with no extractable text (only summary lines).
471        std::fs::write(
472            dir.join("vazia.jsonl"),
473            "{\"type\":\"summary\",\"summary\":\"nada\"}\n",
474        )
475        .unwrap();
476        // A real session with extractable text.
477        std::fs::write(
478            dir.join("real.jsonl"),
479            "{\"type\":\"user\",\"message\":{\"content\":\"trabalho real\"}}\n",
480        )
481        .unwrap();
482
483        let src = ClaudeCode {
484            projects_dir: projects,
485        };
486        let now = chrono::Utc::now();
487        let window = (now - Duration::days(1), now + Duration::days(1));
488
489        // max_sessions=1: if empty session consumed the slot the real one would
490        // be dropped — the fixed code must exclude empties BEFORE truncating.
491        let ex = src.excerpts(repo, "feat/x", window, 1, 50).unwrap();
492        assert_eq!(ex.len(), 1, "real session must survive despite empty peer");
493        assert!(
494            ex[0].text.contains("trabalho real"),
495            "must be the real session"
496        );
497    }
498
499    // -----------------------------------------------------------------------
500    // (c) FTS mention probe via in-memory index
501    // -----------------------------------------------------------------------
502
503    #[test]
504    fn excerpts_indexed_uses_fts_for_mention_probe() {
505        let tmp = tempfile::tempdir().unwrap();
506        let projects = tmp.path().to_path_buf();
507        let repo = Path::new("/home/g/app");
508        let dir = projects.join(encode_project_path(repo));
509        std::fs::create_dir_all(&dir).unwrap();
510
511        let branch = "feat/fts-probe";
512        let session_path = dir.join("fts.jsonl");
513        let content =
514            format!("{{\"type\":\"user\",\"message\":{{\"content\":\"working on {branch}\"}}}}\n");
515        std::fs::write(&session_path, &content).unwrap();
516
517        let src = ClaudeCode {
518            projects_dir: projects,
519        };
520        let now = chrono::Utc::now();
521        // Window that does NOT cover this file's mtime — must rely on mention probe.
522        let passado = now - Duration::days(730);
523        let window = (passado - Duration::days(1), passado);
524
525        // Index the session's bounded tail.
526        let index = Index::open_in_memory();
527        let tail = read_tail_text(&session_path, 50 * 1024).unwrap();
528        let mtime = std::fs::metadata(&session_path)
529            .unwrap()
530            .modified()
531            .unwrap()
532            .duration_since(std::time::UNIX_EPOCH)
533            .unwrap()
534            .as_secs() as i64;
535        let size = std::fs::metadata(&session_path).unwrap().len() as i64;
536        index.upsert_session(&session_path, repo, mtime, size, &tail);
537
538        // excerpts_indexed with Some(index) must find the session via FTS.
539        let ex = src
540            .excerpts_indexed(repo, branch, window, 3, 50, Some(&index))
541            .unwrap();
542        assert_eq!(
543            ex.len(),
544            1,
545            "FTS probe must find the branch-mentioning session"
546        );
547        assert!(ex[0].text.contains(branch));
548    }
549
550    // -----------------------------------------------------------------------
551    // (d) upsert_session is idempotent — unchanged (path,mtime) is not re-indexed
552    // -----------------------------------------------------------------------
553
554    #[test]
555    fn upsert_session_skips_reindex_when_path_mtime_unchanged() {
556        let index = Index::open_in_memory();
557        let path = Path::new("/fake/session.jsonl");
558        let repo = Path::new("/home/g/app");
559        let mtime: i64 = 1_700_000_000;
560        let size: i64 = 42;
561        let text1 = "[user] first index";
562        let text2 = "[user] second index should not overwrite";
563
564        // First upsert: stores text1.
565        index.upsert_session(path, repo, mtime, size, text1);
566
567        // Second upsert with SAME (path, mtime): must NOT overwrite the FTS row.
568        index.upsert_session(path, repo, mtime, size, text2);
569
570        // The FTS index must still contain text1 but NOT text2.
571        let mentions = index.session_mentions(repo, "first");
572        assert!(
573            mentions.contains(&path.to_path_buf()),
574            "first index text must be retrievable"
575        );
576
577        let mentions2 = index.session_mentions(repo, "second");
578        assert!(
579            !mentions2.contains(&path.to_path_buf()),
580            "second upsert must have been skipped (same mtime)"
581        );
582    }
583}