hindsight/storage/discovery.rs
1//! Session file discovery
2//!
3//! Scans Claude Code project directories for JSONL transcript files.
4
5use crate::error::{HindsightError, Result};
6use std::fs;
7use std::path::{Path, PathBuf};
8
9/// Represents a discovered session file
10#[derive(Debug, Clone)]
11pub struct SessionFile {
12 /// Full path to the JSONL file
13 pub path: PathBuf,
14
15 /// Session ID (extracted from filename)
16 pub session_id: String,
17
18 /// Project name (decoded from directory name)
19 pub project_name: String,
20
21 /// File size in bytes
22 pub file_size: u64,
23
24 /// Session creation timestamp (seconds since epoch, from first node's timestamp)
25 pub created_at: i64,
26
27 /// Last modified timestamp (seconds since epoch)
28 pub modified_at: i64,
29
30 /// Whether this session has subagents (folder with subagents/ directory)
31 pub has_subagents: bool,
32
33 /// Model used (short name, e.g. "sonnet-4-5")
34 pub model: Option<String>,
35
36 /// Number of errors (tool result errors + error nodes)
37 pub error_count: usize,
38
39 /// First user message preview (up to 80 chars)
40 pub first_message: Option<String>,
41
42 /// Source directory path (from config, e.g. "~/.claude/projects")
43 pub source_dir: String,
44
45 /// Comma-separated unique models used by subagents (e.g. "claude-haiku-4-5")
46 pub subagent_models: Option<String>,
47}
48
49/// Discover all Claude Code sessions in configured directories
50///
51/// Scans `~/.claude/projects/` for session JSONL files.
52///
53/// # Returns
54///
55/// Returns a vector of `SessionFile` structs representing all discovered sessions.
56///
57/// # Errors
58///
59/// Returns `HindsightError::NoSessionsFound` if no sessions are discovered.
60pub fn discover_sessions() -> Result<Vec<SessionFile>> {
61 let home = dirs::home_dir()
62 .ok_or_else(|| HindsightError::Config("Could not determine home directory".to_string()))?;
63
64 let config = match crate::config::Config::load() {
65 Ok(c) => c,
66 Err(e) => {
67 eprintln!("Warning: failed to load config, using defaults: {}", e);
68 Default::default()
69 }
70 };
71
72 // Resolve configured directories: expand ~ and filter to those that exist.
73 // Each entry carries (expanded_path, raw_config_path) for source_dir tracking.
74 let claude_dirs: Vec<(PathBuf, String)> = config
75 .paths
76 .claude_dirs
77 .iter()
78 .map(|d| {
79 let expanded = if let Some(stripped) = d.path.strip_prefix("~/") {
80 home.join(stripped)
81 } else {
82 PathBuf::from(&d.path)
83 };
84 (expanded, d.path.clone())
85 })
86 .filter(|(p, _)| p.exists())
87 .collect();
88
89 let mut sessions = Vec::new();
90
91 for (claude_dir, source_dir) in &claude_dirs {
92 if !claude_dir.exists() {
93 continue;
94 }
95
96 // Scan each project directory
97 for project_entry in fs::read_dir(claude_dir)? {
98 let project_entry = project_entry?;
99 let project_path = project_entry.path();
100
101 if !project_path.is_dir() {
102 continue;
103 }
104
105 // Extract project name from directory name
106 let project_name = decode_project_name(&project_path);
107
108 // Find all .jsonl files in this project
109 for file_entry in fs::read_dir(&project_path)? {
110 let file_entry = file_entry?;
111 let file_path = file_entry.path();
112
113 // Check if it's a .jsonl file (not a directory)
114 if file_path.is_file()
115 && file_path.extension().and_then(|s| s.to_str()) == Some("jsonl")
116 {
117 let metadata = fs::metadata(&file_path)?;
118 let session_id = file_path
119 .file_stem()
120 .and_then(|s| s.to_str())
121 .unwrap_or("unknown")
122 .to_string();
123
124 let modified_at = metadata
125 .modified()?
126 .duration_since(std::time::UNIX_EPOCH)
127 .map(|d| d.as_secs() as i64)
128 .unwrap_or(0);
129
130 // Check if there's a matching directory with subagents
131 let subagents_dir = project_path.join(&session_id).join("subagents");
132 let has_subagents = subagents_dir.exists() && subagents_dir.is_dir();
133
134 sessions.push(SessionFile {
135 path: file_path,
136 session_id,
137 project_name: project_name.clone(),
138 file_size: metadata.len(),
139 created_at: modified_at, // refined during indexing from first node timestamp
140 modified_at,
141 has_subagents,
142 model: None,
143 error_count: 0,
144 first_message: None,
145 source_dir: source_dir.clone(),
146 subagent_models: None,
147 });
148 }
149 }
150 }
151 }
152
153 if sessions.is_empty() {
154 return Err(HindsightError::NoSessionsFound);
155 }
156
157 // Sort by modification time (newest first)
158 sessions.sort_by(|a, b| b.modified_at.cmp(&a.modified_at));
159
160 Ok(sessions)
161}
162
163/// Decode project name from directory path
164///
165/// Converts `-Users-ediazestrada-Documents-Projects-experiment` to `experiment`
166/// Decode a project name from Claude Code's encoded directory name.
167///
168/// Claude Code encodes project paths by replacing `/` with `-`:
169/// `-Users-codestz-Documents-PersonalProjects-claude-hindsight` → `claude-hindsight`
170/// `-Users-codestz-Documents-Projects-dev-container-poc` → `dev-container-poc`
171/// `-Users-codestz` → `codestz`
172/// `-` → (unnamed, root scope)
173///
174/// Strategy: reconstruct the original path, take the last component.
175fn decode_project_name(path: &Path) -> String {
176 let dir_name = path
177 .file_name()
178 .and_then(|s| s.to_str())
179 .unwrap_or("");
180
181 // Edge case: just "-" or empty → unnamed project
182 if dir_name.is_empty() || dir_name == "-" {
183 return String::new();
184 }
185
186 // If it doesn't start with '-', it's already a plain name
187 if !dir_name.starts_with('-') {
188 return dir_name.to_string();
189 }
190
191 // The encoded format: `-Users-name-path-to-project` represents `/Users/name/path/to/project`
192 // We need to figure out where the actual path separators were.
193 // Claude Code uses the *full absolute path* encoded with `-` for `/`.
194 // Known path segments that are always single words: Users, Documents, home, var, tmp, etc.
195 // The project name is the last directory component which may contain hyphens.
196 //
197 // Strategy: find the last known parent directory marker and take everything after it.
198 let known_parents = [
199 "PersonalProjects-",
200 "Projects-",
201 "workspace-",
202 "Workspace-",
203 "repos-",
204 "Repos-",
205 "src-",
206 "dev-",
207 "code-",
208 "Code-",
209 "github-",
210 "GitHub-",
211 "git-",
212 ];
213
214 for parent in &known_parents {
215 if let Some(pos) = dir_name.rfind(parent) {
216 let after = &dir_name[pos + parent.len()..];
217 if !after.is_empty() {
218 return after.to_string();
219 }
220 }
221 }
222
223 // Fallback: for paths like `-Users-codestz` (home dir sessions),
224 // take the last segment after the last `-` that follows a known single-word segment
225 // Simple heuristic: split by `-`, skip known path components, rejoin the rest
226 let segments: Vec<&str> = dir_name.split('-').filter(|s| !s.is_empty()).collect();
227
228 // Skip known prefixes: Users, username, Documents, etc.
229 // Find the first segment that's NOT a common path component
230 let skip_words: std::collections::HashSet<&str> = [
231 "Users", "home", "var", "tmp", "opt", "Documents", "Desktop",
232 "Downloads", "Library",
233 ].iter().copied().collect();
234
235 let mut project_start = 0;
236 for (i, seg) in segments.iter().enumerate() {
237 if skip_words.contains(seg) {
238 project_start = i + 1;
239 } else if i > 0 && i == project_start {
240 // This is probably the username — skip one more
241 project_start = i + 1;
242 } else {
243 break;
244 }
245 }
246
247 if project_start < segments.len() {
248 segments[project_start..].join("-")
249 } else if !segments.is_empty() {
250 // All segments were "known" — use the last one (e.g., username)
251 segments.last().unwrap().to_string()
252 } else {
253 String::new()
254 }
255}
256
257#[cfg(test)]
258mod tests {
259 use super::*;
260
261 #[test]
262 fn test_decode_project_name() {
263 // Standard project paths
264 assert_eq!(
265 decode_project_name(Path::new("-Users-codestz-Documents-PersonalProjects-claude-hindsight")),
266 "claude-hindsight"
267 );
268 assert_eq!(
269 decode_project_name(Path::new("-Users-codestz-Documents-PersonalProjects-prompt-evaluator")),
270 "prompt-evaluator"
271 );
272 assert_eq!(
273 decode_project_name(Path::new("-Users-codestz-Documents-PersonalProjects-mcpx")),
274 "mcpx"
275 );
276 assert_eq!(
277 decode_project_name(Path::new("-Users-codestz-Documents-Projects-dev-container-poc")),
278 "dev-container-poc"
279 );
280
281 // Home directory sessions
282 assert_eq!(
283 decode_project_name(Path::new("-Users-codestz")),
284 "codestz"
285 );
286
287 // Root/empty
288 assert_eq!(decode_project_name(Path::new("-")), "");
289
290 // Plain names (non-encoded)
291 assert_eq!(decode_project_name(Path::new("my-project")), "my-project");
292 }
293}