1use anyhow::{Context, Result};
2use chrono::{DateTime, Utc};
3use std::collections::HashMap;
4use std::fs::File;
5use std::io::{BufRead, BufReader};
6use std::path::Path;
7
8use super::models::{DataQuality, GlobalDataQuality, SessionData};
9use super::parser::parse_session_file;
10use super::scanner::{resolve_agent_parents, scan_claude_home, scan_projects_dir};
11
12fn extract_version(path: &Path) -> Option<String> {
16 let file = File::open(path).ok()?;
17 let reader = BufReader::new(file);
18 let first_line = reader.lines().next()?.ok()?;
19 let val: serde_json::Value = serde_json::from_str(&first_line).ok()?;
20 val.get("version")
21 .and_then(|v| v.as_str())
22 .map(|s| s.to_string())
23}
24
25fn time_range<'a, I>(timestamps: I) -> (Option<DateTime<Utc>>, Option<DateTime<Utc>>)
27where
28 I: Iterator<Item = &'a DateTime<Utc>>,
29{
30 let mut min: Option<DateTime<Utc>> = None;
31 let mut max: Option<DateTime<Utc>> = None;
32 for ts in timestamps {
33 min = Some(min.map_or(*ts, |m: DateTime<Utc>| m.min(*ts)));
34 max = Some(max.map_or(*ts, |m: DateTime<Utc>| m.max(*ts)));
35 }
36 (min, max)
37}
38
39pub fn load_all(claude_home: &Path) -> Result<(Vec<SessionData>, GlobalDataQuality)> {
46 let mut files = scan_claude_home(claude_home)
48 .context("failed to scan claude home for session files")?;
49
50 resolve_agent_parents(&mut files)
52 .context("failed to resolve agent parent sessions")?;
53
54 let (main_files, agent_files): (Vec<_>, Vec<_>) =
56 files.into_iter().partition(|f| !f.is_agent);
57
58 let mut global_quality = GlobalDataQuality {
59 total_session_files: main_files.len(),
60 total_agent_files: agent_files.len(),
61 ..Default::default()
62 };
63
64 let mut sessions: HashMap<String, SessionData> = HashMap::new();
66
67 for sf in &main_files {
68 let (turns, quality) = parse_session_file(&sf.file_path, false)
69 .with_context(|| format!("failed to parse session: {}", sf.file_path.display()))?;
70
71 let version = extract_version(&sf.file_path);
72
73 let (first_ts, last_ts) = time_range(turns.iter().map(|t| &t.timestamp));
74
75 global_quality.total_valid_turns += quality.valid_turns;
76 global_quality.total_skipped +=
77 quality.skipped_synthetic + quality.skipped_sidechain + quality.skipped_invalid + quality.skipped_parse_error;
78
79 let session = SessionData {
80 session_id: sf.session_id.clone(),
81 project: sf.project.clone(),
82 turns,
83 agent_turns: Vec::new(),
84 first_timestamp: first_ts,
85 last_timestamp: last_ts,
86 version,
87 quality,
88 };
89
90 sessions.insert(sf.session_id.clone(), session);
91 }
92
93 for sf in &agent_files {
95 let (agent_turns, quality) = parse_session_file(&sf.file_path, true)
96 .with_context(|| format!("failed to parse agent file: {}", sf.file_path.display()))?;
97
98 global_quality.total_valid_turns += quality.valid_turns;
99 global_quality.total_skipped +=
100 quality.skipped_synthetic + quality.skipped_sidechain + quality.skipped_invalid + quality.skipped_parse_error;
101
102 match &sf.parent_session_id {
103 Some(parent_id) => {
104 if !sessions.contains_key(parent_id) {
106 let project = sf.project.clone().or_else(|| Some("(orphan)".to_string()));
107 sessions.insert(parent_id.clone(), SessionData {
108 session_id: parent_id.clone(),
109 project,
110 turns: Vec::new(),
111 agent_turns: Vec::new(),
112 first_timestamp: None,
113 last_timestamp: None,
114 version: None,
115 quality: DataQuality::default(),
116 });
117 global_quality.orphan_agents += 1;
118 }
119
120 let parent = sessions.get_mut(parent_id).unwrap();
121 parent.agent_turns.extend(agent_turns);
122
123 parent.quality.total_lines += quality.total_lines;
125 parent.quality.valid_turns += quality.valid_turns;
126 parent.quality.skipped_synthetic += quality.skipped_synthetic;
127 parent.quality.skipped_sidechain += quality.skipped_sidechain;
128 parent.quality.skipped_invalid += quality.skipped_invalid;
129 parent.quality.skipped_parse_error += quality.skipped_parse_error;
130 parent.quality.duplicate_turns += quality.duplicate_turns;
131 }
132 None => {
133 let virtual_id = sf.session_id.clone();
135 let project = sf.project.clone().or_else(|| Some("(orphan)".to_string()));
136 if !sessions.contains_key(&virtual_id) {
137 sessions.insert(virtual_id.clone(), SessionData {
138 session_id: virtual_id.clone(),
139 project,
140 turns: Vec::new(),
141 agent_turns: Vec::new(),
142 first_timestamp: None,
143 last_timestamp: None,
144 version: None,
145 quality: DataQuality::default(),
146 });
147 }
148 let parent = sessions.get_mut(&virtual_id).unwrap();
149 parent.agent_turns.extend(agent_turns);
150
151 parent.quality.total_lines += quality.total_lines;
152 parent.quality.valid_turns += quality.valid_turns;
153 parent.quality.skipped_synthetic += quality.skipped_synthetic;
154 parent.quality.skipped_sidechain += quality.skipped_sidechain;
155 parent.quality.skipped_invalid += quality.skipped_invalid;
156 parent.quality.skipped_parse_error += quality.skipped_parse_error;
157 parent.quality.duplicate_turns += quality.duplicate_turns;
158
159 global_quality.orphan_agents += 1;
160 }
161 }
162 }
163
164 let mut result: Vec<SessionData> = sessions.into_values().collect();
166
167 let mut global_min: Option<DateTime<Utc>> = None;
169 let mut global_max: Option<DateTime<Utc>> = None;
170
171 for session in &mut result {
172 let all_timestamps = session
174 .turns
175 .iter()
176 .chain(session.agent_turns.iter())
177 .map(|t| &t.timestamp);
178 let (first_ts, last_ts) = time_range(all_timestamps);
179 session.first_timestamp = first_ts;
180 session.last_timestamp = last_ts;
181
182 if let Some(ts) = first_ts {
184 global_min = Some(global_min.map_or(ts, |m: DateTime<Utc>| m.min(ts)));
185 }
186 if let Some(ts) = last_ts {
187 global_max = Some(global_max.map_or(ts, |m: DateTime<Utc>| m.max(ts)));
188 }
189 }
190
191 global_quality.time_range = match (global_min, global_max) {
192 (Some(min), Some(max)) => Some((min, max)),
193 _ => None,
194 };
195
196 Ok((result, global_quality))
197}
198
199pub fn load_from_projects_dir(projects_dir: &Path) -> Result<(Vec<SessionData>, GlobalDataQuality)> {
205 let mut files = scan_projects_dir(projects_dir)
207 .context("failed to scan projects dir for session files")?;
208
209 resolve_agent_parents(&mut files)
211 .context("failed to resolve agent parent sessions")?;
212
213 let (main_files, agent_files): (Vec<_>, Vec<_>) =
215 files.into_iter().partition(|f| !f.is_agent);
216
217 let mut global_quality = GlobalDataQuality {
218 total_session_files: main_files.len(),
219 total_agent_files: agent_files.len(),
220 ..Default::default()
221 };
222
223 let mut sessions: HashMap<String, SessionData> = HashMap::new();
225
226 for sf in &main_files {
227 let (turns, quality) = parse_session_file(&sf.file_path, false)
228 .with_context(|| format!("failed to parse session: {}", sf.file_path.display()))?;
229
230 let version = extract_version(&sf.file_path);
231 let (first_ts, last_ts) = time_range(turns.iter().map(|t| &t.timestamp));
232
233 global_quality.total_valid_turns += quality.valid_turns;
234 global_quality.total_skipped +=
235 quality.skipped_synthetic + quality.skipped_sidechain + quality.skipped_invalid + quality.skipped_parse_error;
236
237 let session = SessionData {
238 session_id: sf.session_id.clone(),
239 project: sf.project.clone(),
240 turns,
241 agent_turns: Vec::new(),
242 first_timestamp: first_ts,
243 last_timestamp: last_ts,
244 version,
245 quality,
246 };
247
248 sessions.insert(sf.session_id.clone(), session);
249 }
250
251 for sf in &agent_files {
253 let (agent_turns, quality) = parse_session_file(&sf.file_path, true)
254 .with_context(|| format!("failed to parse agent file: {}", sf.file_path.display()))?;
255
256 global_quality.total_valid_turns += quality.valid_turns;
257 global_quality.total_skipped +=
258 quality.skipped_synthetic + quality.skipped_sidechain + quality.skipped_invalid + quality.skipped_parse_error;
259
260 match &sf.parent_session_id {
261 Some(parent_id) => {
262 if !sessions.contains_key(parent_id) {
263 let project = sf.project.clone().or_else(|| Some("(orphan)".to_string()));
264 sessions.insert(parent_id.clone(), SessionData {
265 session_id: parent_id.clone(),
266 project,
267 turns: Vec::new(),
268 agent_turns: Vec::new(),
269 first_timestamp: None,
270 last_timestamp: None,
271 version: None,
272 quality: DataQuality::default(),
273 });
274 global_quality.orphan_agents += 1;
275 }
276
277 let parent = sessions.get_mut(parent_id).unwrap();
278 parent.agent_turns.extend(agent_turns);
279
280 parent.quality.total_lines += quality.total_lines;
281 parent.quality.valid_turns += quality.valid_turns;
282 parent.quality.skipped_synthetic += quality.skipped_synthetic;
283 parent.quality.skipped_sidechain += quality.skipped_sidechain;
284 parent.quality.skipped_invalid += quality.skipped_invalid;
285 parent.quality.skipped_parse_error += quality.skipped_parse_error;
286 parent.quality.duplicate_turns += quality.duplicate_turns;
287 }
288 None => {
289 let virtual_id = sf.session_id.clone();
290 let project = sf.project.clone().or_else(|| Some("(orphan)".to_string()));
291 if !sessions.contains_key(&virtual_id) {
292 sessions.insert(virtual_id.clone(), SessionData {
293 session_id: virtual_id.clone(),
294 project,
295 turns: Vec::new(),
296 agent_turns: Vec::new(),
297 first_timestamp: None,
298 last_timestamp: None,
299 version: None,
300 quality: DataQuality::default(),
301 });
302 }
303 let parent = sessions.get_mut(&virtual_id).unwrap();
304 parent.agent_turns.extend(agent_turns);
305
306 parent.quality.total_lines += quality.total_lines;
307 parent.quality.valid_turns += quality.valid_turns;
308 parent.quality.skipped_synthetic += quality.skipped_synthetic;
309 parent.quality.skipped_sidechain += quality.skipped_sidechain;
310 parent.quality.skipped_invalid += quality.skipped_invalid;
311 parent.quality.skipped_parse_error += quality.skipped_parse_error;
312 parent.quality.duplicate_turns += quality.duplicate_turns;
313
314 global_quality.orphan_agents += 1;
315 }
316 }
317 }
318
319 let mut result: Vec<SessionData> = sessions.into_values().collect();
321
322 let mut global_min: Option<DateTime<Utc>> = None;
323 let mut global_max: Option<DateTime<Utc>> = None;
324
325 for session in &mut result {
326 let all_timestamps = session
327 .turns
328 .iter()
329 .chain(session.agent_turns.iter())
330 .map(|t| &t.timestamp);
331 let (first_ts, last_ts) = time_range(all_timestamps);
332 session.first_timestamp = first_ts;
333 session.last_timestamp = last_ts;
334
335 if let Some(ts) = first_ts {
336 global_min = Some(global_min.map_or(ts, |m: DateTime<Utc>| m.min(ts)));
337 }
338 if let Some(ts) = last_ts {
339 global_max = Some(global_max.map_or(ts, |m: DateTime<Utc>| m.max(ts)));
340 }
341 }
342
343 global_quality.time_range = match (global_min, global_max) {
344 (Some(min), Some(max)) => Some((min, max)),
345 _ => None,
346 };
347
348 Ok((result, global_quality))
349}