1use anyhow::{Context, Result};
2use chrono::{DateTime, Utc};
3use std::collections::HashMap;
4use std::fs::File;
5use std::io::{BufRead, BufReader};
6use std::path::Path;
7
8use super::models::{DataQuality, GlobalDataQuality, SessionData};
9use super::parser::parse_session_file;
10use super::scanner::{resolve_agent_parents, scan_claude_home, scan_projects_dir};
11
12fn extract_version(path: &Path) -> Option<String> {
16 let file = File::open(path).ok()?;
17 let reader = BufReader::new(file);
18 let first_line = reader.lines().next()?.ok()?;
19 let val: serde_json::Value = serde_json::from_str(&first_line).ok()?;
20 val.get("version")
21 .and_then(|v| v.as_str())
22 .map(|s| s.to_string())
23}
24
25fn time_range<'a, I>(timestamps: I) -> (Option<DateTime<Utc>>, Option<DateTime<Utc>>)
27where
28 I: Iterator<Item = &'a DateTime<Utc>>,
29{
30 let mut min: Option<DateTime<Utc>> = None;
31 let mut max: Option<DateTime<Utc>> = None;
32 for ts in timestamps {
33 min = Some(min.map_or(*ts, |m: DateTime<Utc>| m.min(*ts)));
34 max = Some(max.map_or(*ts, |m: DateTime<Utc>| m.max(*ts)));
35 }
36 (min, max)
37}
38
39pub fn load_all(claude_home: &Path) -> Result<(Vec<SessionData>, GlobalDataQuality)> {
46 let mut files = scan_claude_home(claude_home)
48 .context("failed to scan claude home for session files")?;
49
50 resolve_agent_parents(&mut files)
52 .context("failed to resolve agent parent sessions")?;
53
54 let (main_files, agent_files): (Vec<_>, Vec<_>) =
56 files.into_iter().partition(|f| !f.is_agent);
57
58 let mut global_quality = GlobalDataQuality {
59 total_session_files: main_files.len(),
60 total_agent_files: agent_files.len(),
61 ..Default::default()
62 };
63
64 let mut sessions: HashMap<String, SessionData> = HashMap::new();
66
67 for sf in &main_files {
68 let (turns, quality) = parse_session_file(&sf.file_path, false)
69 .with_context(|| format!("failed to parse session: {}", sf.file_path.display()))?;
70
71 let version = extract_version(&sf.file_path);
72
73 let (first_ts, last_ts) = time_range(turns.iter().map(|t| &t.timestamp));
74
75 global_quality.total_valid_turns += quality.valid_turns;
76 global_quality.total_skipped +=
77 quality.skipped_synthetic + quality.skipped_invalid + quality.skipped_parse_error;
78
79 let session = SessionData {
80 session_id: sf.session_id.clone(),
81 project: sf.project.clone(),
82 turns,
83 agent_turns: Vec::new(),
84 first_timestamp: first_ts,
85 last_timestamp: last_ts,
86 version,
87 quality,
88 };
89
90 sessions.insert(sf.session_id.clone(), session);
91 }
92
93 for sf in &agent_files {
95 let (agent_turns, quality) = parse_session_file(&sf.file_path, true)
96 .with_context(|| format!("failed to parse agent file: {}", sf.file_path.display()))?;
97
98 global_quality.total_valid_turns += quality.valid_turns;
99 global_quality.total_skipped +=
100 quality.skipped_synthetic + quality.skipped_invalid + quality.skipped_parse_error;
101
102 match &sf.parent_session_id {
103 Some(parent_id) => {
104 if !sessions.contains_key(parent_id) {
106 let project = sf.project.clone().or_else(|| Some("(orphan)".to_string()));
107 sessions.insert(parent_id.clone(), SessionData {
108 session_id: parent_id.clone(),
109 project,
110 turns: Vec::new(),
111 agent_turns: Vec::new(),
112 first_timestamp: None,
113 last_timestamp: None,
114 version: None,
115 quality: DataQuality::default(),
116 });
117 global_quality.orphan_agents += 1;
118 }
119
120 let parent = sessions.get_mut(parent_id).unwrap();
121 parent.agent_turns.extend(agent_turns);
122
123 parent.quality.total_lines += quality.total_lines;
125 parent.quality.valid_turns += quality.valid_turns;
126 parent.quality.skipped_synthetic += quality.skipped_synthetic;
127 parent.quality.skipped_invalid += quality.skipped_invalid;
128 parent.quality.skipped_parse_error += quality.skipped_parse_error;
129 parent.quality.duplicate_turns += quality.duplicate_turns;
130 }
131 None => {
132 let virtual_id = sf.session_id.clone();
134 let project = sf.project.clone().or_else(|| Some("(orphan)".to_string()));
135 if !sessions.contains_key(&virtual_id) {
136 sessions.insert(virtual_id.clone(), SessionData {
137 session_id: virtual_id.clone(),
138 project,
139 turns: Vec::new(),
140 agent_turns: Vec::new(),
141 first_timestamp: None,
142 last_timestamp: None,
143 version: None,
144 quality: DataQuality::default(),
145 });
146 }
147 let parent = sessions.get_mut(&virtual_id).unwrap();
148 parent.agent_turns.extend(agent_turns);
149
150 parent.quality.total_lines += quality.total_lines;
151 parent.quality.valid_turns += quality.valid_turns;
152 parent.quality.skipped_synthetic += quality.skipped_synthetic;
153 parent.quality.skipped_invalid += quality.skipped_invalid;
154 parent.quality.skipped_parse_error += quality.skipped_parse_error;
155 parent.quality.duplicate_turns += quality.duplicate_turns;
156
157 global_quality.orphan_agents += 1;
158 }
159 }
160 }
161
162 let mut result: Vec<SessionData> = sessions.into_values().collect();
164
165 let mut global_min: Option<DateTime<Utc>> = None;
167 let mut global_max: Option<DateTime<Utc>> = None;
168
169 for session in &mut result {
170 let all_timestamps = session
172 .turns
173 .iter()
174 .chain(session.agent_turns.iter())
175 .map(|t| &t.timestamp);
176 let (first_ts, last_ts) = time_range(all_timestamps);
177 session.first_timestamp = first_ts;
178 session.last_timestamp = last_ts;
179
180 if let Some(ts) = first_ts {
182 global_min = Some(global_min.map_or(ts, |m: DateTime<Utc>| m.min(ts)));
183 }
184 if let Some(ts) = last_ts {
185 global_max = Some(global_max.map_or(ts, |m: DateTime<Utc>| m.max(ts)));
186 }
187 }
188
189 global_quality.time_range = match (global_min, global_max) {
190 (Some(min), Some(max)) => Some((min, max)),
191 _ => None,
192 };
193
194 Ok((result, global_quality))
195}
196
197pub fn load_from_projects_dir(projects_dir: &Path) -> Result<(Vec<SessionData>, GlobalDataQuality)> {
203 let mut files = scan_projects_dir(projects_dir)
205 .context("failed to scan projects dir for session files")?;
206
207 resolve_agent_parents(&mut files)
209 .context("failed to resolve agent parent sessions")?;
210
211 let (main_files, agent_files): (Vec<_>, Vec<_>) =
213 files.into_iter().partition(|f| !f.is_agent);
214
215 let mut global_quality = GlobalDataQuality {
216 total_session_files: main_files.len(),
217 total_agent_files: agent_files.len(),
218 ..Default::default()
219 };
220
221 let mut sessions: HashMap<String, SessionData> = HashMap::new();
223
224 for sf in &main_files {
225 let (turns, quality) = parse_session_file(&sf.file_path, false)
226 .with_context(|| format!("failed to parse session: {}", sf.file_path.display()))?;
227
228 let version = extract_version(&sf.file_path);
229 let (first_ts, last_ts) = time_range(turns.iter().map(|t| &t.timestamp));
230
231 global_quality.total_valid_turns += quality.valid_turns;
232 global_quality.total_skipped +=
233 quality.skipped_synthetic + quality.skipped_invalid + quality.skipped_parse_error;
234
235 let session = SessionData {
236 session_id: sf.session_id.clone(),
237 project: sf.project.clone(),
238 turns,
239 agent_turns: Vec::new(),
240 first_timestamp: first_ts,
241 last_timestamp: last_ts,
242 version,
243 quality,
244 };
245
246 sessions.insert(sf.session_id.clone(), session);
247 }
248
249 for sf in &agent_files {
251 let (agent_turns, quality) = parse_session_file(&sf.file_path, true)
252 .with_context(|| format!("failed to parse agent file: {}", sf.file_path.display()))?;
253
254 global_quality.total_valid_turns += quality.valid_turns;
255 global_quality.total_skipped +=
256 quality.skipped_synthetic + quality.skipped_invalid + quality.skipped_parse_error;
257
258 match &sf.parent_session_id {
259 Some(parent_id) => {
260 if !sessions.contains_key(parent_id) {
261 let project = sf.project.clone().or_else(|| Some("(orphan)".to_string()));
262 sessions.insert(parent_id.clone(), SessionData {
263 session_id: parent_id.clone(),
264 project,
265 turns: Vec::new(),
266 agent_turns: Vec::new(),
267 first_timestamp: None,
268 last_timestamp: None,
269 version: None,
270 quality: DataQuality::default(),
271 });
272 global_quality.orphan_agents += 1;
273 }
274
275 let parent = sessions.get_mut(parent_id).unwrap();
276 parent.agent_turns.extend(agent_turns);
277
278 parent.quality.total_lines += quality.total_lines;
279 parent.quality.valid_turns += quality.valid_turns;
280 parent.quality.skipped_synthetic += quality.skipped_synthetic;
281 parent.quality.skipped_invalid += quality.skipped_invalid;
282 parent.quality.skipped_parse_error += quality.skipped_parse_error;
283 parent.quality.duplicate_turns += quality.duplicate_turns;
284 }
285 None => {
286 let virtual_id = sf.session_id.clone();
287 let project = sf.project.clone().or_else(|| Some("(orphan)".to_string()));
288 if !sessions.contains_key(&virtual_id) {
289 sessions.insert(virtual_id.clone(), SessionData {
290 session_id: virtual_id.clone(),
291 project,
292 turns: Vec::new(),
293 agent_turns: Vec::new(),
294 first_timestamp: None,
295 last_timestamp: None,
296 version: None,
297 quality: DataQuality::default(),
298 });
299 }
300 let parent = sessions.get_mut(&virtual_id).unwrap();
301 parent.agent_turns.extend(agent_turns);
302
303 parent.quality.total_lines += quality.total_lines;
304 parent.quality.valid_turns += quality.valid_turns;
305 parent.quality.skipped_synthetic += quality.skipped_synthetic;
306 parent.quality.skipped_invalid += quality.skipped_invalid;
307 parent.quality.skipped_parse_error += quality.skipped_parse_error;
308 parent.quality.duplicate_turns += quality.duplicate_turns;
309
310 global_quality.orphan_agents += 1;
311 }
312 }
313 }
314
315 let mut result: Vec<SessionData> = sessions.into_values().collect();
317
318 let mut global_min: Option<DateTime<Utc>> = None;
319 let mut global_max: Option<DateTime<Utc>> = None;
320
321 for session in &mut result {
322 let all_timestamps = session
323 .turns
324 .iter()
325 .chain(session.agent_turns.iter())
326 .map(|t| &t.timestamp);
327 let (first_ts, last_ts) = time_range(all_timestamps);
328 session.first_timestamp = first_ts;
329 session.last_timestamp = last_ts;
330
331 if let Some(ts) = first_ts {
332 global_min = Some(global_min.map_or(ts, |m: DateTime<Utc>| m.min(ts)));
333 }
334 if let Some(ts) = last_ts {
335 global_max = Some(global_max.map_or(ts, |m: DateTime<Utc>| m.max(ts)));
336 }
337 }
338
339 global_quality.time_range = match (global_min, global_max) {
340 (Some(min), Some(max)) => Some((min, max)),
341 _ => None,
342 };
343
344 Ok((result, global_quality))
345}