Skip to main content

cc_token_usage/data/
loader.rs

1use anyhow::{Context, Result};
2use chrono::{DateTime, Utc};
3use std::collections::HashMap;
4use std::fs::File;
5use std::io::{BufRead, BufReader};
6use std::path::Path;
7
8use super::models::{DataQuality, GlobalDataQuality, SessionData};
9use super::parser::parse_session_file;
10use super::scanner::{resolve_agent_parents, scan_claude_home, scan_projects_dir};
11
12/// Extract the Claude Code version string from the first line of a JSONL file.
13///
14/// Both `user` and `assistant` entries carry a `version` field at the top level.
15fn extract_version(path: &Path) -> Option<String> {
16    let file = File::open(path).ok()?;
17    let reader = BufReader::new(file);
18    let first_line = reader.lines().next()?.ok()?;
19    let val: serde_json::Value = serde_json::from_str(&first_line).ok()?;
20    val.get("version")
21        .and_then(|v| v.as_str())
22        .map(|s| s.to_string())
23}
24
25/// Compute the min and max timestamps from a slice of turns that have timestamps.
26fn time_range<'a, I>(timestamps: I) -> (Option<DateTime<Utc>>, Option<DateTime<Utc>>)
27where
28    I: Iterator<Item = &'a DateTime<Utc>>,
29{
30    let mut min: Option<DateTime<Utc>> = None;
31    let mut max: Option<DateTime<Utc>> = None;
32    for ts in timestamps {
33        min = Some(min.map_or(*ts, |m: DateTime<Utc>| m.min(*ts)));
34        max = Some(max.map_or(*ts, |m: DateTime<Utc>| m.max(*ts)));
35    }
36    (min, max)
37}
38
39/// Load all session data from a Claude home directory.
40///
41/// 1. Scans for JSONL files (main sessions + agents)
42/// 2. Resolves legacy agent parent relationships
43/// 3. Parses main sessions first, then merges agent turns into their parents
44/// 4. Computes global time range and quality metrics
45pub fn load_all(claude_home: &Path) -> Result<(Vec<SessionData>, GlobalDataQuality)> {
46    // Step 1: Scan for all session files
47    let mut files = scan_claude_home(claude_home)
48        .context("failed to scan claude home for session files")?;
49
50    // Step 2: Resolve legacy agent parent relationships
51    resolve_agent_parents(&mut files)
52        .context("failed to resolve agent parent sessions")?;
53
54    // Partition into main sessions and agent files
55    let (main_files, agent_files): (Vec<_>, Vec<_>) =
56        files.into_iter().partition(|f| !f.is_agent);
57
58    let mut global_quality = GlobalDataQuality {
59        total_session_files: main_files.len(),
60        total_agent_files: agent_files.len(),
61        ..Default::default()
62    };
63
64    // Step 3a: Process all main sessions
65    let mut sessions: HashMap<String, SessionData> = HashMap::new();
66
67    for sf in &main_files {
68        let (turns, quality) = parse_session_file(&sf.file_path, false)
69            .with_context(|| format!("failed to parse session: {}", sf.file_path.display()))?;
70
71        let version = extract_version(&sf.file_path);
72
73        let (first_ts, last_ts) = time_range(turns.iter().map(|t| &t.timestamp));
74
75        global_quality.total_valid_turns += quality.valid_turns;
76        global_quality.total_skipped +=
77            quality.skipped_synthetic + quality.skipped_sidechain + quality.skipped_invalid + quality.skipped_parse_error;
78
79        let session = SessionData {
80            session_id: sf.session_id.clone(),
81            project: sf.project.clone(),
82            turns,
83            agent_turns: Vec::new(),
84            first_timestamp: first_ts,
85            last_timestamp: last_ts,
86            version,
87            quality,
88        };
89
90        sessions.insert(sf.session_id.clone(), session);
91    }
92
93    // Step 3b: Process agent files and merge into parent sessions
94    for sf in &agent_files {
95        let (agent_turns, quality) = parse_session_file(&sf.file_path, true)
96            .with_context(|| format!("failed to parse agent file: {}", sf.file_path.display()))?;
97
98        global_quality.total_valid_turns += quality.valid_turns;
99        global_quality.total_skipped +=
100            quality.skipped_synthetic + quality.skipped_sidechain + quality.skipped_invalid + quality.skipped_parse_error;
101
102        match &sf.parent_session_id {
103            Some(parent_id) => {
104                // If parent session doesn't exist, create a virtual one
105                if !sessions.contains_key(parent_id) {
106                    let project = sf.project.clone().or_else(|| Some("(orphan)".to_string()));
107                    sessions.insert(parent_id.clone(), SessionData {
108                        session_id: parent_id.clone(),
109                        project,
110                        turns: Vec::new(),
111                        agent_turns: Vec::new(),
112                        first_timestamp: None,
113                        last_timestamp: None,
114                        version: None,
115                        quality: DataQuality::default(),
116                    });
117                    global_quality.orphan_agents += 1;
118                }
119
120                let parent = sessions.get_mut(parent_id).unwrap();
121                parent.agent_turns.extend(agent_turns);
122
123                // Accumulate agent quality into parent's quality
124                parent.quality.total_lines += quality.total_lines;
125                parent.quality.valid_turns += quality.valid_turns;
126                parent.quality.skipped_synthetic += quality.skipped_synthetic;
127                parent.quality.skipped_sidechain += quality.skipped_sidechain;
128                parent.quality.skipped_invalid += quality.skipped_invalid;
129                parent.quality.skipped_parse_error += quality.skipped_parse_error;
130                parent.quality.duplicate_turns += quality.duplicate_turns;
131            }
132            None => {
133                // Agent has no parent_session_id at all — create virtual session using agent's own session_id
134                let virtual_id = sf.session_id.clone();
135                let project = sf.project.clone().or_else(|| Some("(orphan)".to_string()));
136                if !sessions.contains_key(&virtual_id) {
137                    sessions.insert(virtual_id.clone(), SessionData {
138                        session_id: virtual_id.clone(),
139                        project,
140                        turns: Vec::new(),
141                        agent_turns: Vec::new(),
142                        first_timestamp: None,
143                        last_timestamp: None,
144                        version: None,
145                        quality: DataQuality::default(),
146                    });
147                }
148                let parent = sessions.get_mut(&virtual_id).unwrap();
149                parent.agent_turns.extend(agent_turns);
150
151                parent.quality.total_lines += quality.total_lines;
152                parent.quality.valid_turns += quality.valid_turns;
153                parent.quality.skipped_synthetic += quality.skipped_synthetic;
154                parent.quality.skipped_sidechain += quality.skipped_sidechain;
155                parent.quality.skipped_invalid += quality.skipped_invalid;
156                parent.quality.skipped_parse_error += quality.skipped_parse_error;
157                parent.quality.duplicate_turns += quality.duplicate_turns;
158
159                global_quality.orphan_agents += 1;
160            }
161        }
162    }
163
164    // Step 4: Recompute time ranges to include agent turns, and collect results
165    let mut result: Vec<SessionData> = sessions.into_values().collect();
166
167    // Compute global time range
168    let mut global_min: Option<DateTime<Utc>> = None;
169    let mut global_max: Option<DateTime<Utc>> = None;
170
171    for session in &mut result {
172        // Recompute session time range including agent turns
173        let all_timestamps = session
174            .turns
175            .iter()
176            .chain(session.agent_turns.iter())
177            .map(|t| &t.timestamp);
178        let (first_ts, last_ts) = time_range(all_timestamps);
179        session.first_timestamp = first_ts;
180        session.last_timestamp = last_ts;
181
182        // Update global range
183        if let Some(ts) = first_ts {
184            global_min = Some(global_min.map_or(ts, |m: DateTime<Utc>| m.min(ts)));
185        }
186        if let Some(ts) = last_ts {
187            global_max = Some(global_max.map_or(ts, |m: DateTime<Utc>| m.max(ts)));
188        }
189    }
190
191    global_quality.time_range = match (global_min, global_max) {
192        (Some(min), Some(max)) => Some((min, max)),
193        _ => None,
194    };
195
196    Ok((result, global_quality))
197}
198
199/// Load all session data from a projects directory directly.
200///
201/// Unlike `load_all` which expects a Claude home directory (and appends `projects/`),
202/// this function takes the projects directory itself. Useful for loading data from
203/// archive directories like `~/.config/superpowers/conversation-archive/`.
204pub fn load_from_projects_dir(projects_dir: &Path) -> Result<(Vec<SessionData>, GlobalDataQuality)> {
205    // Step 1: Scan for all session files directly from projects_dir
206    let mut files = scan_projects_dir(projects_dir)
207        .context("failed to scan projects dir for session files")?;
208
209    // Step 2: Resolve legacy agent parent relationships
210    resolve_agent_parents(&mut files)
211        .context("failed to resolve agent parent sessions")?;
212
213    // Partition into main sessions and agent files
214    let (main_files, agent_files): (Vec<_>, Vec<_>) =
215        files.into_iter().partition(|f| !f.is_agent);
216
217    let mut global_quality = GlobalDataQuality {
218        total_session_files: main_files.len(),
219        total_agent_files: agent_files.len(),
220        ..Default::default()
221    };
222
223    // Step 3a: Process all main sessions
224    let mut sessions: HashMap<String, SessionData> = HashMap::new();
225
226    for sf in &main_files {
227        let (turns, quality) = parse_session_file(&sf.file_path, false)
228            .with_context(|| format!("failed to parse session: {}", sf.file_path.display()))?;
229
230        let version = extract_version(&sf.file_path);
231        let (first_ts, last_ts) = time_range(turns.iter().map(|t| &t.timestamp));
232
233        global_quality.total_valid_turns += quality.valid_turns;
234        global_quality.total_skipped +=
235            quality.skipped_synthetic + quality.skipped_sidechain + quality.skipped_invalid + quality.skipped_parse_error;
236
237        let session = SessionData {
238            session_id: sf.session_id.clone(),
239            project: sf.project.clone(),
240            turns,
241            agent_turns: Vec::new(),
242            first_timestamp: first_ts,
243            last_timestamp: last_ts,
244            version,
245            quality,
246        };
247
248        sessions.insert(sf.session_id.clone(), session);
249    }
250
251    // Step 3b: Process agent files and merge into parent sessions
252    for sf in &agent_files {
253        let (agent_turns, quality) = parse_session_file(&sf.file_path, true)
254            .with_context(|| format!("failed to parse agent file: {}", sf.file_path.display()))?;
255
256        global_quality.total_valid_turns += quality.valid_turns;
257        global_quality.total_skipped +=
258            quality.skipped_synthetic + quality.skipped_sidechain + quality.skipped_invalid + quality.skipped_parse_error;
259
260        match &sf.parent_session_id {
261            Some(parent_id) => {
262                if !sessions.contains_key(parent_id) {
263                    let project = sf.project.clone().or_else(|| Some("(orphan)".to_string()));
264                    sessions.insert(parent_id.clone(), SessionData {
265                        session_id: parent_id.clone(),
266                        project,
267                        turns: Vec::new(),
268                        agent_turns: Vec::new(),
269                        first_timestamp: None,
270                        last_timestamp: None,
271                        version: None,
272                        quality: DataQuality::default(),
273                    });
274                    global_quality.orphan_agents += 1;
275                }
276
277                let parent = sessions.get_mut(parent_id).unwrap();
278                parent.agent_turns.extend(agent_turns);
279
280                parent.quality.total_lines += quality.total_lines;
281                parent.quality.valid_turns += quality.valid_turns;
282                parent.quality.skipped_synthetic += quality.skipped_synthetic;
283                parent.quality.skipped_sidechain += quality.skipped_sidechain;
284                parent.quality.skipped_invalid += quality.skipped_invalid;
285                parent.quality.skipped_parse_error += quality.skipped_parse_error;
286                parent.quality.duplicate_turns += quality.duplicate_turns;
287            }
288            None => {
289                let virtual_id = sf.session_id.clone();
290                let project = sf.project.clone().or_else(|| Some("(orphan)".to_string()));
291                if !sessions.contains_key(&virtual_id) {
292                    sessions.insert(virtual_id.clone(), SessionData {
293                        session_id: virtual_id.clone(),
294                        project,
295                        turns: Vec::new(),
296                        agent_turns: Vec::new(),
297                        first_timestamp: None,
298                        last_timestamp: None,
299                        version: None,
300                        quality: DataQuality::default(),
301                    });
302                }
303                let parent = sessions.get_mut(&virtual_id).unwrap();
304                parent.agent_turns.extend(agent_turns);
305
306                parent.quality.total_lines += quality.total_lines;
307                parent.quality.valid_turns += quality.valid_turns;
308                parent.quality.skipped_synthetic += quality.skipped_synthetic;
309                parent.quality.skipped_sidechain += quality.skipped_sidechain;
310                parent.quality.skipped_invalid += quality.skipped_invalid;
311                parent.quality.skipped_parse_error += quality.skipped_parse_error;
312                parent.quality.duplicate_turns += quality.duplicate_turns;
313
314                global_quality.orphan_agents += 1;
315            }
316        }
317    }
318
319    // Step 4: Recompute time ranges to include agent turns, and collect results
320    let mut result: Vec<SessionData> = sessions.into_values().collect();
321
322    let mut global_min: Option<DateTime<Utc>> = None;
323    let mut global_max: Option<DateTime<Utc>> = None;
324
325    for session in &mut result {
326        let all_timestamps = session
327            .turns
328            .iter()
329            .chain(session.agent_turns.iter())
330            .map(|t| &t.timestamp);
331        let (first_ts, last_ts) = time_range(all_timestamps);
332        session.first_timestamp = first_ts;
333        session.last_timestamp = last_ts;
334
335        if let Some(ts) = first_ts {
336            global_min = Some(global_min.map_or(ts, |m: DateTime<Utc>| m.min(ts)));
337        }
338        if let Some(ts) = last_ts {
339            global_max = Some(global_max.map_or(ts, |m: DateTime<Utc>| m.max(ts)));
340        }
341    }
342
343    global_quality.time_range = match (global_min, global_max) {
344        (Some(min), Some(max)) => Some((min, max)),
345        _ => None,
346    };
347
348    Ok((result, global_quality))
349}