terraphim_session_analyzer/
parser.rs

1use crate::models::{
2    extract_file_path, parse_timestamp, AgentInvocation, ContentBlock, FileOpType, FileOperation,
3    Message, SessionEntry, ToolCategory, ToolInvocation,
4};
5use crate::patterns::PatternMatcher;
6use crate::tool_analyzer;
7use anyhow::{Context, Result};
8use rayon::prelude::*;
9use std::fs::File;
10use std::io::{BufRead, BufReader};
11use std::path::Path;
12use tracing::{debug, info, warn};
13
14pub struct SessionParser {
15    entries: Vec<SessionEntry>,
16    session_id: String,
17    project_path: String,
18}
19
20impl SessionParser {
21    /// Parse a single JSONL session file
22    /// Parse a single JSONL session file
23    ///
24    /// # Errors
25    ///
26    /// Returns an error if the file cannot be read or contains malformed JSON
27    pub fn from_file<P: AsRef<Path>>(path: P) -> Result<Self> {
28        let path = path.as_ref();
29        info!("Parsing session file: {}", path.display());
30
31        let file = File::open(path)
32            .with_context(|| format!("Failed to open session file: {}", path.display()))?;
33        let reader = BufReader::new(file);
34
35        let mut entries = Vec::new();
36        let mut session_id = String::new();
37        let mut project_path = String::new();
38
39        for (line_num, line) in reader.lines().enumerate() {
40            match line {
41                Ok(line) if !line.trim().is_empty() => {
42                    match serde_json::from_str::<SessionEntry>(&line) {
43                        Ok(entry) => {
44                            // Extract session metadata from first entry
45                            if session_id.is_empty() {
46                                session_id.clone_from(&entry.session_id);
47                            }
48                            if project_path.is_empty() {
49                                if let Some(cwd) = &entry.cwd {
50                                    project_path.clone_from(cwd);
51                                }
52                            }
53                            entries.push(entry);
54                        }
55                        Err(e) => {
56                            warn!(
57                                "Failed to parse line {}: {} - Error: {}",
58                                line_num + 1,
59                                line,
60                                e
61                            );
62                        }
63                    }
64                }
65                Ok(_) => {
66                    // Skip empty lines
67                }
68                Err(e) => {
69                    warn!("Failed to read line {}: {}", line_num + 1, e);
70                }
71            }
72        }
73
74        info!(
75            "Parsed {} entries from session {}",
76            entries.len(),
77            session_id
78        );
79
80        Ok(Self {
81            entries,
82            session_id,
83            project_path,
84        })
85    }
86
87    /// Find all session files in the default Claude directory
88    ///
89    /// # Errors
90    ///
91    /// Returns an error if the Claude directory doesn't exist or cannot be read
92    pub fn from_default_location() -> Result<Vec<Self>> {
93        let home = home::home_dir().context("Could not find home directory")?;
94        let claude_dir = home.join(".claude").join("projects");
95
96        if !claude_dir.exists() {
97            return Err(anyhow::anyhow!(
98                "Claude projects directory not found at: {}",
99                claude_dir.display()
100            ));
101        }
102
103        Self::from_directory(claude_dir)
104    }
105
106    /// Parse all session files in a directory
107    ///
108    /// # Errors
109    ///
110    /// Returns an error if the directory cannot be read or contains invalid session files
111    pub fn from_directory<P: AsRef<Path>>(dir: P) -> Result<Vec<Self>> {
112        let dir = dir.as_ref();
113        info!("Scanning for session files in: {}", dir.display());
114
115        let mut parsers = Vec::new();
116
117        // Walk through all project directories
118        for entry in walkdir::WalkDir::new(dir)
119            .max_depth(2)
120            .into_iter()
121            .filter_map(std::result::Result::ok)
122        {
123            let path = entry.path();
124            if path.extension() == Some("jsonl".as_ref()) {
125                match Self::from_file(path) {
126                    Ok(parser) => {
127                        debug!("Successfully parsed session: {}", parser.session_id);
128                        parsers.push(parser);
129                    }
130                    Err(e) => {
131                        warn!("Failed to parse session file {}: {}", path.display(), e);
132                    }
133                }
134            }
135        }
136
137        info!("Found {} valid session files", parsers.len());
138        Ok(parsers)
139    }
140
141    /// Extract agent invocations from Task tool uses
142    #[must_use]
143    pub fn extract_agent_invocations(&self) -> Vec<AgentInvocation> {
144        self.entries
145            .par_iter()
146            .filter_map(|entry| {
147                if let Message::Assistant { content, .. } = &entry.message {
148                    for block in content {
149                        if let ContentBlock::ToolUse { name, input, id } = block {
150                            if name == "Task" {
151                                return self.parse_task_invocation(entry, input, id);
152                            }
153                        }
154                    }
155                }
156                None
157            })
158            .collect()
159    }
160
161    /// Parse a Task tool invocation into an `AgentInvocation`
162    fn parse_task_invocation(
163        &self,
164        entry: &SessionEntry,
165        input: &serde_json::Value,
166        _tool_id: &str,
167    ) -> Option<AgentInvocation> {
168        let agent_type = input
169            .get("subagent_type")
170            .and_then(|v| v.as_str())?
171            .to_string();
172
173        let task_description = input
174            .get("description")
175            .and_then(|v| v.as_str())
176            .unwrap_or("")
177            .to_string();
178
179        let prompt = input
180            .get("prompt")
181            .and_then(|v| v.as_str())
182            .unwrap_or("")
183            .to_string();
184
185        let timestamp = match parse_timestamp(&entry.timestamp) {
186            Ok(ts) => ts,
187            Err(e) => {
188                warn!("Failed to parse timestamp '{}': {}", entry.timestamp, e);
189                return None;
190            }
191        };
192
193        Some(AgentInvocation {
194            timestamp,
195            agent_type,
196            task_description,
197            prompt,
198            files_modified: Vec::new(), // Will be populated later
199            tools_used: Vec::new(),     // Will be populated later
200            duration_ms: None,          // Will be calculated later
201            parent_message_id: entry.uuid.clone(),
202            session_id: self.session_id.clone(),
203        })
204    }
205
206    /// Extract file operations from tool uses
207    #[must_use]
208    pub fn extract_file_operations(&self) -> Vec<FileOperation> {
209        self.entries
210            .par_iter()
211            .filter_map(|entry| {
212                if let Message::Assistant { content, .. } = &entry.message {
213                    for block in content {
214                        if let ContentBlock::ToolUse { name, input, .. } = block {
215                            if let Ok(op_type) = name.parse::<FileOpType>() {
216                                if let Some(file_path) = extract_file_path(input) {
217                                    let timestamp = match parse_timestamp(&entry.timestamp) {
218                                        Ok(ts) => ts,
219                                        Err(e) => {
220                                            warn!(
221                                                "Failed to parse timestamp '{}': {}",
222                                                entry.timestamp, e
223                                            );
224                                            continue;
225                                        }
226                                    };
227
228                                    return Some(FileOperation {
229                                        timestamp,
230                                        operation: op_type,
231                                        file_path,
232                                        agent_context: None, // Will be set during analysis
233                                        session_id: self.session_id.clone(),
234                                        message_id: entry.uuid.clone(),
235                                    });
236                                }
237                            }
238                        }
239                    }
240                }
241                None
242            })
243            .collect()
244    }
245
246    /// Extract tool invocations from Bash commands
247    ///
248    /// # Arguments
249    /// * `matcher` - Pattern matcher for identifying tools in commands
250    ///
251    /// # Returns
252    /// A vector of `ToolInvocation` instances found in Bash tool uses
253    #[must_use]
254    #[allow(dead_code)] // Will be used in Phase 2
255    pub fn extract_tool_invocations(&self, matcher: &dyn PatternMatcher) -> Vec<ToolInvocation> {
256        self.entries
257            .par_iter()
258            .filter_map(|entry| {
259                if let Message::Assistant { content, .. } = &entry.message {
260                    extract_from_bash_command(entry, content, matcher, &self.session_id)
261                } else {
262                    None
263                }
264            })
265            .collect()
266    }
267
268    /// Find the active agent context for a given message
269    #[must_use]
270    pub fn find_active_agent(&self, message_id: &str) -> Option<String> {
271        // Look backwards from the given message to find the most recent Task invocation
272        let mut found_message = false;
273
274        for entry in self.entries.iter().rev() {
275            if entry.uuid == message_id {
276                found_message = true;
277                continue;
278            }
279
280            if !found_message {
281                continue;
282            }
283
284            // Look for Task tool invocations
285            if let Message::Assistant { content, .. } = &entry.message {
286                for block in content {
287                    if let ContentBlock::ToolUse { name, input, .. } = block {
288                        if name == "Task" {
289                            if let Some(agent_type) =
290                                input.get("subagent_type").and_then(|v| v.as_str())
291                            {
292                                return Some(agent_type.to_string());
293                            }
294                        }
295                    }
296                }
297            }
298        }
299
300        None
301    }
302
303    /// Get session metadata
304    #[must_use]
305    pub fn get_session_info(
306        &self,
307    ) -> (
308        String,
309        String,
310        Option<jiff::Timestamp>,
311        Option<jiff::Timestamp>,
312    ) {
313        let start_time = self.entries.first().and_then(|e| {
314            parse_timestamp(&e.timestamp)
315                .map_err(|err| {
316                    debug!("Could not parse start timestamp '{}': {}", e.timestamp, err);
317                    err
318                })
319                .ok()
320        });
321        let end_time = self.entries.last().and_then(|e| {
322            parse_timestamp(&e.timestamp)
323                .map_err(|err| {
324                    debug!("Could not parse end timestamp '{}': {}", e.timestamp, err);
325                    err
326                })
327                .ok()
328        });
329
330        (
331            self.session_id.clone(),
332            self.project_path.clone(),
333            start_time,
334            end_time,
335        )
336    }
337
338    /// Get entry count for statistics
339    /// Used in integration tests
340    #[allow(dead_code)]
341    #[must_use]
342    pub fn entry_count(&self) -> usize {
343        self.entries.len()
344    }
345
346    /// Get all entries
347    #[must_use]
348    pub fn entries(&self) -> &[SessionEntry] {
349        &self.entries
350    }
351
352    /// Find entries within a time window
353    /// Used in integration tests
354    #[allow(dead_code)]
355    #[must_use]
356    pub fn entries_in_window(
357        &self,
358        start: jiff::Timestamp,
359        end: jiff::Timestamp,
360    ) -> Vec<&SessionEntry> {
361        self.entries
362            .iter()
363            .filter(|entry| match parse_timestamp(&entry.timestamp) {
364                Ok(timestamp) => timestamp >= start && timestamp <= end,
365                Err(e) => {
366                    debug!(
367                        "Skipping entry with invalid timestamp '{}': {}",
368                        entry.timestamp, e
369                    );
370                    false
371                }
372            })
373            .collect()
374    }
375
376    /// Find all unique agent types used in this session
377    /// Used in integration tests
378    #[allow(dead_code)]
379    #[must_use]
380    pub fn get_agent_types(&self) -> Vec<String> {
381        let agents = self.extract_agent_invocations();
382        let mut agent_types: Vec<String> = agents
383            .into_iter()
384            .map(|a| a.agent_type)
385            .collect::<std::collections::HashSet<_>>()
386            .into_iter()
387            .collect();
388        agent_types.sort();
389        agent_types
390    }
391
392    /// Build a timeline of events for visualization
393    /// Used in integration tests
394    #[allow(dead_code)]
395    #[must_use]
396    pub fn build_timeline(&self) -> Vec<TimelineEvent> {
397        let mut events = Vec::new();
398
399        // Add agent invocations
400        for agent in self.extract_agent_invocations() {
401            events.push(TimelineEvent {
402                timestamp: agent.timestamp,
403                event_type: TimelineEventType::AgentInvocation,
404                description: format!("{}: {}", agent.agent_type, agent.task_description),
405                agent: Some(agent.agent_type),
406                file: None,
407            });
408        }
409
410        // Add file operations
411        for file_op in self.extract_file_operations() {
412            events.push(TimelineEvent {
413                timestamp: file_op.timestamp,
414                event_type: TimelineEventType::FileOperation,
415                description: format!("{:?}: {}", file_op.operation, file_op.file_path),
416                agent: file_op.agent_context,
417                file: Some(file_op.file_path),
418            });
419        }
420
421        // Sort by timestamp
422        events.sort_by(|a, b| a.timestamp.cmp(&b.timestamp));
423        events
424    }
425}
426
427/// Helper function to extract tool invocations from Bash command content
428#[allow(dead_code)] // Will be used in Phase 2
429fn extract_from_bash_command(
430    entry: &SessionEntry,
431    content: &[ContentBlock],
432    matcher: &dyn PatternMatcher,
433    session_id: &str,
434) -> Option<ToolInvocation> {
435    for block in content {
436        if let ContentBlock::ToolUse { name, input, .. } = block {
437            if name == "Bash" {
438                // Extract the command from the input
439                let command = input.get("command").and_then(|v| v.as_str())?;
440
441                // Find tool matches using the pattern matcher
442                let matches = matcher.find_matches(command);
443
444                if let Some(tool_match) = matches.first() {
445                    // Parse command context to extract arguments and flags
446                    if let Some((full_cmd, arguments, flags)) =
447                        tool_analyzer::parse_command_context(command, tool_match.start)
448                    {
449                        // Filter out shell built-ins
450                        if !tool_analyzer::is_actual_tool(&tool_match.tool_name) {
451                            continue;
452                        }
453
454                        let timestamp = match parse_timestamp(&entry.timestamp) {
455                            Ok(ts) => ts,
456                            Err(e) => {
457                                warn!("Failed to parse timestamp '{}': {}", entry.timestamp, e);
458                                continue;
459                            }
460                        };
461
462                        return Some(ToolInvocation {
463                            timestamp,
464                            tool_name: tool_match.tool_name.clone(),
465                            tool_category: ToolCategory::from_string(&tool_match.category),
466                            command_line: full_cmd,
467                            arguments,
468                            flags,
469                            exit_code: None,     // Exit code not available from logs
470                            agent_context: None, // Will be populated later
471                            session_id: session_id.to_string(),
472                            message_id: entry.uuid.clone(),
473                        });
474                    }
475                }
476            }
477        }
478    }
479
480    None
481}
482
483/// Used in integration tests and public API
484#[allow(dead_code)]
485#[derive(Debug, Clone)]
486pub struct TimelineEvent {
487    pub timestamp: jiff::Timestamp,
488    pub event_type: TimelineEventType,
489    pub description: String,
490    pub agent: Option<String>,
491    pub file: Option<String>,
492}
493
494/// Used in integration tests and public API
495#[allow(dead_code)]
496#[derive(Debug, Clone)]
497pub enum TimelineEventType {
498    AgentInvocation,
499    FileOperation,
500    UserMessage,
501}
502
503#[cfg(test)]
504mod tests {
505    use super::*;
506
507    #[test]
508    fn test_parse_session_entry() {
509        let json_line = r#"{"parentUuid":null,"isSidechain":false,"userType":"external","cwd":"/home/alex/projects/zestic-at/charm","sessionId":"b325985c-5c1c-48f1-97e2-e3185bb55886","version":"1.0.111","gitBranch":"","type":"user","message":{"role":"user","content":"test message"},"uuid":"ab88a3b0-544a-411a-a8a4-92b142e21472","timestamp":"2025-10-01T09:05:21.902Z"}"#;
510
511        let entry: SessionEntry = serde_json::from_str(json_line).unwrap();
512        assert_eq!(entry.session_id, "b325985c-5c1c-48f1-97e2-e3185bb55886");
513        assert_eq!(entry.uuid, "ab88a3b0-544a-411a-a8a4-92b142e21472");
514    }
515
516    #[test]
517    fn test_parse_task_invocation() {
518        let json_line = r#"{"parentUuid":"parent-uuid","isSidechain":false,"userType":"external","cwd":"/home/alex/projects","sessionId":"test-session","version":"1.0.111","gitBranch":"","message":{"role":"assistant","content":[{"type":"tool_use","id":"tool-id","name":"Task","input":{"subagent_type":"architect","description":"Design system architecture","prompt":"Please design the architecture"}}]},"requestId":"req-123","type":"assistant","uuid":"msg-uuid","timestamp":"2025-10-01T09:05:21.902Z"}"#;
519
520        let entry: SessionEntry = serde_json::from_str(json_line).unwrap();
521
522        let parser = SessionParser {
523            entries: vec![entry.clone()],
524            session_id: "test-session".to_string(),
525            project_path: "/home/alex/projects".to_string(),
526        };
527
528        let agents = parser.extract_agent_invocations();
529        assert_eq!(agents.len(), 1);
530        assert_eq!(agents[0].agent_type, "architect");
531        assert_eq!(agents[0].task_description, "Design system architecture");
532    }
533
534    #[test]
535    fn test_extract_file_operations() {
536        let json_line = r#"{"parentUuid":"parent-uuid","isSidechain":false,"userType":"external","cwd":"/home/alex/projects","sessionId":"test-session","version":"1.0.111","gitBranch":"","message":{"role":"assistant","content":[{"type":"tool_use","id":"tool-id","name":"Write","input":{"file_path":"/path/to/file.rs","content":"test content"}}]},"type":"assistant","uuid":"msg-uuid","timestamp":"2025-10-01T09:05:21.902Z"}"#;
537
538        let entry: SessionEntry = serde_json::from_str(json_line).unwrap();
539
540        let parser = SessionParser {
541            entries: vec![entry],
542            session_id: "test-session".to_string(),
543            project_path: "/home/alex/projects".to_string(),
544        };
545
546        let file_ops = parser.extract_file_operations();
547        assert_eq!(file_ops.len(), 1);
548        assert_eq!(file_ops[0].file_path, "/path/to/file.rs");
549        assert!(matches!(file_ops[0].operation, FileOpType::Write));
550    }
551}