Skip to main content

normalize_chat_sessions/formats/
mod.rs

1//! Log format plugins.
2//!
3//! Each format implements the `LogFormat` trait for parsing session logs.
4//!
5//! # Extensibility
6//!
7//! Users can register custom formats via [`register()`]:
8//!
9//! ```ignore
10//! use normalize_chat_sessions::{LogFormat, SessionAnalysis, SessionFile, register};
11//! use std::path::{Path, PathBuf};
12//!
13//! struct MyAgentFormat;
14//!
15//! impl LogFormat for MyAgentFormat {
16//!     fn name(&self) -> &'static str { "myagent" }
17//!     fn sessions_dir(&self, project: Option<&Path>) -> PathBuf { /* ... */ }
18//!     fn list_sessions(&self, project: Option<&Path>) -> Vec<SessionFile> { /* ... */ }
19//!     fn detect(&self, path: &Path) -> f64 { /* ... */ }
20//!     fn analyze(&self, path: &Path) -> Result<SessionAnalysis, String> { /* ... */ }
21//! }
22//!
23//! // Register before first use
24//! register(&MyAgentFormat);
25//! ```
26
27#[cfg(feature = "format-claude")]
28mod claude_code;
29#[cfg(feature = "format-codex")]
30mod codex;
31#[cfg(feature = "format-gemini")]
32mod gemini_cli;
33#[cfg(feature = "format-normalize")]
34mod normalize_agent;
35
36#[cfg(feature = "format-claude")]
37pub use claude_code::ClaudeCodeFormat;
38#[cfg(feature = "format-codex")]
39pub use codex::CodexFormat;
40#[cfg(feature = "format-gemini")]
41pub use gemini_cli::GeminiCliFormat;
42#[cfg(feature = "format-normalize")]
43pub use normalize_agent::NormalizeAgentFormat;
44
45use crate::Session;
46use std::fs::File;
47use std::io::{BufRead, BufReader, Read};
48use std::path::{Path, PathBuf};
49use std::sync::{OnceLock, RwLock};
50
51/// Global registry of log format plugins.
52static FORMATS: RwLock<Vec<&'static dyn LogFormat>> = RwLock::new(Vec::new());
53static INITIALIZED: OnceLock<()> = OnceLock::new();
54
55/// Register a custom log format plugin.
56///
57/// Call this before any parsing operations to add custom formats.
58/// Built-in formats are registered automatically on first use.
59pub fn register(format: &'static dyn LogFormat) {
60    FORMATS.write().unwrap().push(format);
61}
62
63/// Initialize built-in formats (called automatically on first use).
64fn init_builtin() {
65    INITIALIZED.get_or_init(|| {
66        let mut formats = FORMATS.write().unwrap();
67        #[cfg(feature = "format-claude")]
68        formats.push(&ClaudeCodeFormat);
69        #[cfg(feature = "format-codex")]
70        formats.push(&CodexFormat);
71        #[cfg(feature = "format-gemini")]
72        formats.push(&GeminiCliFormat);
73        #[cfg(feature = "format-normalize")]
74        formats.push(&NormalizeAgentFormat);
75    });
76}
77
78/// Session file with metadata.
79pub struct SessionFile {
80    pub path: PathBuf,
81    pub mtime: std::time::SystemTime,
82}
83
84/// Trait for session log format plugins.
85pub trait LogFormat: Send + Sync {
86    /// Format identifier (e.g., "claude", "codex", "gemini", "moss").
87    fn name(&self) -> &'static str;
88
89    /// Get the sessions directory for this format.
90    /// Does NOT check if the directory exists - that's handled by list_sessions.
91    fn sessions_dir(&self, project: Option<&Path>) -> PathBuf;
92
93    /// List all session files for this format.
94    fn list_sessions(&self, project: Option<&Path>) -> Vec<SessionFile>;
95
96    /// Check if this format can parse the given file.
97    /// Returns a confidence score 0.0-1.0.
98    fn detect(&self, path: &Path) -> f64;
99
100    /// Parse the log file into a unified Session structure.
101    fn parse(&self, path: &Path) -> Result<Session, String>;
102}
103
104/// Get a format by name from the global registry.
105pub fn get_format(name: &str) -> Option<&'static dyn LogFormat> {
106    init_builtin();
107    FORMATS
108        .read()
109        .unwrap()
110        .iter()
111        .find(|f| f.name() == name)
112        .copied()
113}
114
115/// Auto-detect format for a file using the global registry.
116pub fn detect_format(path: &Path) -> Option<&'static dyn LogFormat> {
117    init_builtin();
118    let formats = FORMATS.read().unwrap();
119    let mut best: Option<(&'static dyn LogFormat, f64)> = None;
120    for fmt in formats.iter() {
121        let score = fmt.detect(path);
122        if score > 0.0 && (best.is_none() || score > best.unwrap().1) {
123            best = Some((*fmt, score));
124        }
125    }
126    best.map(|(fmt, _)| fmt)
127}
128
129/// List all available format names from the global registry.
130pub fn list_formats() -> Vec<&'static str> {
131    init_builtin();
132    FORMATS.read().unwrap().iter().map(|f| f.name()).collect()
133}
134
135/// Default implementation: list .jsonl files in a directory.
136pub fn list_jsonl_sessions(dir: &Path) -> Vec<SessionFile> {
137    let mut sessions = Vec::new();
138    if let Ok(entries) = std::fs::read_dir(dir) {
139        for entry in entries.filter_map(|e| e.ok()) {
140            let path = entry.path();
141            if path.extension().and_then(|e| e.to_str()) == Some("jsonl") {
142                if let Ok(meta) = path.metadata() {
143                    if let Ok(mtime) = meta.modified() {
144                        sessions.push(SessionFile { path, mtime });
145                    }
146                }
147            }
148        }
149    }
150    sessions
151}
152
153/// Registry of available log formats.
154///
155/// For most use cases, prefer the global registry via [`register()`],
156/// [`get_format()`], [`detect_format()`], and [`list_formats()`].
157///
158/// Use `FormatRegistry` when you need an isolated registry (e.g., testing).
159pub struct FormatRegistry {
160    formats: Vec<Box<dyn LogFormat>>,
161}
162
163impl Default for FormatRegistry {
164    fn default() -> Self {
165        Self::new()
166    }
167}
168
169impl FormatRegistry {
170    /// Create a new registry with all built-in formats.
171    pub fn new() -> Self {
172        let mut formats: Vec<Box<dyn LogFormat>> = Vec::new();
173        #[cfg(feature = "format-claude")]
174        formats.push(Box::new(ClaudeCodeFormat));
175        #[cfg(feature = "format-codex")]
176        formats.push(Box::new(CodexFormat));
177        #[cfg(feature = "format-gemini")]
178        formats.push(Box::new(GeminiCliFormat));
179        #[cfg(feature = "format-normalize")]
180        formats.push(Box::new(NormalizeAgentFormat));
181        Self { formats }
182    }
183
184    /// Create an empty registry (no built-in formats).
185    pub fn empty() -> Self {
186        Self { formats: vec![] }
187    }
188
189    /// Register a custom format.
190    pub fn register(&mut self, format: Box<dyn LogFormat>) {
191        self.formats.push(format);
192    }
193
194    /// Detect the best format for a file.
195    pub fn detect(&self, path: &Path) -> Option<&dyn LogFormat> {
196        let mut best: Option<(&dyn LogFormat, f64)> = None;
197        for fmt in &self.formats {
198            let score = fmt.detect(path);
199            if score > 0.0 {
200                if best.is_none() || score > best.unwrap().1 {
201                    best = Some((fmt.as_ref(), score));
202                }
203            }
204        }
205        best.map(|(fmt, _)| fmt)
206    }
207
208    /// Get a format by name.
209    pub fn get(&self, name: &str) -> Option<&dyn LogFormat> {
210        self.formats
211            .iter()
212            .find(|f| f.name() == name)
213            .map(|f| f.as_ref())
214    }
215
216    /// List all available format names.
217    pub fn list(&self) -> Vec<&'static str> {
218        self.formats.iter().map(|f| f.name()).collect()
219    }
220}
221
222/// Parse a session log with auto-format detection.
223pub fn parse_session(path: &Path) -> Result<Session, String> {
224    let registry = FormatRegistry::new();
225    let format = registry
226        .detect(path)
227        .ok_or_else(|| format!("Unknown log format: {}", path.display()))?;
228    format.parse(path)
229}
230
231/// Parse a session log with explicit format.
232pub fn parse_session_with_format(path: &Path, format_name: &str) -> Result<Session, String> {
233    let registry = FormatRegistry::new();
234    let format = registry
235        .get(format_name)
236        .ok_or_else(|| format!("Unknown format: {}", format_name))?;
237    format.parse(path)
238}
239
240/// Helper: read first N lines of a file.
241pub(crate) fn peek_lines(path: &Path, n: usize) -> Vec<String> {
242    let Ok(file) = File::open(path) else {
243        return Vec::new();
244    };
245    BufReader::new(file)
246        .lines()
247        .take(n)
248        .filter_map(|l| l.ok())
249        .collect()
250}
251
252/// Helper: read entire file as string.
253pub(crate) fn read_file(path: &Path) -> Result<String, String> {
254    let mut file = File::open(path).map_err(|e| e.to_string())?;
255    let mut content = String::new();
256    file.read_to_string(&mut content)
257        .map_err(|e| e.to_string())?;
258    Ok(content)
259}