Skip to main content

normalize_chat_sessions/formats/
mod.rs

1//! Log format plugins.
2//!
3//! Each format implements the `LogFormat` trait for parsing session logs.
4//!
5//! # Extensibility
6//!
7//! Users can register custom formats via [`register()`]:
8//!
9//! ```ignore
10//! use normalize_chat_sessions::{LogFormat, SessionFile, register};
11//! use std::path::{Path, PathBuf};
12//!
13//! struct MyAgentFormat;
14//!
15//! impl LogFormat for MyAgentFormat {
16//!     fn name(&self) -> &'static str { "myagent" }
17//!     fn sessions_dir(&self, project: Option<&Path>) -> PathBuf { /* ... */ }
18//!     fn list_sessions(&self, project: Option<&Path>) -> Vec<SessionFile> { /* ... */ }
19//!     fn detect(&self, path: &Path) -> f64 { /* ... */ }
20//! }
21//!
22//! // Register before first use
23//! register(&MyAgentFormat);
24//! ```
25
26#[cfg(feature = "format-claude")]
27mod claude_code;
28#[cfg(feature = "format-codex")]
29mod codex;
30#[cfg(feature = "format-gemini")]
31mod gemini_cli;
32#[cfg(feature = "format-normalize")]
33mod normalize_agent;
34
35#[cfg(feature = "format-claude")]
36pub use claude_code::ClaudeCodeFormat;
37#[cfg(feature = "format-codex")]
38pub use codex::CodexFormat;
39#[cfg(feature = "format-gemini")]
40pub use gemini_cli::GeminiCliFormat;
41#[cfg(feature = "format-normalize")]
42pub use normalize_agent::NormalizeAgentFormat;
43
44use crate::Session;
45use std::fs::File;
46use std::io::{BufRead, BufReader, Read};
47use std::path::{Path, PathBuf};
48use std::sync::{OnceLock, RwLock};
49
50/// Error type for session log parsing operations.
51#[derive(Debug, thiserror::Error)]
52pub enum ParseError {
53    /// I/O error reading a session log file.
54    #[error("I/O error reading {path}: {source}")]
55    Io {
56        path: PathBuf,
57        #[source]
58        source: std::io::Error,
59    },
60    /// Structural parse error in a session log file.
61    #[error("parse error in {path}: {message}")]
62    Format { path: PathBuf, message: String },
63    /// Other error (e.g. unknown format, registry failure).
64    #[error("{0}")]
65    Other(String),
66}
67
68/// Global registry of log format plugins.
69static FORMATS: RwLock<Vec<&'static dyn LogFormat>> = RwLock::new(Vec::new());
70static INITIALIZED: OnceLock<()> = OnceLock::new();
71
72/// Register a custom log format plugin.
73///
74/// Call this before any parsing operations to add custom formats.
75/// Built-in formats are registered automatically on first use.
76pub fn register(format: &'static dyn LogFormat) {
77    // normalize-syntax-allow: rust/unwrap-in-impl - mutex poison on a global registry is unrecoverable
78    FORMATS.write().unwrap().push(format);
79}
80
81/// Initialize built-in formats (called automatically on first use).
82fn init_builtin() {
83    INITIALIZED.get_or_init(|| {
84        // normalize-syntax-allow: rust/unwrap-in-impl - mutex poison on a global registry is unrecoverable
85        let mut formats = FORMATS.write().unwrap();
86        #[cfg(feature = "format-claude")]
87        formats.push(&ClaudeCodeFormat);
88        #[cfg(feature = "format-codex")]
89        formats.push(&CodexFormat);
90        #[cfg(feature = "format-gemini")]
91        formats.push(&GeminiCliFormat);
92        #[cfg(feature = "format-normalize")]
93        formats.push(&NormalizeAgentFormat);
94    });
95}
96
97/// Session file with metadata.
98pub struct SessionFile {
99    pub path: PathBuf,
100    pub mtime: std::time::SystemTime,
101    /// Parent session ID (set for subagent sessions).
102    pub parent_id: Option<String>,
103    /// Agent ID (set for subagent sessions, e.g. "agent-a5c5ccc9c2b61e757").
104    pub agent_id: Option<String>,
105    /// Subagent type from meta.json (e.g. "general-purpose", "Explore").
106    pub subagent_type: Option<String>,
107}
108
109/// Trait for session log format plugins.
110pub trait LogFormat: Send + Sync {
111    /// Format identifier (e.g., "claude", "codex", "gemini", "normalize").
112    fn name(&self) -> &'static str;
113
114    /// Get the sessions directory for this format.
115    /// Does NOT check if the directory exists - that's handled by list_sessions.
116    fn sessions_dir(&self, project: Option<&Path>) -> PathBuf;
117
118    /// List all session files for this format.
119    fn list_sessions(&self, project: Option<&Path>) -> Vec<SessionFile>;
120
121    /// List subagent session files for this format.
122    /// Default returns empty (only Claude Code supports subagents currently).
123    fn list_subagent_sessions(&self, _project: Option<&Path>) -> Vec<SessionFile> {
124        Vec::new()
125    }
126
127    /// Returns external directories (outside the project root) that belong to this format's
128    /// session metadata for the given project. Used by `normalize sync` to copy metadata
129    /// alongside the project.
130    ///
131    /// Default implementation delegates to `sessions_dir`. Override only if your format
132    /// stores metadata in multiple locations.
133    fn metadata_roots(&self, project: Option<&Path>) -> Vec<PathBuf> {
134        vec![self.sessions_dir(project)]
135    }
136
137    /// Check if this format can parse the given file.
138    /// Returns a confidence score 0.0-1.0.
139    fn detect(&self, path: &Path) -> f64;
140
141    /// Parse the log file into a unified Session structure.
142    fn parse(&self, path: &Path) -> Result<Session, ParseError>;
143}
144
145/// Get a format by name from the global registry.
146pub fn get_format(name: &str) -> Option<&'static dyn LogFormat> {
147    init_builtin();
148    // normalize-syntax-allow: rust/unwrap-in-impl - mutex poison on a global registry is unrecoverable
149    FORMATS
150        .read()
151        .unwrap()
152        .iter()
153        .find(|f| f.name() == name)
154        .copied()
155}
156
157/// Auto-detect format for a file using the global registry.
158pub fn detect_format(path: &Path) -> Option<&'static dyn LogFormat> {
159    init_builtin();
160    // normalize-syntax-allow: rust/unwrap-in-impl - mutex poison on a global registry is unrecoverable
161    let formats = FORMATS.read().unwrap();
162    let mut best: Option<(&'static dyn LogFormat, f64)> = None;
163    for fmt in formats.iter() {
164        let score = fmt.detect(path);
165        if score > 0.0 && best.is_none_or(|(_, best_score)| score > best_score) {
166            best = Some((*fmt, score));
167        }
168    }
169    best.map(|(fmt, _)| fmt)
170}
171
172/// List all available format names from the global registry.
173pub fn list_formats() -> Vec<&'static str> {
174    init_builtin();
175    // normalize-syntax-allow: rust/unwrap-in-impl - mutex poison on a global registry is unrecoverable
176    FORMATS.read().unwrap().iter().map(|f| f.name()).collect()
177}
178
179/// Returns all external metadata directories for the given project across all known formats.
180///
181/// Only directories that actually exist on disk are returned. Used by `normalize sync` to
182/// discover what session metadata to copy alongside the project directory.
183pub fn project_metadata_roots(project: &Path) -> Vec<PathBuf> {
184    init_builtin();
185    // normalize-syntax-allow: rust/unwrap-in-impl - mutex poison on a global registry is unrecoverable
186    FORMATS
187        .read()
188        .unwrap()
189        .iter()
190        .flat_map(|f| f.metadata_roots(Some(project)))
191        .filter(|p| p.exists())
192        .collect()
193}
194
195/// Default implementation: list .jsonl files in a directory.
196pub fn list_jsonl_sessions(dir: &Path) -> Vec<SessionFile> {
197    let mut sessions = Vec::new();
198    if let Ok(entries) = std::fs::read_dir(dir) {
199        for entry in entries.filter_map(|e| e.ok()) {
200            let path = entry.path();
201            if path.extension().and_then(|e| e.to_str()) == Some("jsonl")
202                && let Ok(meta) = path.metadata()
203                && let Ok(mtime) = meta.modified()
204            {
205                sessions.push(SessionFile {
206                    path,
207                    mtime,
208                    parent_id: None,
209                    agent_id: None,
210                    subagent_type: Some("interactive".into()),
211                });
212            }
213        }
214    }
215    sessions
216}
217
218/// List subagent sessions from `<session-uuid>/subagents/` directories.
219///
220/// Walks each subdirectory of `dir` looking for a `subagents/` folder containing
221/// `agent-<id>.jsonl` files. Also reads the companion `.meta.json` for agent type.
222pub fn list_subagent_sessions(dir: &Path) -> Vec<SessionFile> {
223    let mut sessions = Vec::new();
224    let Ok(entries) = std::fs::read_dir(dir) else {
225        return sessions;
226    };
227    for entry in entries.filter_map(|e| e.ok()) {
228        let path = entry.path();
229        if !path.is_dir() {
230            continue;
231        }
232        let parent_id = path.file_name().and_then(|n| n.to_str()).map(String::from);
233        let subagents_dir = path.join("subagents");
234        if !subagents_dir.is_dir() {
235            continue;
236        }
237        let Ok(sub_entries) = std::fs::read_dir(&subagents_dir) else {
238            continue;
239        };
240        for sub_entry in sub_entries.filter_map(|e| e.ok()) {
241            let sub_path = sub_entry.path();
242            if sub_path.extension().and_then(|e| e.to_str()) != Some("jsonl") {
243                continue;
244            }
245            let stem = match sub_path.file_stem().and_then(|s| s.to_str()) {
246                Some(s) => s.to_string(),
247                None => continue,
248            };
249            if !stem.starts_with("agent-") {
250                continue;
251            }
252            let Ok(meta) = sub_path.metadata() else {
253                continue;
254            };
255            let Ok(mtime) = meta.modified() else {
256                continue;
257            };
258            // Read companion .meta.json for agent type, default to "subagent"
259            let meta_path = sub_path.with_extension("meta.json");
260            let subagent_type = Some(
261                std::fs::read_to_string(&meta_path)
262                    .ok()
263                    .and_then(|s| serde_json::from_str::<serde_json::Value>(&s).ok())
264                    .and_then(|v| {
265                        v.get("agentType")
266                            .and_then(|t| t.as_str())
267                            .map(String::from)
268                    })
269                    .unwrap_or_else(|| "subagent".into()),
270            );
271            sessions.push(SessionFile {
272                path: sub_path,
273                mtime,
274                parent_id: parent_id.clone(),
275                agent_id: Some(stem.clone()),
276                subagent_type,
277            });
278        }
279    }
280    sessions
281}
282
283/// Registry of available log formats.
284///
285/// For most use cases, prefer the global registry via [`register()`],
286/// [`get_format()`], [`detect_format()`], and [`list_formats()`].
287///
288/// Use `FormatRegistry` when you need an isolated registry (e.g., testing).
289pub struct FormatRegistry {
290    formats: Vec<Box<dyn LogFormat>>,
291}
292
293impl Default for FormatRegistry {
294    fn default() -> Self {
295        Self::new()
296    }
297}
298
299impl FormatRegistry {
300    /// Create a new registry with all built-in formats.
301    #[allow(clippy::vec_init_then_push)] // cfg-gated pushes can't use vec![]
302    pub fn new() -> Self {
303        let mut formats: Vec<Box<dyn LogFormat>> = Vec::new();
304        #[cfg(feature = "format-claude")]
305        formats.push(Box::new(ClaudeCodeFormat));
306        #[cfg(feature = "format-codex")]
307        formats.push(Box::new(CodexFormat));
308        #[cfg(feature = "format-gemini")]
309        formats.push(Box::new(GeminiCliFormat));
310        #[cfg(feature = "format-normalize")]
311        formats.push(Box::new(NormalizeAgentFormat));
312        Self { formats }
313    }
314
315    /// Create an empty registry (no built-in formats).
316    pub fn empty() -> Self {
317        Self { formats: vec![] }
318    }
319
320    /// Register a custom format.
321    pub fn register(&mut self, format: Box<dyn LogFormat>) {
322        self.formats.push(format);
323    }
324
325    /// Detect the best format for a file.
326    pub fn detect(&self, path: &Path) -> Option<&dyn LogFormat> {
327        let mut best: Option<(&dyn LogFormat, f64)> = None;
328        for fmt in &self.formats {
329            let score = fmt.detect(path);
330            if score > 0.0 && best.is_none_or(|(_, best_score)| score > best_score) {
331                best = Some((fmt.as_ref(), score));
332            }
333        }
334        best.map(|(fmt, _)| fmt)
335    }
336
337    /// Get a format by name.
338    pub fn get(&self, name: &str) -> Option<&dyn LogFormat> {
339        self.formats
340            .iter()
341            .find(|f| f.name() == name)
342            .map(|f| f.as_ref())
343    }
344
345    /// List all available format names.
346    pub fn list(&self) -> Vec<&'static str> {
347        self.formats.iter().map(|f| f.name()).collect()
348    }
349
350    /// List subagent sessions across all registered formats.
351    pub fn list_subagent_sessions(&self, project: Option<&Path>) -> Vec<SessionFile> {
352        let mut all = Vec::new();
353        for fmt in &self.formats {
354            all.extend(fmt.list_subagent_sessions(project));
355        }
356        all
357    }
358}
359
360/// Parse a session log with auto-format detection.
361pub fn parse_session(path: &Path) -> Result<Session, ParseError> {
362    let registry = FormatRegistry::new();
363    let format = registry
364        .detect(path)
365        .ok_or_else(|| ParseError::Other(format!("Unknown log format: {}", path.display())))?;
366    format.parse(path)
367}
368
369/// Parse a session log with explicit format.
370pub fn parse_session_with_format(path: &Path, format_name: &str) -> Result<Session, ParseError> {
371    let registry = FormatRegistry::new();
372    let format = registry
373        .get(format_name)
374        .ok_or_else(|| ParseError::Other(format!("Unknown format: {}", format_name)))?;
375    format.parse(path)
376}
377
378/// Helper: read first N lines of a file.
379pub(crate) fn peek_lines(path: &Path, n: usize) -> Vec<String> {
380    let Ok(file) = File::open(path) else {
381        return Vec::new();
382    };
383    BufReader::new(file)
384        .lines()
385        .take(n)
386        .filter_map(|l| l.ok())
387        .collect()
388}
389
390/// Helper: read entire file as string.
391pub(crate) fn read_file(path: &Path) -> Result<String, ParseError> {
392    let mut file = File::open(path).map_err(|e| ParseError::Io {
393        path: path.to_path_buf(),
394        source: e,
395    })?;
396    let mut content = String::new();
397    file.read_to_string(&mut content)
398        .map_err(|e| ParseError::Io {
399            path: path.to_path_buf(),
400            source: e,
401        })?;
402    Ok(content)
403}