Skip to main content

lore_cli/capture/watchers/
gemini.rs

1//! Gemini CLI session parser.
2//!
3//! Parses session files from Google's Gemini CLI tool. Sessions are stored as
4//! single JSON files at `~/.gemini/tmp/<project-hash>/chats/session-*.json`.
5//!
6//! Each file contains a JSON object with:
7//! - `sessionId`: Unique session identifier
8//! - `projectHash`: Hash of the project directory
9//! - `startTime`: ISO 8601 timestamp
10//! - `lastUpdated`: ISO 8601 timestamp
11//! - `messages`: Array of message objects with id, timestamp, type, and content
12
13use anyhow::{Context, Result};
14use chrono::{DateTime, Utc};
15use serde::Deserialize;
16use std::fs;
17use std::path::{Path, PathBuf};
18use uuid::Uuid;
19
20use crate::storage::models::{Message, MessageContent, MessageRole, Session};
21
22use super::{Watcher, WatcherInfo};
23
24/// Watcher for Gemini CLI sessions.
25///
26/// Discovers and parses JSON session files from the Gemini CLI tool.
27/// Sessions are stored in `~/.gemini/tmp/<project-hash>/chats/session-*.json`.
28pub struct GeminiWatcher;
29
30impl Watcher for GeminiWatcher {
31    fn info(&self) -> WatcherInfo {
32        WatcherInfo {
33            name: "gemini",
34            description: "Google Gemini CLI",
35            default_paths: vec![gemini_base_dir()],
36        }
37    }
38
39    fn is_available(&self) -> bool {
40        gemini_base_dir().exists()
41    }
42
43    fn find_sources(&self) -> Result<Vec<PathBuf>> {
44        find_gemini_session_files()
45    }
46
47    fn parse_source(&self, path: &Path) -> Result<Vec<(Session, Vec<Message>)>> {
48        let parsed = parse_gemini_session_file(path)?;
49        if parsed.messages.is_empty() {
50            return Ok(vec![]);
51        }
52        let (session, messages) = parsed.to_storage_models();
53        Ok(vec![(session, messages)])
54    }
55
56    fn watch_paths(&self) -> Vec<PathBuf> {
57        vec![gemini_base_dir()]
58    }
59}
60
61/// Returns the path to the Gemini base directory.
62///
63/// This is typically `~/.gemini/tmp/`.
64fn gemini_base_dir() -> PathBuf {
65    dirs::home_dir()
66        .unwrap_or_else(|| PathBuf::from("."))
67        .join(".gemini")
68        .join("tmp")
69}
70
71/// Raw session structure from Gemini JSON files.
72#[derive(Debug, Deserialize)]
73#[serde(rename_all = "camelCase")]
74struct RawGeminiSession {
75    session_id: String,
76    #[serde(default)]
77    project_hash: Option<String>,
78    #[serde(default)]
79    start_time: Option<String>,
80    #[serde(default)]
81    last_updated: Option<String>,
82    #[serde(default)]
83    messages: Vec<RawGeminiMessage>,
84}
85
86/// Raw message structure from Gemini JSON files.
87#[derive(Debug, Deserialize)]
88#[serde(rename_all = "camelCase")]
89struct RawGeminiMessage {
90    #[serde(default)]
91    id: Option<String>,
92    #[serde(default)]
93    timestamp: Option<String>,
94    #[serde(rename = "type")]
95    msg_type: String,
96    #[serde(default)]
97    content: Option<String>,
98    // Optional fields we currently ignore but may use later
99    #[serde(default)]
100    #[allow(dead_code)]
101    tool_calls: Option<serde_json::Value>,
102    #[serde(default)]
103    #[allow(dead_code)]
104    thoughts: Option<serde_json::Value>,
105}
106
107/// Parses a Gemini JSON session file.
108///
109/// Reads the JSON file and extracts session metadata and messages.
110///
111/// # Errors
112///
113/// Returns an error if the file cannot be opened or parsed.
114pub fn parse_gemini_session_file(path: &Path) -> Result<ParsedGeminiSession> {
115    let content = fs::read_to_string(path).context("Failed to read Gemini session file")?;
116    let raw: RawGeminiSession =
117        serde_json::from_str(&content).context("Failed to parse Gemini session JSON")?;
118
119    let start_time = raw
120        .start_time
121        .as_ref()
122        .and_then(|s| DateTime::parse_from_rfc3339(s).ok())
123        .map(|dt| dt.with_timezone(&Utc));
124
125    let last_updated = raw
126        .last_updated
127        .as_ref()
128        .and_then(|s| DateTime::parse_from_rfc3339(s).ok())
129        .map(|dt| dt.with_timezone(&Utc));
130
131    let messages: Vec<ParsedGeminiMessage> = raw
132        .messages
133        .iter()
134        .filter_map(|m| {
135            let role = match m.msg_type.as_str() {
136                "user" => MessageRole::User,
137                "gemini" => MessageRole::Assistant,
138                "system" => MessageRole::System,
139                _ => return None,
140            };
141
142            let content = m.content.as_ref()?.clone();
143            if content.trim().is_empty() {
144                return None;
145            }
146
147            let timestamp = m
148                .timestamp
149                .as_ref()
150                .and_then(|s| DateTime::parse_from_rfc3339(s).ok())
151                .map(|dt| dt.with_timezone(&Utc))
152                .or(start_time)
153                .unwrap_or_else(Utc::now);
154
155            let id = m.id.clone();
156
157            Some(ParsedGeminiMessage {
158                id,
159                timestamp,
160                role,
161                content,
162            })
163        })
164        .collect();
165
166    Ok(ParsedGeminiSession {
167        session_id: raw.session_id,
168        project_hash: raw.project_hash,
169        start_time,
170        last_updated,
171        messages,
172        source_path: path.to_string_lossy().to_string(),
173    })
174}
175
176/// Intermediate representation of a parsed Gemini session.
177#[derive(Debug)]
178pub struct ParsedGeminiSession {
179    pub session_id: String,
180    pub project_hash: Option<String>,
181    pub start_time: Option<DateTime<Utc>>,
182    pub last_updated: Option<DateTime<Utc>>,
183    pub messages: Vec<ParsedGeminiMessage>,
184    pub source_path: String,
185}
186
187impl ParsedGeminiSession {
188    /// Converts this parsed session to storage-ready models.
189    pub fn to_storage_models(&self) -> (Session, Vec<Message>) {
190        let session_uuid = Uuid::parse_str(&self.session_id).unwrap_or_else(|_| Uuid::new_v4());
191
192        let started_at = self
193            .start_time
194            .or_else(|| self.messages.first().map(|m| m.timestamp))
195            .unwrap_or_else(Utc::now);
196
197        let ended_at = self
198            .last_updated
199            .or_else(|| self.messages.last().map(|m| m.timestamp));
200
201        // Try to derive working directory from project hash in source path
202        let working_directory = self
203            .project_hash
204            .as_ref()
205            .map(|h| format!("<project:{h}>"))
206            .unwrap_or_else(|| ".".to_string());
207
208        let session = Session {
209            id: session_uuid,
210            tool: "gemini".to_string(),
211            tool_version: None,
212            started_at,
213            ended_at,
214            model: None,
215            working_directory,
216            git_branch: None,
217            source_path: Some(self.source_path.clone()),
218            message_count: self.messages.len() as i32,
219            machine_id: crate::storage::get_machine_id(),
220        };
221
222        let messages: Vec<Message> = self
223            .messages
224            .iter()
225            .enumerate()
226            .map(|(idx, m)| {
227                let id =
228                    m.id.as_ref()
229                        .and_then(|s| Uuid::parse_str(s).ok())
230                        .unwrap_or_else(Uuid::new_v4);
231
232                Message {
233                    id,
234                    session_id: session_uuid,
235                    parent_id: None,
236                    index: idx as i32,
237                    timestamp: m.timestamp,
238                    role: m.role.clone(),
239                    content: MessageContent::Text(m.content.clone()),
240                    model: None,
241                    git_branch: None,
242                    cwd: None,
243                }
244            })
245            .collect();
246
247        (session, messages)
248    }
249}
250
251/// Intermediate representation of a parsed Gemini message.
252#[derive(Debug)]
253pub struct ParsedGeminiMessage {
254    pub id: Option<String>,
255    pub timestamp: DateTime<Utc>,
256    pub role: MessageRole,
257    pub content: String,
258}
259
260/// Extracts the session ID from a Gemini session filename.
261///
262/// Gemini creates files with the pattern `session-{timestamp}-{session_id}.json`.
263/// Multiple files can share the same session ID but have different timestamps
264/// (and message counts). This function extracts the session ID portion.
265///
266/// Returns `None` if the filename does not match the expected pattern.
267fn extract_session_id_from_filename(filename: &str) -> Option<&str> {
268    // Pattern: session-{timestamp}-{session_id}.json
269    // Example: session-1737651044-1b872dcc.json -> 1b872dcc
270    let without_ext = filename.strip_suffix(".json")?;
271    let without_prefix = without_ext.strip_prefix("session-")?;
272    // Find the last hyphen to get the session ID
273    let last_hyphen = without_prefix.rfind('-')?;
274    Some(&without_prefix[last_hyphen + 1..])
275}
276
277/// Counts messages in a Gemini session file without fully parsing it.
278///
279/// This is a lightweight check used for deduplication. It reads the JSON
280/// and counts only the message array length.
281fn count_messages_in_file(path: &Path) -> usize {
282    // Try to parse just enough to count messages
283    let content = match fs::read_to_string(path) {
284        Ok(c) => c,
285        Err(_) => return 0,
286    };
287
288    // Use a minimal struct to just get the message count
289    #[derive(Deserialize)]
290    struct MinimalSession {
291        #[serde(default)]
292        messages: Vec<serde_json::Value>,
293    }
294
295    match serde_json::from_str::<MinimalSession>(&content) {
296        Ok(session) => session.messages.len(),
297        Err(_) => 0,
298    }
299}
300
301/// Discovers all Gemini session files, deduplicating by session ID.
302///
303/// Scans `~/.gemini/tmp/*/chats/` for `session-*.json` files.
304///
305/// Gemini creates multiple files with the same session ID but different
306/// timestamps as the session progresses (e.g., session-1737651044-1b872dcc.json,
307/// session-1737651054-1b872dcc.json). To avoid processing duplicate sessions
308/// with varying message counts, this function keeps only the file with the
309/// most messages for each unique session ID.
310pub fn find_gemini_session_files() -> Result<Vec<PathBuf>> {
311    let base_dir = gemini_base_dir();
312
313    if !base_dir.exists() {
314        return Ok(Vec::new());
315    }
316
317    // Collect all session files first
318    let mut all_files = Vec::new();
319
320    // Walk the directory tree: tmp/<project-hash>/chats/session-*.json
321    for project_entry in std::fs::read_dir(&base_dir)? {
322        let project_entry = project_entry?;
323        let project_path = project_entry.path();
324        if !project_path.is_dir() {
325            continue;
326        }
327
328        let chats_dir = project_path.join("chats");
329        if !chats_dir.exists() || !chats_dir.is_dir() {
330            continue;
331        }
332
333        for file_entry in std::fs::read_dir(&chats_dir)? {
334            let file_entry = file_entry?;
335            let file_path = file_entry.path();
336
337            if let Some(name) = file_path.file_name().and_then(|n| n.to_str()) {
338                if name.starts_with("session-") && name.ends_with(".json") {
339                    all_files.push(file_path);
340                }
341            }
342        }
343    }
344
345    // Deduplicate: group by session ID, keep the file with most messages
346    deduplicate_session_files(all_files)
347}
348
349/// Deduplicates session files by session ID, keeping the file with the most messages.
350///
351/// Groups files by their session ID (extracted from filename) and returns only
352/// the file with the highest message count for each group.
353fn deduplicate_session_files(files: Vec<PathBuf>) -> Result<Vec<PathBuf>> {
354    use std::collections::HashMap;
355
356    // Map: session_id -> (path, message_count)
357    let mut best_by_session: HashMap<String, (PathBuf, usize)> = HashMap::new();
358
359    for path in files {
360        let filename = match path.file_name().and_then(|n| n.to_str()) {
361            Some(name) => name,
362            None => continue,
363        };
364
365        let session_id = match extract_session_id_from_filename(filename) {
366            Some(id) => id.to_string(),
367            None => {
368                // If we cannot extract a session ID, treat the whole filename as unique
369                filename.to_string()
370            }
371        };
372
373        let message_count = count_messages_in_file(&path);
374
375        match best_by_session.get(&session_id) {
376            Some((_, existing_count)) if *existing_count >= message_count => {
377                // Current file has fewer or equal messages, skip it
378            }
379            _ => {
380                // This file has more messages (or is the first we've seen)
381                best_by_session.insert(session_id, (path, message_count));
382            }
383        }
384    }
385
386    // Extract just the paths
387    let deduplicated: Vec<PathBuf> = best_by_session
388        .into_values()
389        .map(|(path, _)| path)
390        .collect();
391
392    Ok(deduplicated)
393}
394
395#[cfg(test)]
396mod tests {
397    use super::*;
398    use std::io::Write;
399    use tempfile::NamedTempFile;
400
401    /// Creates a temporary JSON file with given content.
402    fn create_temp_session_file(content: &str) -> NamedTempFile {
403        let mut file = NamedTempFile::with_suffix(".json").expect("Failed to create temp file");
404        file.write_all(content.as_bytes())
405            .expect("Failed to write content");
406        file.flush().expect("Failed to flush");
407        file
408    }
409
410    /// Generate a simple Gemini session JSON.
411    fn make_session_json(session_id: &str, project_hash: &str, messages_json: &str) -> String {
412        format!(
413            r#"{{
414                "sessionId": "{session_id}",
415                "projectHash": "{project_hash}",
416                "startTime": "2025-11-30T20:06:04.951Z",
417                "lastUpdated": "2025-11-30T20:15:26.585Z",
418                "messages": {messages_json}
419            }}"#
420        )
421    }
422
423    // Note: Common watcher trait tests (info, watch_paths, find_sources) are in
424    // src/capture/watchers/test_common.rs to avoid duplication across all watchers.
425    // Only tool-specific parsing tests remain here.
426
427    #[test]
428    fn test_parse_simple_session() {
429        let json = make_session_json(
430            "ed60a4d9-1234-5678-abcd-ef0123456789",
431            "cc89a35",
432            r#"[
433                {"id": "msg1", "timestamp": "2025-11-30T20:06:05.000Z", "type": "user", "content": "Hello"},
434                {"id": "msg2", "timestamp": "2025-11-30T20:06:10.000Z", "type": "gemini", "content": "Hi there!"}
435            ]"#,
436        );
437
438        let file = create_temp_session_file(&json);
439        let parsed = parse_gemini_session_file(file.path()).expect("Failed to parse");
440
441        assert_eq!(parsed.session_id, "ed60a4d9-1234-5678-abcd-ef0123456789");
442        assert_eq!(parsed.project_hash, Some("cc89a35".to_string()));
443        assert_eq!(parsed.messages.len(), 2);
444        assert_eq!(parsed.messages[0].role, MessageRole::User);
445        assert_eq!(parsed.messages[0].content, "Hello");
446        assert_eq!(parsed.messages[1].role, MessageRole::Assistant);
447        assert_eq!(parsed.messages[1].content, "Hi there!");
448    }
449
450    #[test]
451    fn test_parse_user_message() {
452        let json = make_session_json(
453            "test-session",
454            "hash123",
455            r#"[{"type": "user", "content": "What is Rust?"}]"#,
456        );
457
458        let file = create_temp_session_file(&json);
459        let parsed = parse_gemini_session_file(file.path()).expect("Failed to parse");
460
461        assert_eq!(parsed.messages.len(), 1);
462        assert_eq!(parsed.messages[0].role, MessageRole::User);
463        assert_eq!(parsed.messages[0].content, "What is Rust?");
464    }
465
466    #[test]
467    fn test_parse_gemini_message_as_assistant() {
468        let json = make_session_json(
469            "test-session",
470            "hash123",
471            r#"[{"type": "gemini", "content": "Rust is a systems programming language."}]"#,
472        );
473
474        let file = create_temp_session_file(&json);
475        let parsed = parse_gemini_session_file(file.path()).expect("Failed to parse");
476
477        assert_eq!(parsed.messages.len(), 1);
478        assert_eq!(parsed.messages[0].role, MessageRole::Assistant);
479    }
480
481    #[test]
482    fn test_parse_system_message() {
483        let json = make_session_json(
484            "test-session",
485            "hash123",
486            r#"[{"type": "system", "content": "You are a helpful assistant."}]"#,
487        );
488
489        let file = create_temp_session_file(&json);
490        let parsed = parse_gemini_session_file(file.path()).expect("Failed to parse");
491
492        assert_eq!(parsed.messages.len(), 1);
493        assert_eq!(parsed.messages[0].role, MessageRole::System);
494    }
495
496    #[test]
497    fn test_unknown_message_type_skipped() {
498        let json = make_session_json(
499            "test-session",
500            "hash123",
501            r#"[
502                {"type": "user", "content": "Hello"},
503                {"type": "unknown", "content": "Should be skipped"},
504                {"type": "gemini", "content": "Hi!"}
505            ]"#,
506        );
507
508        let file = create_temp_session_file(&json);
509        let parsed = parse_gemini_session_file(file.path()).expect("Failed to parse");
510
511        assert_eq!(parsed.messages.len(), 2);
512        assert_eq!(parsed.messages[0].role, MessageRole::User);
513        assert_eq!(parsed.messages[1].role, MessageRole::Assistant);
514    }
515
516    #[test]
517    fn test_empty_content_skipped() {
518        let json = make_session_json(
519            "test-session",
520            "hash123",
521            r#"[
522                {"type": "user", "content": "Hello"},
523                {"type": "gemini", "content": ""},
524                {"type": "gemini", "content": "   "},
525                {"type": "user", "content": "Goodbye"}
526            ]"#,
527        );
528
529        let file = create_temp_session_file(&json);
530        let parsed = parse_gemini_session_file(file.path()).expect("Failed to parse");
531
532        assert_eq!(parsed.messages.len(), 2);
533    }
534
535    #[test]
536    fn test_null_content_skipped() {
537        let json = make_session_json(
538            "test-session",
539            "hash123",
540            r#"[
541                {"type": "user", "content": "Hello"},
542                {"type": "gemini"}
543            ]"#,
544        );
545
546        let file = create_temp_session_file(&json);
547        let parsed = parse_gemini_session_file(file.path()).expect("Failed to parse");
548
549        assert_eq!(parsed.messages.len(), 1);
550    }
551
552    #[test]
553    fn test_to_storage_models() {
554        let json = make_session_json(
555            "ed60a4d9-1234-5678-abcd-ef0123456789",
556            "cc89a35",
557            r#"[
558                {"id": "550e8400-e29b-41d4-a716-446655440001", "type": "user", "content": "Hello"},
559                {"type": "gemini", "content": "Hi!"}
560            ]"#,
561        );
562
563        let file = create_temp_session_file(&json);
564        let parsed = parse_gemini_session_file(file.path()).expect("Failed to parse");
565        let (session, messages) = parsed.to_storage_models();
566
567        assert_eq!(session.tool, "gemini");
568        assert_eq!(
569            session.id.to_string(),
570            "ed60a4d9-1234-5678-abcd-ef0123456789"
571        );
572        assert!(session.working_directory.contains("cc89a35"));
573        assert_eq!(session.message_count, 2);
574
575        assert_eq!(messages.len(), 2);
576        assert_eq!(
577            messages[0].id.to_string(),
578            "550e8400-e29b-41d4-a716-446655440001"
579        );
580        assert_eq!(messages[0].role, MessageRole::User);
581        assert_eq!(messages[0].index, 0);
582        assert_eq!(messages[1].role, MessageRole::Assistant);
583        assert_eq!(messages[1].index, 1);
584    }
585
586    #[test]
587    fn test_timestamps_parsed() {
588        let json = make_session_json(
589            "test-session",
590            "hash123",
591            r#"[{"type": "user", "content": "Hello", "timestamp": "2025-11-30T20:06:05.000Z"}]"#,
592        );
593
594        let file = create_temp_session_file(&json);
595        let parsed = parse_gemini_session_file(file.path()).expect("Failed to parse");
596
597        assert!(parsed.start_time.is_some());
598        assert!(parsed.last_updated.is_some());
599        assert!(parsed.messages[0]
600            .timestamp
601            .to_rfc3339()
602            .contains("2025-11-30"));
603    }
604
605    #[test]
606    fn test_empty_messages_array() {
607        let json = make_session_json("test-session", "hash123", "[]");
608
609        let file = create_temp_session_file(&json);
610        let parsed = parse_gemini_session_file(file.path()).expect("Failed to parse");
611
612        assert!(parsed.messages.is_empty());
613    }
614
615    #[test]
616    fn test_watcher_parse_source() {
617        let watcher = GeminiWatcher;
618        let json = make_session_json(
619            "test-session",
620            "hash123",
621            r#"[{"type": "user", "content": "Hello"}]"#,
622        );
623
624        let file = create_temp_session_file(&json);
625        let result = watcher
626            .parse_source(file.path())
627            .expect("Should parse successfully");
628
629        assert_eq!(result.len(), 1);
630        let (session, messages) = &result[0];
631        assert_eq!(session.tool, "gemini");
632        assert_eq!(messages.len(), 1);
633    }
634
635    #[test]
636    fn test_watcher_parse_source_empty_session() {
637        let watcher = GeminiWatcher;
638        let json = make_session_json("test-session", "hash123", "[]");
639
640        let file = create_temp_session_file(&json);
641        let result = watcher
642            .parse_source(file.path())
643            .expect("Should parse successfully");
644
645        assert!(result.is_empty());
646    }
647
648    #[test]
649    fn test_invalid_uuid_generates_new() {
650        let json = make_session_json(
651            "not-a-valid-uuid",
652            "hash123",
653            r#"[{"type": "user", "content": "Hello"}]"#,
654        );
655
656        let file = create_temp_session_file(&json);
657        let parsed = parse_gemini_session_file(file.path()).expect("Failed to parse");
658        let (session, _) = parsed.to_storage_models();
659
660        // Should still have a valid UUID (newly generated)
661        assert!(!session.id.is_nil());
662    }
663
664    #[test]
665    fn test_messages_with_tool_calls_and_thoughts() {
666        let json = make_session_json(
667            "test-session",
668            "hash123",
669            r#"[
670                {
671                    "type": "user",
672                    "content": "Run a command",
673                    "toolCalls": [{"name": "bash", "args": {"cmd": "ls"}}]
674                },
675                {
676                    "type": "gemini",
677                    "content": "Here are the files",
678                    "thoughts": ["Analyzing directory structure"]
679                }
680            ]"#,
681        );
682
683        let file = create_temp_session_file(&json);
684        let parsed = parse_gemini_session_file(file.path()).expect("Failed to parse");
685
686        // Should parse messages despite having extra fields
687        assert_eq!(parsed.messages.len(), 2);
688    }
689
690    #[test]
691    fn test_minimal_session() {
692        let json = r#"{"sessionId": "minimal", "messages": []}"#;
693
694        let file = create_temp_session_file(json);
695        let parsed = parse_gemini_session_file(file.path()).expect("Failed to parse");
696
697        assert_eq!(parsed.session_id, "minimal");
698        assert!(parsed.project_hash.is_none());
699        assert!(parsed.messages.is_empty());
700    }
701
702    #[test]
703    fn test_session_with_no_project_hash() {
704        let json = r#"{
705            "sessionId": "test",
706            "startTime": "2025-11-30T20:06:04.951Z",
707            "messages": [{"type": "user", "content": "Hello"}]
708        }"#;
709
710        let file = create_temp_session_file(json);
711        let parsed = parse_gemini_session_file(file.path()).expect("Failed to parse");
712        let (session, _) = parsed.to_storage_models();
713
714        // Working directory should default to "."
715        assert_eq!(session.working_directory, ".");
716    }
717
718    #[test]
719    fn test_extract_session_id_from_filename() {
720        // Standard pattern: session-{timestamp}-{session_id}.json
721        assert_eq!(
722            extract_session_id_from_filename("session-1737651044-1b872dcc.json"),
723            Some("1b872dcc")
724        );
725        assert_eq!(
726            extract_session_id_from_filename("session-1737651054-1b872dcc.json"),
727            Some("1b872dcc")
728        );
729        assert_eq!(
730            extract_session_id_from_filename("session-1737651059-1b872dcc.json"),
731            Some("1b872dcc")
732        );
733
734        // Different session IDs
735        assert_eq!(
736            extract_session_id_from_filename("session-1234567890-abcdef12.json"),
737            Some("abcdef12")
738        );
739
740        // Edge cases
741        assert_eq!(extract_session_id_from_filename("session-.json"), None);
742        assert_eq!(extract_session_id_from_filename("session-123.json"), None);
743        assert_eq!(extract_session_id_from_filename("other-file.json"), None);
744        assert_eq!(extract_session_id_from_filename("session-123-abc"), None);
745        assert_eq!(extract_session_id_from_filename(""), None);
746    }
747
748    #[test]
749    fn test_count_messages_in_file() {
750        // File with 2 messages
751        let json = make_session_json(
752            "test",
753            "hash",
754            r#"[{"type": "user", "content": "Hello"}, {"type": "gemini", "content": "Hi"}]"#,
755        );
756        let file = create_temp_session_file(&json);
757        assert_eq!(count_messages_in_file(file.path()), 2);
758
759        // File with 0 messages
760        let json_empty = make_session_json("test", "hash", "[]");
761        let file_empty = create_temp_session_file(&json_empty);
762        assert_eq!(count_messages_in_file(file_empty.path()), 0);
763
764        // Non-existent file returns 0
765        assert_eq!(
766            count_messages_in_file(Path::new("/nonexistent/file.json")),
767            0
768        );
769    }
770
771    #[test]
772    fn test_deduplicate_session_files() {
773        use tempfile::TempDir;
774
775        let temp_dir = TempDir::new().expect("Failed to create temp dir");
776
777        // Create files simulating Gemini's behavior: same session ID, different timestamps
778        // File 1: session-1737651044-1b872dcc.json with 1 message
779        let path1 = temp_dir.path().join("session-1737651044-1b872dcc.json");
780        let json1 = make_session_json(
781            "1b872dcc",
782            "hash",
783            r#"[{"type": "user", "content": "Hello"}]"#,
784        );
785        std::fs::write(&path1, &json1).expect("Failed to write file 1");
786
787        // File 2: session-1737651054-1b872dcc.json with 2 messages (should be kept)
788        let path2 = temp_dir.path().join("session-1737651054-1b872dcc.json");
789        let json2 = make_session_json(
790            "1b872dcc",
791            "hash",
792            r#"[{"type": "user", "content": "Hello"}, {"type": "gemini", "content": "Hi"}]"#,
793        );
794        std::fs::write(&path2, &json2).expect("Failed to write file 2");
795
796        // File 3: session-1737651059-1b872dcc.json with 1 message
797        let path3 = temp_dir.path().join("session-1737651059-1b872dcc.json");
798        let json3 = make_session_json(
799            "1b872dcc",
800            "hash",
801            r#"[{"type": "user", "content": "Goodbye"}]"#,
802        );
803        std::fs::write(&path3, &json3).expect("Failed to write file 3");
804
805        // File 4: Different session ID, should be kept
806        let path4 = temp_dir.path().join("session-1737651044-different.json");
807        let json4 = make_session_json(
808            "different",
809            "hash",
810            r#"[{"type": "user", "content": "Other session"}]"#,
811        );
812        std::fs::write(&path4, &json4).expect("Failed to write file 4");
813
814        let files = vec![path1.clone(), path2.clone(), path3.clone(), path4.clone()];
815        let result = deduplicate_session_files(files).expect("Should deduplicate");
816
817        // Should have 2 files: one for 1b872dcc (the one with 2 messages) and one for different
818        assert_eq!(result.len(), 2);
819
820        // Verify path2 (with most messages for 1b872dcc) is in the result
821        assert!(
822            result.contains(&path2),
823            "Should keep file with most messages"
824        );
825        assert!(result.contains(&path4), "Should keep different session");
826        assert!(
827            !result.contains(&path1),
828            "Should not keep file with fewer messages"
829        );
830        assert!(
831            !result.contains(&path3),
832            "Should not keep file with fewer messages"
833        );
834    }
835
836    #[test]
837    fn test_deduplicate_session_files_empty() {
838        let result = deduplicate_session_files(vec![]).expect("Should handle empty");
839        assert!(result.is_empty());
840    }
841
842    #[test]
843    fn test_deduplicate_session_files_single() {
844        use tempfile::TempDir;
845
846        let temp_dir = TempDir::new().expect("Failed to create temp dir");
847        let path = temp_dir.path().join("session-1234-abc.json");
848        let json = make_session_json("abc", "hash", r#"[{"type": "user", "content": "Hello"}]"#);
849        std::fs::write(&path, &json).expect("Failed to write file");
850
851        let result = deduplicate_session_files(vec![path.clone()]).expect("Should deduplicate");
852        assert_eq!(result.len(), 1);
853        assert_eq!(result[0], path);
854    }
855}