Skip to main content

ai_memory/
mine.rs

1// Copyright 2026 AlphaOne LLC
2// SPDX-License-Identifier: Apache-2.0
3
4//! Retroactive conversation import from Claude, `ChatGPT`, and Slack exports.
5
6use anyhow::{Context, Result, bail};
7use std::fs;
8use std::path::Path;
9
10use crate::models::MAX_CONTENT_SIZE;
11
12// ---------------------------------------------------------------------------
13// Common types
14// ---------------------------------------------------------------------------
15
16#[derive(Debug, Clone)]
17pub struct Conversation {
18    pub id: String,
19    pub title: Option<String>,
20    pub messages: Vec<Message>,
21    pub created_at: Option<String>,
22}
23
24#[derive(Debug, Clone)]
25pub struct Message {
26    pub role: String,
27    pub content: String,
28    pub timestamp: Option<String>,
29}
30
31/// Result of mining a single conversation.
32#[derive(Debug)]
33pub struct MinedMemory {
34    pub title: String,
35    pub content: String,
36    pub source_format: String,
37    pub created_at: Option<String>,
38}
39
40// ---------------------------------------------------------------------------
41// Format detection
42// ---------------------------------------------------------------------------
43
44#[derive(Debug, Clone, Copy, PartialEq)]
45pub enum Format {
46    Claude,
47    ChatGpt,
48    Slack,
49}
50
51impl Format {
52    pub fn from_str(s: &str) -> Option<Self> {
53        match s.to_lowercase().as_str() {
54            "claude" => Some(Self::Claude),
55            "chatgpt" => Some(Self::ChatGpt),
56            "slack" => Some(Self::Slack),
57            _ => None,
58        }
59    }
60
61    pub fn source_tag(self) -> &'static str {
62        match self {
63            Self::Claude => "mine-claude",
64            Self::ChatGpt => "mine-chatgpt",
65            Self::Slack => "mine-slack",
66        }
67    }
68}
69
70// ---------------------------------------------------------------------------
71// Parse: Claude (JSONL)
72// ---------------------------------------------------------------------------
73
74/// Parse Claude's conversations.jsonl export.
75/// Each line is a JSON object representing one conversation.
76pub fn parse_claude(path: &Path) -> Result<Vec<Conversation>> {
77    let data = fs::read_to_string(path)
78        .with_context(|| format!("failed to read Claude export: {}", path.display()))?;
79
80    let mut conversations = Vec::new();
81
82    for (line_num, line) in data.lines().enumerate() {
83        let line = line.trim();
84        if line.is_empty() {
85            continue;
86        }
87        let val: serde_json::Value = serde_json::from_str(line)
88            .with_context(|| format!("invalid JSON on line {}", line_num + 1))?;
89
90        let conv = parse_claude_conversation(&val, line_num)?;
91        if let Some(c) = conv {
92            conversations.push(c);
93        }
94    }
95
96    Ok(conversations)
97}
98
99#[allow(clippy::unnecessary_wraps)]
100fn parse_claude_conversation(
101    val: &serde_json::Value,
102    line_num: usize,
103) -> Result<Option<Conversation>> {
104    let id = val["uuid"]
105        .as_str()
106        .unwrap_or(&format!("claude-{line_num}"))
107        .to_string();
108    let title = val["name"].as_str().map(std::string::ToString::to_string);
109    let created_at = val["created_at"]
110        .as_str()
111        .map(std::string::ToString::to_string);
112
113    let mut messages = Vec::new();
114
115    // Format 1: "chat_messages" array (Claude export format)
116    if let Some(msgs) = val["chat_messages"].as_array() {
117        for msg in msgs {
118            let role = msg["sender"]
119                .as_str()
120                .or_else(|| msg["role"].as_str())
121                .unwrap_or("unknown")
122                .to_string();
123            // Map "human" -> "user"
124            let role = match role.as_str() {
125                "human" => "user".to_string(),
126                other => other.to_string(),
127            };
128
129            let content = extract_text_content(&msg["text"])
130                .or_else(|| extract_text_content(&msg["content"]))
131                .unwrap_or_default();
132
133            if !content.is_empty() {
134                let timestamp = msg["created_at"]
135                    .as_str()
136                    .or_else(|| msg["timestamp"].as_str())
137                    .map(std::string::ToString::to_string);
138                messages.push(Message {
139                    role,
140                    content,
141                    timestamp,
142                });
143            }
144        }
145    }
146    // Format 2: "mapping" object (tree of message nodes)
147    else if let Some(mapping) = val["mapping"].as_object() {
148        let mut node_messages: Vec<(String, Message)> = Vec::new();
149        for (_node_id, node) in mapping {
150            if let Some(msg) = node["message"].as_object() {
151                let role = msg
152                    .get("role")
153                    .and_then(|r| r.as_str())
154                    .or_else(|| {
155                        msg.get("author")
156                            .and_then(|a| a.get("role"))
157                            .and_then(|r| r.as_str())
158                    })
159                    .unwrap_or("unknown");
160
161                if role == "system" {
162                    continue;
163                }
164
165                let content = extract_message_content(msg);
166                if !content.is_empty() {
167                    let ts = msg
168                        .get("create_time")
169                        .and_then(serde_json::Value::as_i64)
170                        .map(|t| {
171                            chrono::DateTime::from_timestamp(t, 0)
172                                .map(|dt| dt.to_rfc3339())
173                                .unwrap_or_default()
174                        });
175                    let sort_key = msg
176                        .get("create_time")
177                        .and_then(serde_json::Value::as_f64)
178                        .unwrap_or(0.0)
179                        .to_string();
180                    node_messages.push((
181                        sort_key,
182                        Message {
183                            role: role.to_string(),
184                            content,
185                            timestamp: ts,
186                        },
187                    ));
188                }
189            }
190        }
191        node_messages.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap_or(std::cmp::Ordering::Equal));
192        messages = node_messages.into_iter().map(|(_, m)| m).collect();
193    }
194
195    if messages.is_empty() {
196        return Ok(None);
197    }
198
199    Ok(Some(Conversation {
200        id,
201        title,
202        messages,
203        created_at,
204    }))
205}
206
207// ---------------------------------------------------------------------------
208// Parse: ChatGPT (JSON)
209// ---------------------------------------------------------------------------
210
211/// Parse `ChatGPT`'s conversations.json export.
212pub fn parse_chatgpt(path: &Path) -> Result<Vec<Conversation>> {
213    let data = fs::read_to_string(path)
214        .with_context(|| format!("failed to read ChatGPT export: {}", path.display()))?;
215
216    let val: serde_json::Value =
217        serde_json::from_str(&data).context("invalid JSON in ChatGPT export")?;
218
219    let arr = val
220        .as_array()
221        .ok_or_else(|| anyhow::anyhow!("expected JSON array at top level"))?;
222
223    let mut conversations = Vec::new();
224
225    for (idx, conv_val) in arr.iter().enumerate() {
226        let id = conv_val["id"]
227            .as_str()
228            .unwrap_or(&format!("chatgpt-{idx}"))
229            .to_string();
230        let title = conv_val["title"]
231            .as_str()
232            .map(std::string::ToString::to_string);
233        let created_at = conv_val["create_time"]
234            .as_i64()
235            .and_then(|t| chrono::DateTime::from_timestamp(t, 0))
236            .map(|dt| dt.to_rfc3339());
237
238        let mut messages = Vec::new();
239
240        // ChatGPT uses a "mapping" tree of nodes
241        if let Some(mapping) = conv_val["mapping"].as_object() {
242            let mut node_msgs: Vec<(f64, Message)> = Vec::new();
243
244            for (_node_id, node) in mapping {
245                if let Some(msg) = node.get("message") {
246                    let role = msg["author"]["role"].as_str().unwrap_or("unknown");
247                    if role == "system" {
248                        continue;
249                    }
250
251                    let content =
252                        extract_message_content(msg.as_object().unwrap_or(&serde_json::Map::new()));
253                    if content.is_empty() {
254                        continue;
255                    }
256
257                    let ts = msg["create_time"].as_f64().unwrap_or(0.0);
258                    #[allow(clippy::cast_possible_truncation)]
259                    node_msgs.push((
260                        ts,
261                        Message {
262                            role: role.to_string(),
263                            content,
264                            timestamp: chrono::DateTime::from_timestamp(ts as i64, 0)
265                                .map(|dt| dt.to_rfc3339()),
266                        },
267                    ));
268                }
269            }
270
271            node_msgs.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap_or(std::cmp::Ordering::Equal));
272            messages = node_msgs.into_iter().map(|(_, m)| m).collect();
273        }
274
275        if messages.is_empty() {
276            continue;
277        }
278
279        conversations.push(Conversation {
280            id,
281            title,
282            messages,
283            created_at,
284        });
285    }
286
287    Ok(conversations)
288}
289
290// ---------------------------------------------------------------------------
291// Parse: Slack (directory of JSON files)
292// ---------------------------------------------------------------------------
293
294/// Parse a Slack workspace export directory.
295/// Structure: channel_name/YYYY-MM-DD.json
296pub fn parse_slack(path: &Path) -> Result<Vec<Conversation>> {
297    if !path.is_dir() {
298        bail!("Slack export path must be a directory: {}", path.display());
299    }
300
301    let mut conversations = Vec::new();
302
303    // Each subdirectory is a channel
304    let mut entries: Vec<_> = fs::read_dir(path)
305        .with_context(|| format!("failed to read Slack export dir: {}", path.display()))?
306        .filter_map(std::result::Result::ok)
307        .collect();
308    entries.sort_by_key(std::fs::DirEntry::file_name);
309
310    for entry in entries {
311        let channel_path = entry.path();
312        if !channel_path.is_dir() {
313            continue;
314        }
315        let channel_name = entry.file_name().to_string_lossy().to_string();
316
317        // Collect all JSON files in the channel, sorted by date
318        let mut json_files: Vec<_> = fs::read_dir(&channel_path)?
319            .filter_map(std::result::Result::ok)
320            .filter(|e| e.path().extension().is_some_and(|ext| ext == "json"))
321            .collect();
322        json_files.sort_by_key(std::fs::DirEntry::file_name);
323
324        let mut all_messages = Vec::new();
325
326        for file_entry in json_files {
327            let file_path = file_entry.path();
328            let data = fs::read_to_string(&file_path)?;
329            let msgs: serde_json::Value = serde_json::from_str(&data)
330                .with_context(|| format!("invalid JSON: {}", file_path.display()))?;
331
332            if let Some(arr) = msgs.as_array() {
333                for msg in arr {
334                    let user = msg["user"]
335                        .as_str()
336                        .or_else(|| msg["username"].as_str())
337                        .unwrap_or("unknown");
338                    let text = msg["text"].as_str().unwrap_or("").to_string();
339                    if text.is_empty() {
340                        continue;
341                    }
342
343                    #[allow(clippy::cast_possible_truncation)]
344                    let ts = msg["ts"]
345                        .as_str()
346                        .and_then(|s| s.parse::<f64>().ok())
347                        .and_then(|t| chrono::DateTime::from_timestamp(t as i64, 0))
348                        .map(|dt| dt.to_rfc3339());
349
350                    all_messages.push(Message {
351                        role: user.to_string(),
352                        content: text,
353                        timestamp: ts.clone(),
354                    });
355                }
356            }
357        }
358
359        if all_messages.is_empty() {
360            continue;
361        }
362
363        let created_at = all_messages.first().and_then(|m| m.timestamp.clone());
364
365        conversations.push(Conversation {
366            id: format!("slack-{channel_name}"),
367            title: Some(format!("#{channel_name}")),
368            messages: all_messages,
369            created_at,
370        });
371    }
372
373    Ok(conversations)
374}
375
376// ---------------------------------------------------------------------------
377// Content extraction
378// ---------------------------------------------------------------------------
379
380/// Extract text from a `serde_json::Value` that may be a string or array of parts.
381fn extract_text_content(val: &serde_json::Value) -> Option<String> {
382    if let Some(s) = val.as_str() {
383        return Some(s.to_string());
384    }
385    if let Some(arr) = val.as_array() {
386        let parts: Vec<String> = arr
387            .iter()
388            .filter_map(|p| {
389                if let Some(s) = p.as_str() {
390                    Some(s.to_string())
391                } else {
392                    p["text"].as_str().map(std::string::ToString::to_string)
393                }
394            })
395            .collect();
396        if !parts.is_empty() {
397            return Some(parts.join("\n"));
398        }
399    }
400    None
401}
402
403/// Extract message content from a message object (ChatGPT/Claude mapping format).
404fn extract_message_content(msg: &serde_json::Map<String, serde_json::Value>) -> String {
405    // Try content.parts array first (ChatGPT format)
406    if let Some(content) = msg.get("content") {
407        if let Some(parts) = content["parts"].as_array() {
408            let text: Vec<String> = parts
409                .iter()
410                .filter_map(|p| p.as_str().map(String::from))
411                .collect();
412            if !text.is_empty() {
413                return text.join("\n");
414            }
415        }
416        // Try content as string
417        if let Some(s) = content.as_str() {
418            return s.to_string();
419        }
420        // Try content.text
421        if let Some(s) = content["text"].as_str() {
422            return s.to_string();
423        }
424    }
425    // Try text field directly
426    if let Some(s) = msg.get("text").and_then(|v| v.as_str()) {
427        return s.to_string();
428    }
429    String::new()
430}
431
432// ---------------------------------------------------------------------------
433// Conversion to memories
434// ---------------------------------------------------------------------------
435
436/// Convert a parsed conversation into a `MinedMemory` ready for storage.
437pub fn conversation_to_memory(conv: &Conversation, format: Format) -> Option<MinedMemory> {
438    if conv.messages.is_empty() {
439        return None;
440    }
441
442    // Title: use conversation title or first user message
443    let title = conv.title.as_deref().filter(|t| !t.is_empty()).map_or_else(
444        || {
445            let first_user = conv
446                .messages
447                .iter()
448                .find(|m| m.role == "user" || m.role == "human")
449                .or(conv.messages.first());
450            match first_user {
451                Some(m) => truncate(&m.content, 100).to_string(),
452                None => format!("Conversation {}", &conv.id),
453            }
454        },
455        |t| truncate(t, 100).to_string(),
456    );
457
458    // Content: formatted message concatenation
459    let mut content = String::new();
460    for msg in &conv.messages {
461        let line = format!("[{}]: {}\n", msg.role, msg.content);
462        if content.len() + line.len() > MAX_CONTENT_SIZE {
463            break;
464        }
465        content.push_str(&line);
466    }
467
468    if content.is_empty() {
469        return None;
470    }
471
472    Some(MinedMemory {
473        title,
474        content,
475        source_format: format.source_tag().to_string(),
476        created_at: conv.created_at.clone(),
477    })
478}
479
480fn truncate(s: &str, max_chars: usize) -> &str {
481    if s.len() <= max_chars {
482        return s;
483    }
484    let mut end = max_chars;
485    while end > 0 && !s.is_char_boundary(end) {
486        end -= 1;
487    }
488    &s[..end]
489}
490
491// ---------------------------------------------------------------------------
492// Tests
493// ---------------------------------------------------------------------------
494
495#[cfg(test)]
496mod tests {
497    use super::*;
498    use std::io::Write;
499    use tempfile::NamedTempFile;
500
501    fn make_temp_file(content: &str) -> NamedTempFile {
502        let mut f = NamedTempFile::new().unwrap();
503        f.write_all(content.as_bytes()).unwrap();
504        f
505    }
506
507    #[test]
508    fn test_parse_claude_jsonl() {
509        let jsonl = r#"{"uuid":"conv1","name":"Test Chat","chat_messages":[{"sender":"human","text":"Hello"},{"sender":"assistant","text":"Hi there!"}]}"#;
510        let f = make_temp_file(jsonl);
511        let convs = parse_claude(f.path()).unwrap();
512        assert_eq!(convs.len(), 1);
513        assert_eq!(convs[0].title, Some("Test Chat".to_string()));
514        assert_eq!(convs[0].messages.len(), 2);
515        assert_eq!(convs[0].messages[0].role, "user");
516        assert_eq!(convs[0].messages[0].content, "Hello");
517    }
518
519    #[test]
520    fn test_parse_claude_empty_lines() {
521        let jsonl = "\n\n{\"uuid\":\"c1\",\"name\":\"X\",\"chat_messages\":[{\"sender\":\"human\",\"text\":\"hi\"}]}\n\n";
522        let f = make_temp_file(jsonl);
523        let convs = parse_claude(f.path()).unwrap();
524        assert_eq!(convs.len(), 1);
525    }
526
527    #[test]
528    fn test_parse_chatgpt_json() {
529        let json = r#"[{"id":"conv1","title":"GPT Chat","create_time":1700000000,"mapping":{"node1":{"message":{"author":{"role":"user"},"content":{"parts":["What is Rust?"]},"create_time":1700000001}},"node2":{"message":{"author":{"role":"assistant"},"content":{"parts":["Rust is a systems programming language."]},"create_time":1700000002}}}}]"#;
530        let f = make_temp_file(json);
531        let convs = parse_chatgpt(f.path()).unwrap();
532        assert_eq!(convs.len(), 1);
533        assert_eq!(convs[0].title, Some("GPT Chat".to_string()));
534        assert_eq!(convs[0].messages.len(), 2);
535        assert_eq!(convs[0].messages[0].content, "What is Rust?");
536    }
537
538    #[test]
539    fn test_parse_slack_dir() {
540        let dir = tempfile::tempdir().unwrap();
541        let channel_dir = dir.path().join("general");
542        fs::create_dir(&channel_dir).unwrap();
543        let msg_json = r#"[{"user":"U123","text":"Hello team!","ts":"1700000000.000000"},{"user":"U456","text":"Hey!","ts":"1700000001.000000"}]"#;
544        fs::write(channel_dir.join("2024-01-01.json"), msg_json).unwrap();
545
546        let convs = parse_slack(dir.path()).unwrap();
547        assert_eq!(convs.len(), 1);
548        assert_eq!(convs[0].title, Some("#general".to_string()));
549        assert_eq!(convs[0].messages.len(), 2);
550    }
551
552    #[test]
553    fn test_conversation_to_memory() {
554        let conv = Conversation {
555            id: "test1".to_string(),
556            title: Some("My Chat".to_string()),
557            messages: vec![
558                Message {
559                    role: "user".to_string(),
560                    content: "Hello".to_string(),
561                    timestamp: None,
562                },
563                Message {
564                    role: "assistant".to_string(),
565                    content: "Hi!".to_string(),
566                    timestamp: None,
567                },
568            ],
569            created_at: None,
570        };
571        let mem = conversation_to_memory(&conv, Format::Claude).unwrap();
572        assert_eq!(mem.title, "My Chat");
573        assert!(mem.content.contains("[user]: Hello"));
574        assert!(mem.content.contains("[assistant]: Hi!"));
575        assert_eq!(mem.source_format, "mine-claude");
576    }
577
578    #[test]
579    fn test_conversation_to_memory_no_title() {
580        let conv = Conversation {
581            id: "test2".to_string(),
582            title: None,
583            messages: vec![Message {
584                role: "user".to_string(),
585                content: "What is the weather?".to_string(),
586                timestamp: None,
587            }],
588            created_at: None,
589        };
590        let mem = conversation_to_memory(&conv, Format::ChatGpt).unwrap();
591        assert_eq!(mem.title, "What is the weather?");
592    }
593
594    #[test]
595    fn test_conversation_to_memory_empty() {
596        let conv = Conversation {
597            id: "test3".to_string(),
598            title: None,
599            messages: vec![],
600            created_at: None,
601        };
602        assert!(conversation_to_memory(&conv, Format::Claude).is_none());
603    }
604
605    #[test]
606    fn test_truncate() {
607        assert_eq!(truncate("hello", 10), "hello");
608        assert_eq!(truncate("hello world", 5), "hello");
609    }
610
611    #[test]
612    fn test_format_from_str() {
613        assert_eq!(Format::from_str("claude"), Some(Format::Claude));
614        assert_eq!(Format::from_str("ChatGPT"), Some(Format::ChatGpt));
615        assert_eq!(Format::from_str("SLACK"), Some(Format::Slack));
616        assert_eq!(Format::from_str("unknown"), None);
617    }
618}
619
620#[test]
621fn mine_handles_empty_namespace() {
622    // Empty namespace string should still parse and convert to memory.
623    let conv = Conversation {
624        id: "test-empty-ns".to_string(),
625        title: Some("Empty Namespace Test".to_string()),
626        messages: vec![Message {
627            role: "user".to_string(),
628            content: "Test message with substantial content for conversion".to_string(),
629            timestamp: None,
630        }],
631        created_at: None,
632    };
633    let mem = conversation_to_memory(&conv, Format::Claude);
634    assert!(mem.is_some());
635    let m = mem.unwrap();
636    assert_eq!(m.source_format, "mine-claude");
637}
638
639#[test]
640fn mine_skips_archived_memories() {
641    // A conversation with no messages returns None (archived state).
642    let conv = Conversation {
643        id: "empty".to_string(),
644        title: Some("Should Skip".to_string()),
645        messages: vec![], // Empty — treated as archived
646        created_at: None,
647    };
648    assert!(conversation_to_memory(&conv, Format::Claude).is_none());
649}
650
651#[test]
652fn mine_with_zero_limit_returns_empty() {
653    // When mining with zero messages, conversation_to_memory returns None.
654    let conv = Conversation {
655        id: "zero-limit".to_string(),
656        title: None,
657        messages: vec![], // No messages
658        created_at: None,
659    };
660    let mem = conversation_to_memory(&conv, Format::ChatGpt);
661    assert!(mem.is_none());
662}
663
664// ---------------------------------------------------------------------------
665// W12-D: parser branch coverage (Claude mapping format, ChatGPT edge cases,
666// Slack error paths, content extraction variants, converter limits).
667// ---------------------------------------------------------------------------
668#[cfg(test)]
669mod tests_w12d {
670    use super::*;
671    use std::fs;
672    use std::io::Write as _;
673    use tempfile::NamedTempFile;
674
675    fn temp_file(content: &str) -> NamedTempFile {
676        let mut f = NamedTempFile::new().unwrap();
677        f.write_all(content.as_bytes()).unwrap();
678        f
679    }
680
681    // ---- Format::source_tag — exercise all variants -----------------------
682    #[test]
683    fn source_tag_all_variants() {
684        assert_eq!(Format::Claude.source_tag(), "mine-claude");
685        assert_eq!(Format::ChatGpt.source_tag(), "mine-chatgpt");
686        assert_eq!(Format::Slack.source_tag(), "mine-slack");
687    }
688
689    // ---- parse_claude — error paths ---------------------------------------
690    #[test]
691    fn parse_claude_missing_file_errors() {
692        let p = std::path::Path::new("/nonexistent/path/to/claude_does_not_exist.jsonl");
693        let err = parse_claude(p).unwrap_err();
694        let msg = format!("{err:#}");
695        assert!(
696            msg.contains("failed to read Claude export"),
697            "expected read-failure context, got: {msg}"
698        );
699    }
700
701    #[test]
702    fn parse_claude_invalid_json_line_errors() {
703        // Second line is malformed; first line is fine.
704        let jsonl = "{\"uuid\":\"a\",\"chat_messages\":[{\"sender\":\"human\",\"text\":\"hi\"}]}\nNOT JSON\n";
705        let f = temp_file(jsonl);
706        let err = parse_claude(f.path()).unwrap_err();
707        let msg = format!("{err:#}");
708        assert!(
709            msg.contains("invalid JSON on line 2"),
710            "want line 2 hint, got: {msg}"
711        );
712    }
713
714    #[test]
715    fn parse_claude_skips_conversations_with_no_messages() {
716        // Conversation with empty chat_messages should be filtered (None branch).
717        let jsonl = r#"{"uuid":"empty","name":"Empty","chat_messages":[]}
718{"uuid":"good","name":"Good","chat_messages":[{"sender":"human","text":"hi"}]}"#;
719        let f = temp_file(jsonl);
720        let convs = parse_claude(f.path()).unwrap();
721        assert_eq!(convs.len(), 1, "empty conv should be skipped");
722        assert_eq!(convs[0].id, "good");
723    }
724
725    #[test]
726    fn parse_claude_skips_messages_without_content() {
727        // Messages with empty/missing text should be skipped, but conv kept if any survive.
728        let jsonl = r#"{"uuid":"c1","chat_messages":[{"sender":"human","text":""},{"sender":"assistant","text":"hello"}]}"#;
729        let f = temp_file(jsonl);
730        let convs = parse_claude(f.path()).unwrap();
731        assert_eq!(convs.len(), 1);
732        assert_eq!(convs[0].messages.len(), 1);
733        assert_eq!(convs[0].messages[0].role, "assistant");
734    }
735
736    #[test]
737    fn parse_claude_uses_role_fallback_and_timestamps() {
738        // Use `role` instead of `sender`; `content` instead of `text`; `timestamp` instead of `created_at`.
739        let jsonl = r#"{"uuid":"c1","chat_messages":[{"role":"assistant","content":"reply","timestamp":"2024-01-01T00:00:00Z"}]}"#;
740        let f = temp_file(jsonl);
741        let convs = parse_claude(f.path()).unwrap();
742        assert_eq!(convs.len(), 1);
743        assert_eq!(convs[0].messages[0].role, "assistant");
744        assert_eq!(convs[0].messages[0].content, "reply");
745        assert_eq!(
746            convs[0].messages[0].timestamp.as_deref(),
747            Some("2024-01-01T00:00:00Z")
748        );
749    }
750
751    // ---- parse_claude — mapping format (Format 2) -------------------------
752    #[test]
753    fn parse_claude_mapping_format() {
754        // No chat_messages — falls through to mapping branch.
755        let jsonl = r#"{"uuid":"map1","name":"Mapping Conv","mapping":{"n1":{"message":{"role":"user","content":{"parts":["first"]},"create_time":1700000001}},"n2":{"message":{"author":{"role":"assistant"},"content":{"parts":["second"]},"create_time":1700000002}},"n3":{"message":{"role":"system","content":{"parts":["ignored"]}}}}}"#;
756        let f = temp_file(jsonl);
757        let convs = parse_claude(f.path()).unwrap();
758        assert_eq!(convs.len(), 1);
759        let conv = &convs[0];
760        assert_eq!(conv.title.as_deref(), Some("Mapping Conv"));
761        // System message dropped; user+assistant retained, ordered by create_time.
762        assert_eq!(conv.messages.len(), 2);
763        assert_eq!(conv.messages[0].content, "first");
764        assert_eq!(conv.messages[1].content, "second");
765        // create_time -> RFC3339 timestamp present
766        assert!(conv.messages[0].timestamp.is_some());
767    }
768
769    #[test]
770    fn parse_claude_mapping_skips_empty_content_nodes() {
771        // Mapping with one node whose content is empty (no parts/text) should be dropped.
772        let jsonl = r#"{"uuid":"map2","mapping":{"n1":{"message":{"role":"user","content":{"parts":[]}}},"n2":{"message":{"role":"user","content":{"parts":["kept"]},"create_time":1700000005}}}}"#;
773        let f = temp_file(jsonl);
774        let convs = parse_claude(f.path()).unwrap();
775        assert_eq!(convs.len(), 1);
776        assert_eq!(convs[0].messages.len(), 1);
777        assert_eq!(convs[0].messages[0].content, "kept");
778    }
779
780    #[test]
781    fn parse_claude_mapping_uuid_fallback_and_no_messages() {
782        // No uuid -> fallback id; mapping with only system messages -> filtered as None.
783        let jsonl = r#"{"mapping":{"n1":{"message":{"role":"system","content":{"parts":["only system"]}}}}}"#;
784        let f = temp_file(jsonl);
785        let convs = parse_claude(f.path()).unwrap();
786        assert_eq!(convs.len(), 0, "system-only conversation is dropped");
787    }
788
789    // ---- parse_chatgpt — error & edge cases -------------------------------
790    #[test]
791    fn parse_chatgpt_missing_file_errors() {
792        let p = std::path::Path::new("/nonexistent/chatgpt.json");
793        let err = parse_chatgpt(p).unwrap_err();
794        assert!(format!("{err:#}").contains("failed to read ChatGPT export"));
795    }
796
797    #[test]
798    fn parse_chatgpt_invalid_json_errors() {
799        let f = temp_file("not really json");
800        let err = parse_chatgpt(f.path()).unwrap_err();
801        assert!(format!("{err:#}").contains("invalid JSON in ChatGPT export"));
802    }
803
804    #[test]
805    fn parse_chatgpt_top_level_object_errors() {
806        let f = temp_file(r#"{"not":"an array"}"#);
807        let err = parse_chatgpt(f.path()).unwrap_err();
808        assert!(format!("{err:#}").contains("expected JSON array"));
809    }
810
811    #[test]
812    fn parse_chatgpt_skips_system_and_empty_messages() {
813        // System role skipped; empty-content message skipped; final conv has 1 message.
814        let json = r#"[{"id":"c1","title":"T","create_time":1700000000,"mapping":{
815            "n1":{"message":{"author":{"role":"system"},"content":{"parts":["sys ignored"]},"create_time":1700000001}},
816            "n2":{"message":{"author":{"role":"user"},"content":{"parts":[]},"create_time":1700000002}},
817            "n3":{"message":{"author":{"role":"user"},"content":{"parts":["kept"]},"create_time":1700000003}}
818        }}]"#;
819        let f = temp_file(json);
820        let convs = parse_chatgpt(f.path()).unwrap();
821        assert_eq!(convs.len(), 1);
822        assert_eq!(convs[0].messages.len(), 1);
823        assert_eq!(convs[0].messages[0].content, "kept");
824        assert!(convs[0].messages[0].timestamp.is_some());
825    }
826
827    #[test]
828    fn parse_chatgpt_drops_conversations_with_no_messages() {
829        // Mapping containing only system messages -> conv filtered out.
830        let json = r#"[{"id":"only-sys","mapping":{
831            "n1":{"message":{"author":{"role":"system"},"content":{"parts":["x"]}}}
832        }}]"#;
833        let f = temp_file(json);
834        let convs = parse_chatgpt(f.path()).unwrap();
835        assert!(convs.is_empty());
836    }
837
838    #[test]
839    fn parse_chatgpt_id_fallback_when_missing() {
840        // Conv missing both id and mapping -> falls through with no messages -> dropped.
841        // But if there ARE messages, the fallback id chatgpt-N path is exercised.
842        let json = r#"[{"mapping":{"n1":{"message":{"author":{"role":"user"},"content":{"parts":["hello"]},"create_time":1700000010}}}}]"#;
843        let f = temp_file(json);
844        let convs = parse_chatgpt(f.path()).unwrap();
845        assert_eq!(convs.len(), 1);
846        assert_eq!(convs[0].id, "chatgpt-0");
847    }
848
849    #[test]
850    fn parse_chatgpt_empty_array() {
851        let f = temp_file("[]");
852        let convs = parse_chatgpt(f.path()).unwrap();
853        assert!(convs.is_empty());
854    }
855
856    // ---- parse_slack — error and edge cases -------------------------------
857    #[test]
858    fn parse_slack_path_must_be_directory() {
859        let f = temp_file("not a dir");
860        let err = parse_slack(f.path()).unwrap_err();
861        assert!(format!("{err:#}").contains("must be a directory"));
862    }
863
864    #[test]
865    fn parse_slack_skips_non_directory_entries_in_root() {
866        // A loose file at the export root should be skipped (only subdirs are channels).
867        let dir = tempfile::tempdir().unwrap();
868        fs::write(dir.path().join("README.txt"), "hello").unwrap();
869        let channel = dir.path().join("general");
870        fs::create_dir(&channel).unwrap();
871        fs::write(
872            channel.join("2024-01-01.json"),
873            r#"[{"user":"U1","text":"hi","ts":"1700000000.0"}]"#,
874        )
875        .unwrap();
876        let convs = parse_slack(dir.path()).unwrap();
877        assert_eq!(convs.len(), 1);
878    }
879
880    #[test]
881    fn parse_slack_skips_non_json_files_and_empty_text() {
882        let dir = tempfile::tempdir().unwrap();
883        let channel = dir.path().join("random");
884        fs::create_dir(&channel).unwrap();
885        // Non-JSON file (should be skipped via extension filter).
886        fs::write(channel.join("note.txt"), "ignored").unwrap();
887        // JSON with one valid + one empty-text message.
888        let json = r#"[{"user":"U1","text":"","ts":"1700000000.0"},{"username":"bot","text":"hello","ts":"1700000001.0"}]"#;
889        fs::write(channel.join("2024-01-02.json"), json).unwrap();
890        let convs = parse_slack(dir.path()).unwrap();
891        assert_eq!(convs.len(), 1);
892        assert_eq!(convs[0].messages.len(), 1);
893        // username fallback exercised
894        assert_eq!(convs[0].messages[0].role, "bot");
895    }
896
897    #[test]
898    fn parse_slack_invalid_json_in_channel_errors() {
899        let dir = tempfile::tempdir().unwrap();
900        let channel = dir.path().join("oops");
901        fs::create_dir(&channel).unwrap();
902        fs::write(channel.join("2024-01-01.json"), "not json").unwrap();
903        let err = parse_slack(dir.path()).unwrap_err();
904        assert!(format!("{err:#}").contains("invalid JSON"));
905    }
906
907    #[test]
908    fn parse_slack_drops_channels_with_no_messages() {
909        // Channel with only empty-text messages -> dropped from output.
910        let dir = tempfile::tempdir().unwrap();
911        let empty_chan = dir.path().join("silent");
912        fs::create_dir(&empty_chan).unwrap();
913        fs::write(
914            empty_chan.join("2024-01-01.json"),
915            r#"[{"user":"U1","text":"","ts":"1700000000.0"}]"#,
916        )
917        .unwrap();
918        let live_chan = dir.path().join("alive");
919        fs::create_dir(&live_chan).unwrap();
920        fs::write(
921            live_chan.join("2024-01-01.json"),
922            r#"[{"user":"U2","text":"hi","ts":"1700000001.0"}]"#,
923        )
924        .unwrap();
925        let convs = parse_slack(dir.path()).unwrap();
926        assert_eq!(convs.len(), 1);
927        assert_eq!(convs[0].id, "slack-alive");
928    }
929
930    #[test]
931    fn parse_slack_handles_missing_timestamp() {
932        // Message with no `ts` -> timestamp None branch.
933        let dir = tempfile::tempdir().unwrap();
934        let channel = dir.path().join("notime");
935        fs::create_dir(&channel).unwrap();
936        fs::write(
937            channel.join("2024-01-01.json"),
938            r#"[{"user":"U1","text":"hi"}]"#,
939        )
940        .unwrap();
941        let convs = parse_slack(dir.path()).unwrap();
942        assert_eq!(convs.len(), 1);
943        assert!(convs[0].messages[0].timestamp.is_none());
944    }
945
946    #[test]
947    fn parse_slack_skips_non_array_top_level() {
948        // JSON file that is an object (not an array) -> the `if let Some(arr)` branch is
949        // simply skipped; channel ends up with no messages and is dropped.
950        let dir = tempfile::tempdir().unwrap();
951        let channel = dir.path().join("weird");
952        fs::create_dir(&channel).unwrap();
953        fs::write(channel.join("2024-01-01.json"), r#"{"not":"an array"}"#).unwrap();
954        let convs = parse_slack(dir.path()).unwrap();
955        assert!(convs.is_empty());
956    }
957
958    // ---- extract_text_content — array branches ----------------------------
959    #[test]
960    fn extract_text_content_array_of_strings() {
961        let v = serde_json::json!(["one", "two"]);
962        assert_eq!(extract_text_content(&v).as_deref(), Some("one\ntwo"));
963    }
964
965    #[test]
966    fn extract_text_content_array_of_text_objects() {
967        // Claude tool-use / text-block format: array of {"type":"text","text":"..."} objects.
968        let v = serde_json::json!([
969            {"type":"text","text":"alpha"},
970            {"type":"text","text":"beta"}
971        ]);
972        assert_eq!(extract_text_content(&v).as_deref(), Some("alpha\nbeta"));
973    }
974
975    #[test]
976    fn extract_text_content_empty_and_non_text() {
977        // Empty array -> None
978        assert!(extract_text_content(&serde_json::json!([])).is_none());
979        // Array of objects with no "text" field -> None (parts vec ends up empty).
980        let v = serde_json::json!([{"type":"image","url":"x"}]);
981        assert!(extract_text_content(&v).is_none());
982        // Null -> None
983        assert!(extract_text_content(&serde_json::Value::Null).is_none());
984    }
985
986    // ---- extract_message_content — branch coverage ------------------------
987    #[test]
988    fn extract_message_content_string_form() {
989        // content is a plain string (not parts array, not object with text).
990        let mut m = serde_json::Map::new();
991        m.insert("content".into(), serde_json::json!("plain text"));
992        assert_eq!(extract_message_content(&m), "plain text");
993    }
994
995    #[test]
996    fn extract_message_content_text_field_under_content() {
997        // content is an object with a "text" field but no "parts".
998        let mut m = serde_json::Map::new();
999        m.insert("content".into(), serde_json::json!({"text":"nested-text"}));
1000        assert_eq!(extract_message_content(&m), "nested-text");
1001    }
1002
1003    #[test]
1004    fn extract_message_content_top_level_text_field() {
1005        // No content at all; falls through to top-level text.
1006        let mut m = serde_json::Map::new();
1007        m.insert("text".into(), serde_json::json!("top-text"));
1008        assert_eq!(extract_message_content(&m), "top-text");
1009    }
1010
1011    #[test]
1012    fn extract_message_content_returns_empty_when_unparseable() {
1013        // No useful fields.
1014        let m = serde_json::Map::new();
1015        assert!(extract_message_content(&m).is_empty());
1016    }
1017
1018    #[test]
1019    fn extract_message_content_parts_array_skips_non_strings() {
1020        // parts mixing strings and non-strings: only strings are joined.
1021        let mut m = serde_json::Map::new();
1022        m.insert(
1023            "content".into(),
1024            serde_json::json!({"parts":["good", {"img":1}, "also-good"]}),
1025        );
1026        assert_eq!(extract_message_content(&m), "good\nalso-good");
1027    }
1028
1029    // ---- conversation_to_memory — title & content branches ----------------
1030    #[test]
1031    fn conversation_to_memory_empty_title_falls_back_to_first_user() {
1032        // title is Some("") -> filter rejects empty -> first-user path.
1033        let conv = Conversation {
1034            id: "c".into(),
1035            title: Some(String::new()),
1036            messages: vec![
1037                Message {
1038                    role: "assistant".into(),
1039                    content: "hello back".into(),
1040                    timestamp: None,
1041                },
1042                Message {
1043                    role: "user".into(),
1044                    content: "hello".into(),
1045                    timestamp: None,
1046                },
1047            ],
1048            created_at: None,
1049        };
1050        let mem = conversation_to_memory(&conv, Format::Slack).unwrap();
1051        assert_eq!(mem.title, "hello");
1052        assert_eq!(mem.source_format, "mine-slack");
1053    }
1054
1055    #[test]
1056    fn conversation_to_memory_no_user_uses_first_message() {
1057        // No user/human role -> first message used.
1058        let conv = Conversation {
1059            id: "c".into(),
1060            title: None,
1061            messages: vec![
1062                Message {
1063                    role: "assistant".into(),
1064                    content: "only assistant".into(),
1065                    timestamp: None,
1066                },
1067                Message {
1068                    role: "tool".into(),
1069                    content: "tool-out".into(),
1070                    timestamp: None,
1071                },
1072            ],
1073            created_at: None,
1074        };
1075        let mem = conversation_to_memory(&conv, Format::ChatGpt).unwrap();
1076        assert_eq!(mem.title, "only assistant");
1077    }
1078
1079    #[test]
1080    fn conversation_to_memory_title_truncates_to_100_chars() {
1081        let long_title = "x".repeat(250);
1082        let conv = Conversation {
1083            id: "c".into(),
1084            title: Some(long_title),
1085            messages: vec![Message {
1086                role: "user".into(),
1087                content: "body".into(),
1088                timestamp: None,
1089            }],
1090            created_at: None,
1091        };
1092        let mem = conversation_to_memory(&conv, Format::Claude).unwrap();
1093        assert_eq!(mem.title.len(), 100);
1094    }
1095
1096    #[test]
1097    fn conversation_to_memory_first_user_content_truncates() {
1098        // No title, first user content very long -> truncated to 100 chars.
1099        let long_msg = "y".repeat(200);
1100        let conv = Conversation {
1101            id: "c".into(),
1102            title: None,
1103            messages: vec![Message {
1104                role: "user".into(),
1105                content: long_msg,
1106                timestamp: None,
1107            }],
1108            created_at: None,
1109        };
1110        let mem = conversation_to_memory(&conv, Format::Claude).unwrap();
1111        assert_eq!(mem.title.len(), 100);
1112    }
1113
1114    #[test]
1115    fn conversation_to_memory_stops_at_max_content_size() {
1116        // Build a single huge message exceeding MAX_CONTENT_SIZE so the loop
1117        // breaks before appending it. With the very first message rejected,
1118        // content stays empty and the function returns None.
1119        let big = "z".repeat(MAX_CONTENT_SIZE + 10);
1120        let conv = Conversation {
1121            id: "c".into(),
1122            title: Some("t".into()),
1123            messages: vec![Message {
1124                role: "user".into(),
1125                content: big,
1126                timestamp: None,
1127            }],
1128            created_at: None,
1129        };
1130        // First (and only) message exceeds the cap -> content empty -> None.
1131        assert!(conversation_to_memory(&conv, Format::Claude).is_none());
1132    }
1133
1134    #[test]
1135    fn conversation_to_memory_truncates_on_second_message() {
1136        // First small message accepted; second huge message rejected by size cap.
1137        let big = "z".repeat(MAX_CONTENT_SIZE);
1138        let conv = Conversation {
1139            id: "c".into(),
1140            title: Some("t".into()),
1141            messages: vec![
1142                Message {
1143                    role: "user".into(),
1144                    content: "small".into(),
1145                    timestamp: None,
1146                },
1147                Message {
1148                    role: "assistant".into(),
1149                    content: big,
1150                    timestamp: None,
1151                },
1152            ],
1153            created_at: None,
1154        };
1155        let mem = conversation_to_memory(&conv, Format::Claude).unwrap();
1156        assert!(mem.content.contains("small"));
1157        // The huge one was skipped due to size cap.
1158        assert!(!mem.content.contains(&"z".repeat(100)));
1159    }
1160
1161    // ---- truncate — char boundary loop ------------------------------------
1162    #[test]
1163    fn truncate_respects_char_boundary() {
1164        // "héllo" — multi-byte char at index 1; truncating at byte 2 must back off.
1165        let s = "héllo";
1166        // Byte length of "h" + 2 bytes for é = 3. Asking for 2 bytes must back off to 1 ("h").
1167        let out = truncate(s, 2);
1168        assert_eq!(out, "h");
1169    }
1170
1171    #[test]
1172    fn truncate_at_exact_boundary_returns_unchanged() {
1173        let s = "abcdef";
1174        assert_eq!(truncate(s, 6), "abcdef");
1175    }
1176
1177    #[test]
1178    fn truncate_zero_max_returns_empty() {
1179        // max_chars = 0 -> while loop exits immediately, slice is "".
1180        let s = "héllo";
1181        assert_eq!(truncate(s, 0), "");
1182    }
1183}