1use anyhow::{Context, Result, bail};
7use std::fs;
8use std::path::Path;
9
10use crate::models::MAX_CONTENT_SIZE;
11
12#[derive(Debug, Clone)]
17pub struct Conversation {
18 pub id: String,
19 pub title: Option<String>,
20 pub messages: Vec<Message>,
21 pub created_at: Option<String>,
22}
23
24#[derive(Debug, Clone)]
25pub struct Message {
26 pub role: String,
27 pub content: String,
28 pub timestamp: Option<String>,
29}
30
31#[derive(Debug)]
33pub struct MinedMemory {
34 pub title: String,
35 pub content: String,
36 pub source_format: String,
37 pub created_at: Option<String>,
38}
39
40#[derive(Debug, Clone, Copy, PartialEq)]
45pub enum Format {
46 Claude,
47 ChatGpt,
48 Slack,
49}
50
51impl Format {
52 pub fn from_str(s: &str) -> Option<Self> {
53 match s.to_lowercase().as_str() {
54 "claude" => Some(Self::Claude),
55 "chatgpt" => Some(Self::ChatGpt),
56 "slack" => Some(Self::Slack),
57 _ => None,
58 }
59 }
60
61 pub fn source_tag(self) -> &'static str {
62 match self {
63 Self::Claude => "mine-claude",
64 Self::ChatGpt => "mine-chatgpt",
65 Self::Slack => "mine-slack",
66 }
67 }
68}
69
70pub fn parse_claude(path: &Path) -> Result<Vec<Conversation>> {
77 let data = fs::read_to_string(path)
78 .with_context(|| format!("failed to read Claude export: {}", path.display()))?;
79
80 let mut conversations = Vec::new();
81
82 for (line_num, line) in data.lines().enumerate() {
83 let line = line.trim();
84 if line.is_empty() {
85 continue;
86 }
87 let val: serde_json::Value = serde_json::from_str(line)
88 .with_context(|| format!("invalid JSON on line {}", line_num + 1))?;
89
90 let conv = parse_claude_conversation(&val, line_num)?;
91 if let Some(c) = conv {
92 conversations.push(c);
93 }
94 }
95
96 Ok(conversations)
97}
98
99#[allow(clippy::unnecessary_wraps)]
100fn parse_claude_conversation(
101 val: &serde_json::Value,
102 line_num: usize,
103) -> Result<Option<Conversation>> {
104 let id = val["uuid"]
105 .as_str()
106 .unwrap_or(&format!("claude-{line_num}"))
107 .to_string();
108 let title = val["name"].as_str().map(std::string::ToString::to_string);
109 let created_at = val["created_at"]
110 .as_str()
111 .map(std::string::ToString::to_string);
112
113 let mut messages = Vec::new();
114
115 if let Some(msgs) = val["chat_messages"].as_array() {
117 for msg in msgs {
118 let role = msg["sender"]
119 .as_str()
120 .or_else(|| msg["role"].as_str())
121 .unwrap_or("unknown")
122 .to_string();
123 let role = match role.as_str() {
125 "human" => "user".to_string(),
126 other => other.to_string(),
127 };
128
129 let content = extract_text_content(&msg["text"])
130 .or_else(|| extract_text_content(&msg["content"]))
131 .unwrap_or_default();
132
133 if !content.is_empty() {
134 let timestamp = msg["created_at"]
135 .as_str()
136 .or_else(|| msg["timestamp"].as_str())
137 .map(std::string::ToString::to_string);
138 messages.push(Message {
139 role,
140 content,
141 timestamp,
142 });
143 }
144 }
145 }
146 else if let Some(mapping) = val["mapping"].as_object() {
148 let mut node_messages: Vec<(String, Message)> = Vec::new();
149 for (_node_id, node) in mapping {
150 if let Some(msg) = node["message"].as_object() {
151 let role = msg
152 .get("role")
153 .and_then(|r| r.as_str())
154 .or_else(|| {
155 msg.get("author")
156 .and_then(|a| a.get("role"))
157 .and_then(|r| r.as_str())
158 })
159 .unwrap_or("unknown");
160
161 if role == "system" {
162 continue;
163 }
164
165 let content = extract_message_content(msg);
166 if !content.is_empty() {
167 let ts = msg
168 .get("create_time")
169 .and_then(serde_json::Value::as_i64)
170 .map(|t| {
171 chrono::DateTime::from_timestamp(t, 0)
172 .map(|dt| dt.to_rfc3339())
173 .unwrap_or_default()
174 });
175 let sort_key = msg
176 .get("create_time")
177 .and_then(serde_json::Value::as_f64)
178 .unwrap_or(0.0)
179 .to_string();
180 node_messages.push((
181 sort_key,
182 Message {
183 role: role.to_string(),
184 content,
185 timestamp: ts,
186 },
187 ));
188 }
189 }
190 }
191 node_messages.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap_or(std::cmp::Ordering::Equal));
192 messages = node_messages.into_iter().map(|(_, m)| m).collect();
193 }
194
195 if messages.is_empty() {
196 return Ok(None);
197 }
198
199 Ok(Some(Conversation {
200 id,
201 title,
202 messages,
203 created_at,
204 }))
205}
206
207pub fn parse_chatgpt(path: &Path) -> Result<Vec<Conversation>> {
213 let data = fs::read_to_string(path)
214 .with_context(|| format!("failed to read ChatGPT export: {}", path.display()))?;
215
216 let val: serde_json::Value =
217 serde_json::from_str(&data).context("invalid JSON in ChatGPT export")?;
218
219 let arr = val
220 .as_array()
221 .ok_or_else(|| anyhow::anyhow!("expected JSON array at top level"))?;
222
223 let mut conversations = Vec::new();
224
225 for (idx, conv_val) in arr.iter().enumerate() {
226 let id = conv_val["id"]
227 .as_str()
228 .unwrap_or(&format!("chatgpt-{idx}"))
229 .to_string();
230 let title = conv_val["title"]
231 .as_str()
232 .map(std::string::ToString::to_string);
233 let created_at = conv_val["create_time"]
234 .as_i64()
235 .and_then(|t| chrono::DateTime::from_timestamp(t, 0))
236 .map(|dt| dt.to_rfc3339());
237
238 let mut messages = Vec::new();
239
240 if let Some(mapping) = conv_val["mapping"].as_object() {
242 let mut node_msgs: Vec<(f64, Message)> = Vec::new();
243
244 for (_node_id, node) in mapping {
245 if let Some(msg) = node.get("message") {
246 let role = msg["author"]["role"].as_str().unwrap_or("unknown");
247 if role == "system" {
248 continue;
249 }
250
251 let content =
252 extract_message_content(msg.as_object().unwrap_or(&serde_json::Map::new()));
253 if content.is_empty() {
254 continue;
255 }
256
257 let ts = msg["create_time"].as_f64().unwrap_or(0.0);
258 #[allow(clippy::cast_possible_truncation)]
259 node_msgs.push((
260 ts,
261 Message {
262 role: role.to_string(),
263 content,
264 timestamp: chrono::DateTime::from_timestamp(ts as i64, 0)
265 .map(|dt| dt.to_rfc3339()),
266 },
267 ));
268 }
269 }
270
271 node_msgs.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap_or(std::cmp::Ordering::Equal));
272 messages = node_msgs.into_iter().map(|(_, m)| m).collect();
273 }
274
275 if messages.is_empty() {
276 continue;
277 }
278
279 conversations.push(Conversation {
280 id,
281 title,
282 messages,
283 created_at,
284 });
285 }
286
287 Ok(conversations)
288}
289
290pub fn parse_slack(path: &Path) -> Result<Vec<Conversation>> {
297 if !path.is_dir() {
298 bail!("Slack export path must be a directory: {}", path.display());
299 }
300
301 let mut conversations = Vec::new();
302
303 let mut entries: Vec<_> = fs::read_dir(path)
305 .with_context(|| format!("failed to read Slack export dir: {}", path.display()))?
306 .filter_map(std::result::Result::ok)
307 .collect();
308 entries.sort_by_key(std::fs::DirEntry::file_name);
309
310 for entry in entries {
311 let channel_path = entry.path();
312 if !channel_path.is_dir() {
313 continue;
314 }
315 let channel_name = entry.file_name().to_string_lossy().to_string();
316
317 let mut json_files: Vec<_> = fs::read_dir(&channel_path)?
319 .filter_map(std::result::Result::ok)
320 .filter(|e| e.path().extension().is_some_and(|ext| ext == "json"))
321 .collect();
322 json_files.sort_by_key(std::fs::DirEntry::file_name);
323
324 let mut all_messages = Vec::new();
325
326 for file_entry in json_files {
327 let file_path = file_entry.path();
328 let data = fs::read_to_string(&file_path)?;
329 let msgs: serde_json::Value = serde_json::from_str(&data)
330 .with_context(|| format!("invalid JSON: {}", file_path.display()))?;
331
332 if let Some(arr) = msgs.as_array() {
333 for msg in arr {
334 let user = msg["user"]
335 .as_str()
336 .or_else(|| msg["username"].as_str())
337 .unwrap_or("unknown");
338 let text = msg["text"].as_str().unwrap_or("").to_string();
339 if text.is_empty() {
340 continue;
341 }
342
343 #[allow(clippy::cast_possible_truncation)]
344 let ts = msg["ts"]
345 .as_str()
346 .and_then(|s| s.parse::<f64>().ok())
347 .and_then(|t| chrono::DateTime::from_timestamp(t as i64, 0))
348 .map(|dt| dt.to_rfc3339());
349
350 all_messages.push(Message {
351 role: user.to_string(),
352 content: text,
353 timestamp: ts.clone(),
354 });
355 }
356 }
357 }
358
359 if all_messages.is_empty() {
360 continue;
361 }
362
363 let created_at = all_messages.first().and_then(|m| m.timestamp.clone());
364
365 conversations.push(Conversation {
366 id: format!("slack-{channel_name}"),
367 title: Some(format!("#{channel_name}")),
368 messages: all_messages,
369 created_at,
370 });
371 }
372
373 Ok(conversations)
374}
375
376fn extract_text_content(val: &serde_json::Value) -> Option<String> {
382 if let Some(s) = val.as_str() {
383 return Some(s.to_string());
384 }
385 if let Some(arr) = val.as_array() {
386 let parts: Vec<String> = arr
387 .iter()
388 .filter_map(|p| {
389 if let Some(s) = p.as_str() {
390 Some(s.to_string())
391 } else {
392 p["text"].as_str().map(std::string::ToString::to_string)
393 }
394 })
395 .collect();
396 if !parts.is_empty() {
397 return Some(parts.join("\n"));
398 }
399 }
400 None
401}
402
403fn extract_message_content(msg: &serde_json::Map<String, serde_json::Value>) -> String {
405 if let Some(content) = msg.get("content") {
407 if let Some(parts) = content["parts"].as_array() {
408 let text: Vec<String> = parts
409 .iter()
410 .filter_map(|p| p.as_str().map(String::from))
411 .collect();
412 if !text.is_empty() {
413 return text.join("\n");
414 }
415 }
416 if let Some(s) = content.as_str() {
418 return s.to_string();
419 }
420 if let Some(s) = content["text"].as_str() {
422 return s.to_string();
423 }
424 }
425 if let Some(s) = msg.get("text").and_then(|v| v.as_str()) {
427 return s.to_string();
428 }
429 String::new()
430}
431
432pub fn conversation_to_memory(conv: &Conversation, format: Format) -> Option<MinedMemory> {
438 if conv.messages.is_empty() {
439 return None;
440 }
441
442 let title = conv.title.as_deref().filter(|t| !t.is_empty()).map_or_else(
444 || {
445 let first_user = conv
446 .messages
447 .iter()
448 .find(|m| m.role == "user" || m.role == "human")
449 .or(conv.messages.first());
450 match first_user {
451 Some(m) => truncate(&m.content, 100).to_string(),
452 None => format!("Conversation {}", &conv.id),
453 }
454 },
455 |t| truncate(t, 100).to_string(),
456 );
457
458 let mut content = String::new();
460 for msg in &conv.messages {
461 let line = format!("[{}]: {}\n", msg.role, msg.content);
462 if content.len() + line.len() > MAX_CONTENT_SIZE {
463 break;
464 }
465 content.push_str(&line);
466 }
467
468 if content.is_empty() {
469 return None;
470 }
471
472 Some(MinedMemory {
473 title,
474 content,
475 source_format: format.source_tag().to_string(),
476 created_at: conv.created_at.clone(),
477 })
478}
479
480fn truncate(s: &str, max_chars: usize) -> &str {
481 if s.len() <= max_chars {
482 return s;
483 }
484 let mut end = max_chars;
485 while end > 0 && !s.is_char_boundary(end) {
486 end -= 1;
487 }
488 &s[..end]
489}
490
491#[cfg(test)]
496mod tests {
497 use super::*;
498 use std::io::Write;
499 use tempfile::NamedTempFile;
500
501 fn make_temp_file(content: &str) -> NamedTempFile {
502 let mut f = NamedTempFile::new().unwrap();
503 f.write_all(content.as_bytes()).unwrap();
504 f
505 }
506
507 #[test]
508 fn test_parse_claude_jsonl() {
509 let jsonl = r#"{"uuid":"conv1","name":"Test Chat","chat_messages":[{"sender":"human","text":"Hello"},{"sender":"assistant","text":"Hi there!"}]}"#;
510 let f = make_temp_file(jsonl);
511 let convs = parse_claude(f.path()).unwrap();
512 assert_eq!(convs.len(), 1);
513 assert_eq!(convs[0].title, Some("Test Chat".to_string()));
514 assert_eq!(convs[0].messages.len(), 2);
515 assert_eq!(convs[0].messages[0].role, "user");
516 assert_eq!(convs[0].messages[0].content, "Hello");
517 }
518
519 #[test]
520 fn test_parse_claude_empty_lines() {
521 let jsonl = "\n\n{\"uuid\":\"c1\",\"name\":\"X\",\"chat_messages\":[{\"sender\":\"human\",\"text\":\"hi\"}]}\n\n";
522 let f = make_temp_file(jsonl);
523 let convs = parse_claude(f.path()).unwrap();
524 assert_eq!(convs.len(), 1);
525 }
526
527 #[test]
528 fn test_parse_chatgpt_json() {
529 let json = r#"[{"id":"conv1","title":"GPT Chat","create_time":1700000000,"mapping":{"node1":{"message":{"author":{"role":"user"},"content":{"parts":["What is Rust?"]},"create_time":1700000001}},"node2":{"message":{"author":{"role":"assistant"},"content":{"parts":["Rust is a systems programming language."]},"create_time":1700000002}}}}]"#;
530 let f = make_temp_file(json);
531 let convs = parse_chatgpt(f.path()).unwrap();
532 assert_eq!(convs.len(), 1);
533 assert_eq!(convs[0].title, Some("GPT Chat".to_string()));
534 assert_eq!(convs[0].messages.len(), 2);
535 assert_eq!(convs[0].messages[0].content, "What is Rust?");
536 }
537
538 #[test]
539 fn test_parse_slack_dir() {
540 let dir = tempfile::tempdir().unwrap();
541 let channel_dir = dir.path().join("general");
542 fs::create_dir(&channel_dir).unwrap();
543 let msg_json = r#"[{"user":"U123","text":"Hello team!","ts":"1700000000.000000"},{"user":"U456","text":"Hey!","ts":"1700000001.000000"}]"#;
544 fs::write(channel_dir.join("2024-01-01.json"), msg_json).unwrap();
545
546 let convs = parse_slack(dir.path()).unwrap();
547 assert_eq!(convs.len(), 1);
548 assert_eq!(convs[0].title, Some("#general".to_string()));
549 assert_eq!(convs[0].messages.len(), 2);
550 }
551
552 #[test]
553 fn test_conversation_to_memory() {
554 let conv = Conversation {
555 id: "test1".to_string(),
556 title: Some("My Chat".to_string()),
557 messages: vec![
558 Message {
559 role: "user".to_string(),
560 content: "Hello".to_string(),
561 timestamp: None,
562 },
563 Message {
564 role: "assistant".to_string(),
565 content: "Hi!".to_string(),
566 timestamp: None,
567 },
568 ],
569 created_at: None,
570 };
571 let mem = conversation_to_memory(&conv, Format::Claude).unwrap();
572 assert_eq!(mem.title, "My Chat");
573 assert!(mem.content.contains("[user]: Hello"));
574 assert!(mem.content.contains("[assistant]: Hi!"));
575 assert_eq!(mem.source_format, "mine-claude");
576 }
577
578 #[test]
579 fn test_conversation_to_memory_no_title() {
580 let conv = Conversation {
581 id: "test2".to_string(),
582 title: None,
583 messages: vec![Message {
584 role: "user".to_string(),
585 content: "What is the weather?".to_string(),
586 timestamp: None,
587 }],
588 created_at: None,
589 };
590 let mem = conversation_to_memory(&conv, Format::ChatGpt).unwrap();
591 assert_eq!(mem.title, "What is the weather?");
592 }
593
594 #[test]
595 fn test_conversation_to_memory_empty() {
596 let conv = Conversation {
597 id: "test3".to_string(),
598 title: None,
599 messages: vec![],
600 created_at: None,
601 };
602 assert!(conversation_to_memory(&conv, Format::Claude).is_none());
603 }
604
605 #[test]
606 fn test_truncate() {
607 assert_eq!(truncate("hello", 10), "hello");
608 assert_eq!(truncate("hello world", 5), "hello");
609 }
610
611 #[test]
612 fn test_format_from_str() {
613 assert_eq!(Format::from_str("claude"), Some(Format::Claude));
614 assert_eq!(Format::from_str("ChatGPT"), Some(Format::ChatGpt));
615 assert_eq!(Format::from_str("SLACK"), Some(Format::Slack));
616 assert_eq!(Format::from_str("unknown"), None);
617 }
618}
619
620#[test]
621fn mine_handles_empty_namespace() {
622 let conv = Conversation {
624 id: "test-empty-ns".to_string(),
625 title: Some("Empty Namespace Test".to_string()),
626 messages: vec![Message {
627 role: "user".to_string(),
628 content: "Test message with substantial content for conversion".to_string(),
629 timestamp: None,
630 }],
631 created_at: None,
632 };
633 let mem = conversation_to_memory(&conv, Format::Claude);
634 assert!(mem.is_some());
635 let m = mem.unwrap();
636 assert_eq!(m.source_format, "mine-claude");
637}
638
639#[test]
640fn mine_skips_archived_memories() {
641 let conv = Conversation {
643 id: "empty".to_string(),
644 title: Some("Should Skip".to_string()),
645 messages: vec![], created_at: None,
647 };
648 assert!(conversation_to_memory(&conv, Format::Claude).is_none());
649}
650
651#[test]
652fn mine_with_zero_limit_returns_empty() {
653 let conv = Conversation {
655 id: "zero-limit".to_string(),
656 title: None,
657 messages: vec![], created_at: None,
659 };
660 let mem = conversation_to_memory(&conv, Format::ChatGpt);
661 assert!(mem.is_none());
662}
663
664#[cfg(test)]
669mod tests_w12d {
670 use super::*;
671 use std::fs;
672 use std::io::Write as _;
673 use tempfile::NamedTempFile;
674
675 fn temp_file(content: &str) -> NamedTempFile {
676 let mut f = NamedTempFile::new().unwrap();
677 f.write_all(content.as_bytes()).unwrap();
678 f
679 }
680
681 #[test]
683 fn source_tag_all_variants() {
684 assert_eq!(Format::Claude.source_tag(), "mine-claude");
685 assert_eq!(Format::ChatGpt.source_tag(), "mine-chatgpt");
686 assert_eq!(Format::Slack.source_tag(), "mine-slack");
687 }
688
689 #[test]
691 fn parse_claude_missing_file_errors() {
692 let p = std::path::Path::new("/nonexistent/path/to/claude_does_not_exist.jsonl");
693 let err = parse_claude(p).unwrap_err();
694 let msg = format!("{err:#}");
695 assert!(
696 msg.contains("failed to read Claude export"),
697 "expected read-failure context, got: {msg}"
698 );
699 }
700
701 #[test]
702 fn parse_claude_invalid_json_line_errors() {
703 let jsonl = "{\"uuid\":\"a\",\"chat_messages\":[{\"sender\":\"human\",\"text\":\"hi\"}]}\nNOT JSON\n";
705 let f = temp_file(jsonl);
706 let err = parse_claude(f.path()).unwrap_err();
707 let msg = format!("{err:#}");
708 assert!(
709 msg.contains("invalid JSON on line 2"),
710 "want line 2 hint, got: {msg}"
711 );
712 }
713
714 #[test]
715 fn parse_claude_skips_conversations_with_no_messages() {
716 let jsonl = r#"{"uuid":"empty","name":"Empty","chat_messages":[]}
718{"uuid":"good","name":"Good","chat_messages":[{"sender":"human","text":"hi"}]}"#;
719 let f = temp_file(jsonl);
720 let convs = parse_claude(f.path()).unwrap();
721 assert_eq!(convs.len(), 1, "empty conv should be skipped");
722 assert_eq!(convs[0].id, "good");
723 }
724
725 #[test]
726 fn parse_claude_skips_messages_without_content() {
727 let jsonl = r#"{"uuid":"c1","chat_messages":[{"sender":"human","text":""},{"sender":"assistant","text":"hello"}]}"#;
729 let f = temp_file(jsonl);
730 let convs = parse_claude(f.path()).unwrap();
731 assert_eq!(convs.len(), 1);
732 assert_eq!(convs[0].messages.len(), 1);
733 assert_eq!(convs[0].messages[0].role, "assistant");
734 }
735
736 #[test]
737 fn parse_claude_uses_role_fallback_and_timestamps() {
738 let jsonl = r#"{"uuid":"c1","chat_messages":[{"role":"assistant","content":"reply","timestamp":"2024-01-01T00:00:00Z"}]}"#;
740 let f = temp_file(jsonl);
741 let convs = parse_claude(f.path()).unwrap();
742 assert_eq!(convs.len(), 1);
743 assert_eq!(convs[0].messages[0].role, "assistant");
744 assert_eq!(convs[0].messages[0].content, "reply");
745 assert_eq!(
746 convs[0].messages[0].timestamp.as_deref(),
747 Some("2024-01-01T00:00:00Z")
748 );
749 }
750
751 #[test]
753 fn parse_claude_mapping_format() {
754 let jsonl = r#"{"uuid":"map1","name":"Mapping Conv","mapping":{"n1":{"message":{"role":"user","content":{"parts":["first"]},"create_time":1700000001}},"n2":{"message":{"author":{"role":"assistant"},"content":{"parts":["second"]},"create_time":1700000002}},"n3":{"message":{"role":"system","content":{"parts":["ignored"]}}}}}"#;
756 let f = temp_file(jsonl);
757 let convs = parse_claude(f.path()).unwrap();
758 assert_eq!(convs.len(), 1);
759 let conv = &convs[0];
760 assert_eq!(conv.title.as_deref(), Some("Mapping Conv"));
761 assert_eq!(conv.messages.len(), 2);
763 assert_eq!(conv.messages[0].content, "first");
764 assert_eq!(conv.messages[1].content, "second");
765 assert!(conv.messages[0].timestamp.is_some());
767 }
768
769 #[test]
770 fn parse_claude_mapping_skips_empty_content_nodes() {
771 let jsonl = r#"{"uuid":"map2","mapping":{"n1":{"message":{"role":"user","content":{"parts":[]}}},"n2":{"message":{"role":"user","content":{"parts":["kept"]},"create_time":1700000005}}}}"#;
773 let f = temp_file(jsonl);
774 let convs = parse_claude(f.path()).unwrap();
775 assert_eq!(convs.len(), 1);
776 assert_eq!(convs[0].messages.len(), 1);
777 assert_eq!(convs[0].messages[0].content, "kept");
778 }
779
780 #[test]
781 fn parse_claude_mapping_uuid_fallback_and_no_messages() {
782 let jsonl = r#"{"mapping":{"n1":{"message":{"role":"system","content":{"parts":["only system"]}}}}}"#;
784 let f = temp_file(jsonl);
785 let convs = parse_claude(f.path()).unwrap();
786 assert_eq!(convs.len(), 0, "system-only conversation is dropped");
787 }
788
789 #[test]
791 fn parse_chatgpt_missing_file_errors() {
792 let p = std::path::Path::new("/nonexistent/chatgpt.json");
793 let err = parse_chatgpt(p).unwrap_err();
794 assert!(format!("{err:#}").contains("failed to read ChatGPT export"));
795 }
796
797 #[test]
798 fn parse_chatgpt_invalid_json_errors() {
799 let f = temp_file("not really json");
800 let err = parse_chatgpt(f.path()).unwrap_err();
801 assert!(format!("{err:#}").contains("invalid JSON in ChatGPT export"));
802 }
803
804 #[test]
805 fn parse_chatgpt_top_level_object_errors() {
806 let f = temp_file(r#"{"not":"an array"}"#);
807 let err = parse_chatgpt(f.path()).unwrap_err();
808 assert!(format!("{err:#}").contains("expected JSON array"));
809 }
810
811 #[test]
812 fn parse_chatgpt_skips_system_and_empty_messages() {
813 let json = r#"[{"id":"c1","title":"T","create_time":1700000000,"mapping":{
815 "n1":{"message":{"author":{"role":"system"},"content":{"parts":["sys ignored"]},"create_time":1700000001}},
816 "n2":{"message":{"author":{"role":"user"},"content":{"parts":[]},"create_time":1700000002}},
817 "n3":{"message":{"author":{"role":"user"},"content":{"parts":["kept"]},"create_time":1700000003}}
818 }}]"#;
819 let f = temp_file(json);
820 let convs = parse_chatgpt(f.path()).unwrap();
821 assert_eq!(convs.len(), 1);
822 assert_eq!(convs[0].messages.len(), 1);
823 assert_eq!(convs[0].messages[0].content, "kept");
824 assert!(convs[0].messages[0].timestamp.is_some());
825 }
826
827 #[test]
828 fn parse_chatgpt_drops_conversations_with_no_messages() {
829 let json = r#"[{"id":"only-sys","mapping":{
831 "n1":{"message":{"author":{"role":"system"},"content":{"parts":["x"]}}}
832 }}]"#;
833 let f = temp_file(json);
834 let convs = parse_chatgpt(f.path()).unwrap();
835 assert!(convs.is_empty());
836 }
837
838 #[test]
839 fn parse_chatgpt_id_fallback_when_missing() {
840 let json = r#"[{"mapping":{"n1":{"message":{"author":{"role":"user"},"content":{"parts":["hello"]},"create_time":1700000010}}}}]"#;
843 let f = temp_file(json);
844 let convs = parse_chatgpt(f.path()).unwrap();
845 assert_eq!(convs.len(), 1);
846 assert_eq!(convs[0].id, "chatgpt-0");
847 }
848
849 #[test]
850 fn parse_chatgpt_empty_array() {
851 let f = temp_file("[]");
852 let convs = parse_chatgpt(f.path()).unwrap();
853 assert!(convs.is_empty());
854 }
855
856 #[test]
858 fn parse_slack_path_must_be_directory() {
859 let f = temp_file("not a dir");
860 let err = parse_slack(f.path()).unwrap_err();
861 assert!(format!("{err:#}").contains("must be a directory"));
862 }
863
864 #[test]
865 fn parse_slack_skips_non_directory_entries_in_root() {
866 let dir = tempfile::tempdir().unwrap();
868 fs::write(dir.path().join("README.txt"), "hello").unwrap();
869 let channel = dir.path().join("general");
870 fs::create_dir(&channel).unwrap();
871 fs::write(
872 channel.join("2024-01-01.json"),
873 r#"[{"user":"U1","text":"hi","ts":"1700000000.0"}]"#,
874 )
875 .unwrap();
876 let convs = parse_slack(dir.path()).unwrap();
877 assert_eq!(convs.len(), 1);
878 }
879
880 #[test]
881 fn parse_slack_skips_non_json_files_and_empty_text() {
882 let dir = tempfile::tempdir().unwrap();
883 let channel = dir.path().join("random");
884 fs::create_dir(&channel).unwrap();
885 fs::write(channel.join("note.txt"), "ignored").unwrap();
887 let json = r#"[{"user":"U1","text":"","ts":"1700000000.0"},{"username":"bot","text":"hello","ts":"1700000001.0"}]"#;
889 fs::write(channel.join("2024-01-02.json"), json).unwrap();
890 let convs = parse_slack(dir.path()).unwrap();
891 assert_eq!(convs.len(), 1);
892 assert_eq!(convs[0].messages.len(), 1);
893 assert_eq!(convs[0].messages[0].role, "bot");
895 }
896
897 #[test]
898 fn parse_slack_invalid_json_in_channel_errors() {
899 let dir = tempfile::tempdir().unwrap();
900 let channel = dir.path().join("oops");
901 fs::create_dir(&channel).unwrap();
902 fs::write(channel.join("2024-01-01.json"), "not json").unwrap();
903 let err = parse_slack(dir.path()).unwrap_err();
904 assert!(format!("{err:#}").contains("invalid JSON"));
905 }
906
907 #[test]
908 fn parse_slack_drops_channels_with_no_messages() {
909 let dir = tempfile::tempdir().unwrap();
911 let empty_chan = dir.path().join("silent");
912 fs::create_dir(&empty_chan).unwrap();
913 fs::write(
914 empty_chan.join("2024-01-01.json"),
915 r#"[{"user":"U1","text":"","ts":"1700000000.0"}]"#,
916 )
917 .unwrap();
918 let live_chan = dir.path().join("alive");
919 fs::create_dir(&live_chan).unwrap();
920 fs::write(
921 live_chan.join("2024-01-01.json"),
922 r#"[{"user":"U2","text":"hi","ts":"1700000001.0"}]"#,
923 )
924 .unwrap();
925 let convs = parse_slack(dir.path()).unwrap();
926 assert_eq!(convs.len(), 1);
927 assert_eq!(convs[0].id, "slack-alive");
928 }
929
930 #[test]
931 fn parse_slack_handles_missing_timestamp() {
932 let dir = tempfile::tempdir().unwrap();
934 let channel = dir.path().join("notime");
935 fs::create_dir(&channel).unwrap();
936 fs::write(
937 channel.join("2024-01-01.json"),
938 r#"[{"user":"U1","text":"hi"}]"#,
939 )
940 .unwrap();
941 let convs = parse_slack(dir.path()).unwrap();
942 assert_eq!(convs.len(), 1);
943 assert!(convs[0].messages[0].timestamp.is_none());
944 }
945
946 #[test]
947 fn parse_slack_skips_non_array_top_level() {
948 let dir = tempfile::tempdir().unwrap();
951 let channel = dir.path().join("weird");
952 fs::create_dir(&channel).unwrap();
953 fs::write(channel.join("2024-01-01.json"), r#"{"not":"an array"}"#).unwrap();
954 let convs = parse_slack(dir.path()).unwrap();
955 assert!(convs.is_empty());
956 }
957
958 #[test]
960 fn extract_text_content_array_of_strings() {
961 let v = serde_json::json!(["one", "two"]);
962 assert_eq!(extract_text_content(&v).as_deref(), Some("one\ntwo"));
963 }
964
965 #[test]
966 fn extract_text_content_array_of_text_objects() {
967 let v = serde_json::json!([
969 {"type":"text","text":"alpha"},
970 {"type":"text","text":"beta"}
971 ]);
972 assert_eq!(extract_text_content(&v).as_deref(), Some("alpha\nbeta"));
973 }
974
975 #[test]
976 fn extract_text_content_empty_and_non_text() {
977 assert!(extract_text_content(&serde_json::json!([])).is_none());
979 let v = serde_json::json!([{"type":"image","url":"x"}]);
981 assert!(extract_text_content(&v).is_none());
982 assert!(extract_text_content(&serde_json::Value::Null).is_none());
984 }
985
986 #[test]
988 fn extract_message_content_string_form() {
989 let mut m = serde_json::Map::new();
991 m.insert("content".into(), serde_json::json!("plain text"));
992 assert_eq!(extract_message_content(&m), "plain text");
993 }
994
995 #[test]
996 fn extract_message_content_text_field_under_content() {
997 let mut m = serde_json::Map::new();
999 m.insert("content".into(), serde_json::json!({"text":"nested-text"}));
1000 assert_eq!(extract_message_content(&m), "nested-text");
1001 }
1002
1003 #[test]
1004 fn extract_message_content_top_level_text_field() {
1005 let mut m = serde_json::Map::new();
1007 m.insert("text".into(), serde_json::json!("top-text"));
1008 assert_eq!(extract_message_content(&m), "top-text");
1009 }
1010
1011 #[test]
1012 fn extract_message_content_returns_empty_when_unparseable() {
1013 let m = serde_json::Map::new();
1015 assert!(extract_message_content(&m).is_empty());
1016 }
1017
1018 #[test]
1019 fn extract_message_content_parts_array_skips_non_strings() {
1020 let mut m = serde_json::Map::new();
1022 m.insert(
1023 "content".into(),
1024 serde_json::json!({"parts":["good", {"img":1}, "also-good"]}),
1025 );
1026 assert_eq!(extract_message_content(&m), "good\nalso-good");
1027 }
1028
1029 #[test]
1031 fn conversation_to_memory_empty_title_falls_back_to_first_user() {
1032 let conv = Conversation {
1034 id: "c".into(),
1035 title: Some(String::new()),
1036 messages: vec![
1037 Message {
1038 role: "assistant".into(),
1039 content: "hello back".into(),
1040 timestamp: None,
1041 },
1042 Message {
1043 role: "user".into(),
1044 content: "hello".into(),
1045 timestamp: None,
1046 },
1047 ],
1048 created_at: None,
1049 };
1050 let mem = conversation_to_memory(&conv, Format::Slack).unwrap();
1051 assert_eq!(mem.title, "hello");
1052 assert_eq!(mem.source_format, "mine-slack");
1053 }
1054
1055 #[test]
1056 fn conversation_to_memory_no_user_uses_first_message() {
1057 let conv = Conversation {
1059 id: "c".into(),
1060 title: None,
1061 messages: vec![
1062 Message {
1063 role: "assistant".into(),
1064 content: "only assistant".into(),
1065 timestamp: None,
1066 },
1067 Message {
1068 role: "tool".into(),
1069 content: "tool-out".into(),
1070 timestamp: None,
1071 },
1072 ],
1073 created_at: None,
1074 };
1075 let mem = conversation_to_memory(&conv, Format::ChatGpt).unwrap();
1076 assert_eq!(mem.title, "only assistant");
1077 }
1078
1079 #[test]
1080 fn conversation_to_memory_title_truncates_to_100_chars() {
1081 let long_title = "x".repeat(250);
1082 let conv = Conversation {
1083 id: "c".into(),
1084 title: Some(long_title),
1085 messages: vec![Message {
1086 role: "user".into(),
1087 content: "body".into(),
1088 timestamp: None,
1089 }],
1090 created_at: None,
1091 };
1092 let mem = conversation_to_memory(&conv, Format::Claude).unwrap();
1093 assert_eq!(mem.title.len(), 100);
1094 }
1095
1096 #[test]
1097 fn conversation_to_memory_first_user_content_truncates() {
1098 let long_msg = "y".repeat(200);
1100 let conv = Conversation {
1101 id: "c".into(),
1102 title: None,
1103 messages: vec![Message {
1104 role: "user".into(),
1105 content: long_msg,
1106 timestamp: None,
1107 }],
1108 created_at: None,
1109 };
1110 let mem = conversation_to_memory(&conv, Format::Claude).unwrap();
1111 assert_eq!(mem.title.len(), 100);
1112 }
1113
1114 #[test]
1115 fn conversation_to_memory_stops_at_max_content_size() {
1116 let big = "z".repeat(MAX_CONTENT_SIZE + 10);
1120 let conv = Conversation {
1121 id: "c".into(),
1122 title: Some("t".into()),
1123 messages: vec![Message {
1124 role: "user".into(),
1125 content: big,
1126 timestamp: None,
1127 }],
1128 created_at: None,
1129 };
1130 assert!(conversation_to_memory(&conv, Format::Claude).is_none());
1132 }
1133
1134 #[test]
1135 fn conversation_to_memory_truncates_on_second_message() {
1136 let big = "z".repeat(MAX_CONTENT_SIZE);
1138 let conv = Conversation {
1139 id: "c".into(),
1140 title: Some("t".into()),
1141 messages: vec![
1142 Message {
1143 role: "user".into(),
1144 content: "small".into(),
1145 timestamp: None,
1146 },
1147 Message {
1148 role: "assistant".into(),
1149 content: big,
1150 timestamp: None,
1151 },
1152 ],
1153 created_at: None,
1154 };
1155 let mem = conversation_to_memory(&conv, Format::Claude).unwrap();
1156 assert!(mem.content.contains("small"));
1157 assert!(!mem.content.contains(&"z".repeat(100)));
1159 }
1160
1161 #[test]
1163 fn truncate_respects_char_boundary() {
1164 let s = "héllo";
1166 let out = truncate(s, 2);
1168 assert_eq!(out, "h");
1169 }
1170
1171 #[test]
1172 fn truncate_at_exact_boundary_returns_unchanged() {
1173 let s = "abcdef";
1174 assert_eq!(truncate(s, 6), "abcdef");
1175 }
1176
1177 #[test]
1178 fn truncate_zero_max_returns_empty() {
1179 let s = "héllo";
1181 assert_eq!(truncate(s, 0), "");
1182 }
1183}