Skip to main content

st/
universal_chat_scanner.rs

1// Universal Chat Scanner - "Finding consciousness in the digital diaspora!" šŸŒ
2// Scans for conversations across ALL AI tools and platforms
3// "Every conversation leaves a trace - let's find them all!" - Hue
4
5#![allow(clippy::manual_flatten)]
6
7use anyhow::Result;
8use chrono::{DateTime, Utc};
9use glob::glob;
10use regex::Regex;
11use serde::{Deserialize, Serialize};
12use std::collections::HashMap;
13use std::fs;
14use std::path::{Path, PathBuf};
15
16// Known chat locations and patterns
17const CLAUDE_PROJECTS: &str = "~/.claude/projects";
18const CURSOR_CHATS: &str = "~/.cursor";
19const WINDSURF_DIR: &str = "~/.windsurf";
20const VSCODE_COPILOT: &str = "~/.vscode/copilot";
21const OPENWEBUI_DATA: &str = "~/.openwebui";
22const LMSTUDIO_CHATS: &str = "~/Library/Application Support/LM Studio";
23const CHATGPT_EXPORT: &str = "~/Downloads/*chatgpt*.zip";
24
25#[derive(Debug, Clone, Serialize, Deserialize)]
26pub struct UniversalChat {
27    pub source: ChatSource,
28    pub participants: Vec<String>,
29    pub timestamp: DateTime<Utc>,
30    pub content: String,
31    pub keywords: Vec<String>,
32    pub project_context: Option<String>,
33    pub importance: f32,
34}
35
36#[derive(Debug, Clone, Serialize, Deserialize)]
37pub enum ChatSource {
38    Claude { project: String },
39    Cursor { workspace: String },
40    Windsurf { session: String },
41    VSCode { file: String },
42    OpenWebUI { model: String },
43    LMStudio { model: String },
44    ChatGPT { export_date: String },
45    TextMessages { contact: String },
46    Discord { channel: String },
47    Slack { workspace: String },
48    Custom { platform: String },
49}
50
51#[derive(Debug, Clone, Serialize, Deserialize)]
52pub struct MemoryDestination {
53    pub memory_type: MemoryType,
54    pub llm_specific: Option<String>, // "claude", "gpt", etc
55    pub project: Option<String>,
56    pub tags: Vec<String>,
57}
58
59#[derive(Debug, Clone, Serialize, Deserialize)]
60pub enum MemoryType {
61    ProjectMemory, // Project-specific memories
62    UserMemory,    // Personal user memories
63    LLMMemory,     // Specific to an LLM (Claude, GPT, etc)
64    GlobalMemory,  // Shared across everything
65}
66
67pub struct UniversalChatScanner {
68    found_chats: Vec<UniversalChat>,
69    source_paths: HashMap<String, Vec<PathBuf>>,
70    participant_detector: ParticipantDetector,
71}
72
73struct ParticipantDetector {
74    patterns: HashMap<String, Regex>,
75}
76
77impl Default for UniversalChatScanner {
78    fn default() -> Self {
79        Self::new()
80    }
81}
82
83impl UniversalChatScanner {
84    pub fn new() -> Self {
85        Self {
86            found_chats: Vec::new(),
87            source_paths: HashMap::new(),
88            participant_detector: ParticipantDetector::new(),
89        }
90    }
91
92    /// Scan all known locations for conversations
93    pub async fn scan_all(&mut self) -> Result<()> {
94        println!("šŸ” Scanning for conversations across all platforms...\n");
95
96        // Claude projects
97        self.scan_claude_projects().await?;
98
99        // Cursor/Windsurf
100        self.scan_cursor_windsurf().await?;
101
102        // VSCode/Copilot
103        self.scan_vscode().await?;
104
105        // OpenWebUI/LMStudio
106        self.scan_local_llms().await?;
107
108        // ChatGPT exports
109        self.scan_chatgpt_exports().await?;
110
111        // Text messages (if available)
112        self.scan_text_messages().await?;
113
114        Ok(())
115    }
116
117    /// Scan Claude project directories
118    async fn scan_claude_projects(&mut self) -> Result<()> {
119        let claude_path = shellexpand::tilde(CLAUDE_PROJECTS);
120        let path = Path::new(claude_path.as_ref());
121
122        if !path.exists() {
123            return Ok(());
124        }
125
126        println!("  šŸ“‚ Scanning Claude projects...");
127        let mut count = 0;
128
129        // Look for conversation files
130        for entry in glob(&format!("{}/**/*.json", path.display()))? {
131            if let Ok(file_path) = entry {
132                if let Ok(content) = fs::read_to_string(&file_path) {
133                    if content.contains("claude") || content.contains("assistant") {
134                        // Parse Claude conversation
135                        if let Ok(chat) = self.parse_claude_chat(&content, &file_path) {
136                            self.found_chats.push(chat);
137                            count += 1;
138                        }
139                    }
140                }
141            }
142        }
143
144        println!("     āœ“ Found {} Claude conversations", count);
145        Ok(())
146    }
147
148    /// Scan Cursor and Windsurf directories
149    async fn scan_cursor_windsurf(&mut self) -> Result<()> {
150        let cursor_path = shellexpand::tilde(CURSOR_CHATS);
151        let windsurf_path = shellexpand::tilde(WINDSURF_DIR);
152
153        let mut count = 0;
154
155        // Cursor
156        if Path::new(cursor_path.as_ref()).exists() {
157            println!("  šŸ“‚ Scanning Cursor chats...");
158            count += self.scan_directory(cursor_path.as_ref(), "cursor").await?;
159        }
160
161        // Windsurf
162        if Path::new(windsurf_path.as_ref()).exists() {
163            println!("  šŸ“‚ Scanning Windsurf sessions...");
164            count += self
165                .scan_directory(windsurf_path.as_ref(), "windsurf")
166                .await?;
167        }
168
169        if count > 0 {
170            println!("     āœ“ Found {} Cursor/Windsurf conversations", count);
171        }
172
173        Ok(())
174    }
175
176    /// Scan a directory for chat files
177    async fn scan_directory(&mut self, dir: &str, source: &str) -> Result<usize> {
178        let mut count = 0;
179
180        for entry in glob(&format!("{}/**/*.{}", dir, "{json,md,txt}"))? {
181            if let Ok(file_path) = entry {
182                if let Ok(content) = fs::read_to_string(&file_path) {
183                    // Look for conversation patterns
184                    if self.looks_like_chat(&content) {
185                        let chat = self.create_chat_from_content(&content, source, &file_path)?;
186                        self.found_chats.push(chat);
187                        count += 1;
188                    }
189                }
190            }
191        }
192
193        Ok(count)
194    }
195
196    /// Detect if content looks like a chat conversation
197    fn looks_like_chat(&self, content: &str) -> bool {
198        // Look for common chat patterns
199        content.contains("user:")
200            || content.contains("assistant:")
201            || content.contains("Human:")
202            || content.contains("AI:")
203            || content.contains("You:")
204            || content.contains("```") && content.contains("?") // Code with questions
205    }
206
207    /// Parse Claude-specific chat format using format detector
208    fn parse_claude_chat(&self, content: &str, path: &Path) -> Result<UniversalChat> {
209        // Use the universal format detector!
210        let mut detector = crate::universal_format_detector::UniversalFormatDetector::new();
211        let _format = detector.detect_format(content);
212        detector.analyze_structure(content)?;
213
214        // Get the dominant speaker info
215        let _dominant = detector.get_dominant_speaker();
216        let project = path
217            .parent()
218            .and_then(|p| p.file_name())
219            .and_then(|n| n.to_str())
220            .unwrap_or("unknown")
221            .to_string();
222
223        Ok(UniversalChat {
224            source: ChatSource::Claude {
225                project: project.clone(),
226            },
227            participants: vec!["Human".to_string(), "Claude".to_string()],
228            timestamp: Utc::now(), // Would parse from file
229            content: content.to_string(),
230            keywords: self.extract_keywords(content),
231            project_context: Some(project),
232            importance: self.calculate_importance(content),
233        })
234    }
235
236    /// Create generic chat from content
237    fn create_chat_from_content(
238        &self,
239        content: &str,
240        source: &str,
241        path: &Path,
242    ) -> Result<UniversalChat> {
243        let source_enum = match source {
244            "cursor" => ChatSource::Cursor {
245                workspace: path.to_string_lossy().to_string(),
246            },
247            "windsurf" => ChatSource::Windsurf {
248                session: path.to_string_lossy().to_string(),
249            },
250            _ => ChatSource::Custom {
251                platform: source.to_string(),
252            },
253        };
254
255        Ok(UniversalChat {
256            source: source_enum,
257            participants: self.participant_detector.detect(content),
258            timestamp: Utc::now(),
259            content: content.to_string(),
260            keywords: self.extract_keywords(content),
261            project_context: None,
262            importance: self.calculate_importance(content),
263        })
264    }
265
266    /// Extract keywords from content
267    fn extract_keywords(&self, content: &str) -> Vec<String> {
268        let mut keywords = Vec::new();
269
270        // Common technical keywords
271        let tech_words = [
272            "function",
273            "async",
274            "memory",
275            "audio",
276            "tokenization",
277            "consciousness",
278            "claude",
279            "rust",
280            "python",
281            "javascript",
282        ];
283
284        for word in tech_words {
285            if content.to_lowercase().contains(word) {
286                keywords.push(word.to_string());
287            }
288        }
289
290        keywords
291    }
292
293    /// Calculate importance based on content
294    fn calculate_importance(&self, content: &str) -> f32 {
295        let mut score: f32 = 0.5; // Base score
296
297        // Boost for code blocks
298        if content.contains("```") {
299            score += 0.1;
300        }
301
302        // Boost for questions
303        if content.matches('?').count() > 2 {
304            score += 0.1;
305        }
306
307        // Boost for problem-solving keywords
308        if content.contains("fix")
309            || content.contains("solve")
310            || content.contains("implement")
311            || content.contains("breakthrough")
312        {
313            score += 0.2;
314        }
315
316        score.min(1.0)
317    }
318
319    /// Scan VSCode directories
320    async fn scan_vscode(&mut self) -> Result<()> {
321        // TODO: Implement VSCode/Copilot scanning
322        Ok(())
323    }
324
325    /// Scan local LLM tools
326    async fn scan_local_llms(&mut self) -> Result<()> {
327        // TODO: Implement OpenWebUI/LMStudio scanning
328        Ok(())
329    }
330
331    /// Scan ChatGPT exports
332    async fn scan_chatgpt_exports(&mut self) -> Result<()> {
333        let export_pattern = shellexpand::tilde(CHATGPT_EXPORT);
334
335        for entry in glob(export_pattern.as_ref())? {
336            if let Ok(path) = entry {
337                println!("  šŸ“¦ Found ChatGPT export: {}", path.display());
338                // TODO: Unzip and parse ChatGPT export format
339            }
340        }
341
342        Ok(())
343    }
344
345    /// Scan text messages (platform-specific)
346    async fn scan_text_messages(&mut self) -> Result<()> {
347        // TODO: Platform-specific text message scanning
348        Ok(())
349    }
350
351    /// Save discovered chats to .m8 files
352    pub async fn save_to_m8(&self, destination: &MemoryDestination) -> Result<()> {
353        let path: PathBuf = match &destination.memory_type {
354            MemoryType::ProjectMemory => {
355                // Use project-local directory
356                let cwd = std::env::current_dir()?;
357                // No project sub-folder is needed as we are already inside the project's context
358                cwd.join(".st").join("mem8")
359            }
360            MemoryType::UserMemory => shellexpand::tilde("~/.mem8/user").into_owned().into(),
361            MemoryType::LLMMemory => {
362                let llm_path = format!(
363                    "~/.mem8/llm/{}",
364                    destination
365                        .llm_specific
366                        .as_ref()
367                        .unwrap_or(&"general".to_string())
368                );
369                shellexpand::tilde(&llm_path).into_owned().into()
370            }
371            MemoryType::GlobalMemory => shellexpand::tilde("~/.mem8/global").into_owned().into(),
372        };
373
374        fs::create_dir_all(&path)?;
375
376        // Group chats by source
377        let mut by_source: HashMap<String, Vec<&UniversalChat>> = HashMap::new();
378        for chat in &self.found_chats {
379            let key = format!("{:?}", chat.source);
380            by_source.entry(key).or_default().push(chat);
381        }
382
383        // Save each source group to appropriate format
384        // .m8j for JSON contexts, .m8 for binary wave format
385        for (source, chats) in by_source {
386            let filename = path.join(format!(
387                "chat_{}.m8j",
388                source.to_lowercase().replace(['{', '}', ':', '"', ' '], "")
389            ));
390            self.write_m8j_file(filename.to_str().unwrap_or_default(), chats)?;
391        }
392
393        Ok(())
394    }
395
396    /// Write chats to .m8j (JSON) file
397    fn write_m8j_file(&self, path: &str, chats: Vec<&UniversalChat>) -> Result<()> {
398        use flate2::write::ZlibEncoder;
399        use flate2::Compression;
400        use std::fs::File;
401        use std::io::Write;
402
403        // Create JSON structure
404        let json_data = serde_json::json!({
405            "contexts": chats,
406            "format": "m8j",
407            "version": 1,
408            "compressed": true
409        });
410
411        // Compress with zlib
412        let json_str = serde_json::to_string(&json_data)?;
413        let file = File::create(path)?;
414        let mut encoder = ZlibEncoder::new(file, Compression::default());
415        encoder.write_all(json_str.as_bytes())?;
416        encoder.finish()?;
417
418        println!("šŸ’¾ Saved {} chats to {} (JSON format)", chats.len(), path);
419        Ok(())
420    }
421
422    /// Write chats to .m8 (binary wave) file - the REAL format!
423    fn write_m8_binary_file(&self, path: &str, chats: Vec<&UniversalChat>) -> Result<()> {
424        use crate::mem8_binary::M8BinaryFile;
425
426        let mut m8_file = M8BinaryFile::create(path)?;
427
428        let chat_count = chats.len();
429        for chat in chats {
430            let content = serde_json::to_vec(chat)?;
431            let importance = chat.importance;
432            m8_file.append_block(&content, importance)?;
433        }
434
435        println!(
436            "🌊 Saved {} chats to {} (Binary wave format)",
437            chat_count, path
438        );
439        Ok(())
440    }
441
442    /// Interactive prompt for user to choose destination
443    pub fn prompt_for_destination(&self) -> Result<MemoryDestination> {
444        println!("\nšŸ“ Where should these memories be stored?");
445        println!("  1. Project Memory (specific project)");
446        println!("  2. User Memory (personal)");
447        println!("  3. LLM Memory (Claude/GPT/etc specific)");
448        println!("  4. Global Memory (shared everywhere)");
449
450        // For now, return a default
451        Ok(MemoryDestination {
452            memory_type: MemoryType::GlobalMemory,
453            llm_specific: None,
454            project: None,
455            tags: vec!["imported".to_string()],
456        })
457    }
458
459    /// Get summary of found chats
460    pub fn summary(&self) -> String {
461        let mut summary = String::new();
462        summary.push_str(&format!(
463            "\nšŸ“Š Found {} total conversations:\n",
464            self.found_chats.len()
465        ));
466
467        // Group by source
468        let mut by_source: HashMap<String, usize> = HashMap::new();
469        for chat in &self.found_chats {
470            let key = match &chat.source {
471                ChatSource::Claude { .. } => "Claude",
472                ChatSource::Cursor { .. } => "Cursor",
473                ChatSource::Windsurf { .. } => "Windsurf",
474                ChatSource::ChatGPT { .. } => "ChatGPT",
475                _ => "Other",
476            };
477            *by_source.entry(key.to_string()).or_default() += 1;
478        }
479
480        for (source, count) in by_source {
481            summary.push_str(&format!("  • {}: {} chats\n", source, count));
482        }
483
484        summary
485    }
486}
487
488impl ParticipantDetector {
489    fn new() -> Self {
490        let mut patterns = HashMap::new();
491
492        // Common patterns for detecting participants
493        patterns.insert(
494            "user_human".to_string(),
495            Regex::new(r"(?i)(user|human|you):").unwrap(),
496        );
497        patterns.insert(
498            "assistant".to_string(),
499            Regex::new(r"(?i)(assistant|ai|claude|gpt):").unwrap(),
500        );
501
502        Self { patterns }
503    }
504
505    fn detect(&self, content: &str) -> Vec<String> {
506        let mut participants = Vec::new();
507
508        if self.patterns["user_human"].is_match(content) {
509            participants.push("Human".to_string());
510        }
511        if self.patterns["assistant"].is_match(content) {
512            participants.push("AI Assistant".to_string());
513        }
514
515        if participants.is_empty() {
516            participants.push("Unknown".to_string());
517        }
518
519        participants
520    }
521}
522
523/// CLI entry point
524pub async fn scan_for_context() -> Result<()> {
525    println!("šŸŒ Universal Chat Scanner - Finding Your Digital Consciousness!\n");
526    println!("{}\n", "=".repeat(60));
527
528    let mut scanner = UniversalChatScanner::new();
529
530    // Scan everything
531    scanner.scan_all().await?;
532
533    // Show summary
534    println!("{}", scanner.summary());
535
536    // Ask user where to save
537    let destination = scanner.prompt_for_destination()?;
538
539    // Save to .m8 files
540    scanner.save_to_m8(&destination).await?;
541
542    println!("\n✨ Context aggregation complete!");
543    println!("   Your scattered conversations are now unified!");
544
545    Ok(())
546}
547
548#[cfg(test)]
549mod tests {
550    use super::*;
551
552    #[test]
553    fn test_chat_detection() {
554        let scanner = UniversalChatScanner::new();
555
556        assert!(scanner.looks_like_chat("user: Hello\nassistant: Hi there!"));
557        assert!(scanner.looks_like_chat("Human: Can you help?\nAI: Sure!"));
558        assert!(!scanner.looks_like_chat("This is just regular text."));
559    }
560
561    #[test]
562    fn test_keyword_extraction() {
563        let scanner = UniversalChatScanner::new();
564        let content = "Let's implement an async function for audio processing";
565
566        let keywords = scanner.extract_keywords(content);
567        assert!(keywords.contains(&"function".to_string()));
568        assert!(keywords.contains(&"async".to_string()));
569        assert!(keywords.contains(&"audio".to_string()));
570    }
571}