Skip to main content

st/mcp/
context_absorber.rs

1// Context Absorber - Automatically absorbs project-related context from JSON files
2// "Like a knowledge sponge that never stops learning!" - Aye
3
4#![allow(clippy::manual_flatten)]
5
6use crate::feature_flags;
7use anyhow::Result;
8use notify::{Config, Event, EventKind, RecommendedWatcher, RecursiveMode, Watcher};
9use serde::{Deserialize, Serialize};
10use serde_json::Value;
11use std::collections::HashSet;
12use std::fs;
13use std::path::{Path, PathBuf};
14use std::sync::mpsc::{channel, Receiver, Sender};
15use std::sync::{Arc, Mutex};
16use std::thread;
17use std::time::SystemTime;
18
19#[derive(Debug, Clone, Serialize, Deserialize)]
20pub struct WatchPermissions {
21    pub allowed_paths: Vec<String>,
22    pub excluded_paths: Vec<String>,
23    pub auto_absorb: bool,
24    pub notify_on_absorption: bool,
25    pub max_file_size_mb: u64,
26}
27
28impl Default for WatchPermissions {
29    fn default() -> Self {
30        Self {
31            allowed_paths: vec![
32                "~/Documents/".to_string(),
33                "~/.config/".to_string(),
34                "~/Library/Application Support/".to_string(),
35                "~/.cursor/".to_string(), // Cursor AI logs
36                "~/.vscode/".to_string(), // VS Code extensions data
37                "~/Library/Application Support/Code/".to_string(), // VS Code on Mac
38                "~/.local/share/".to_string(), // Linux app data
39                "~/.cache/".to_string(),  // Cache dirs often have AI logs
40            ],
41            excluded_paths: vec![
42                "~/.ssh/".to_string(),
43                "~/.aws/".to_string(),
44                "~/.gnupg/".to_string(),
45                "**/node_modules/**".to_string(),
46                "**/.git/**".to_string(),
47            ],
48            auto_absorb: true,
49            notify_on_absorption: true,
50            max_file_size_mb: 10,
51        }
52    }
53}
54
55#[derive(Debug, Clone, Serialize, Deserialize)]
56pub struct AbsorbedContext {
57    pub timestamp: SystemTime,
58    pub origin: PathBuf,
59    pub project_name: String,
60    pub content_type: String,
61    pub content: Value,
62    pub relevance_score: f64,
63    pub keywords: Vec<String>,
64}
65
66pub struct ContextAbsorber {
67    project_name: String,
68    watch_paths: Vec<PathBuf>,
69    permissions: WatchPermissions,
70    absorbed_contexts: Arc<Mutex<Vec<AbsorbedContext>>>,
71    watcher: Option<RecommendedWatcher>,
72    sender: Sender<AbsorptionEvent>,
73    receiver: Receiver<AbsorptionEvent>,
74    last_absorption_time: Arc<Mutex<SystemTime>>,
75}
76
77#[derive(Debug)]
78enum AbsorptionEvent {
79    FileChanged(PathBuf),
80    Stop,
81}
82
83impl ContextAbsorber {
84    pub fn new(project_name: String) -> Result<Self> {
85        let (sender, receiver) = channel();
86
87        // Load or create permissions
88        let permissions = Self::load_permissions()?;
89
90        // Expand watch paths
91        let watch_paths = permissions
92            .allowed_paths
93            .iter()
94            .map(|p| shellexpand::tilde(p))
95            .map(|p| PathBuf::from(p.to_string()))
96            .filter(|p| p.exists())
97            .collect();
98
99        // Load last absorption time from M8 file if it exists
100        let last_time = Self::load_last_absorption_time(&project_name).unwrap_or(
101            SystemTime::now() - std::time::Duration::from_secs(604800), // Virgin M8? Go back 7 days!
102        );
103
104        Ok(Self {
105            project_name,
106            watch_paths,
107            permissions,
108            absorbed_contexts: Arc::new(Mutex::new(Vec::new())),
109            watcher: None,
110            sender,
111            receiver,
112            last_absorption_time: Arc::new(Mutex::new(last_time)),
113        })
114    }
115
116    fn load_last_absorption_time(_project_name: &str) -> Option<SystemTime> {
117        let cwd = std::env::current_dir().ok()?;
118        let m8_path = cwd.join(".st").join("absorbed_context.m8");
119
120        if m8_path.exists() {
121            // Get the modification time of the M8 file
122            fs::metadata(&m8_path)
123                .ok()
124                .and_then(|meta| meta.modified().ok())
125        } else {
126            None
127        }
128    }
129
130    fn load_permissions() -> Result<WatchPermissions> {
131        let perm_path = dirs::home_dir()
132            .unwrap_or_else(|| PathBuf::from("."))
133            .join(".mem8")
134            .join("watch_permissions.json");
135
136        if perm_path.exists() {
137            let content = fs::read_to_string(&perm_path)?;
138            Ok(serde_json::from_str(&content)?)
139        } else {
140            // Create default permissions
141            let permissions = WatchPermissions::default();
142            if let Some(parent) = perm_path.parent() {
143                fs::create_dir_all(parent)?;
144            }
145            fs::write(&perm_path, serde_json::to_string_pretty(&permissions)?)?;
146            Ok(permissions)
147        }
148    }
149
150    pub fn start_watching(&mut self) -> Result<()> {
151        // Check if file watching is enabled
152        let flags = feature_flags::features();
153        if !flags.enable_file_watching {
154            eprintln!("File watching is disabled by configuration");
155            return Ok(());
156        }
157
158        let sender = self.sender.clone();
159        let project_name = self.project_name.clone();
160
161        // Create watcher
162        let mut watcher = RecommendedWatcher::new(
163            move |res: Result<Event, notify::Error>| {
164                if let Ok(event) = res {
165                    match event.kind {
166                        EventKind::Create(_) => {
167                            // NEW FILE CREATED!
168                            for path in event.paths {
169                                let ext = path.extension().and_then(|s| s.to_str());
170                                // Watch JSON, JSONL, and Markdown files!
171                                if ext == Some("json")
172                                    || ext == Some("jsonl")
173                                    || ext == Some("md")
174                                    || ext == Some("markdown")
175                                {
176                                    println!("🆕 New file detected: {}", path.display());
177                                    // For new files, always check them
178                                    if let Ok(content) = fs::read_to_string(&path) {
179                                        // Use smart project detection instead of simple contains
180                                        if crate::mcp::smart_project_detector::contains_project_reference(&content, &project_name) {
181                                            println!("   📎 Contains project reference! Absorbing...");
182                                            let _ = sender.send(AbsorptionEvent::FileChanged(path));
183                                        } else {
184                                            println!("   ⏭️  No project references found");
185                                        }
186                                    }
187                                }
188                            }
189                        }
190                        EventKind::Modify(_) => {
191                            // EXISTING FILE MODIFIED
192                            for path in event.paths {
193                                let ext = path.extension().and_then(|s| s.to_str());
194                                if ext == Some("json")
195                                    || ext == Some("jsonl")
196                                    || ext == Some("md")
197                                    || ext == Some("markdown")
198                                {
199                                    // Check if file might contain project name using smart detection
200                                    if let Ok(content) = fs::read_to_string(&path) {
201                                        if crate::mcp::smart_project_detector::contains_project_reference(&content, &project_name) {
202                                            let _ = sender.send(AbsorptionEvent::FileChanged(path));
203                                        }
204                                    }
205                                }
206                            }
207                        }
208                        _ => {}
209                    }
210                }
211            },
212            Config::default(),
213        )?;
214
215        // Watch all configured paths
216        for path in &self.watch_paths {
217            if path.exists() {
218                watcher.watch(path, RecursiveMode::Recursive)?;
219                println!("👁️  Watching: {}", path.display());
220            }
221        }
222
223        self.watcher = Some(watcher);
224
225        // Start absorption thread
226        self.start_absorption_thread();
227
228        // Do initial scan of existing files (only those modified since last absorption)
229        self.initial_scan()?;
230
231        Ok(())
232    }
233
234    fn initial_scan(&self) -> Result<()> {
235        println!("🔍 Initial scan for files modified since last absorption...");
236
237        let last_time = *self.last_absorption_time.lock().unwrap();
238        let mut files_to_check = Vec::new();
239
240        // Scan watch paths for relevant files
241        for watch_path in &self.watch_paths {
242            if watch_path.is_dir() {
243                // Find JSON, JSONL, and MD files
244                let patterns = vec!["*.json", "*.jsonl", "*.md", "*.markdown"];
245                for pattern in patterns {
246                    let glob_pattern = format!("{}/{}", watch_path.display(), pattern);
247                    if let Ok(paths) = glob::glob(&glob_pattern) {
248                        for path_result in paths {
249                            if let Ok(path) = path_result {
250                                // Check modification time
251                                if let Ok(metadata) = fs::metadata(&path) {
252                                    if let Ok(modified) = metadata.modified() {
253                                        if modified > last_time {
254                                            files_to_check.push(path);
255                                        }
256                                    }
257                                }
258                            }
259                        }
260                    }
261
262                    // Also check subdirectories (one level deep for performance)
263                    let recursive_pattern = format!("{}/*/{}", watch_path.display(), pattern);
264                    if let Ok(paths) = glob::glob(&recursive_pattern) {
265                        for path_result in paths.take(100) {
266                            // Limit to avoid scanning too much
267                            if let Ok(path) = path_result {
268                                if let Ok(metadata) = fs::metadata(&path) {
269                                    if let Ok(modified) = metadata.modified() {
270                                        if modified > last_time {
271                                            files_to_check.push(path);
272                                        }
273                                    }
274                                }
275                            }
276                        }
277                    }
278                }
279            }
280        }
281
282        println!(
283            "📊 Found {} files modified since last absorption",
284            files_to_check.len()
285        );
286
287        // Process files that might contain project references
288        let mut absorbed_count = 0;
289        for path in files_to_check {
290            // Quick check if file might contain project reference
291            if let Ok(content) = fs::read_to_string(&path) {
292                if crate::mcp::smart_project_detector::contains_project_reference(
293                    &content,
294                    &self.project_name,
295                ) {
296                    println!("   📎 Absorbing: {}", path.display());
297                    let _ = self.sender.send(AbsorptionEvent::FileChanged(path));
298                    absorbed_count += 1;
299                }
300            }
301        }
302
303        println!(
304            "✅ Initial scan complete! Absorbed {} files",
305            absorbed_count
306        );
307
308        // Update last absorption time
309        *self.last_absorption_time.lock().unwrap() = SystemTime::now();
310
311        Ok(())
312    }
313
314    fn start_absorption_thread(&mut self) {
315        // Create a new channel for this thread
316        let (tx, rx) = channel();
317        self.sender = tx; // Update sender with new channel
318
319        let project_name = self.project_name.clone();
320        let contexts = self.absorbed_contexts.clone();
321        let permissions = self.permissions.clone();
322
323        thread::spawn(move || {
324            while let Ok(event) = rx.recv() {
325                match event {
326                    AbsorptionEvent::FileChanged(path) => {
327                        if let Ok(context) = Self::absorb_file(&path, &project_name, &permissions) {
328                            if permissions.notify_on_absorption {
329                                println!("🧽 Absorbed context from: {}", path.display());
330                                println!("   Relevance: {:.2}", context.relevance_score);
331                            }
332
333                            // Store context
334                            if let Ok(mut ctx_lock) = contexts.lock() {
335                                ctx_lock.push(context.clone());
336                            }
337
338                            // Write to M8 file
339                            let _ = Self::append_to_m8(&context);
340                        }
341                    }
342                    AbsorptionEvent::Stop => break,
343                }
344            }
345        });
346    }
347
348    fn absorb_file(
349        path: &Path,
350        project_name: &str,
351        permissions: &WatchPermissions,
352    ) -> Result<AbsorbedContext> {
353        // Check file size
354        let metadata = fs::metadata(path)?;
355        if metadata.len() > permissions.max_file_size_mb * 1024 * 1024 {
356            return Err(anyhow::anyhow!("File too large"));
357        }
358
359        // Check if path is excluded
360        for excluded in &permissions.excluded_paths {
361            if path
362                .to_string_lossy()
363                .contains(excluded.trim_start_matches('*'))
364            {
365                return Err(anyhow::anyhow!("Path is excluded"));
366            }
367        }
368
369        // Read file content
370        let content = fs::read_to_string(path)?;
371
372        // Determine file type and parse accordingly
373        let ext = path.extension().and_then(|s| s.to_str());
374        let (parsed_content, content_type) = match ext {
375            Some("json") => {
376                // Parse as JSON
377                let json: Value = serde_json::from_str(&content)?;
378                let relevant = Self::extract_relevant_content(&json, project_name);
379                (relevant, Self::detect_content_type(&json))
380            }
381            Some("jsonl") => {
382                // Parse as JSONL (JSON Lines - one JSON object per line)
383                let relevant = Self::extract_jsonl_content(&content, project_name)?;
384                (relevant, "jsonl_stream".to_string())
385            }
386            Some("md") | Some("markdown") => {
387                // Parse Markdown - extract relevant sections
388                let relevant = Self::extract_markdown_content(&content, project_name);
389                (relevant, "markdown_document".to_string())
390            }
391            _ => {
392                // Treat as plain text
393                let relevant = Self::extract_text_content(&content, project_name);
394                (relevant, "text_file".to_string())
395            }
396        };
397
398        let keywords = Self::extract_keywords(&parsed_content);
399        let relevance_score = Self::calculate_relevance(&parsed_content, project_name);
400
401        Ok(AbsorbedContext {
402            timestamp: SystemTime::now(),
403            origin: path.to_path_buf(),
404            project_name: project_name.to_string(),
405            content_type,
406            content: parsed_content,
407            relevance_score,
408            keywords,
409        })
410    }
411
412    fn extract_relevant_content(json: &Value, project_name: &str) -> Value {
413        let mut relevant = serde_json::json!({});
414
415        // Recursively find all mentions of project name
416        Self::find_mentions(json, project_name, &mut relevant);
417
418        relevant
419    }
420
421    fn find_mentions(json: &Value, needle: &str, result: &mut Value) {
422        match json {
423            Value::Object(map) => {
424                for (key, value) in map {
425                    if key.contains(needle) || value.to_string().contains(needle) {
426                        result[key] = value.clone();
427                    }
428                    Self::find_mentions(value, needle, result);
429                }
430            }
431            Value::Array(arr) => {
432                for item in arr {
433                    if item.to_string().contains(needle) {
434                        if let Value::Array(ref mut res_arr) = result["mentions"] {
435                            res_arr.push(item.clone());
436                        } else {
437                            result["mentions"] = serde_json::json!([item.clone()]);
438                        }
439                    }
440                }
441            }
442            _ => {}
443        }
444    }
445
446    fn extract_keywords(content: &Value) -> Vec<String> {
447        let mut keywords = HashSet::new();
448        let text = content.to_string();
449
450        // Simple keyword extraction (can be improved with NLP)
451        for word in text.split_whitespace() {
452            let clean = word.trim_matches(|c: char| !c.is_alphanumeric());
453            if clean.len() > 4 && !STOP_WORDS.contains(&clean.to_lowercase().as_str()) {
454                keywords.insert(clean.to_string());
455            }
456        }
457
458        keywords.into_iter().collect()
459    }
460
461    fn calculate_relevance(content: &Value, project_name: &str) -> f64 {
462        let text = content.to_string();
463        let mentions = text.matches(project_name).count();
464        let total_words = text.split_whitespace().count();
465
466        if total_words == 0 {
467            return 0.0;
468        }
469
470        // Simple relevance: mentions per 100 words, capped at 1.0
471        ((mentions as f64 / total_words as f64) * 100.0).min(1.0)
472    }
473
474    fn detect_content_type(json: &Value) -> String {
475        // Try to detect what kind of JSON this is - Including AI assistants!
476        if json.get("conversations").is_some() {
477            "claude_conversation".to_string()
478        } else if json.get("messages").is_some() && json.get("model").is_some() {
479            // Cursor AI chat format
480            "cursor_ai_chat".to_string()
481        } else if json.get("cells").is_some() && json.get("metadata").is_some() {
482            // Jupyter notebook with AI interactions
483            "jupyter_notebook".to_string()
484        } else if json.get("entries").is_some() || json.get("chats").is_some() {
485            // VS Code Copilot Chat or other VS Code AI extensions
486            "vscode_ai_chat".to_string()
487        } else if json.get("workspaceFolders").is_some() {
488            "vscode_workspace".to_string()
489        } else if json.get("dependencies").is_some() {
490            "package_json".to_string()
491        } else if json.get("config").is_some() {
492            "configuration".to_string()
493        } else if json.get("prompts").is_some() || json.get("completions").is_some() {
494            // GitHub Copilot suggestions log
495            "copilot_suggestions".to_string()
496        } else {
497            "generic_json".to_string()
498        }
499    }
500
501    fn extract_jsonl_content(content: &str, project_name: &str) -> Result<Value> {
502        let mut relevant_lines = Vec::new();
503
504        // Parse each line as JSON
505        for line in content.lines() {
506            if line.trim().is_empty() {
507                continue;
508            }
509
510            // Try to parse the line as JSON
511            if let Ok(json) = serde_json::from_str::<Value>(line) {
512                // Check if this line mentions the project
513                if json.to_string().contains(project_name) {
514                    relevant_lines.push(json);
515                }
516            }
517        }
518
519        Ok(serde_json::json!({
520            "jsonl_entries": relevant_lines,
521            "total_relevant": relevant_lines.len()
522        }))
523    }
524
525    fn extract_markdown_content(content: &str, project_name: &str) -> Value {
526        let mut sections = Vec::new();
527        let mut current_section = String::new();
528        let mut in_relevant_section = false;
529
530        for line in content.lines() {
531            // Check if line mentions project
532            if line.contains(project_name) {
533                in_relevant_section = true;
534            }
535
536            // Capture headers and relevant content
537            if line.starts_with('#') || in_relevant_section {
538                current_section.push_str(line);
539                current_section.push('\n');
540
541                // If we have a good chunk, save it
542                if current_section.len() > 500 {
543                    sections.push(current_section.clone());
544                    current_section.clear();
545                    in_relevant_section = false;
546                }
547            }
548        }
549
550        // Save any remaining content
551        if !current_section.is_empty() {
552            sections.push(current_section);
553        }
554
555        serde_json::json!({
556            "markdown_sections": sections,
557            "mentions_count": content.matches(project_name).count()
558        })
559    }
560
561    fn extract_text_content(content: &str, project_name: &str) -> Value {
562        // Extract lines that mention the project
563        let relevant_lines: Vec<String> = content
564            .lines()
565            .filter(|line| line.contains(project_name))
566            .map(|s| s.to_string())
567            .collect();
568
569        // Also get some context around mentions
570        let mut context_snippets = Vec::new();
571        let lines: Vec<&str> = content.lines().collect();
572
573        for (i, line) in lines.iter().enumerate() {
574            if line.contains(project_name) {
575                let start = i.saturating_sub(2);
576                let end = (i + 3).min(lines.len());
577                let snippet = lines[start..end].join("\n");
578                context_snippets.push(snippet);
579            }
580        }
581
582        serde_json::json!({
583            "relevant_lines": relevant_lines,
584            "context_snippets": context_snippets,
585            "total_mentions": content.matches(project_name).count()
586        })
587    }
588
589    fn append_to_m8(context: &AbsorbedContext) -> Result<()> {
590        let cwd = std::env::current_dir()?;
591        let st_dir = cwd.join(".st");
592        let m8_path = st_dir.join("absorbed_context.m8");
593
594        // Ensure directory exists
595        fs::create_dir_all(&st_dir)?;
596
597        // Append context to M8 file
598        let mut existing = if m8_path.exists() {
599            let content = fs::read_to_string(&m8_path)?;
600            if content.is_empty() {
601                Vec::new()
602            } else {
603                serde_json::from_str::<Vec<AbsorbedContext>>(&content).unwrap_or_default()
604            }
605        } else {
606            Vec::new()
607        };
608
609        existing.push(context.clone());
610
611        // Keep only last 100 contexts
612        if existing.len() > 100 {
613            let skip_count = existing.len().saturating_sub(100);
614            existing = existing.into_iter().skip(skip_count).collect();
615        }
616
617        fs::write(&m8_path, serde_json::to_string_pretty(&existing)?)?;
618
619        Ok(())
620    }
621
622    pub fn stop_watching(&mut self) {
623        let _ = self.sender.send(AbsorptionEvent::Stop);
624        self.watcher = None;
625    }
626
627    pub fn get_absorbed_contexts(&self) -> Vec<AbsorbedContext> {
628        self.absorbed_contexts
629            .lock()
630            .unwrap_or_else(|e| e.into_inner())
631            .clone()
632    }
633}
634
635// Common stop words to ignore
636const STOP_WORDS: &[&str] = &[
637    "the", "and", "for", "with", "this", "that", "from", "into", "over", "under", "about",
638    "through", "between", "after", "before", "during",
639];
640
641// MCP tool integration
642pub async fn handle_context_absorber(params: Value) -> Result<Value> {
643    let action = params["action"].as_str().unwrap_or("status");
644    let project_name = params["project_name"].as_str().unwrap_or("smart-tree");
645
646    match action {
647        "start" => {
648            let mut absorber = ContextAbsorber::new(project_name.to_string())?;
649            absorber.start_watching()?;
650
651            Ok(serde_json::json!({
652                "status": "started",
653                "project": project_name,
654                "watching_paths": absorber.watch_paths,
655                "message": format!("🧽 Context absorber started for '{}'", project_name)
656            }))
657        }
658        "status" => {
659            let cwd = std::env::current_dir()?;
660            let m8_path = cwd.join(".st").join("absorbed_context.m8");
661
662            let count = if m8_path.exists() {
663                let content = fs::read_to_string(&m8_path)?;
664                serde_json::from_str::<Vec<AbsorbedContext>>(&content)
665                    .map(|v| v.len())
666                    .unwrap_or(0)
667            } else {
668                0
669            };
670
671            Ok(serde_json::json!({
672                "status": "ready",
673                "project": project_name,
674                "absorbed_contexts": count,
675                "m8_file": m8_path.to_string_lossy()
676            }))
677        }
678        "configure" => {
679            let perm_path = dirs::home_dir()
680                .unwrap_or_else(|| PathBuf::from("."))
681                .join(".mem8")
682                .join("watch_permissions.json");
683
684            Ok(serde_json::json!({
685                "permissions_file": perm_path.to_string_lossy(),
686                "current_permissions": WatchPermissions::default(),
687                "message": "Edit the permissions file to configure watching"
688            }))
689        }
690        _ => Err(anyhow::anyhow!("Unknown action: {}", action)),
691    }
692}