1#![allow(clippy::manual_flatten)]
5
6use crate::feature_flags;
7use anyhow::Result;
8use notify::{Config, Event, EventKind, RecommendedWatcher, RecursiveMode, Watcher};
9use serde::{Deserialize, Serialize};
10use serde_json::Value;
11use std::collections::HashSet;
12use std::fs;
13use std::path::{Path, PathBuf};
14use std::sync::mpsc::{channel, Receiver, Sender};
15use std::sync::{Arc, Mutex};
16use std::thread;
17use std::time::SystemTime;
18
19#[derive(Debug, Clone, Serialize, Deserialize)]
20pub struct WatchPermissions {
21 pub allowed_paths: Vec<String>,
22 pub excluded_paths: Vec<String>,
23 pub auto_absorb: bool,
24 pub notify_on_absorption: bool,
25 pub max_file_size_mb: u64,
26}
27
28impl Default for WatchPermissions {
29 fn default() -> Self {
30 Self {
31 allowed_paths: vec![
32 "~/Documents/".to_string(),
33 "~/.config/".to_string(),
34 "~/Library/Application Support/".to_string(),
35 "~/.cursor/".to_string(), "~/.vscode/".to_string(), "~/Library/Application Support/Code/".to_string(), "~/.local/share/".to_string(), "~/.cache/".to_string(), ],
41 excluded_paths: vec![
42 "~/.ssh/".to_string(),
43 "~/.aws/".to_string(),
44 "~/.gnupg/".to_string(),
45 "**/node_modules/**".to_string(),
46 "**/.git/**".to_string(),
47 ],
48 auto_absorb: true,
49 notify_on_absorption: true,
50 max_file_size_mb: 10,
51 }
52 }
53}
54
55#[derive(Debug, Clone, Serialize, Deserialize)]
56pub struct AbsorbedContext {
57 pub timestamp: SystemTime,
58 pub origin: PathBuf,
59 pub project_name: String,
60 pub content_type: String,
61 pub content: Value,
62 pub relevance_score: f64,
63 pub keywords: Vec<String>,
64}
65
66pub struct ContextAbsorber {
67 project_name: String,
68 watch_paths: Vec<PathBuf>,
69 permissions: WatchPermissions,
70 absorbed_contexts: Arc<Mutex<Vec<AbsorbedContext>>>,
71 watcher: Option<RecommendedWatcher>,
72 sender: Sender<AbsorptionEvent>,
73 receiver: Receiver<AbsorptionEvent>,
74 last_absorption_time: Arc<Mutex<SystemTime>>,
75}
76
77#[derive(Debug)]
78enum AbsorptionEvent {
79 FileChanged(PathBuf),
80 Stop,
81}
82
83impl ContextAbsorber {
84 pub fn new(project_name: String) -> Result<Self> {
85 let (sender, receiver) = channel();
86
87 let permissions = Self::load_permissions()?;
89
90 let watch_paths = permissions
92 .allowed_paths
93 .iter()
94 .map(|p| shellexpand::tilde(p))
95 .map(|p| PathBuf::from(p.to_string()))
96 .filter(|p| p.exists())
97 .collect();
98
99 let last_time = Self::load_last_absorption_time(&project_name).unwrap_or(
101 SystemTime::now() - std::time::Duration::from_secs(604800), );
103
104 Ok(Self {
105 project_name,
106 watch_paths,
107 permissions,
108 absorbed_contexts: Arc::new(Mutex::new(Vec::new())),
109 watcher: None,
110 sender,
111 receiver,
112 last_absorption_time: Arc::new(Mutex::new(last_time)),
113 })
114 }
115
116 fn load_last_absorption_time(_project_name: &str) -> Option<SystemTime> {
117 let cwd = std::env::current_dir().ok()?;
118 let m8_path = cwd.join(".st").join("absorbed_context.m8");
119
120 if m8_path.exists() {
121 fs::metadata(&m8_path)
123 .ok()
124 .and_then(|meta| meta.modified().ok())
125 } else {
126 None
127 }
128 }
129
130 fn load_permissions() -> Result<WatchPermissions> {
131 let perm_path = dirs::home_dir()
132 .unwrap_or_else(|| PathBuf::from("."))
133 .join(".mem8")
134 .join("watch_permissions.json");
135
136 if perm_path.exists() {
137 let content = fs::read_to_string(&perm_path)?;
138 Ok(serde_json::from_str(&content)?)
139 } else {
140 let permissions = WatchPermissions::default();
142 if let Some(parent) = perm_path.parent() {
143 fs::create_dir_all(parent)?;
144 }
145 fs::write(&perm_path, serde_json::to_string_pretty(&permissions)?)?;
146 Ok(permissions)
147 }
148 }
149
150 pub fn start_watching(&mut self) -> Result<()> {
151 let flags = feature_flags::features();
153 if !flags.enable_file_watching {
154 eprintln!("File watching is disabled by configuration");
155 return Ok(());
156 }
157
158 let sender = self.sender.clone();
159 let project_name = self.project_name.clone();
160
161 let mut watcher = RecommendedWatcher::new(
163 move |res: Result<Event, notify::Error>| {
164 if let Ok(event) = res {
165 match event.kind {
166 EventKind::Create(_) => {
167 for path in event.paths {
169 let ext = path.extension().and_then(|s| s.to_str());
170 if ext == Some("json")
172 || ext == Some("jsonl")
173 || ext == Some("md")
174 || ext == Some("markdown")
175 {
176 println!("🆕 New file detected: {}", path.display());
177 if let Ok(content) = fs::read_to_string(&path) {
179 if crate::mcp::smart_project_detector::contains_project_reference(&content, &project_name) {
181 println!(" 📎 Contains project reference! Absorbing...");
182 let _ = sender.send(AbsorptionEvent::FileChanged(path));
183 } else {
184 println!(" ⏭️ No project references found");
185 }
186 }
187 }
188 }
189 }
190 EventKind::Modify(_) => {
191 for path in event.paths {
193 let ext = path.extension().and_then(|s| s.to_str());
194 if ext == Some("json")
195 || ext == Some("jsonl")
196 || ext == Some("md")
197 || ext == Some("markdown")
198 {
199 if let Ok(content) = fs::read_to_string(&path) {
201 if crate::mcp::smart_project_detector::contains_project_reference(&content, &project_name) {
202 let _ = sender.send(AbsorptionEvent::FileChanged(path));
203 }
204 }
205 }
206 }
207 }
208 _ => {}
209 }
210 }
211 },
212 Config::default(),
213 )?;
214
215 for path in &self.watch_paths {
217 if path.exists() {
218 watcher.watch(path, RecursiveMode::Recursive)?;
219 println!("👁️ Watching: {}", path.display());
220 }
221 }
222
223 self.watcher = Some(watcher);
224
225 self.start_absorption_thread();
227
228 self.initial_scan()?;
230
231 Ok(())
232 }
233
234 fn initial_scan(&self) -> Result<()> {
235 println!("🔍 Initial scan for files modified since last absorption...");
236
237 let last_time = *self.last_absorption_time.lock().unwrap();
238 let mut files_to_check = Vec::new();
239
240 for watch_path in &self.watch_paths {
242 if watch_path.is_dir() {
243 let patterns = vec!["*.json", "*.jsonl", "*.md", "*.markdown"];
245 for pattern in patterns {
246 let glob_pattern = format!("{}/{}", watch_path.display(), pattern);
247 if let Ok(paths) = glob::glob(&glob_pattern) {
248 for path_result in paths {
249 if let Ok(path) = path_result {
250 if let Ok(metadata) = fs::metadata(&path) {
252 if let Ok(modified) = metadata.modified() {
253 if modified > last_time {
254 files_to_check.push(path);
255 }
256 }
257 }
258 }
259 }
260 }
261
262 let recursive_pattern = format!("{}/*/{}", watch_path.display(), pattern);
264 if let Ok(paths) = glob::glob(&recursive_pattern) {
265 for path_result in paths.take(100) {
266 if let Ok(path) = path_result {
268 if let Ok(metadata) = fs::metadata(&path) {
269 if let Ok(modified) = metadata.modified() {
270 if modified > last_time {
271 files_to_check.push(path);
272 }
273 }
274 }
275 }
276 }
277 }
278 }
279 }
280 }
281
282 println!(
283 "📊 Found {} files modified since last absorption",
284 files_to_check.len()
285 );
286
287 let mut absorbed_count = 0;
289 for path in files_to_check {
290 if let Ok(content) = fs::read_to_string(&path) {
292 if crate::mcp::smart_project_detector::contains_project_reference(
293 &content,
294 &self.project_name,
295 ) {
296 println!(" 📎 Absorbing: {}", path.display());
297 let _ = self.sender.send(AbsorptionEvent::FileChanged(path));
298 absorbed_count += 1;
299 }
300 }
301 }
302
303 println!(
304 "✅ Initial scan complete! Absorbed {} files",
305 absorbed_count
306 );
307
308 *self.last_absorption_time.lock().unwrap() = SystemTime::now();
310
311 Ok(())
312 }
313
314 fn start_absorption_thread(&mut self) {
315 let (tx, rx) = channel();
317 self.sender = tx; let project_name = self.project_name.clone();
320 let contexts = self.absorbed_contexts.clone();
321 let permissions = self.permissions.clone();
322
323 thread::spawn(move || {
324 while let Ok(event) = rx.recv() {
325 match event {
326 AbsorptionEvent::FileChanged(path) => {
327 if let Ok(context) = Self::absorb_file(&path, &project_name, &permissions) {
328 if permissions.notify_on_absorption {
329 println!("🧽 Absorbed context from: {}", path.display());
330 println!(" Relevance: {:.2}", context.relevance_score);
331 }
332
333 if let Ok(mut ctx_lock) = contexts.lock() {
335 ctx_lock.push(context.clone());
336 }
337
338 let _ = Self::append_to_m8(&context);
340 }
341 }
342 AbsorptionEvent::Stop => break,
343 }
344 }
345 });
346 }
347
348 fn absorb_file(
349 path: &Path,
350 project_name: &str,
351 permissions: &WatchPermissions,
352 ) -> Result<AbsorbedContext> {
353 let metadata = fs::metadata(path)?;
355 if metadata.len() > permissions.max_file_size_mb * 1024 * 1024 {
356 return Err(anyhow::anyhow!("File too large"));
357 }
358
359 for excluded in &permissions.excluded_paths {
361 if path
362 .to_string_lossy()
363 .contains(excluded.trim_start_matches('*'))
364 {
365 return Err(anyhow::anyhow!("Path is excluded"));
366 }
367 }
368
369 let content = fs::read_to_string(path)?;
371
372 let ext = path.extension().and_then(|s| s.to_str());
374 let (parsed_content, content_type) = match ext {
375 Some("json") => {
376 let json: Value = serde_json::from_str(&content)?;
378 let relevant = Self::extract_relevant_content(&json, project_name);
379 (relevant, Self::detect_content_type(&json))
380 }
381 Some("jsonl") => {
382 let relevant = Self::extract_jsonl_content(&content, project_name)?;
384 (relevant, "jsonl_stream".to_string())
385 }
386 Some("md") | Some("markdown") => {
387 let relevant = Self::extract_markdown_content(&content, project_name);
389 (relevant, "markdown_document".to_string())
390 }
391 _ => {
392 let relevant = Self::extract_text_content(&content, project_name);
394 (relevant, "text_file".to_string())
395 }
396 };
397
398 let keywords = Self::extract_keywords(&parsed_content);
399 let relevance_score = Self::calculate_relevance(&parsed_content, project_name);
400
401 Ok(AbsorbedContext {
402 timestamp: SystemTime::now(),
403 origin: path.to_path_buf(),
404 project_name: project_name.to_string(),
405 content_type,
406 content: parsed_content,
407 relevance_score,
408 keywords,
409 })
410 }
411
412 fn extract_relevant_content(json: &Value, project_name: &str) -> Value {
413 let mut relevant = serde_json::json!({});
414
415 Self::find_mentions(json, project_name, &mut relevant);
417
418 relevant
419 }
420
421 fn find_mentions(json: &Value, needle: &str, result: &mut Value) {
422 match json {
423 Value::Object(map) => {
424 for (key, value) in map {
425 if key.contains(needle) || value.to_string().contains(needle) {
426 result[key] = value.clone();
427 }
428 Self::find_mentions(value, needle, result);
429 }
430 }
431 Value::Array(arr) => {
432 for item in arr {
433 if item.to_string().contains(needle) {
434 if let Value::Array(ref mut res_arr) = result["mentions"] {
435 res_arr.push(item.clone());
436 } else {
437 result["mentions"] = serde_json::json!([item.clone()]);
438 }
439 }
440 }
441 }
442 _ => {}
443 }
444 }
445
446 fn extract_keywords(content: &Value) -> Vec<String> {
447 let mut keywords = HashSet::new();
448 let text = content.to_string();
449
450 for word in text.split_whitespace() {
452 let clean = word.trim_matches(|c: char| !c.is_alphanumeric());
453 if clean.len() > 4 && !STOP_WORDS.contains(&clean.to_lowercase().as_str()) {
454 keywords.insert(clean.to_string());
455 }
456 }
457
458 keywords.into_iter().collect()
459 }
460
461 fn calculate_relevance(content: &Value, project_name: &str) -> f64 {
462 let text = content.to_string();
463 let mentions = text.matches(project_name).count();
464 let total_words = text.split_whitespace().count();
465
466 if total_words == 0 {
467 return 0.0;
468 }
469
470 ((mentions as f64 / total_words as f64) * 100.0).min(1.0)
472 }
473
474 fn detect_content_type(json: &Value) -> String {
475 if json.get("conversations").is_some() {
477 "claude_conversation".to_string()
478 } else if json.get("messages").is_some() && json.get("model").is_some() {
479 "cursor_ai_chat".to_string()
481 } else if json.get("cells").is_some() && json.get("metadata").is_some() {
482 "jupyter_notebook".to_string()
484 } else if json.get("entries").is_some() || json.get("chats").is_some() {
485 "vscode_ai_chat".to_string()
487 } else if json.get("workspaceFolders").is_some() {
488 "vscode_workspace".to_string()
489 } else if json.get("dependencies").is_some() {
490 "package_json".to_string()
491 } else if json.get("config").is_some() {
492 "configuration".to_string()
493 } else if json.get("prompts").is_some() || json.get("completions").is_some() {
494 "copilot_suggestions".to_string()
496 } else {
497 "generic_json".to_string()
498 }
499 }
500
501 fn extract_jsonl_content(content: &str, project_name: &str) -> Result<Value> {
502 let mut relevant_lines = Vec::new();
503
504 for line in content.lines() {
506 if line.trim().is_empty() {
507 continue;
508 }
509
510 if let Ok(json) = serde_json::from_str::<Value>(line) {
512 if json.to_string().contains(project_name) {
514 relevant_lines.push(json);
515 }
516 }
517 }
518
519 Ok(serde_json::json!({
520 "jsonl_entries": relevant_lines,
521 "total_relevant": relevant_lines.len()
522 }))
523 }
524
525 fn extract_markdown_content(content: &str, project_name: &str) -> Value {
526 let mut sections = Vec::new();
527 let mut current_section = String::new();
528 let mut in_relevant_section = false;
529
530 for line in content.lines() {
531 if line.contains(project_name) {
533 in_relevant_section = true;
534 }
535
536 if line.starts_with('#') || in_relevant_section {
538 current_section.push_str(line);
539 current_section.push('\n');
540
541 if current_section.len() > 500 {
543 sections.push(current_section.clone());
544 current_section.clear();
545 in_relevant_section = false;
546 }
547 }
548 }
549
550 if !current_section.is_empty() {
552 sections.push(current_section);
553 }
554
555 serde_json::json!({
556 "markdown_sections": sections,
557 "mentions_count": content.matches(project_name).count()
558 })
559 }
560
561 fn extract_text_content(content: &str, project_name: &str) -> Value {
562 let relevant_lines: Vec<String> = content
564 .lines()
565 .filter(|line| line.contains(project_name))
566 .map(|s| s.to_string())
567 .collect();
568
569 let mut context_snippets = Vec::new();
571 let lines: Vec<&str> = content.lines().collect();
572
573 for (i, line) in lines.iter().enumerate() {
574 if line.contains(project_name) {
575 let start = i.saturating_sub(2);
576 let end = (i + 3).min(lines.len());
577 let snippet = lines[start..end].join("\n");
578 context_snippets.push(snippet);
579 }
580 }
581
582 serde_json::json!({
583 "relevant_lines": relevant_lines,
584 "context_snippets": context_snippets,
585 "total_mentions": content.matches(project_name).count()
586 })
587 }
588
589 fn append_to_m8(context: &AbsorbedContext) -> Result<()> {
590 let cwd = std::env::current_dir()?;
591 let st_dir = cwd.join(".st");
592 let m8_path = st_dir.join("absorbed_context.m8");
593
594 fs::create_dir_all(&st_dir)?;
596
597 let mut existing = if m8_path.exists() {
599 let content = fs::read_to_string(&m8_path)?;
600 if content.is_empty() {
601 Vec::new()
602 } else {
603 serde_json::from_str::<Vec<AbsorbedContext>>(&content).unwrap_or_default()
604 }
605 } else {
606 Vec::new()
607 };
608
609 existing.push(context.clone());
610
611 if existing.len() > 100 {
613 let skip_count = existing.len().saturating_sub(100);
614 existing = existing.into_iter().skip(skip_count).collect();
615 }
616
617 fs::write(&m8_path, serde_json::to_string_pretty(&existing)?)?;
618
619 Ok(())
620 }
621
622 pub fn stop_watching(&mut self) {
623 let _ = self.sender.send(AbsorptionEvent::Stop);
624 self.watcher = None;
625 }
626
627 pub fn get_absorbed_contexts(&self) -> Vec<AbsorbedContext> {
628 self.absorbed_contexts
629 .lock()
630 .unwrap_or_else(|e| e.into_inner())
631 .clone()
632 }
633}
634
635const STOP_WORDS: &[&str] = &[
637 "the", "and", "for", "with", "this", "that", "from", "into", "over", "under", "about",
638 "through", "between", "after", "before", "during",
639];
640
641pub async fn handle_context_absorber(params: Value) -> Result<Value> {
643 let action = params["action"].as_str().unwrap_or("status");
644 let project_name = params["project_name"].as_str().unwrap_or("smart-tree");
645
646 match action {
647 "start" => {
648 let mut absorber = ContextAbsorber::new(project_name.to_string())?;
649 absorber.start_watching()?;
650
651 Ok(serde_json::json!({
652 "status": "started",
653 "project": project_name,
654 "watching_paths": absorber.watch_paths,
655 "message": format!("🧽 Context absorber started for '{}'", project_name)
656 }))
657 }
658 "status" => {
659 let cwd = std::env::current_dir()?;
660 let m8_path = cwd.join(".st").join("absorbed_context.m8");
661
662 let count = if m8_path.exists() {
663 let content = fs::read_to_string(&m8_path)?;
664 serde_json::from_str::<Vec<AbsorbedContext>>(&content)
665 .map(|v| v.len())
666 .unwrap_or(0)
667 } else {
668 0
669 };
670
671 Ok(serde_json::json!({
672 "status": "ready",
673 "project": project_name,
674 "absorbed_contexts": count,
675 "m8_file": m8_path.to_string_lossy()
676 }))
677 }
678 "configure" => {
679 let perm_path = dirs::home_dir()
680 .unwrap_or_else(|| PathBuf::from("."))
681 .join(".mem8")
682 .join("watch_permissions.json");
683
684 Ok(serde_json::json!({
685 "permissions_file": perm_path.to_string_lossy(),
686 "current_permissions": WatchPermissions::default(),
687 "message": "Edit the permissions file to configure watching"
688 }))
689 }
690 _ => Err(anyhow::anyhow!("Unknown action: {}", action)),
691 }
692}