Skip to main content

st/context_gatherer/
mod.rs

1//! Context Gathering System for Smart Tree
2//!
3//! This module searches across AI tool directories (~/.claude, ~/.windsurf, ~/.cursor, etc.)
4//! to gather project-related context and convert it into M8 format for processing.
5
6pub mod collab_session;
7pub mod cross_session;
8pub mod partnership;
9pub mod temporal;
10
11use anyhow::{Context, Result};
12use serde::{Deserialize, Serialize};
13use std::collections::HashMap;
14use std::fs;
15use std::path::{Path, PathBuf};
16use walkdir::WalkDir;
17// TODO: Use proper M8 wave format when available
18// use crate::mem8::wave::{MemoryWave, WaveGrid, FrequencyBand, SensorType};
19// use crate::mem8::format::M8Writer;
20
21/// AI tool directories to search for context
22pub const AI_TOOL_DIRS: &[&str] = &[
23    ".claude",
24    ".windsurf",
25    ".cursor",
26    ".continue",
27    ".github/copilot",
28    ".vscode",
29    ".idea",
30    ".zed",
31];
32
33/// File extensions that contain context information
34pub const CONTEXT_EXTENSIONS: &[&str] = &[
35    "json", "jsonl", "xml", "yaml", "yml", "toml", "md", "txt", "log", "conf", "config", "env",
36    "settings",
37];
38
39/// Context gathering configuration
40#[derive(Debug, Clone, Serialize, Deserialize)]
41pub struct GatherConfig {
42    /// Directories to search (relative to home)
43    pub search_dirs: Vec<String>,
44    /// Additional custom directories
45    pub custom_dirs: Vec<PathBuf>,
46    /// File extensions to include
47    pub extensions: Vec<String>,
48    /// Project identifiers to look for
49    pub project_identifiers: Vec<String>,
50    /// Maximum file size to process (in bytes)
51    pub max_file_size: usize,
52    /// Enable recursive search in subdirectories
53    pub recursive: bool,
54    /// Privacy mode - redact sensitive information
55    pub privacy_mode: bool,
56}
57
58impl Default for GatherConfig {
59    fn default() -> Self {
60        Self {
61            search_dirs: AI_TOOL_DIRS.iter().map(|s| s.to_string()).collect(),
62            custom_dirs: vec![],
63            extensions: CONTEXT_EXTENSIONS.iter().map(|s| s.to_string()).collect(),
64            project_identifiers: vec![],
65            max_file_size: 10 * 1024 * 1024, // 10MB
66            recursive: true,
67            privacy_mode: true,
68        }
69    }
70}
71
72/// Represents gathered context from various sources
73#[derive(Debug, Clone, Serialize, Deserialize)]
74pub struct GatheredContext {
75    pub source_path: PathBuf,
76    pub ai_tool: String,
77    pub content_type: ContextType,
78    pub content: ContextContent,
79    pub metadata: HashMap<String, String>,
80    pub relevance_score: f32,
81    pub timestamp: chrono::DateTime<chrono::Utc>,
82}
83
84#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
85pub enum ContextType {
86    ChatHistory,
87    ProjectSettings,
88    CodeSnippets,
89    Documentation,
90    Configuration,
91    SearchHistory,
92    Bookmarks,
93    CustomPrompts,
94    ModelPreferences,
95    WorkspaceState,
96}
97
98#[derive(Debug, Clone, Serialize, Deserialize)]
99pub enum ContextContent {
100    Text(String),
101    Json(serde_json::Value),
102    Xml(String),
103    Binary(Vec<u8>),
104}
105
106/// Main context gatherer
107pub struct ContextGatherer {
108    config: GatherConfig,
109    project_path: PathBuf,
110    gathered_contexts: Vec<GatheredContext>,
111    session_tracker: collab_session::CollaborativeSessionTracker,
112    cross_session_bridge: cross_session::CrossSessionBridge,
113}
114
115impl ContextGatherer {
116    pub fn new(project_path: PathBuf, config: GatherConfig) -> Self {
117        Self {
118            config,
119            project_path,
120            gathered_contexts: Vec::new(),
121            session_tracker: collab_session::CollaborativeSessionTracker::new(),
122            cross_session_bridge: cross_session::CrossSessionBridge::new(),
123        }
124    }
125
126    /// Gather context from all configured sources
127    pub fn gather_all(&mut self) -> Result<()> {
128        let home_dir = dirs::home_dir().context("Failed to get home directory")?;
129
130        // Clone to avoid borrow issues
131        let search_dirs = self.config.search_dirs.clone();
132        let custom_dirs = self.config.custom_dirs.clone();
133
134        // Search AI tool directories
135        for dir_name in search_dirs {
136            let search_path = home_dir.join(&dir_name);
137            if search_path.exists() {
138                println!("🔍 Scanning {}", search_path.display());
139                self.scan_directory(&search_path, &dir_name)?;
140            }
141        }
142
143        // Search custom directories
144        for custom_dir in custom_dirs {
145            if custom_dir.exists() {
146                println!("🔍 Scanning custom: {}", custom_dir.display());
147                let tool_name = custom_dir
148                    .file_name()
149                    .and_then(|n| n.to_str())
150                    .unwrap_or("custom");
151                self.scan_directory(&custom_dir, tool_name)?;
152            }
153        }
154
155        // Post-process and score relevance
156        self.score_relevance();
157
158        // Analyze for cross-domain patterns
159        let patterns = self
160            .cross_session_bridge
161            .analyze_for_patterns(&self.gathered_contexts);
162        if !patterns.is_empty() {
163            println!("🔗 Found {} cross-domain patterns", patterns.len());
164        }
165
166        // Generate insights if we have enough data
167        let insights = self.cross_session_bridge.generate_insights(0.3);
168        if !insights.is_empty() {
169            println!("💡 Generated {} cross-session insights", insights.len());
170        }
171
172        Ok(())
173    }
174
175    /// Scan a directory for context files
176    fn scan_directory(&mut self, path: &Path, ai_tool: &str) -> Result<()> {
177        let walker = if self.config.recursive {
178            WalkDir::new(path).max_depth(5)
179        } else {
180            WalkDir::new(path).max_depth(1)
181        };
182
183        for entry in walker.into_iter().filter_map(|e| e.ok()) {
184            let path = entry.path();
185
186            // Skip if not a file
187            if !path.is_file() {
188                continue;
189            }
190
191            // Check file extension
192            if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
193                if !self.config.extensions.contains(&ext.to_string()) {
194                    continue;
195                }
196
197                // Check file size
198                if let Ok(metadata) = fs::metadata(path) {
199                    if metadata.len() as usize > self.config.max_file_size {
200                        continue;
201                    }
202                }
203
204                // Process the file
205                if let Ok(context) = self.process_file(path, ai_tool) {
206                    if self.is_relevant(&context) {
207                        // Track collaborative sessions
208                        let _ = self.session_tracker.process_context(&context);
209                        self.gathered_contexts.push(context);
210                    }
211                }
212            }
213        }
214
215        Ok(())
216    }
217
218    /// Process a single file and extract context
219    fn process_file(&self, path: &Path, ai_tool: &str) -> Result<GatheredContext> {
220        let content = fs::read_to_string(path).context("Failed to read file")?;
221
222        let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
223
224        let (content_type, content) = match ext {
225            "json" => self.parse_json(&content, path)?,
226            "jsonl" => self.parse_jsonl(&content, path)?,
227            "xml" => self.parse_xml(&content)?,
228            "yaml" | "yml" => self.parse_yaml(&content)?,
229            "md" => (ContextType::Documentation, ContextContent::Text(content)),
230            _ => (ContextType::Configuration, ContextContent::Text(content)),
231        };
232
233        Ok(GatheredContext {
234            source_path: path.to_path_buf(),
235            ai_tool: ai_tool.to_string(),
236            content_type,
237            content,
238            metadata: self.extract_metadata(path),
239            relevance_score: 0.0, // Will be calculated later
240            timestamp: chrono::Utc::now(),
241        })
242    }
243
244    /// Parse JSON content and determine its type
245    fn parse_json(&self, content: &str, path: &Path) -> Result<(ContextType, ContextContent)> {
246        let json: serde_json::Value = serde_json::from_str(content)?;
247
248        // Detect content type based on structure and filename
249        let content_type = if path.to_str().unwrap_or("").contains("chat") {
250            ContextType::ChatHistory
251        } else if path.to_str().unwrap_or("").contains("settings") {
252            ContextType::ProjectSettings
253        } else if json.get("messages").is_some() {
254            ContextType::ChatHistory
255        } else if json.get("workspace").is_some() {
256            ContextType::WorkspaceState
257        } else {
258            ContextType::Configuration
259        };
260
261        // Apply privacy redaction if needed
262        let json = if self.config.privacy_mode {
263            self.redact_sensitive_json(json)
264        } else {
265            json
266        };
267
268        Ok((content_type, ContextContent::Json(json)))
269    }
270
271    /// Parse JSONL (JSON Lines) format
272    fn parse_jsonl(&self, content: &str, path: &Path) -> Result<(ContextType, ContextContent)> {
273        let mut lines = Vec::new();
274
275        for line in content.lines() {
276            if let Ok(json) = serde_json::from_str::<serde_json::Value>(line) {
277                lines.push(json);
278            }
279        }
280
281        let content_type = if path.to_str().unwrap_or("").contains("history") {
282            ContextType::SearchHistory
283        } else {
284            ContextType::ChatHistory
285        };
286
287        Ok((
288            content_type,
289            ContextContent::Json(serde_json::Value::Array(lines)),
290        ))
291    }
292
293    /// Parse XML content
294    fn parse_xml(&self, content: &str) -> Result<(ContextType, ContextContent)> {
295        // For now, store as text - could add proper XML parsing later
296        Ok((
297            ContextType::Configuration,
298            ContextContent::Xml(content.to_string()),
299        ))
300    }
301
302    /// Parse YAML content
303    fn parse_yaml(&self, content: &str) -> Result<(ContextType, ContextContent)> {
304        let yaml: serde_yaml::Value = serde_yaml::from_str(content)?;
305        let json = serde_json::to_value(yaml)?;
306        Ok((ContextType::Configuration, ContextContent::Json(json)))
307    }
308
309    /// Check if context is relevant to the current project
310    fn is_relevant(&self, context: &GatheredContext) -> bool {
311        let project_name = self
312            .project_path
313            .file_name()
314            .and_then(|n| n.to_str())
315            .unwrap_or("");
316
317        // Check content for project references
318        let content_str = match &context.content {
319            ContextContent::Text(t) => t.clone(),
320            ContextContent::Json(j) => j.to_string(),
321            ContextContent::Xml(x) => x.clone(),
322            ContextContent::Binary(_) => return false,
323        };
324
325        // Look for project name or identifiers
326        if content_str
327            .to_lowercase()
328            .contains(&project_name.to_lowercase())
329        {
330            return true;
331        }
332
333        for identifier in &self.config.project_identifiers {
334            if content_str.contains(identifier) {
335                return true;
336            }
337        }
338
339        // Check path references
340        let project_path_str = self.project_path.to_string_lossy();
341        if content_str.contains(project_path_str.as_ref()) {
342            return true;
343        }
344
345        false
346    }
347
348    /// Score relevance of gathered contexts
349    fn score_relevance(&mut self) {
350        let project_name = self
351            .project_path
352            .file_name()
353            .and_then(|n| n.to_str())
354            .unwrap_or("")
355            .to_lowercase();
356
357        for context in &mut self.gathered_contexts {
358            let mut score = 0.0;
359
360            // Score based on content type
361            score += match context.content_type {
362                ContextType::ChatHistory => 0.8,
363                ContextType::ProjectSettings => 0.9,
364                ContextType::CodeSnippets => 0.7,
365                ContextType::Documentation => 0.6,
366                ContextType::CustomPrompts => 0.8,
367                _ => 0.5,
368            };
369
370            // Score based on recency
371            let age_days = (chrono::Utc::now() - context.timestamp).num_days();
372            if age_days < 7 {
373                score += 0.3;
374            } else if age_days < 30 {
375                score += 0.2;
376            } else if age_days < 90 {
377                score += 0.1;
378            }
379
380            // Score based on project name mentions
381            let content_str = match &context.content {
382                ContextContent::Text(t) => t.clone(),
383                ContextContent::Json(j) => j.to_string(),
384                ContextContent::Xml(x) => x.clone(),
385                ContextContent::Binary(_) => String::new(),
386            };
387
388            let mentions = content_str.to_lowercase().matches(&project_name).count();
389            score += (mentions as f32 * 0.1).min(0.5);
390
391            context.relevance_score = score.min(1.0);
392        }
393
394        // Sort by relevance
395        self.gathered_contexts
396            .sort_by(|a, b| b.relevance_score.partial_cmp(&a.relevance_score).unwrap());
397    }
398
399    /// Extract metadata from file path
400    fn extract_metadata(&self, path: &Path) -> HashMap<String, String> {
401        let mut metadata = HashMap::new();
402
403        if let Some(parent) = path.parent() {
404            metadata.insert(
405                "parent_dir".to_string(),
406                parent.to_string_lossy().to_string(),
407            );
408        }
409
410        if let Ok(file_metadata) = fs::metadata(path) {
411            if let Ok(modified) = file_metadata.modified() {
412                metadata.insert(
413                    "modified".to_string(),
414                    chrono::DateTime::<chrono::Utc>::from(modified).to_rfc3339(),
415                );
416            }
417            metadata.insert("size".to_string(), file_metadata.len().to_string());
418        }
419
420        metadata
421    }
422
423    /// Redact sensitive information from JSON
424    #[allow(clippy::only_used_in_recursion)]
425    fn redact_sensitive_json(&self, mut json: serde_json::Value) -> serde_json::Value {
426        if let Some(obj) = json.as_object_mut() {
427            for (key, value) in obj.iter_mut() {
428                if key.contains("key")
429                    || key.contains("token")
430                    || key.contains("secret")
431                    || key.contains("password")
432                {
433                    *value = serde_json::Value::String("[REDACTED]".to_string());
434                } else if value.is_object() || value.is_array() {
435                    *value = self.redact_sensitive_json(value.clone());
436                }
437            }
438        } else if let Some(arr) = json.as_array_mut() {
439            for value in arr.iter_mut() {
440                *value = self.redact_sensitive_json(value.clone());
441            }
442        }
443
444        json
445    }
446
447    /// Convert gathered contexts to M8 format
448    pub fn to_m8(&self) -> Result<Vec<u8>> {
449        // For now, create a simple JSON representation
450        // TODO: Implement proper M8 wave-based format
451        let m8_data = serde_json::json!({
452            "version": "1.0",
453            "type": "context_gather",
454            "metadata": {
455                "project_path": self.project_path,
456                "total_contexts": self.gathered_contexts.len(),
457                "timestamp": chrono::Utc::now().to_rfc3339(),
458                "top_sources": self.get_top_sources(),
459            },
460            "contexts": self.gathered_contexts.iter().map(|c| {
461                serde_json::json!({
462                    "path": c.source_path.to_string_lossy(),
463                    "tool": c.ai_tool,
464                    "type": format!("{:?}", c.content_type),
465                    "score": c.relevance_score,
466                    "preview": match &c.content {
467                        ContextContent::Text(t) => t.chars().take(100).collect::<String>(),
468                        ContextContent::Json(j) => j.to_string().chars().take(100).collect::<String>(),
469                        ContextContent::Xml(x) => x.chars().take(100).collect::<String>(),
470                        ContextContent::Binary(b) => format!("[Binary: {} bytes]", b.len()),
471                    }
472                })
473            }).collect::<Vec<_>>()
474        });
475
476        // Compress with zlib for efficiency
477        use flate2::write::ZlibEncoder;
478        use flate2::Compression;
479        use std::io::Write;
480
481        let json_bytes = serde_json::to_vec(&m8_data)?;
482        let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default());
483        encoder.write_all(&json_bytes)?;
484        encoder.finish().map_err(Into::into)
485    }
486
487    /// Get summary of top context sources
488    fn get_top_sources(&self) -> HashMap<String, usize> {
489        let mut sources = HashMap::new();
490
491        for context in &self.gathered_contexts {
492            *sources.entry(context.ai_tool.clone()).or_insert(0) += 1;
493        }
494
495        sources
496    }
497
498    /// Get gathered contexts
499    pub fn contexts(&self) -> &[GatheredContext] {
500        &self.gathered_contexts
501    }
502
503    /// Save gathered contexts to JSON file
504    pub fn save_json(&self, path: &Path) -> Result<()> {
505        let json = serde_json::to_string_pretty(&self.gathered_contexts)?;
506        fs::write(path, json)?;
507        Ok(())
508    }
509
510    /// Perform temporal analysis on gathered contexts
511    pub fn analyze_temporal(
512        &self,
513        resolution: temporal::TemporalResolution,
514    ) -> temporal::TemporalPatterns {
515        let analyzer =
516            temporal::TemporalContextAnalyzer::new(self.gathered_contexts.clone(), resolution);
517        analyzer.detect_patterns()
518    }
519
520    /// Get temporal wave grid representation
521    pub fn create_temporal_waves(
522        &self,
523        resolution: temporal::TemporalResolution,
524    ) -> temporal::TemporalWaveGrid {
525        let analyzer =
526            temporal::TemporalContextAnalyzer::new(self.gathered_contexts.clone(), resolution);
527        analyzer.create_temporal_waves()
528    }
529
530    /// Apply temporal decay to relevance scores
531    pub fn apply_temporal_decay(&mut self, half_life_days: f32) {
532        let mut analyzer = temporal::TemporalContextAnalyzer::new(
533            self.gathered_contexts.clone(),
534            temporal::TemporalResolution::Day,
535        );
536        analyzer.apply_temporal_decay(half_life_days);
537
538        // Update our contexts with decayed scores
539        self.gathered_contexts = analyzer.contexts;
540    }
541
542    /// Analyze AI-human partnership patterns
543    pub fn analyze_partnership(&self) -> partnership::PartnershipAnalysis {
544        let analyzer = partnership::PartnershipAnalyzer::new(self.gathered_contexts.clone());
545        analyzer.analyze_partnership()
546    }
547
548    /// Get collaborative session tracker
549    pub fn session_tracker(&self) -> &collab_session::CollaborativeSessionTracker {
550        &self.session_tracker
551    }
552
553    /// Get mutable collaborative session tracker
554    pub fn session_tracker_mut(&mut self) -> &mut collab_session::CollaborativeSessionTracker {
555        &mut self.session_tracker
556    }
557
558    /// Anchor an important collaborative memory
559    pub fn anchor_memory(
560        &mut self,
561        origin: collab_session::CollaborativeOrigin,
562        anchor_type: collab_session::AnchorType,
563        context: String,
564        keywords: Vec<String>,
565    ) -> Result<String> {
566        self.session_tracker
567            .anchor_memory(origin, anchor_type, context, keywords)
568    }
569
570    /// Find memories related to keywords
571    pub fn find_relevant_memories(&self, keywords: &[String]) -> Vec<String> {
572        self.session_tracker
573            .find_relevant_anchors(keywords)
574            .into_iter()
575            .map(|anchor| {
576                format!(
577                    "[{}] {}: {} (keywords: {})",
578                    anchor.timestamp.format("%Y-%m-%d"),
579                    match &anchor.anchor_type {
580                        collab_session::AnchorType::PatternInsight => "Pattern",
581                        collab_session::AnchorType::Solution => "Solution",
582                        collab_session::AnchorType::Breakthrough => "Breakthrough",
583                        collab_session::AnchorType::LearningMoment => "Learning",
584                        collab_session::AnchorType::SharedJoke => "Joke",
585                        collab_session::AnchorType::TechnicalPattern => "Tech Pattern",
586                        collab_session::AnchorType::ProcessImprovement => "Process",
587                    },
588                    anchor.context,
589                    anchor.keywords.join(", ")
590                )
591            })
592            .collect()
593    }
594
595    /// Get co-engagement heatmap
596    pub fn get_co_engagement_heatmap(&self) -> collab_session::CoEngagementHeatmap {
597        let sessions: Vec<_> = self
598            .session_tracker
599            .session_history
600            .iter()
601            .cloned()
602            .collect();
603        collab_session::CoEngagementHeatmap::from_sessions(&sessions)
604    }
605
606    /// Get cross-domain patterns
607    pub fn get_cross_domain_patterns(&self) -> Vec<&cross_session::CrossDomainPattern> {
608        self.cross_session_bridge.get_patterns()
609    }
610
611    /// Get relevant insights for current project
612    pub fn get_relevant_insights(
613        &self,
614        keywords: &[String],
615    ) -> Vec<cross_session::CrossSessionInsight> {
616        self.cross_session_bridge
617            .suggest_relevant_insights(&self.project_path, keywords)
618    }
619
620    /// Invite a persona for consultation
621    pub fn invite_persona(
622        &self,
623        context: &str,
624        duration: u32,
625    ) -> Option<cross_session::PersonaInvitation> {
626        self.cross_session_bridge.invite_persona(context, duration)
627    }
628}
629
630// TODO: Implement M8Writer extension when M8 format is available