terraphim_session_analyzer/patterns/
knowledge_graph.rs

1//! Pattern learning infrastructure for dynamically discovering new tool patterns
2//!
3//! This module implements a learning system that observes tool usage in Bash commands,
4//! identifies patterns, and promotes frequently-seen patterns to learned patterns.
5//!
6//! ## Architecture
7//!
8//! - `PatternLearner`: Main learning system with voting-based promotion
9//! - `CandidatePattern`: Tracks observations and category votes for unknown tools
10//! - `LearnedPattern`: Promoted patterns with confidence scores
11//!
12//! ## Example
13//!
14//! ```rust
15//! use terraphim_session_analyzer::patterns::knowledge_graph::{PatternLearner, LearnedPattern};
16//! use terraphim_session_analyzer::models::ToolCategory;
17//!
18//! # fn main() -> anyhow::Result<()> {
19//! let mut learner = PatternLearner::new();
20//!
21//! // Observe tool usage
22//! learner.observe(
23//!     "pytest".to_string(),
24//!     "pytest tests/".to_string(),
25//!     ToolCategory::Testing
26//! );
27//!
28//! // After multiple observations, promote to learned patterns
29//! let learned = learner.promote_candidates();
30//! # Ok(())
31//! # }
32//! ```
33
34use crate::models::ToolCategory;
35#[cfg(feature = "terraphim")]
36use crate::models::ToolChain;
37use anyhow::{Context, Result};
38use indexmap::IndexMap;
39use jiff::Timestamp;
40use serde::{Deserialize, Serialize};
41use std::collections::HashMap;
42use std::path::PathBuf;
43
44/// Learn new tool patterns from usage
45#[derive(Debug, Clone, Serialize, Deserialize)]
46pub struct PatternLearner {
47    /// Candidate patterns being tracked
48    candidate_patterns: IndexMap<String, CandidatePattern>,
49
50    /// Number of observations required before promoting a pattern
51    promotion_threshold: u32,
52}
53
54/// A candidate pattern being observed
55#[derive(Debug, Clone, Serialize, Deserialize)]
56pub struct CandidatePattern {
57    /// Name of the tool
58    pub tool_name: String,
59
60    /// Number of times this tool has been observed
61    pub observations: u32,
62
63    /// Commands where this tool appears (for context analysis)
64    pub contexts: Vec<String>,
65
66    /// Votes for which category this tool belongs to
67    pub category_votes: HashMap<String, u32>,
68
69    /// First time this tool was observed
70    pub first_seen: Timestamp,
71
72    /// Last time this tool was observed
73    pub last_seen: Timestamp,
74}
75
76/// A learned pattern that has been promoted
77#[derive(Debug, Clone, Serialize, Deserialize)]
78pub struct LearnedPattern {
79    /// Name of the tool
80    pub tool_name: String,
81
82    /// Determined category based on voting
83    pub category: ToolCategory,
84
85    /// Confidence score (0.0-1.0) based on observation consistency
86    pub confidence: f32,
87
88    /// Total number of observations
89    pub observations: u32,
90
91    /// When this pattern was learned (promoted)
92    pub learned_at: Timestamp,
93}
94
95impl Default for PatternLearner {
96    fn default() -> Self {
97        Self::new()
98    }
99}
100
101#[allow(dead_code)] // Will be used in Phase 3 Part 3
102impl PatternLearner {
103    /// Create a new pattern learner with default threshold (3 observations)
104    #[must_use]
105    pub fn new() -> Self {
106        Self {
107            candidate_patterns: IndexMap::new(),
108            promotion_threshold: 3,
109        }
110    }
111
112    /// Create a new pattern learner with custom promotion threshold
113    #[must_use]
114    pub fn with_threshold(threshold: u32) -> Self {
115        Self {
116            candidate_patterns: IndexMap::new(),
117            promotion_threshold: threshold,
118        }
119    }
120
121    /// Observe a potential new tool pattern
122    ///
123    /// This method records an observation of a tool being used in a specific context.
124    /// When a tool reaches the promotion threshold, it can be promoted to a learned pattern.
125    pub fn observe(&mut self, tool_name: String, command: String, category: ToolCategory) {
126        let category_str = category_to_string(&category);
127        let now = Timestamp::now();
128
129        self.candidate_patterns
130            .entry(tool_name.clone())
131            .and_modify(|candidate| {
132                candidate.observations += 1;
133                candidate.last_seen = now;
134
135                // Add context if not already present and within limit
136                if !candidate.contexts.contains(&command) && candidate.contexts.len() < 10 {
137                    candidate.contexts.push(command.clone());
138                }
139
140                // Vote on category
141                *candidate
142                    .category_votes
143                    .entry(category_str.clone())
144                    .or_insert(0) += 1;
145            })
146            .or_insert_with(|| CandidatePattern {
147                tool_name: tool_name.clone(),
148                observations: 1,
149                contexts: vec![command],
150                category_votes: {
151                    let mut votes = HashMap::new();
152                    votes.insert(category_str, 1);
153                    votes
154                },
155                first_seen: now,
156                last_seen: now,
157            });
158    }
159
160    /// Promote candidates that meet the observation threshold to learned patterns
161    ///
162    /// Returns a list of newly promoted patterns and removes them from candidates.
163    pub fn promote_candidates(&mut self) -> Vec<LearnedPattern> {
164        let mut promoted = Vec::new();
165        let now = Timestamp::now();
166
167        // Find candidates ready for promotion
168        let candidates_to_promote: Vec<String> = self
169            .candidate_patterns
170            .iter()
171            .filter(|(_, candidate)| candidate.observations >= self.promotion_threshold)
172            .map(|(name, _)| name.clone())
173            .collect();
174
175        // Promote each candidate
176        for tool_name in candidates_to_promote {
177            if let Some(candidate) = self.candidate_patterns.shift_remove(&tool_name) {
178                let category = determine_category(&candidate.category_votes, &candidate.contexts);
179                let confidence =
180                    calculate_confidence(&candidate.category_votes, candidate.observations);
181
182                promoted.push(LearnedPattern {
183                    tool_name: candidate.tool_name,
184                    category,
185                    confidence,
186                    observations: candidate.observations,
187                    learned_at: now,
188                });
189            }
190        }
191
192        promoted
193    }
194
195    /// Get the current count of candidate patterns
196    #[must_use]
197    pub fn candidate_count(&self) -> usize {
198        self.candidate_patterns.len()
199    }
200
201    /// Save learned patterns to cache directory
202    ///
203    /// # Errors
204    ///
205    /// Returns an error if the cache directory cannot be created or the file cannot be written
206    pub fn save_to_cache(&self, learned_patterns: &[LearnedPattern]) -> Result<()> {
207        let cache_path = get_cache_path()?;
208
209        // Create parent directory if it doesn't exist
210        if let Some(parent) = cache_path.parent() {
211            std::fs::create_dir_all(parent).with_context(|| {
212                format!("Failed to create cache directory: {}", parent.display())
213            })?;
214        }
215
216        // Serialize and write patterns
217        let json = serde_json::to_string_pretty(learned_patterns)
218            .context("Failed to serialize learned patterns")?;
219
220        std::fs::write(&cache_path, json).with_context(|| {
221            format!(
222                "Failed to write learned patterns to {}",
223                cache_path.display()
224            )
225        })?;
226
227        Ok(())
228    }
229
230    /// Load learned patterns from cache
231    ///
232    /// # Errors
233    ///
234    /// Returns an error if the cache file cannot be read or parsed
235    pub fn load_from_cache() -> Result<Vec<LearnedPattern>> {
236        let cache_path = get_cache_path()?;
237
238        if !cache_path.exists() {
239            return Ok(Vec::new());
240        }
241
242        let content = std::fs::read_to_string(&cache_path)
243            .with_context(|| format!("Failed to read cache file: {}", cache_path.display()))?;
244
245        let patterns: Vec<LearnedPattern> = serde_json::from_str(&content)
246            .context("Failed to parse learned patterns from cache")?;
247
248        Ok(patterns)
249    }
250
251    /// Get all current candidate patterns (for debugging/inspection)
252    #[must_use]
253    pub fn get_candidates(&self) -> Vec<&CandidatePattern> {
254        self.candidate_patterns.values().collect()
255    }
256}
257
258/// Determine the category based on voting results and context analysis
259#[allow(dead_code)] // Will be used in Phase 3 Part 3
260fn determine_category(category_votes: &HashMap<String, u32>, contexts: &[String]) -> ToolCategory {
261    // Find the category with the most votes
262    let winner = category_votes
263        .iter()
264        .max_by_key(|(_, count)| *count)
265        .map(|(category, _)| category.as_str());
266
267    if let Some(category_str) = winner {
268        string_to_category(category_str)
269    } else {
270        // Fallback: infer from contexts
271        infer_category_from_contexts(contexts)
272    }
273}
274
275/// Calculate confidence score based on voting consistency
276#[allow(dead_code)] // Used in tests
277fn calculate_confidence(category_votes: &HashMap<String, u32>, total_observations: u32) -> f32 {
278    if total_observations == 0 {
279        return 0.0;
280    }
281
282    // Find the highest vote count
283    let max_votes = category_votes.values().max().copied().unwrap_or(0);
284
285    // Confidence is the proportion of votes for the winning category
286    #[allow(clippy::cast_precision_loss)]
287    let confidence = (max_votes as f32) / (total_observations as f32);
288
289    // Clamp to valid range
290    confidence.clamp(0.0, 1.0)
291}
292
293/// Infer category from tool name and command contexts using heuristics
294#[allow(dead_code)] // Will be used in Phase 3 Part 3
295pub fn infer_category_from_contexts(contexts: &[String]) -> ToolCategory {
296    // Analyze the contexts to find common patterns
297    let combined_context = contexts.join(" ").to_lowercase();
298
299    // Testing tools
300    if combined_context.contains("test")
301        || combined_context.contains("spec")
302        || combined_context.contains("jest")
303        || combined_context.contains("pytest")
304        || combined_context.contains("mocha")
305    {
306        return ToolCategory::Testing;
307    }
308
309    // Build tools
310    if combined_context.contains("build")
311        || combined_context.contains("webpack")
312        || combined_context.contains("vite")
313        || combined_context.contains("rollup")
314        || combined_context.contains("esbuild")
315    {
316        return ToolCategory::BuildTool;
317    }
318
319    // Linting
320    if combined_context.contains("lint")
321        || combined_context.contains("eslint")
322        || combined_context.contains("clippy")
323        || combined_context.contains("pylint")
324    {
325        return ToolCategory::Linting;
326    }
327
328    // Git operations
329    if combined_context.contains("git ")
330        || combined_context.contains("commit")
331        || combined_context.contains("push")
332        || combined_context.contains("pull")
333    {
334        return ToolCategory::Git;
335    }
336
337    // Package managers
338    if combined_context.contains("install")
339        || combined_context.contains("npm ")
340        || combined_context.contains("yarn ")
341        || combined_context.contains("pnpm ")
342        || combined_context.contains("cargo ")
343        || combined_context.contains("pip ")
344    {
345        return ToolCategory::PackageManager;
346    }
347
348    // Cloud deployment
349    if combined_context.contains("deploy")
350        || combined_context.contains("publish")
351        || combined_context.contains("wrangler")
352        || combined_context.contains("vercel")
353        || combined_context.contains("netlify")
354    {
355        return ToolCategory::CloudDeploy;
356    }
357
358    // Database
359    if combined_context.contains("database")
360        || combined_context.contains("migrate")
361        || combined_context.contains("psql")
362        || combined_context.contains("mysql")
363    {
364        return ToolCategory::Database;
365    }
366
367    // Default to Other
368    ToolCategory::Other("unknown".to_string())
369}
370
371/// Convert ToolCategory to string for storage
372#[allow(dead_code)] // Will be used in Phase 3 Part 3
373fn category_to_string(category: &ToolCategory) -> String {
374    match category {
375        ToolCategory::PackageManager => "PackageManager".to_string(),
376        ToolCategory::BuildTool => "BuildTool".to_string(),
377        ToolCategory::Testing => "Testing".to_string(),
378        ToolCategory::Linting => "Linting".to_string(),
379        ToolCategory::Git => "Git".to_string(),
380        ToolCategory::CloudDeploy => "CloudDeploy".to_string(),
381        ToolCategory::Database => "Database".to_string(),
382        ToolCategory::Other(s) => format!("Other({s})"),
383    }
384}
385
386/// Convert string back to ToolCategory
387#[allow(dead_code)] // Will be used in Phase 3 Part 3
388fn string_to_category(s: &str) -> ToolCategory {
389    match s {
390        "PackageManager" => ToolCategory::PackageManager,
391        "BuildTool" => ToolCategory::BuildTool,
392        "Testing" => ToolCategory::Testing,
393        "Linting" => ToolCategory::Linting,
394        "Git" => ToolCategory::Git,
395        "CloudDeploy" => ToolCategory::CloudDeploy,
396        "Database" => ToolCategory::Database,
397        s if s.starts_with("Other(") => {
398            let inner = s.trim_start_matches("Other(").trim_end_matches(')');
399            ToolCategory::Other(inner.to_string())
400        }
401        _ => ToolCategory::Other(s.to_string()),
402    }
403}
404
405/// Get the path to the learned patterns cache file
406///
407/// # Errors
408///
409/// Returns an error if the home directory cannot be determined
410#[allow(dead_code)] // Used in tests
411fn get_cache_path() -> Result<PathBuf> {
412    let home = home::home_dir().context("Could not find home directory")?;
413    Ok(home
414        .join(".config")
415        .join("claude-log-analyzer")
416        .join("learned_patterns.json"))
417}
418
419// ============================================================================
420// Tool Relationship Models (Feature-gated for Terraphim)
421// ============================================================================
422
423/// Relationship between two tools indicating how they interact in workflows
424#[cfg(feature = "terraphim")]
425#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
426#[allow(dead_code)] // Will be used in future terraphim integration
427pub struct ToolRelationship {
428    /// The source tool in the relationship
429    pub from_tool: String,
430
431    /// The target tool in the relationship
432    pub to_tool: String,
433
434    /// The type of relationship between the tools
435    pub relationship_type: RelationType,
436
437    /// Confidence score for this relationship (0.0-1.0)
438    pub confidence: f32,
439}
440
441/// Types of relationships between tools
442#[cfg(feature = "terraphim")]
443#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
444#[allow(dead_code)] // Will be used in future terraphim integration
445pub enum RelationType {
446    /// Tool A requires Tool B to function (e.g., wrangler depends on npm build)
447    DependsOn,
448
449    /// Tool A is an alternative to Tool B (e.g., bunx replaces npx)
450    Replaces,
451
452    /// Tool A works well with Tool B (e.g., git works with npm)
453    Complements,
454
455    /// Tool A conflicts with Tool B
456    Conflicts,
457}
458
459#[cfg(feature = "terraphim")]
460#[allow(dead_code)] // Methods will be used in future terraphim integration
461impl ToolRelationship {
462    /// Infer relationships from tool chain patterns
463    ///
464    /// Analyzes a tool chain to identify potential relationships between tools.
465    /// Sequential tools often have DependsOn relationships, while tools that appear
466    /// in similar contexts might Complement each other.
467    ///
468    /// # Examples
469    ///
470    /// ```ignore
471    /// use terraphim_session_analyzer::patterns::knowledge_graph::ToolRelationship;
472    /// use terraphim_session_analyzer::models::ToolChain;
473    ///
474    /// let chain = ToolChain {
475    ///     tools: vec!["npm".to_string(), "wrangler".to_string()],
476    ///     frequency: 5,
477    ///     average_time_between_ms: 1000,
478    ///     typical_agent: Some("devops".to_string()),
479    ///     success_rate: 0.95,
480    /// };
481    ///
482    /// let relationships = ToolRelationship::infer_from_chain(&chain);
483    /// // Expect npm -> wrangler DependsOn relationship
484    /// ```
485    #[must_use]
486    pub fn infer_from_chain(chain: &ToolChain) -> Vec<Self> {
487        let mut relationships = Vec::new();
488
489        // Sequential tools suggest DependsOn relationships
490        // Higher frequency and success rate increase confidence
491        for i in 0..chain.tools.len().saturating_sub(1) {
492            let from_tool = &chain.tools[i];
493            let to_tool = &chain.tools[i + 1];
494
495            // Base confidence on chain success rate and frequency
496            #[allow(clippy::cast_precision_loss)]
497            let frequency_factor = (chain.frequency.min(10) as f32) / 10.0;
498            let base_confidence = chain.success_rate * frequency_factor;
499
500            // Common dependency patterns get higher confidence
501            let confidence = if is_known_dependency(from_tool, to_tool) {
502                (base_confidence + 0.2).min(1.0)
503            } else {
504                base_confidence
505            };
506
507            relationships.push(ToolRelationship {
508                from_tool: to_tool.clone(),
509                to_tool: from_tool.clone(),
510                relationship_type: RelationType::DependsOn,
511                confidence,
512            });
513        }
514
515        relationships
516    }
517
518    /// Create a new tool relationship
519    #[must_use]
520    pub fn new(
521        from_tool: String,
522        to_tool: String,
523        relationship_type: RelationType,
524        confidence: f32,
525    ) -> Self {
526        Self {
527            from_tool,
528            to_tool,
529            relationship_type,
530            confidence: confidence.clamp(0.0, 1.0),
531        }
532    }
533}
534
535/// Check if a tool dependency is well-known
536#[cfg(feature = "terraphim")]
537#[allow(dead_code)] // Used in inference and tests
538fn is_known_dependency(dependency: &str, dependent: &str) -> bool {
539    // Common dependency patterns
540    matches!(
541        (dependency, dependent),
542        ("npm", "wrangler")
543            | ("npm", "vercel")
544            | ("npm", "netlify")
545            | ("cargo", "clippy")
546            | ("git", "npm")
547            | ("git", "cargo")
548            | ("npm", "npx")
549            | ("yarn", "npx")
550    )
551}
552
553/// Knowledge graph containing tool relationships
554#[cfg(feature = "terraphim")]
555#[derive(Debug, Clone, Serialize, Deserialize, Default)]
556#[allow(dead_code)] // Will be used in future terraphim integration
557pub struct KnowledgeGraph {
558    /// All known tool relationships
559    pub relationships: Vec<ToolRelationship>,
560}
561
562#[cfg(feature = "terraphim")]
563#[allow(dead_code)] // Methods will be used in future terraphim integration
564impl KnowledgeGraph {
565    /// Create a new empty knowledge graph
566    #[must_use]
567    pub fn new() -> Self {
568        Self {
569            relationships: Vec::new(),
570        }
571    }
572
573    /// Build a knowledge graph from tool chains
574    ///
575    /// Analyzes all tool chains to infer relationships between tools.
576    /// Common sequences suggest DependsOn relationships, while alternative
577    /// patterns suggest Replaces relationships.
578    ///
579    /// # Examples
580    ///
581    /// ```ignore
582    /// use terraphim_session_analyzer::patterns::knowledge_graph::KnowledgeGraph;
583    /// use terraphim_session_analyzer::models::ToolChain;
584    ///
585    /// let chains = vec![
586    ///     ToolChain {
587    ///         tools: vec!["git".to_string(), "npm".to_string()],
588    ///         frequency: 10,
589    ///         average_time_between_ms: 500,
590    ///         typical_agent: Some("developer".to_string()),
591    ///         success_rate: 0.95,
592    ///     },
593    /// ];
594    ///
595    /// let graph = KnowledgeGraph::build_from_chains(&chains);
596    /// ```
597    #[must_use]
598    pub fn build_from_chains(chains: &[ToolChain]) -> Self {
599        let mut graph = Self::new();
600
601        // Infer DependsOn relationships from sequential tool usage
602        for chain in chains {
603            let relationships = ToolRelationship::infer_from_chain(chain);
604            for rel in relationships {
605                graph.add_relationship(rel);
606            }
607        }
608
609        // Infer Replaces relationships from alternative tool patterns
610        graph.infer_replacement_relationships(chains);
611
612        // Infer Complements relationships from co-occurrence
613        graph.infer_complement_relationships(chains);
614
615        graph
616    }
617
618    /// Add a relationship to the graph with deduplication
619    ///
620    /// If a relationship between the same tools already exists, the one with
621    /// higher confidence is kept, or they are merged if they have the same type.
622    pub fn add_relationship(&mut self, new_rel: ToolRelationship) {
623        // Check for existing relationship between same tools
624        if let Some(existing) = self.relationships.iter_mut().find(|r| {
625            r.from_tool == new_rel.from_tool
626                && r.to_tool == new_rel.to_tool
627                && r.relationship_type == new_rel.relationship_type
628        }) {
629            // Merge by taking higher confidence and averaging
630            existing.confidence = (existing.confidence + new_rel.confidence) / 2.0;
631        } else {
632            self.relationships.push(new_rel);
633        }
634    }
635
636    /// Infer replacement relationships from alternative tool usage patterns
637    fn infer_replacement_relationships(&mut self, chains: &[ToolChain]) {
638        // Build tool position map - tools that appear in the same position
639        let mut position_map: HashMap<usize, HashMap<String, u32>> = HashMap::new();
640
641        for chain in chains {
642            for (pos, tool) in chain.tools.iter().enumerate() {
643                *position_map
644                    .entry(pos)
645                    .or_default()
646                    .entry(tool.clone())
647                    .or_insert(0) += chain.frequency;
648            }
649        }
650
651        // Find tools that appear in the same position (potential replacements)
652        for tools_at_position in position_map.values() {
653            let tools: Vec<(&String, &u32)> = tools_at_position.iter().collect();
654
655            for i in 0..tools.len() {
656                for j in (i + 1)..tools.len() {
657                    let (tool1, freq1) = tools[i];
658                    let (tool2, freq2) = tools[j];
659
660                    // Check if these are known alternatives
661                    if are_known_alternatives(tool1, tool2) {
662                        #[allow(clippy::cast_precision_loss)]
663                        let total = (freq1 + freq2) as f32;
664                        #[allow(clippy::cast_precision_loss)]
665                        let confidence = (*freq1.min(freq2) as f32 / total) * 0.8;
666
667                        self.add_relationship(ToolRelationship::new(
668                            tool1.clone(),
669                            tool2.clone(),
670                            RelationType::Replaces,
671                            confidence,
672                        ));
673                    }
674                }
675            }
676        }
677    }
678
679    /// Infer complement relationships from co-occurrence patterns
680    fn infer_complement_relationships(&mut self, chains: &[ToolChain]) {
681        // Count co-occurrences of tool pairs (not necessarily sequential)
682        let mut cooccurrence: HashMap<(String, String), u32> = HashMap::new();
683
684        for chain in chains {
685            // For each pair of tools in the chain (not just sequential)
686            for i in 0..chain.tools.len() {
687                for j in (i + 1)..chain.tools.len() {
688                    let tool1 = &chain.tools[i];
689                    let tool2 = &chain.tools[j];
690
691                    // Skip if they're already connected as dependencies
692                    if self.has_relationship(tool1, tool2, &RelationType::DependsOn) {
693                        continue;
694                    }
695
696                    let key = if tool1 < tool2 {
697                        (tool1.clone(), tool2.clone())
698                    } else {
699                        (tool2.clone(), tool1.clone())
700                    };
701
702                    *cooccurrence.entry(key).or_insert(0) += chain.frequency;
703                }
704            }
705        }
706
707        // Convert frequent co-occurrences to Complements relationships
708        for ((tool1, tool2), count) in cooccurrence {
709            if count >= 3 {
710                // Require at least 3 co-occurrences
711                #[allow(clippy::cast_precision_loss)]
712                let confidence = ((count.min(10) as f32) / 10.0) * 0.6;
713
714                self.add_relationship(ToolRelationship::new(
715                    tool1,
716                    tool2,
717                    RelationType::Complements,
718                    confidence,
719                ));
720            }
721        }
722    }
723
724    /// Check if a specific relationship exists
725    fn has_relationship(&self, from: &str, to: &str, rel_type: &RelationType) -> bool {
726        self.relationships.iter().any(|r| {
727            ((r.from_tool == from && r.to_tool == to) || (r.from_tool == to && r.to_tool == from))
728                && r.relationship_type == *rel_type
729        })
730    }
731
732    /// Get all relationships for a specific tool
733    #[must_use]
734    pub fn get_relationships_for_tool(&self, tool_name: &str) -> Vec<&ToolRelationship> {
735        self.relationships
736            .iter()
737            .filter(|r| r.from_tool == tool_name || r.to_tool == tool_name)
738            .collect()
739    }
740}
741
742/// Check if two tools are known alternatives
743#[cfg(feature = "terraphim")]
744#[allow(dead_code)] // Used in inference and tests
745fn are_known_alternatives(tool1: &str, tool2: &str) -> bool {
746    let alternatives = [
747        ("npm", "yarn"),
748        ("npm", "pnpm"),
749        ("yarn", "pnpm"),
750        ("npx", "bunx"),
751        ("webpack", "vite"),
752        ("webpack", "rollup"),
753        ("jest", "vitest"),
754        ("mocha", "jest"),
755        ("eslint", "biome"),
756    ];
757
758    alternatives
759        .iter()
760        .any(|(a, b)| (tool1 == *a && tool2 == *b) || (tool1 == *b && tool2 == *a))
761}
762
763#[cfg(test)]
764mod tests {
765    use super::*;
766
767    #[test]
768    fn test_pattern_learner_new() {
769        let learner = PatternLearner::new();
770        assert_eq!(learner.promotion_threshold, 3);
771        assert_eq!(learner.candidate_count(), 0);
772    }
773
774    #[test]
775    fn test_pattern_learner_with_threshold() {
776        let learner = PatternLearner::with_threshold(5);
777        assert_eq!(learner.promotion_threshold, 5);
778    }
779
780    #[test]
781    fn test_observe_single_tool() {
782        let mut learner = PatternLearner::new();
783
784        learner.observe(
785            "pytest".to_string(),
786            "pytest tests/".to_string(),
787            ToolCategory::Testing,
788        );
789
790        assert_eq!(learner.candidate_count(), 1);
791
792        let candidates = learner.get_candidates();
793        assert_eq!(candidates.len(), 1);
794        assert_eq!(candidates[0].tool_name, "pytest");
795        assert_eq!(candidates[0].observations, 1);
796    }
797
798    #[test]
799    fn test_observe_multiple_times() {
800        let mut learner = PatternLearner::new();
801
802        for i in 0..5 {
803            learner.observe(
804                "pytest".to_string(),
805                format!("pytest tests/test_{i}.py"),
806                ToolCategory::Testing,
807            );
808        }
809
810        assert_eq!(learner.candidate_count(), 1);
811
812        let candidates = learner.get_candidates();
813        assert_eq!(candidates[0].observations, 5);
814        assert!(candidates[0].contexts.len() <= 10); // Respects limit
815    }
816
817    #[test]
818    fn test_promote_candidates_threshold_met() {
819        let mut learner = PatternLearner::new();
820
821        // Observe 3 times (meets default threshold)
822        for i in 0..3 {
823            learner.observe(
824                "pytest".to_string(),
825                format!("pytest tests/test_{i}.py"),
826                ToolCategory::Testing,
827            );
828        }
829
830        let promoted = learner.promote_candidates();
831
832        assert_eq!(promoted.len(), 1);
833        assert_eq!(promoted[0].tool_name, "pytest");
834        assert_eq!(promoted[0].observations, 3);
835        assert!(matches!(promoted[0].category, ToolCategory::Testing));
836        assert_eq!(learner.candidate_count(), 0); // Removed after promotion
837    }
838
839    #[test]
840    fn test_promote_candidates_threshold_not_met() {
841        let mut learner = PatternLearner::new();
842
843        // Observe only 2 times (below threshold)
844        for i in 0..2 {
845            learner.observe(
846                "pytest".to_string(),
847                format!("pytest tests/test_{i}.py"),
848                ToolCategory::Testing,
849            );
850        }
851
852        let promoted = learner.promote_candidates();
853
854        assert_eq!(promoted.len(), 0);
855        assert_eq!(learner.candidate_count(), 1); // Still a candidate
856    }
857
858    #[test]
859    fn test_category_voting() {
860        let mut learner = PatternLearner::new();
861
862        // Vote for Testing twice, BuildTool once
863        learner.observe(
864            "tool".to_string(),
865            "tool test".to_string(),
866            ToolCategory::Testing,
867        );
868        learner.observe(
869            "tool".to_string(),
870            "tool test2".to_string(),
871            ToolCategory::Testing,
872        );
873        learner.observe(
874            "tool".to_string(),
875            "tool build".to_string(),
876            ToolCategory::BuildTool,
877        );
878
879        let promoted = learner.promote_candidates();
880        assert_eq!(promoted.len(), 1);
881        // Should choose Testing (majority vote)
882        assert!(matches!(promoted[0].category, ToolCategory::Testing));
883    }
884
885    #[test]
886    fn test_confidence_calculation() {
887        let mut votes = HashMap::new();
888        votes.insert("Testing".to_string(), 3);
889        votes.insert("BuildTool".to_string(), 1);
890
891        let confidence = calculate_confidence(&votes, 4);
892        assert!((confidence - 0.75).abs() < 0.01); // 3/4 = 0.75
893    }
894
895    #[test]
896    fn test_infer_category_testing() {
897        let contexts = vec!["pytest tests/".to_string(), "pytest --verbose".to_string()];
898
899        let category = infer_category_from_contexts(&contexts);
900        assert!(matches!(category, ToolCategory::Testing));
901    }
902
903    #[test]
904    fn test_infer_category_build_tool() {
905        let contexts = vec!["webpack build".to_string(), "vite build".to_string()];
906
907        let category = infer_category_from_contexts(&contexts);
908        assert!(matches!(category, ToolCategory::BuildTool));
909    }
910
911    #[test]
912    fn test_infer_category_linting() {
913        let contexts = vec!["eslint src/".to_string(), "cargo clippy".to_string()];
914
915        let category = infer_category_from_contexts(&contexts);
916        assert!(matches!(category, ToolCategory::Linting));
917    }
918
919    #[test]
920    fn test_infer_category_git() {
921        let contexts = vec!["git commit".to_string(), "git push".to_string()];
922
923        let category = infer_category_from_contexts(&contexts);
924        assert!(matches!(category, ToolCategory::Git));
925    }
926
927    #[test]
928    fn test_infer_category_package_manager() {
929        let contexts = vec!["npm install".to_string(), "yarn add".to_string()];
930
931        let category = infer_category_from_contexts(&contexts);
932        assert!(matches!(category, ToolCategory::PackageManager));
933    }
934
935    #[test]
936    fn test_category_roundtrip() {
937        let categories = vec![
938            ToolCategory::PackageManager,
939            ToolCategory::BuildTool,
940            ToolCategory::Testing,
941            ToolCategory::Linting,
942            ToolCategory::Git,
943            ToolCategory::CloudDeploy,
944            ToolCategory::Database,
945            ToolCategory::Other("custom".to_string()),
946        ];
947
948        for category in categories {
949            let s = category_to_string(&category);
950            let parsed = string_to_category(&s);
951            assert_eq!(
952                std::mem::discriminant(&category),
953                std::mem::discriminant(&parsed)
954            );
955        }
956    }
957
958    #[test]
959    fn test_get_cache_path() {
960        let path = get_cache_path();
961        assert!(path.is_ok());
962
963        let path_buf = path.unwrap();
964        assert!(path_buf.to_string_lossy().contains(".config"));
965        assert!(path_buf.to_string_lossy().contains("claude-log-analyzer"));
966        assert!(path_buf.to_string_lossy().contains("learned_patterns.json"));
967    }
968
969    mod proptest_tests {
970        use super::*;
971        use proptest::prelude::*;
972
973        proptest! {
974            #[test]
975            fn test_observe_properties(
976                tool_name in "[a-z]{3,15}",
977                command in "[a-z ]{5,30}",
978                observation_count in 1u32..10
979            ) {
980                let mut learner = PatternLearner::new();
981
982                for _ in 0..observation_count {
983                    learner.observe(
984                        tool_name.clone(),
985                        command.clone(),
986                        ToolCategory::Testing
987                    );
988                }
989
990                // Property 1: Should always have exactly one candidate for one tool
991                prop_assert_eq!(learner.candidate_count(), 1);
992
993                // Property 2: Observation count should match
994                let candidates = learner.get_candidates();
995                prop_assert_eq!(candidates[0].observations, observation_count);
996
997                // Property 3: Tool name should be preserved
998                prop_assert_eq!(&candidates[0].tool_name, &tool_name);
999            }
1000
1001            #[test]
1002            fn test_promotion_threshold_properties(
1003                threshold in 1u32..20,
1004                observations in 1u32..20
1005            ) {
1006                let mut learner = PatternLearner::with_threshold(threshold);
1007
1008                for _ in 0..observations {
1009                    learner.observe(
1010                        "tool".to_string(),
1011                        "command".to_string(),
1012                        ToolCategory::Testing
1013                    );
1014                }
1015
1016                let promoted = learner.promote_candidates();
1017
1018                // Property: Promotion happens if and only if observations >= threshold
1019                if observations >= threshold {
1020                    prop_assert_eq!(promoted.len(), 1);
1021                    prop_assert_eq!(learner.candidate_count(), 0);
1022                } else {
1023                    prop_assert_eq!(promoted.len(), 0);
1024                    prop_assert_eq!(learner.candidate_count(), 1);
1025                }
1026            }
1027
1028            #[test]
1029            fn test_confidence_properties(
1030                winning_votes in 1u32..100,
1031                losing_votes in 0u32..100
1032            ) {
1033                let total = winning_votes + losing_votes;
1034                if total == 0 {
1035                    return Ok(());
1036                }
1037
1038                let mut votes = HashMap::new();
1039                votes.insert("Category1".to_string(), winning_votes);
1040                if losing_votes > 0 {
1041                    votes.insert("Category2".to_string(), losing_votes);
1042                }
1043
1044                let confidence = calculate_confidence(&votes, total);
1045
1046                // Property 1: Confidence should be in valid range
1047                prop_assert!((0.0..=1.0).contains(&confidence));
1048
1049                // Property 2: Confidence should match the max vote proportion
1050                #[allow(clippy::cast_precision_loss)]
1051                let max_votes = winning_votes.max(losing_votes);
1052                let expected = (max_votes as f32) / (total as f32);
1053                prop_assert!((confidence - expected).abs() < 0.01);
1054            }
1055        }
1056    }
1057
1058    // ============================================================================
1059    // Terraphim Feature Tests
1060    // ============================================================================
1061
1062    #[cfg(feature = "terraphim")]
1063    mod terraphim_tests {
1064        use super::*;
1065
1066        #[test]
1067        fn test_tool_relationship_new() {
1068            let rel = ToolRelationship::new(
1069                "npm".to_string(),
1070                "wrangler".to_string(),
1071                RelationType::DependsOn,
1072                0.8,
1073            );
1074
1075            assert_eq!(rel.from_tool, "npm");
1076            assert_eq!(rel.to_tool, "wrangler");
1077            assert_eq!(rel.relationship_type, RelationType::DependsOn);
1078            assert!((rel.confidence - 0.8).abs() < 0.01);
1079        }
1080
1081        #[test]
1082        fn test_tool_relationship_confidence_clamp() {
1083            // Test upper bound
1084            let rel = ToolRelationship::new(
1085                "npm".to_string(),
1086                "wrangler".to_string(),
1087                RelationType::DependsOn,
1088                1.5,
1089            );
1090            assert!((rel.confidence - 1.0).abs() < 0.01);
1091
1092            // Test lower bound
1093            let rel = ToolRelationship::new(
1094                "npm".to_string(),
1095                "wrangler".to_string(),
1096                RelationType::DependsOn,
1097                -0.5,
1098            );
1099            assert!((rel.confidence - 0.0).abs() < 0.01);
1100        }
1101
1102        #[test]
1103        fn test_infer_from_chain_sequential_tools() {
1104            let chain = ToolChain {
1105                tools: vec!["git".to_string(), "npm".to_string(), "wrangler".to_string()],
1106                frequency: 5,
1107                average_time_between_ms: 1000,
1108                typical_agent: Some("devops".to_string()),
1109                success_rate: 0.9,
1110            };
1111
1112            let relationships = ToolRelationship::infer_from_chain(&chain);
1113
1114            // Should create 2 relationships (git->npm, npm->wrangler)
1115            assert_eq!(relationships.len(), 2);
1116
1117            // All should be DependsOn type
1118            for rel in &relationships {
1119                assert_eq!(rel.relationship_type, RelationType::DependsOn);
1120                assert!(rel.confidence > 0.0);
1121                assert!(rel.confidence <= 1.0);
1122            }
1123        }
1124
1125        #[test]
1126        fn test_infer_from_chain_known_dependency() {
1127            let chain = ToolChain {
1128                tools: vec!["npm".to_string(), "wrangler".to_string()],
1129                frequency: 10,
1130                average_time_between_ms: 500,
1131                typical_agent: Some("devops".to_string()),
1132                success_rate: 1.0,
1133            };
1134
1135            let relationships = ToolRelationship::infer_from_chain(&chain);
1136
1137            assert_eq!(relationships.len(), 1);
1138            let rel = &relationships[0];
1139
1140            // Known dependency should have boosted confidence
1141            assert!(rel.confidence > 0.9);
1142        }
1143
1144        #[test]
1145        fn test_knowledge_graph_new() {
1146            let graph = KnowledgeGraph::new();
1147            assert_eq!(graph.relationships.len(), 0);
1148        }
1149
1150        #[test]
1151        fn test_knowledge_graph_add_relationship() {
1152            let mut graph = KnowledgeGraph::new();
1153
1154            let rel = ToolRelationship::new(
1155                "npm".to_string(),
1156                "wrangler".to_string(),
1157                RelationType::DependsOn,
1158                0.8,
1159            );
1160
1161            graph.add_relationship(rel);
1162            assert_eq!(graph.relationships.len(), 1);
1163        }
1164
1165        #[test]
1166        fn test_knowledge_graph_deduplication() {
1167            let mut graph = KnowledgeGraph::new();
1168
1169            // Add same relationship twice with different confidence
1170            let rel1 = ToolRelationship::new(
1171                "npm".to_string(),
1172                "wrangler".to_string(),
1173                RelationType::DependsOn,
1174                0.6,
1175            );
1176            let rel2 = ToolRelationship::new(
1177                "npm".to_string(),
1178                "wrangler".to_string(),
1179                RelationType::DependsOn,
1180                0.8,
1181            );
1182
1183            graph.add_relationship(rel1);
1184            graph.add_relationship(rel2);
1185
1186            // Should have only one relationship (deduplicated)
1187            assert_eq!(graph.relationships.len(), 1);
1188
1189            // Confidence should be averaged
1190            let rel = &graph.relationships[0];
1191            assert!((rel.confidence - 0.7).abs() < 0.01);
1192        }
1193
1194        #[test]
1195        fn test_knowledge_graph_build_from_chains() {
1196            let chains = vec![
1197                ToolChain {
1198                    tools: vec!["git".to_string(), "npm".to_string()],
1199                    frequency: 10,
1200                    average_time_between_ms: 500,
1201                    typical_agent: Some("developer".to_string()),
1202                    success_rate: 0.95,
1203                },
1204                ToolChain {
1205                    tools: vec!["npm".to_string(), "wrangler".to_string()],
1206                    frequency: 8,
1207                    average_time_between_ms: 1000,
1208                    typical_agent: Some("devops".to_string()),
1209                    success_rate: 0.9,
1210                },
1211            ];
1212
1213            let graph = KnowledgeGraph::build_from_chains(&chains);
1214
1215            // Should have DependsOn relationships from both chains
1216            assert!(!graph.relationships.is_empty());
1217
1218            // Check that DependsOn relationships exist
1219            let depends_on_count = graph
1220                .relationships
1221                .iter()
1222                .filter(|r| r.relationship_type == RelationType::DependsOn)
1223                .count();
1224            assert!(depends_on_count >= 2);
1225        }
1226
1227        #[test]
1228        fn test_knowledge_graph_replacement_relationships() {
1229            let chains = vec![
1230                ToolChain {
1231                    tools: vec!["npm".to_string(), "build".to_string()],
1232                    frequency: 5,
1233                    average_time_between_ms: 1000,
1234                    typical_agent: Some("developer".to_string()),
1235                    success_rate: 0.9,
1236                },
1237                ToolChain {
1238                    tools: vec!["yarn".to_string(), "build".to_string()],
1239                    frequency: 5,
1240                    average_time_between_ms: 1000,
1241                    typical_agent: Some("developer".to_string()),
1242                    success_rate: 0.9,
1243                },
1244            ];
1245
1246            let graph = KnowledgeGraph::build_from_chains(&chains);
1247
1248            // Should identify npm and yarn as alternatives (Replaces relationship)
1249            let replaces_count = graph
1250                .relationships
1251                .iter()
1252                .filter(|r| r.relationship_type == RelationType::Replaces)
1253                .count();
1254            assert!(replaces_count > 0);
1255        }
1256
1257        #[test]
1258        fn test_knowledge_graph_get_relationships_for_tool() {
1259            let mut graph = KnowledgeGraph::new();
1260
1261            graph.add_relationship(ToolRelationship::new(
1262                "npm".to_string(),
1263                "wrangler".to_string(),
1264                RelationType::DependsOn,
1265                0.8,
1266            ));
1267            graph.add_relationship(ToolRelationship::new(
1268                "git".to_string(),
1269                "npm".to_string(),
1270                RelationType::Complements,
1271                0.7,
1272            ));
1273
1274            let npm_rels = graph.get_relationships_for_tool("npm");
1275
1276            // npm should have 2 relationships
1277            assert_eq!(npm_rels.len(), 2);
1278        }
1279
1280        #[test]
1281        fn test_are_known_alternatives() {
1282            assert!(are_known_alternatives("npm", "yarn"));
1283            assert!(are_known_alternatives("yarn", "npm"));
1284            assert!(are_known_alternatives("npx", "bunx"));
1285            assert!(are_known_alternatives("webpack", "vite"));
1286            assert!(!are_known_alternatives("npm", "cargo"));
1287        }
1288
1289        #[test]
1290        fn test_is_known_dependency() {
1291            assert!(is_known_dependency("npm", "wrangler"));
1292            assert!(is_known_dependency("cargo", "clippy"));
1293            assert!(is_known_dependency("git", "npm"));
1294            assert!(!is_known_dependency("random", "tool"));
1295        }
1296    }
1297}
terraphim_session_analyzer/patterns/knowledge_graph.rs

terraphim_session_analyzer/patterns/
knowledge_graph.rs