Skip to main content

lean_ctx/core/
knowledge.rs

1use chrono::{DateTime, Utc};
2use serde::{Deserialize, Serialize};
3use std::path::PathBuf;
4
5const MAX_FACTS: usize = 200;
6const MAX_PATTERNS: usize = 50;
7const MAX_HISTORY: usize = 100;
8const CONTRADICTION_THRESHOLD: f32 = 0.5;
9
10#[derive(Debug, Clone, Serialize, Deserialize)]
11pub struct ProjectKnowledge {
12    pub project_root: String,
13    pub project_hash: String,
14    pub facts: Vec<KnowledgeFact>,
15    pub patterns: Vec<ProjectPattern>,
16    pub history: Vec<ConsolidatedInsight>,
17    pub updated_at: DateTime<Utc>,
18}
19
20#[derive(Debug, Clone, Serialize, Deserialize)]
21pub struct KnowledgeFact {
22    pub category: String,
23    pub key: String,
24    pub value: String,
25    pub source_session: String,
26    pub confidence: f32,
27    pub created_at: DateTime<Utc>,
28    pub last_confirmed: DateTime<Utc>,
29    #[serde(default)]
30    pub retrieval_count: u32,
31    #[serde(default)]
32    pub last_retrieved: Option<DateTime<Utc>>,
33    #[serde(default)]
34    pub valid_from: Option<DateTime<Utc>>,
35    #[serde(default)]
36    pub valid_until: Option<DateTime<Utc>>,
37    #[serde(default)]
38    pub supersedes: Option<String>,
39    #[serde(default)]
40    pub confirmation_count: u32,
41}
42
43#[derive(Debug, Clone, Serialize, Deserialize)]
44pub struct Contradiction {
45    pub existing_key: String,
46    pub existing_value: String,
47    pub new_value: String,
48    pub category: String,
49    pub severity: ContradictionSeverity,
50    pub resolution: String,
51}
52
53#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
54pub enum ContradictionSeverity {
55    Low,
56    Medium,
57    High,
58}
59
60#[derive(Debug, Clone, Serialize, Deserialize)]
61pub struct ProjectPattern {
62    pub pattern_type: String,
63    pub description: String,
64    pub examples: Vec<String>,
65    pub source_session: String,
66    pub created_at: DateTime<Utc>,
67}
68
69#[derive(Debug, Clone, Serialize, Deserialize)]
70pub struct ConsolidatedInsight {
71    pub summary: String,
72    pub from_sessions: Vec<String>,
73    pub timestamp: DateTime<Utc>,
74}
75
76impl ProjectKnowledge {
77    pub fn run_memory_lifecycle(&mut self) -> crate::core::memory_lifecycle::LifecycleReport {
78        let cfg = crate::core::memory_lifecycle::LifecycleConfig {
79            max_facts: MAX_FACTS,
80            ..Default::default()
81        };
82        crate::core::memory_lifecycle::run_lifecycle(&mut self.facts, &cfg)
83    }
84
85    pub fn new(project_root: &str) -> Self {
86        Self {
87            project_root: project_root.to_string(),
88            project_hash: hash_project_root(project_root),
89            facts: Vec::new(),
90            patterns: Vec::new(),
91            history: Vec::new(),
92            updated_at: Utc::now(),
93        }
94    }
95
96    pub fn check_contradiction(
97        &self,
98        category: &str,
99        key: &str,
100        new_value: &str,
101    ) -> Option<Contradiction> {
102        let existing = self
103            .facts
104            .iter()
105            .find(|f| f.category == category && f.key == key && f.is_current())?;
106
107        if existing.value.to_lowercase() == new_value.to_lowercase() {
108            return None;
109        }
110
111        let similarity = string_similarity(&existing.value, new_value);
112        if similarity > 0.8 {
113            return None;
114        }
115
116        let severity = if existing.confidence >= 0.9 && existing.confirmation_count >= 2 {
117            ContradictionSeverity::High
118        } else if existing.confidence >= CONTRADICTION_THRESHOLD {
119            ContradictionSeverity::Medium
120        } else {
121            ContradictionSeverity::Low
122        };
123
124        let resolution = match severity {
125            ContradictionSeverity::High => format!(
126                "High-confidence fact [{category}/{key}] changed: '{}' -> '{new_value}' (was confirmed {}x). Previous value archived.",
127                existing.value, existing.confirmation_count
128            ),
129            ContradictionSeverity::Medium => format!(
130                "Fact [{category}/{key}] updated: '{}' -> '{new_value}'",
131                existing.value
132            ),
133            ContradictionSeverity::Low => format!(
134                "Low-confidence fact [{category}/{key}] replaced: '{}' -> '{new_value}'",
135                existing.value
136            ),
137        };
138
139        Some(Contradiction {
140            existing_key: key.to_string(),
141            existing_value: existing.value.clone(),
142            new_value: new_value.to_string(),
143            category: category.to_string(),
144            severity,
145            resolution,
146        })
147    }
148
149    pub fn remember(
150        &mut self,
151        category: &str,
152        key: &str,
153        value: &str,
154        session_id: &str,
155        confidence: f32,
156    ) -> Option<Contradiction> {
157        let contradiction = self.check_contradiction(category, key, value);
158
159        if let Some(existing) = self
160            .facts
161            .iter_mut()
162            .find(|f| f.category == category && f.key == key && f.is_current())
163        {
164            if existing.value == value {
165                existing.last_confirmed = Utc::now();
166                existing.source_session = session_id.to_string();
167                existing.confidence = f32::midpoint(existing.confidence, confidence);
168                existing.confirmation_count += 1;
169            } else if existing.confidence >= 0.9 && existing.confirmation_count >= 2 {
170                existing.valid_until = Some(Utc::now());
171                let superseded_id = format!("{}/{}", existing.category, existing.key);
172                let now = Utc::now();
173                self.facts.push(KnowledgeFact {
174                    category: category.to_string(),
175                    key: key.to_string(),
176                    value: value.to_string(),
177                    source_session: session_id.to_string(),
178                    confidence,
179                    created_at: now,
180                    last_confirmed: now,
181                    retrieval_count: 0,
182                    last_retrieved: None,
183                    valid_from: Some(now),
184                    valid_until: None,
185                    supersedes: Some(superseded_id),
186                    confirmation_count: 1,
187                });
188            } else {
189                existing.value = value.to_string();
190                existing.confidence = confidence;
191                existing.last_confirmed = Utc::now();
192                existing.source_session = session_id.to_string();
193                existing.valid_from = existing.valid_from.or(Some(existing.created_at));
194                existing.confirmation_count = 1;
195            }
196        } else {
197            let now = Utc::now();
198            self.facts.push(KnowledgeFact {
199                category: category.to_string(),
200                key: key.to_string(),
201                value: value.to_string(),
202                source_session: session_id.to_string(),
203                confidence,
204                created_at: now,
205                last_confirmed: now,
206                retrieval_count: 0,
207                last_retrieved: None,
208                valid_from: Some(now),
209                valid_until: None,
210                supersedes: None,
211                confirmation_count: 1,
212            });
213        }
214
215        // No hard-prune: archive-only lifecycle will compact if needed.
216        if self.facts.len() > MAX_FACTS * 2 {
217            let _ = self.run_memory_lifecycle();
218        }
219
220        self.updated_at = Utc::now();
221
222        let action = if contradiction.is_some() {
223            "contradict"
224        } else {
225            "remember"
226        };
227        crate::core::events::emit(crate::core::events::EventKind::KnowledgeUpdate {
228            category: category.to_string(),
229            key: key.to_string(),
230            action: action.to_string(),
231        });
232
233        contradiction
234    }
235
236    pub fn recall(&self, query: &str) -> Vec<&KnowledgeFact> {
237        let q = query.to_lowercase();
238        let terms: Vec<&str> = q.split_whitespace().collect();
239
240        let mut results: Vec<(&KnowledgeFact, f32)> = self
241            .facts
242            .iter()
243            .filter(|f| f.is_current())
244            .filter_map(|f| {
245                let searchable = format!(
246                    "{} {} {} {}",
247                    f.category.to_lowercase(),
248                    f.key.to_lowercase(),
249                    f.value.to_lowercase(),
250                    f.source_session
251                );
252                let match_count = terms.iter().filter(|t| searchable.contains(**t)).count();
253                if match_count > 0 {
254                    let relevance = (match_count as f32 / terms.len() as f32) * f.confidence;
255                    Some((f, relevance))
256                } else {
257                    None
258                }
259            })
260            .collect();
261
262        results.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
263        results.into_iter().map(|(f, _)| f).collect()
264    }
265
266    pub fn recall_by_category(&self, category: &str) -> Vec<&KnowledgeFact> {
267        self.facts
268            .iter()
269            .filter(|f| f.category == category && f.is_current())
270            .collect()
271    }
272
273    pub fn recall_at_time(&self, query: &str, at: DateTime<Utc>) -> Vec<&KnowledgeFact> {
274        let q = query.to_lowercase();
275        let terms: Vec<&str> = q.split_whitespace().collect();
276
277        let mut results: Vec<(&KnowledgeFact, f32)> = self
278            .facts
279            .iter()
280            .filter(|f| f.was_valid_at(at))
281            .filter_map(|f| {
282                let searchable = format!(
283                    "{} {} {}",
284                    f.category.to_lowercase(),
285                    f.key.to_lowercase(),
286                    f.value.to_lowercase(),
287                );
288                let match_count = terms.iter().filter(|t| searchable.contains(**t)).count();
289                if match_count > 0 {
290                    Some((f, match_count as f32 / terms.len() as f32))
291                } else {
292                    None
293                }
294            })
295            .collect();
296
297        results.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
298        results.into_iter().map(|(f, _)| f).collect()
299    }
300
301    pub fn timeline(&self, category: &str) -> Vec<&KnowledgeFact> {
302        let mut facts: Vec<&KnowledgeFact> = self
303            .facts
304            .iter()
305            .filter(|f| f.category == category)
306            .collect();
307        facts.sort_by_key(|x| x.created_at);
308        facts
309    }
310
311    pub fn list_rooms(&self) -> Vec<(String, usize)> {
312        let mut categories: std::collections::BTreeMap<String, usize> =
313            std::collections::BTreeMap::new();
314        for f in &self.facts {
315            if f.is_current() {
316                *categories.entry(f.category.clone()).or_insert(0) += 1;
317            }
318        }
319        categories.into_iter().collect()
320    }
321
322    pub fn add_pattern(
323        &mut self,
324        pattern_type: &str,
325        description: &str,
326        examples: Vec<String>,
327        session_id: &str,
328    ) {
329        if let Some(existing) = self
330            .patterns
331            .iter_mut()
332            .find(|p| p.pattern_type == pattern_type && p.description == description)
333        {
334            for ex in &examples {
335                if !existing.examples.contains(ex) {
336                    existing.examples.push(ex.clone());
337                }
338            }
339            return;
340        }
341
342        self.patterns.push(ProjectPattern {
343            pattern_type: pattern_type.to_string(),
344            description: description.to_string(),
345            examples,
346            source_session: session_id.to_string(),
347            created_at: Utc::now(),
348        });
349
350        if self.patterns.len() > MAX_PATTERNS {
351            self.patterns.truncate(MAX_PATTERNS);
352        }
353        self.updated_at = Utc::now();
354    }
355
356    pub fn consolidate(&mut self, summary: &str, session_ids: Vec<String>) {
357        self.history.push(ConsolidatedInsight {
358            summary: summary.to_string(),
359            from_sessions: session_ids,
360            timestamp: Utc::now(),
361        });
362
363        if self.history.len() > MAX_HISTORY {
364            self.history.drain(0..self.history.len() - MAX_HISTORY);
365        }
366        self.updated_at = Utc::now();
367    }
368
369    pub fn remove_fact(&mut self, category: &str, key: &str) -> bool {
370        let before = self.facts.len();
371        self.facts
372            .retain(|f| !(f.category == category && f.key == key));
373        let removed = self.facts.len() < before;
374        if removed {
375            self.updated_at = Utc::now();
376        }
377        removed
378    }
379
380    pub fn format_summary(&self) -> String {
381        let mut out = String::new();
382        let current_facts: Vec<&KnowledgeFact> =
383            self.facts.iter().filter(|f| f.is_current()).collect();
384
385        if !current_facts.is_empty() {
386            out.push_str("PROJECT KNOWLEDGE:\n");
387            let mut rooms: Vec<(String, usize)> = self.list_rooms();
388            rooms.sort_by(|a, b| b.1.cmp(&a.1).then_with(|| a.0.cmp(&b.0)));
389
390            let total_rooms = rooms.len();
391            rooms.truncate(crate::core::budgets::KNOWLEDGE_SUMMARY_ROOMS_LIMIT);
392
393            for (cat, _count) in rooms {
394                out.push_str(&format!("  [{cat}]\n"));
395
396                let mut facts_in_cat: Vec<&KnowledgeFact> = current_facts
397                    .iter()
398                    .copied()
399                    .filter(|f| f.category == cat)
400                    .collect();
401                facts_in_cat.sort_by(|a, b| sort_fact_for_output(a, b));
402
403                let total_in_cat = facts_in_cat.len();
404                facts_in_cat.truncate(crate::core::budgets::KNOWLEDGE_SUMMARY_FACTS_PER_ROOM_LIMIT);
405
406                for f in facts_in_cat {
407                    let key = crate::core::sanitize::neutralize_metadata(&f.key);
408                    let val = crate::core::sanitize::neutralize_metadata(&f.value);
409                    out.push_str(&format!(
410                        "    {}: {} (confidence: {:.0}%)\n",
411                        key,
412                        val,
413                        f.confidence * 100.0
414                    ));
415                }
416                if total_in_cat > crate::core::budgets::KNOWLEDGE_SUMMARY_FACTS_PER_ROOM_LIMIT {
417                    out.push_str(&format!(
418                        "    … +{} more\n",
419                        total_in_cat - crate::core::budgets::KNOWLEDGE_SUMMARY_FACTS_PER_ROOM_LIMIT
420                    ));
421                }
422            }
423
424            if total_rooms > crate::core::budgets::KNOWLEDGE_SUMMARY_ROOMS_LIMIT {
425                out.push_str(&format!(
426                    "  … +{} more rooms\n",
427                    total_rooms - crate::core::budgets::KNOWLEDGE_SUMMARY_ROOMS_LIMIT
428                ));
429            }
430        }
431
432        if !self.patterns.is_empty() {
433            out.push_str("PROJECT PATTERNS:\n");
434            let mut patterns = self.patterns.clone();
435            patterns.sort_by(|a, b| {
436                b.created_at
437                    .cmp(&a.created_at)
438                    .then_with(|| a.pattern_type.cmp(&b.pattern_type))
439                    .then_with(|| a.description.cmp(&b.description))
440            });
441            let total = patterns.len();
442            patterns.truncate(crate::core::budgets::KNOWLEDGE_PATTERNS_LIMIT);
443            for p in &patterns {
444                let ty = crate::core::sanitize::neutralize_metadata(&p.pattern_type);
445                let desc = crate::core::sanitize::neutralize_metadata(&p.description);
446                out.push_str(&format!("  [{ty}] {desc}\n"));
447            }
448            if total > crate::core::budgets::KNOWLEDGE_PATTERNS_LIMIT {
449                out.push_str(&format!(
450                    "  … +{} more\n",
451                    total - crate::core::budgets::KNOWLEDGE_PATTERNS_LIMIT
452                ));
453            }
454        }
455
456        if out.is_empty() {
457            out
458        } else {
459            crate::core::sanitize::fence_content("project_knowledge", out.trim_end())
460        }
461    }
462
463    pub fn format_aaak(&self) -> String {
464        let current_facts: Vec<&KnowledgeFact> =
465            self.facts.iter().filter(|f| f.is_current()).collect();
466
467        if current_facts.is_empty() && self.patterns.is_empty() {
468            return String::new();
469        }
470
471        let mut out = String::new();
472
473        let mut rooms: Vec<(String, usize)> = self.list_rooms();
474        rooms.sort_by(|a, b| b.1.cmp(&a.1).then_with(|| a.0.cmp(&b.0)));
475        rooms.truncate(crate::core::budgets::KNOWLEDGE_AAAK_ROOMS_LIMIT);
476
477        for (cat, _count) in rooms {
478            let mut facts_in_cat: Vec<&KnowledgeFact> = current_facts
479                .iter()
480                .copied()
481                .filter(|f| f.category == cat)
482                .collect();
483            facts_in_cat.sort_by(|a, b| sort_fact_for_output(a, b));
484            facts_in_cat.truncate(crate::core::budgets::KNOWLEDGE_AAAK_FACTS_PER_ROOM_LIMIT);
485
486            let items: Vec<String> = facts_in_cat
487                .iter()
488                .map(|f| {
489                    let stars = confidence_stars(f.confidence);
490                    let key = crate::core::sanitize::neutralize_metadata(&f.key);
491                    let val = crate::core::sanitize::neutralize_metadata(&f.value);
492                    format!("{key}={val}{stars}")
493                })
494                .collect();
495            out.push_str(&format!(
496                "{}:{}\n",
497                crate::core::sanitize::neutralize_metadata(&cat.to_uppercase()),
498                items.join("|")
499            ));
500        }
501
502        if !self.patterns.is_empty() {
503            let mut patterns = self.patterns.clone();
504            patterns.sort_by(|a, b| {
505                b.created_at
506                    .cmp(&a.created_at)
507                    .then_with(|| a.pattern_type.cmp(&b.pattern_type))
508                    .then_with(|| a.description.cmp(&b.description))
509            });
510            patterns.truncate(crate::core::budgets::KNOWLEDGE_PATTERNS_LIMIT);
511            let pat_items: Vec<String> = patterns
512                .iter()
513                .map(|p| {
514                    let ty = crate::core::sanitize::neutralize_metadata(&p.pattern_type);
515                    let desc = crate::core::sanitize::neutralize_metadata(&p.description);
516                    format!("{ty}.{desc}")
517                })
518                .collect();
519            out.push_str(&format!("PAT:{}\n", pat_items.join("|")));
520        }
521
522        if out.is_empty() {
523            out
524        } else {
525            crate::core::sanitize::fence_content("project_memory_aaak", out.trim_end())
526        }
527    }
528
529    pub fn format_wakeup(&self) -> String {
530        let current_facts: Vec<&KnowledgeFact> = self
531            .facts
532            .iter()
533            .filter(|f| f.is_current() && f.confidence >= 0.7)
534            .collect();
535
536        if current_facts.is_empty() {
537            return String::new();
538        }
539
540        let mut top_facts: Vec<&KnowledgeFact> = current_facts;
541        top_facts.sort_by(|a, b| sort_fact_for_output(a, b));
542        top_facts.truncate(10);
543
544        let items: Vec<String> = top_facts
545            .iter()
546            .map(|f| {
547                let cat = crate::core::sanitize::neutralize_metadata(&f.category);
548                let key = crate::core::sanitize::neutralize_metadata(&f.key);
549                let val = crate::core::sanitize::neutralize_metadata(&f.value);
550                format!("{cat}/{key}={val}")
551            })
552            .collect();
553
554        crate::core::sanitize::fence_content(
555            "project_facts_wakeup",
556            &format!("FACTS:{}", items.join("|")),
557        )
558    }
559
560    pub fn save(&self) -> Result<(), String> {
561        let dir = knowledge_dir(&self.project_hash)?;
562        std::fs::create_dir_all(&dir).map_err(|e| e.to_string())?;
563
564        let path = dir.join("knowledge.json");
565        let json = serde_json::to_string_pretty(self).map_err(|e| e.to_string())?;
566        std::fs::write(&path, json).map_err(|e| e.to_string())
567    }
568
569    pub fn load(project_root: &str) -> Option<Self> {
570        let hash = hash_project_root(project_root);
571        let dir = knowledge_dir(&hash).ok()?;
572        let path = dir.join("knowledge.json");
573
574        if let Ok(content) = std::fs::read_to_string(&path) {
575            if let Ok(k) = serde_json::from_str::<Self>(&content) {
576                return Some(k);
577            }
578        }
579
580        let old_hash = crate::core::project_hash::hash_path_only(project_root);
581        if old_hash != hash {
582            crate::core::project_hash::migrate_if_needed(&old_hash, &hash, project_root);
583            if let Ok(content) = std::fs::read_to_string(&path) {
584                if let Ok(mut k) = serde_json::from_str::<Self>(&content) {
585                    k.project_hash = hash;
586                    let _ = k.save();
587                    return Some(k);
588                }
589            }
590        }
591
592        None
593    }
594
595    pub fn load_or_create(project_root: &str) -> Self {
596        Self::load(project_root).unwrap_or_else(|| Self::new(project_root))
597    }
598
599    /// Migrates legacy knowledge that was accidentally stored under an empty project_root ("")
600    /// into the given `target_root`. Keeps a timestamped backup of the legacy file.
601    pub fn migrate_legacy_empty_root(target_root: &str) -> Result<bool, String> {
602        if target_root.trim().is_empty() {
603            return Ok(false);
604        }
605
606        let Some(legacy) = Self::load("") else {
607            return Ok(false);
608        };
609
610        if !legacy.project_root.trim().is_empty() {
611            return Ok(false);
612        }
613        if legacy.facts.is_empty() && legacy.patterns.is_empty() && legacy.history.is_empty() {
614            return Ok(false);
615        }
616
617        let mut target = Self::load_or_create(target_root);
618
619        fn fact_key(f: &KnowledgeFact) -> String {
620            format!(
621                "{}|{}|{}|{}|{}",
622                f.category, f.key, f.value, f.source_session, f.created_at
623            )
624        }
625        fn pattern_key(p: &ProjectPattern) -> String {
626            format!(
627                "{}|{}|{}|{}",
628                p.pattern_type, p.description, p.source_session, p.created_at
629            )
630        }
631        fn history_key(h: &ConsolidatedInsight) -> String {
632            format!(
633                "{}|{}|{}",
634                h.summary,
635                h.from_sessions.join(","),
636                h.timestamp
637            )
638        }
639
640        let mut seen_facts: std::collections::HashSet<String> =
641            target.facts.iter().map(fact_key).collect();
642        for f in legacy.facts {
643            if seen_facts.insert(fact_key(&f)) {
644                target.facts.push(f);
645            }
646        }
647
648        let mut seen_patterns: std::collections::HashSet<String> =
649            target.patterns.iter().map(pattern_key).collect();
650        for p in legacy.patterns {
651            if seen_patterns.insert(pattern_key(&p)) {
652                target.patterns.push(p);
653            }
654        }
655
656        let mut seen_history: std::collections::HashSet<String> =
657            target.history.iter().map(history_key).collect();
658        for h in legacy.history {
659            if seen_history.insert(history_key(&h)) {
660                target.history.push(h);
661            }
662        }
663
664        // Enforce caps to avoid unbounded growth from migration.
665        target.facts.sort_by(|a, b| {
666            b.created_at
667                .cmp(&a.created_at)
668                .then_with(|| b.confidence.total_cmp(&a.confidence))
669        });
670        if target.facts.len() > MAX_FACTS {
671            target.facts.truncate(MAX_FACTS);
672        }
673        target
674            .patterns
675            .sort_by_key(|x| std::cmp::Reverse(x.created_at));
676        if target.patterns.len() > MAX_PATTERNS {
677            target.patterns.truncate(MAX_PATTERNS);
678        }
679        target
680            .history
681            .sort_by_key(|x| std::cmp::Reverse(x.timestamp));
682        if target.history.len() > MAX_HISTORY {
683            target.history.truncate(MAX_HISTORY);
684        }
685
686        target.updated_at = Utc::now();
687        target.save()?;
688
689        let legacy_hash = crate::core::project_hash::hash_path_only("");
690        let legacy_dir = knowledge_dir(&legacy_hash)?;
691        let legacy_path = legacy_dir.join("knowledge.json");
692        if legacy_path.exists() {
693            let ts = Utc::now().format("%Y%m%d-%H%M%S");
694            let backup = legacy_dir.join(format!("knowledge.legacy-empty-root.{ts}.json"));
695            std::fs::rename(&legacy_path, &backup).map_err(|e| e.to_string())?;
696        }
697
698        Ok(true)
699    }
700
701    pub fn recall_for_output(&mut self, query: &str, limit: usize) -> (Vec<KnowledgeFact>, usize) {
702        let q = query.to_lowercase();
703        let terms: Vec<&str> = q.split_whitespace().filter(|t| !t.is_empty()).collect();
704        if terms.is_empty() {
705            return (Vec::new(), 0);
706        }
707
708        struct Scored {
709            idx: usize,
710            relevance: f32,
711        }
712
713        let mut scored: Vec<Scored> = self
714            .facts
715            .iter()
716            .enumerate()
717            .filter(|(_, f)| f.is_current())
718            .filter_map(|(idx, f)| {
719                let searchable = format!(
720                    "{} {} {} {}",
721                    f.category.to_lowercase(),
722                    f.key.to_lowercase(),
723                    f.value.to_lowercase(),
724                    f.source_session
725                );
726                let match_count = terms.iter().filter(|t| searchable.contains(**t)).count();
727                if match_count > 0 {
728                    let relevance = (match_count as f32 / terms.len() as f32) * f.confidence;
729                    Some(Scored { idx, relevance })
730                } else {
731                    None
732                }
733            })
734            .collect();
735
736        scored.sort_by(|a, b| {
737            b.relevance
738                .partial_cmp(&a.relevance)
739                .unwrap_or(std::cmp::Ordering::Equal)
740                .then_with(|| sort_fact_for_output(&self.facts[a.idx], &self.facts[b.idx]))
741        });
742
743        let total = scored.len();
744        scored.truncate(limit);
745
746        let now = Utc::now();
747        let mut out: Vec<KnowledgeFact> = Vec::new();
748        for s in scored {
749            if let Some(f) = self.facts.get_mut(s.idx) {
750                f.retrieval_count = f.retrieval_count.saturating_add(1);
751                f.last_retrieved = Some(now);
752                out.push(f.clone());
753            }
754        }
755
756        (out, total)
757    }
758
759    pub fn recall_by_category_for_output(
760        &mut self,
761        category: &str,
762        limit: usize,
763    ) -> (Vec<KnowledgeFact>, usize) {
764        let mut idxs: Vec<usize> = self
765            .facts
766            .iter()
767            .enumerate()
768            .filter(|(_, f)| f.is_current() && f.category == category)
769            .map(|(i, _)| i)
770            .collect();
771
772        idxs.sort_by(|a, b| sort_fact_for_output(&self.facts[*a], &self.facts[*b]));
773
774        let total = idxs.len();
775        idxs.truncate(limit);
776
777        let now = Utc::now();
778        let mut out = Vec::new();
779        for idx in idxs {
780            if let Some(f) = self.facts.get_mut(idx) {
781                f.retrieval_count = f.retrieval_count.saturating_add(1);
782                f.last_retrieved = Some(now);
783                out.push(f.clone());
784            }
785        }
786
787        (out, total)
788    }
789}
790
791impl KnowledgeFact {
792    pub fn is_current(&self) -> bool {
793        self.valid_until.is_none()
794    }
795
796    pub fn was_valid_at(&self, at: DateTime<Utc>) -> bool {
797        let after_start = self.valid_from.is_none_or(|from| at >= from);
798        let before_end = self.valid_until.is_none_or(|until| at <= until);
799        after_start && before_end
800    }
801}
802
803fn confidence_stars(confidence: f32) -> &'static str {
804    if confidence >= 0.95 {
805        "★★★★★"
806    } else if confidence >= 0.85 {
807        "★★★★"
808    } else if confidence >= 0.7 {
809        "★★★"
810    } else if confidence >= 0.5 {
811        "★★"
812    } else {
813        "★"
814    }
815}
816
817fn string_similarity(a: &str, b: &str) -> f32 {
818    let a_lower = a.to_lowercase();
819    let b_lower = b.to_lowercase();
820    let a_words: std::collections::HashSet<&str> = a_lower.split_whitespace().collect();
821    let b_words: std::collections::HashSet<&str> = b_lower.split_whitespace().collect();
822
823    if a_words.is_empty() && b_words.is_empty() {
824        return 1.0;
825    }
826
827    let intersection = a_words.intersection(&b_words).count();
828    let union = a_words.union(&b_words).count();
829
830    if union == 0 {
831        return 0.0;
832    }
833
834    intersection as f32 / union as f32
835}
836
837fn knowledge_dir(project_hash: &str) -> Result<PathBuf, String> {
838    Ok(crate::core::data_dir::lean_ctx_data_dir()?
839        .join("knowledge")
840        .join(project_hash))
841}
842
843fn sort_fact_for_output(a: &KnowledgeFact, b: &KnowledgeFact) -> std::cmp::Ordering {
844    salience_score(b)
845        .cmp(&salience_score(a))
846        .then_with(|| {
847            b.confidence
848                .partial_cmp(&a.confidence)
849                .unwrap_or(std::cmp::Ordering::Equal)
850        })
851        .then_with(|| b.confirmation_count.cmp(&a.confirmation_count))
852        .then_with(|| b.retrieval_count.cmp(&a.retrieval_count))
853        .then_with(|| b.last_retrieved.cmp(&a.last_retrieved))
854        .then_with(|| b.last_confirmed.cmp(&a.last_confirmed))
855        .then_with(|| a.category.cmp(&b.category))
856        .then_with(|| a.key.cmp(&b.key))
857        .then_with(|| a.value.cmp(&b.value))
858}
859
860fn salience_score(f: &KnowledgeFact) -> u32 {
861    let cat = f.category.to_lowercase();
862    let base: u32 = match cat.as_str() {
863        "decision" => 70,
864        "gotcha" => 75,
865        "architecture" | "arch" => 60,
866        "security" => 65,
867        "testing" | "tests" | "deployment" | "deploy" => 55,
868        "conventions" | "convention" => 45,
869        "finding" => 40,
870        _ => 30,
871    };
872
873    let confidence_bonus = (f.confidence.clamp(0.0, 1.0) * 30.0) as u32;
874    let confirmation_bonus = f.confirmation_count.min(15);
875    let retrieval_bonus = ((f.retrieval_count as f32).ln_1p() * 8.0).min(20.0) as u32;
876
877    let recency_bonus = f.last_retrieved.map_or(0u32, |t| {
878        let days = Utc::now().signed_duration_since(t).num_days();
879        if days <= 7 {
880            10u32
881        } else if days <= 30 {
882            5u32
883        } else {
884            0u32
885        }
886    });
887
888    base + confidence_bonus + confirmation_bonus + retrieval_bonus + recency_bonus
889}
890
891fn hash_project_root(root: &str) -> String {
892    crate::core::project_hash::hash_project_root(root)
893}
894
895#[cfg(test)]
896mod tests {
897    use super::*;
898
899    #[test]
900    fn remember_and_recall() {
901        let mut k = ProjectKnowledge::new("/tmp/test-project");
902        k.remember("architecture", "auth", "JWT RS256", "session-1", 0.9);
903        k.remember("api", "rate-limit", "100/min", "session-1", 0.8);
904
905        let results = k.recall("auth");
906        assert_eq!(results.len(), 1);
907        assert_eq!(results[0].value, "JWT RS256");
908
909        let results = k.recall("api rate");
910        assert_eq!(results.len(), 1);
911        assert_eq!(results[0].key, "rate-limit");
912    }
913
914    #[test]
915    fn upsert_existing_fact() {
916        let mut k = ProjectKnowledge::new("/tmp/test");
917        k.remember("arch", "db", "PostgreSQL", "s1", 0.7);
918        k.remember("arch", "db", "PostgreSQL 16 with pgvector", "s2", 0.95);
919
920        let current: Vec<_> = k.facts.iter().filter(|f| f.is_current()).collect();
921        assert_eq!(current.len(), 1);
922        assert_eq!(current[0].value, "PostgreSQL 16 with pgvector");
923    }
924
925    #[test]
926    fn contradiction_detection() {
927        let mut k = ProjectKnowledge::new("/tmp/test");
928        k.remember("arch", "db", "PostgreSQL", "s1", 0.95);
929        k.facts[0].confirmation_count = 3;
930
931        let contradiction = k.check_contradiction("arch", "db", "MySQL");
932        assert!(contradiction.is_some());
933        let c = contradiction.unwrap();
934        assert_eq!(c.severity, ContradictionSeverity::High);
935    }
936
937    #[test]
938    fn temporal_validity() {
939        let mut k = ProjectKnowledge::new("/tmp/test");
940        k.remember("arch", "db", "PostgreSQL", "s1", 0.95);
941        k.facts[0].confirmation_count = 3;
942
943        k.remember("arch", "db", "MySQL", "s2", 0.9);
944
945        let current: Vec<_> = k.facts.iter().filter(|f| f.is_current()).collect();
946        assert_eq!(current.len(), 1);
947        assert_eq!(current[0].value, "MySQL");
948
949        let all_db: Vec<_> = k.facts.iter().filter(|f| f.key == "db").collect();
950        assert_eq!(all_db.len(), 2);
951    }
952
953    #[test]
954    fn confirmation_count() {
955        let mut k = ProjectKnowledge::new("/tmp/test");
956        k.remember("arch", "db", "PostgreSQL", "s1", 0.9);
957        assert_eq!(k.facts[0].confirmation_count, 1);
958
959        k.remember("arch", "db", "PostgreSQL", "s2", 0.9);
960        assert_eq!(k.facts[0].confirmation_count, 2);
961    }
962
963    #[test]
964    fn remove_fact() {
965        let mut k = ProjectKnowledge::new("/tmp/test");
966        k.remember("arch", "db", "PostgreSQL", "s1", 0.9);
967        assert!(k.remove_fact("arch", "db"));
968        assert!(k.facts.is_empty());
969        assert!(!k.remove_fact("arch", "db"));
970    }
971
972    #[test]
973    fn list_rooms() {
974        let mut k = ProjectKnowledge::new("/tmp/test");
975        k.remember("architecture", "auth", "JWT", "s1", 0.9);
976        k.remember("architecture", "db", "PG", "s1", 0.9);
977        k.remember("deploy", "host", "AWS", "s1", 0.8);
978
979        let rooms = k.list_rooms();
980        assert_eq!(rooms.len(), 2);
981    }
982
983    #[test]
984    fn aaak_format() {
985        let mut k = ProjectKnowledge::new("/tmp/test");
986        k.remember("architecture", "auth", "JWT RS256", "s1", 0.95);
987        k.remember("architecture", "db", "PostgreSQL", "s1", 0.7);
988
989        let aaak = k.format_aaak();
990        assert!(aaak.contains("ARCHITECTURE:"));
991        assert!(aaak.contains("auth=JWT RS256"));
992    }
993
994    #[test]
995    fn consolidate_history() {
996        let mut k = ProjectKnowledge::new("/tmp/test");
997        k.consolidate(
998            "Migrated from REST to GraphQL",
999            vec!["s1".into(), "s2".into()],
1000        );
1001        assert_eq!(k.history.len(), 1);
1002        assert_eq!(k.history[0].from_sessions.len(), 2);
1003    }
1004
1005    #[test]
1006    fn format_summary_output() {
1007        let mut k = ProjectKnowledge::new("/tmp/test");
1008        k.remember("architecture", "auth", "JWT RS256", "s1", 0.9);
1009        k.add_pattern(
1010            "naming",
1011            "snake_case for functions",
1012            vec!["get_user()".into()],
1013            "s1",
1014        );
1015        let summary = k.format_summary();
1016        assert!(summary.contains("PROJECT KNOWLEDGE:"));
1017        assert!(summary.contains("auth: JWT RS256"));
1018        assert!(summary.contains("PROJECT PATTERNS:"));
1019    }
1020
1021    #[test]
1022    fn temporal_recall_at_time() {
1023        let mut k = ProjectKnowledge::new("/tmp/test");
1024        k.remember("arch", "db", "PostgreSQL", "s1", 0.95);
1025        k.facts[0].confirmation_count = 3;
1026
1027        let before_change = Utc::now();
1028        std::thread::sleep(std::time::Duration::from_millis(10));
1029
1030        k.remember("arch", "db", "MySQL", "s2", 0.9);
1031
1032        let results = k.recall_at_time("db", before_change);
1033        assert_eq!(results.len(), 1);
1034        assert_eq!(results[0].value, "PostgreSQL");
1035
1036        let results_now = k.recall_at_time("db", Utc::now());
1037        assert_eq!(results_now.len(), 1);
1038        assert_eq!(results_now[0].value, "MySQL");
1039    }
1040
1041    #[test]
1042    fn timeline_shows_history() {
1043        let mut k = ProjectKnowledge::new("/tmp/test");
1044        k.remember("arch", "db", "PostgreSQL", "s1", 0.95);
1045        k.facts[0].confirmation_count = 3;
1046        k.remember("arch", "db", "MySQL", "s2", 0.9);
1047
1048        let timeline = k.timeline("arch");
1049        assert_eq!(timeline.len(), 2);
1050        assert!(!timeline[0].is_current());
1051        assert!(timeline[1].is_current());
1052    }
1053
1054    #[test]
1055    fn wakeup_format() {
1056        let mut k = ProjectKnowledge::new("/tmp/test");
1057        k.remember("arch", "auth", "JWT", "s1", 0.95);
1058        k.remember("arch", "db", "PG", "s1", 0.8);
1059
1060        let wakeup = k.format_wakeup();
1061        assert!(wakeup.contains("FACTS:"));
1062        assert!(wakeup.contains("arch/auth=JWT"));
1063        assert!(wakeup.contains("arch/db=PG"));
1064    }
1065
1066    #[test]
1067    fn salience_prioritizes_decisions_over_findings_at_similar_confidence() {
1068        let mut k = ProjectKnowledge::new("/tmp/test");
1069        k.remember("finding", "f1", "some thing", "s1", 0.9);
1070        k.remember("decision", "d1", "important", "s1", 0.85);
1071
1072        let wakeup = k.format_wakeup();
1073        let items = wakeup
1074            .strip_prefix("FACTS:")
1075            .unwrap_or(&wakeup)
1076            .split('|')
1077            .collect::<Vec<_>>();
1078        assert!(
1079            items
1080                .first()
1081                .is_some_and(|s| s.contains("decision/d1=important")),
1082            "expected decision first in wakeup: {wakeup}"
1083        );
1084    }
1085
1086    #[test]
1087    fn low_confidence_contradiction() {
1088        let mut k = ProjectKnowledge::new("/tmp/test");
1089        k.remember("arch", "db", "PostgreSQL", "s1", 0.4);
1090
1091        let c = k.check_contradiction("arch", "db", "MySQL");
1092        assert!(c.is_some());
1093        assert_eq!(c.unwrap().severity, ContradictionSeverity::Low);
1094    }
1095
1096    #[test]
1097    fn no_contradiction_for_same_value() {
1098        let mut k = ProjectKnowledge::new("/tmp/test");
1099        k.remember("arch", "db", "PostgreSQL", "s1", 0.95);
1100
1101        let c = k.check_contradiction("arch", "db", "PostgreSQL");
1102        assert!(c.is_none());
1103    }
1104
1105    #[test]
1106    fn no_contradiction_for_similar_values() {
1107        let mut k = ProjectKnowledge::new("/tmp/test");
1108        k.remember(
1109            "arch",
1110            "db",
1111            "PostgreSQL 16 production database server",
1112            "s1",
1113            0.95,
1114        );
1115
1116        let c = k.check_contradiction(
1117            "arch",
1118            "db",
1119            "PostgreSQL 16 production database server config",
1120        );
1121        assert!(c.is_none());
1122    }
1123}