Skip to main content

lean_ctx/core/
knowledge.rs

1use chrono::{DateTime, Utc};
2use serde::{Deserialize, Serialize};
3use std::path::PathBuf;
4
5const MAX_FACTS: usize = 200;
6const MAX_PATTERNS: usize = 50;
7const MAX_HISTORY: usize = 100;
8const CONTRADICTION_THRESHOLD: f32 = 0.5;
9
10#[derive(Debug, Clone, Serialize, Deserialize)]
11pub struct ProjectKnowledge {
12    pub project_root: String,
13    pub project_hash: String,
14    pub facts: Vec<KnowledgeFact>,
15    pub patterns: Vec<ProjectPattern>,
16    pub history: Vec<ConsolidatedInsight>,
17    pub updated_at: DateTime<Utc>,
18}
19
20#[derive(Debug, Clone, Serialize, Deserialize)]
21pub struct KnowledgeFact {
22    pub category: String,
23    pub key: String,
24    pub value: String,
25    pub source_session: String,
26    pub confidence: f32,
27    pub created_at: DateTime<Utc>,
28    pub last_confirmed: DateTime<Utc>,
29    #[serde(default)]
30    pub retrieval_count: u32,
31    #[serde(default)]
32    pub last_retrieved: Option<DateTime<Utc>>,
33    #[serde(default)]
34    pub valid_from: Option<DateTime<Utc>>,
35    #[serde(default)]
36    pub valid_until: Option<DateTime<Utc>>,
37    #[serde(default)]
38    pub supersedes: Option<String>,
39    #[serde(default)]
40    pub confirmation_count: u32,
41}
42
43#[derive(Debug, Clone, Serialize, Deserialize)]
44pub struct Contradiction {
45    pub existing_key: String,
46    pub existing_value: String,
47    pub new_value: String,
48    pub category: String,
49    pub severity: ContradictionSeverity,
50    pub resolution: String,
51}
52
53#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
54pub enum ContradictionSeverity {
55    Low,
56    Medium,
57    High,
58}
59
60#[derive(Debug, Clone, Serialize, Deserialize)]
61pub struct ProjectPattern {
62    pub pattern_type: String,
63    pub description: String,
64    pub examples: Vec<String>,
65    pub source_session: String,
66    pub created_at: DateTime<Utc>,
67}
68
69#[derive(Debug, Clone, Serialize, Deserialize)]
70pub struct ConsolidatedInsight {
71    pub summary: String,
72    pub from_sessions: Vec<String>,
73    pub timestamp: DateTime<Utc>,
74}
75
76impl ProjectKnowledge {
77    pub fn run_memory_lifecycle(&mut self) -> crate::core::memory_lifecycle::LifecycleReport {
78        let cfg = crate::core::memory_lifecycle::LifecycleConfig {
79            max_facts: MAX_FACTS,
80            ..Default::default()
81        };
82        crate::core::memory_lifecycle::run_lifecycle(&mut self.facts, &cfg)
83    }
84
85    pub fn new(project_root: &str) -> Self {
86        Self {
87            project_root: project_root.to_string(),
88            project_hash: hash_project_root(project_root),
89            facts: Vec::new(),
90            patterns: Vec::new(),
91            history: Vec::new(),
92            updated_at: Utc::now(),
93        }
94    }
95
96    pub fn check_contradiction(
97        &self,
98        category: &str,
99        key: &str,
100        new_value: &str,
101    ) -> Option<Contradiction> {
102        let existing = self
103            .facts
104            .iter()
105            .find(|f| f.category == category && f.key == key && f.is_current())?;
106
107        if existing.value.to_lowercase() == new_value.to_lowercase() {
108            return None;
109        }
110
111        let similarity = string_similarity(&existing.value, new_value);
112        if similarity > 0.8 {
113            return None;
114        }
115
116        let severity = if existing.confidence >= 0.9 && existing.confirmation_count >= 2 {
117            ContradictionSeverity::High
118        } else if existing.confidence >= CONTRADICTION_THRESHOLD {
119            ContradictionSeverity::Medium
120        } else {
121            ContradictionSeverity::Low
122        };
123
124        let resolution = match severity {
125            ContradictionSeverity::High => format!(
126                "High-confidence fact [{category}/{key}] changed: '{}' -> '{new_value}' (was confirmed {}x). Previous value archived.",
127                existing.value, existing.confirmation_count
128            ),
129            ContradictionSeverity::Medium => format!(
130                "Fact [{category}/{key}] updated: '{}' -> '{new_value}'",
131                existing.value
132            ),
133            ContradictionSeverity::Low => format!(
134                "Low-confidence fact [{category}/{key}] replaced: '{}' -> '{new_value}'",
135                existing.value
136            ),
137        };
138
139        Some(Contradiction {
140            existing_key: key.to_string(),
141            existing_value: existing.value.clone(),
142            new_value: new_value.to_string(),
143            category: category.to_string(),
144            severity,
145            resolution,
146        })
147    }
148
149    pub fn remember(
150        &mut self,
151        category: &str,
152        key: &str,
153        value: &str,
154        session_id: &str,
155        confidence: f32,
156    ) -> Option<Contradiction> {
157        let contradiction = self.check_contradiction(category, key, value);
158
159        if let Some(existing) = self
160            .facts
161            .iter_mut()
162            .find(|f| f.category == category && f.key == key && f.is_current())
163        {
164            if existing.value != value {
165                if existing.confidence >= 0.9 && existing.confirmation_count >= 2 {
166                    existing.valid_until = Some(Utc::now());
167                    let superseded_id = format!("{}/{}", existing.category, existing.key);
168                    let now = Utc::now();
169                    self.facts.push(KnowledgeFact {
170                        category: category.to_string(),
171                        key: key.to_string(),
172                        value: value.to_string(),
173                        source_session: session_id.to_string(),
174                        confidence,
175                        created_at: now,
176                        last_confirmed: now,
177                        retrieval_count: 0,
178                        last_retrieved: None,
179                        valid_from: Some(now),
180                        valid_until: None,
181                        supersedes: Some(superseded_id),
182                        confirmation_count: 1,
183                    });
184                } else {
185                    existing.value = value.to_string();
186                    existing.confidence = confidence;
187                    existing.last_confirmed = Utc::now();
188                    existing.source_session = session_id.to_string();
189                    existing.valid_from = existing.valid_from.or(Some(existing.created_at));
190                    existing.confirmation_count = 1;
191                }
192            } else {
193                existing.last_confirmed = Utc::now();
194                existing.source_session = session_id.to_string();
195                existing.confidence = (existing.confidence + confidence) / 2.0;
196                existing.confirmation_count += 1;
197            }
198        } else {
199            let now = Utc::now();
200            self.facts.push(KnowledgeFact {
201                category: category.to_string(),
202                key: key.to_string(),
203                value: value.to_string(),
204                source_session: session_id.to_string(),
205                confidence,
206                created_at: now,
207                last_confirmed: now,
208                retrieval_count: 0,
209                last_retrieved: None,
210                valid_from: Some(now),
211                valid_until: None,
212                supersedes: None,
213                confirmation_count: 1,
214            });
215        }
216
217        // No hard-prune: archive-only lifecycle will compact if needed.
218        if self.facts.len() > MAX_FACTS * 2 {
219            let _ = self.run_memory_lifecycle();
220        }
221
222        self.updated_at = Utc::now();
223
224        let action = if contradiction.is_some() {
225            "contradict"
226        } else {
227            "remember"
228        };
229        crate::core::events::emit(crate::core::events::EventKind::KnowledgeUpdate {
230            category: category.to_string(),
231            key: key.to_string(),
232            action: action.to_string(),
233        });
234
235        contradiction
236    }
237
238    pub fn recall(&self, query: &str) -> Vec<&KnowledgeFact> {
239        let q = query.to_lowercase();
240        let terms: Vec<&str> = q.split_whitespace().collect();
241
242        let mut results: Vec<(&KnowledgeFact, f32)> = self
243            .facts
244            .iter()
245            .filter(|f| f.is_current())
246            .filter_map(|f| {
247                let searchable = format!(
248                    "{} {} {} {}",
249                    f.category.to_lowercase(),
250                    f.key.to_lowercase(),
251                    f.value.to_lowercase(),
252                    f.source_session
253                );
254                let match_count = terms.iter().filter(|t| searchable.contains(**t)).count();
255                if match_count > 0 {
256                    let relevance = (match_count as f32 / terms.len() as f32) * f.confidence;
257                    Some((f, relevance))
258                } else {
259                    None
260                }
261            })
262            .collect();
263
264        results.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
265        results.into_iter().map(|(f, _)| f).collect()
266    }
267
268    pub fn recall_by_category(&self, category: &str) -> Vec<&KnowledgeFact> {
269        self.facts
270            .iter()
271            .filter(|f| f.category == category && f.is_current())
272            .collect()
273    }
274
275    pub fn recall_at_time(&self, query: &str, at: DateTime<Utc>) -> Vec<&KnowledgeFact> {
276        let q = query.to_lowercase();
277        let terms: Vec<&str> = q.split_whitespace().collect();
278
279        let mut results: Vec<(&KnowledgeFact, f32)> = self
280            .facts
281            .iter()
282            .filter(|f| f.was_valid_at(at))
283            .filter_map(|f| {
284                let searchable = format!(
285                    "{} {} {}",
286                    f.category.to_lowercase(),
287                    f.key.to_lowercase(),
288                    f.value.to_lowercase(),
289                );
290                let match_count = terms.iter().filter(|t| searchable.contains(**t)).count();
291                if match_count > 0 {
292                    Some((f, match_count as f32 / terms.len() as f32))
293                } else {
294                    None
295                }
296            })
297            .collect();
298
299        results.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
300        results.into_iter().map(|(f, _)| f).collect()
301    }
302
303    pub fn timeline(&self, category: &str) -> Vec<&KnowledgeFact> {
304        let mut facts: Vec<&KnowledgeFact> = self
305            .facts
306            .iter()
307            .filter(|f| f.category == category)
308            .collect();
309        facts.sort_by_key(|x| x.created_at);
310        facts
311    }
312
313    pub fn list_rooms(&self) -> Vec<(String, usize)> {
314        let mut categories: std::collections::BTreeMap<String, usize> =
315            std::collections::BTreeMap::new();
316        for f in &self.facts {
317            if f.is_current() {
318                *categories.entry(f.category.clone()).or_insert(0) += 1;
319            }
320        }
321        categories.into_iter().collect()
322    }
323
324    pub fn add_pattern(
325        &mut self,
326        pattern_type: &str,
327        description: &str,
328        examples: Vec<String>,
329        session_id: &str,
330    ) {
331        if let Some(existing) = self
332            .patterns
333            .iter_mut()
334            .find(|p| p.pattern_type == pattern_type && p.description == description)
335        {
336            for ex in &examples {
337                if !existing.examples.contains(ex) {
338                    existing.examples.push(ex.clone());
339                }
340            }
341            return;
342        }
343
344        self.patterns.push(ProjectPattern {
345            pattern_type: pattern_type.to_string(),
346            description: description.to_string(),
347            examples,
348            source_session: session_id.to_string(),
349            created_at: Utc::now(),
350        });
351
352        if self.patterns.len() > MAX_PATTERNS {
353            self.patterns.truncate(MAX_PATTERNS);
354        }
355        self.updated_at = Utc::now();
356    }
357
358    pub fn consolidate(&mut self, summary: &str, session_ids: Vec<String>) {
359        self.history.push(ConsolidatedInsight {
360            summary: summary.to_string(),
361            from_sessions: session_ids,
362            timestamp: Utc::now(),
363        });
364
365        if self.history.len() > MAX_HISTORY {
366            self.history.drain(0..self.history.len() - MAX_HISTORY);
367        }
368        self.updated_at = Utc::now();
369    }
370
371    pub fn remove_fact(&mut self, category: &str, key: &str) -> bool {
372        let before = self.facts.len();
373        self.facts
374            .retain(|f| !(f.category == category && f.key == key));
375        let removed = self.facts.len() < before;
376        if removed {
377            self.updated_at = Utc::now();
378        }
379        removed
380    }
381
382    pub fn format_summary(&self) -> String {
383        let mut out = String::new();
384        let current_facts: Vec<&KnowledgeFact> =
385            self.facts.iter().filter(|f| f.is_current()).collect();
386
387        if !current_facts.is_empty() {
388            out.push_str("PROJECT KNOWLEDGE:\n");
389            let mut rooms: Vec<(String, usize)> = self.list_rooms();
390            rooms.sort_by(|a, b| b.1.cmp(&a.1).then_with(|| a.0.cmp(&b.0)));
391
392            let total_rooms = rooms.len();
393            rooms.truncate(crate::core::budgets::KNOWLEDGE_SUMMARY_ROOMS_LIMIT);
394
395            for (cat, _count) in rooms {
396                out.push_str(&format!("  [{cat}]\n"));
397
398                let mut facts_in_cat: Vec<&KnowledgeFact> = current_facts
399                    .iter()
400                    .copied()
401                    .filter(|f| f.category == cat)
402                    .collect();
403                facts_in_cat.sort_by(|a, b| sort_fact_for_output(a, b));
404
405                let total_in_cat = facts_in_cat.len();
406                facts_in_cat.truncate(crate::core::budgets::KNOWLEDGE_SUMMARY_FACTS_PER_ROOM_LIMIT);
407
408                for f in facts_in_cat {
409                    let key = crate::core::sanitize::neutralize_metadata(&f.key);
410                    let val = crate::core::sanitize::neutralize_metadata(&f.value);
411                    out.push_str(&format!(
412                        "    {}: {} (confidence: {:.0}%)\n",
413                        key,
414                        val,
415                        f.confidence * 100.0
416                    ));
417                }
418                if total_in_cat > crate::core::budgets::KNOWLEDGE_SUMMARY_FACTS_PER_ROOM_LIMIT {
419                    out.push_str(&format!(
420                        "    … +{} more\n",
421                        total_in_cat - crate::core::budgets::KNOWLEDGE_SUMMARY_FACTS_PER_ROOM_LIMIT
422                    ));
423                }
424            }
425
426            if total_rooms > crate::core::budgets::KNOWLEDGE_SUMMARY_ROOMS_LIMIT {
427                out.push_str(&format!(
428                    "  … +{} more rooms\n",
429                    total_rooms - crate::core::budgets::KNOWLEDGE_SUMMARY_ROOMS_LIMIT
430                ));
431            }
432        }
433
434        if !self.patterns.is_empty() {
435            out.push_str("PROJECT PATTERNS:\n");
436            let mut patterns = self.patterns.clone();
437            patterns.sort_by(|a, b| {
438                b.created_at
439                    .cmp(&a.created_at)
440                    .then_with(|| a.pattern_type.cmp(&b.pattern_type))
441                    .then_with(|| a.description.cmp(&b.description))
442            });
443            let total = patterns.len();
444            patterns.truncate(crate::core::budgets::KNOWLEDGE_PATTERNS_LIMIT);
445            for p in &patterns {
446                let ty = crate::core::sanitize::neutralize_metadata(&p.pattern_type);
447                let desc = crate::core::sanitize::neutralize_metadata(&p.description);
448                out.push_str(&format!("  [{ty}] {desc}\n"));
449            }
450            if total > crate::core::budgets::KNOWLEDGE_PATTERNS_LIMIT {
451                out.push_str(&format!(
452                    "  … +{} more\n",
453                    total - crate::core::budgets::KNOWLEDGE_PATTERNS_LIMIT
454                ));
455            }
456        }
457
458        if out.is_empty() {
459            out
460        } else {
461            crate::core::sanitize::fence_content("project_knowledge", out.trim_end())
462        }
463    }
464
465    pub fn format_aaak(&self) -> String {
466        let current_facts: Vec<&KnowledgeFact> =
467            self.facts.iter().filter(|f| f.is_current()).collect();
468
469        if current_facts.is_empty() && self.patterns.is_empty() {
470            return String::new();
471        }
472
473        let mut out = String::new();
474
475        let mut rooms: Vec<(String, usize)> = self.list_rooms();
476        rooms.sort_by(|a, b| b.1.cmp(&a.1).then_with(|| a.0.cmp(&b.0)));
477        rooms.truncate(crate::core::budgets::KNOWLEDGE_AAAK_ROOMS_LIMIT);
478
479        for (cat, _count) in rooms {
480            let mut facts_in_cat: Vec<&KnowledgeFact> = current_facts
481                .iter()
482                .copied()
483                .filter(|f| f.category == cat)
484                .collect();
485            facts_in_cat.sort_by(|a, b| sort_fact_for_output(a, b));
486            facts_in_cat.truncate(crate::core::budgets::KNOWLEDGE_AAAK_FACTS_PER_ROOM_LIMIT);
487
488            let items: Vec<String> = facts_in_cat
489                .iter()
490                .map(|f| {
491                    let stars = confidence_stars(f.confidence);
492                    let key = crate::core::sanitize::neutralize_metadata(&f.key);
493                    let val = crate::core::sanitize::neutralize_metadata(&f.value);
494                    format!("{key}={val}{stars}")
495                })
496                .collect();
497            out.push_str(&format!(
498                "{}:{}\n",
499                crate::core::sanitize::neutralize_metadata(&cat.to_uppercase()),
500                items.join("|")
501            ));
502        }
503
504        if !self.patterns.is_empty() {
505            let mut patterns = self.patterns.clone();
506            patterns.sort_by(|a, b| {
507                b.created_at
508                    .cmp(&a.created_at)
509                    .then_with(|| a.pattern_type.cmp(&b.pattern_type))
510                    .then_with(|| a.description.cmp(&b.description))
511            });
512            patterns.truncate(crate::core::budgets::KNOWLEDGE_PATTERNS_LIMIT);
513            let pat_items: Vec<String> = patterns
514                .iter()
515                .map(|p| {
516                    let ty = crate::core::sanitize::neutralize_metadata(&p.pattern_type);
517                    let desc = crate::core::sanitize::neutralize_metadata(&p.description);
518                    format!("{ty}.{desc}")
519                })
520                .collect();
521            out.push_str(&format!("PAT:{}\n", pat_items.join("|")));
522        }
523
524        if out.is_empty() {
525            out
526        } else {
527            crate::core::sanitize::fence_content("project_memory_aaak", out.trim_end())
528        }
529    }
530
531    pub fn format_wakeup(&self) -> String {
532        let current_facts: Vec<&KnowledgeFact> = self
533            .facts
534            .iter()
535            .filter(|f| f.is_current() && f.confidence >= 0.7)
536            .collect();
537
538        if current_facts.is_empty() {
539            return String::new();
540        }
541
542        let mut top_facts: Vec<&KnowledgeFact> = current_facts;
543        top_facts.sort_by(|a, b| sort_fact_for_output(a, b));
544        top_facts.truncate(10);
545
546        let items: Vec<String> = top_facts
547            .iter()
548            .map(|f| {
549                let cat = crate::core::sanitize::neutralize_metadata(&f.category);
550                let key = crate::core::sanitize::neutralize_metadata(&f.key);
551                let val = crate::core::sanitize::neutralize_metadata(&f.value);
552                format!("{cat}/{key}={val}")
553            })
554            .collect();
555
556        crate::core::sanitize::fence_content(
557            "project_facts_wakeup",
558            &format!("FACTS:{}", items.join("|")),
559        )
560    }
561
562    pub fn save(&self) -> Result<(), String> {
563        let dir = knowledge_dir(&self.project_hash)?;
564        std::fs::create_dir_all(&dir).map_err(|e| e.to_string())?;
565
566        let path = dir.join("knowledge.json");
567        let json = serde_json::to_string_pretty(self).map_err(|e| e.to_string())?;
568        std::fs::write(&path, json).map_err(|e| e.to_string())
569    }
570
571    pub fn load(project_root: &str) -> Option<Self> {
572        let hash = hash_project_root(project_root);
573        let dir = knowledge_dir(&hash).ok()?;
574        let path = dir.join("knowledge.json");
575
576        let content = std::fs::read_to_string(&path).ok()?;
577        serde_json::from_str(&content).ok()
578    }
579
580    pub fn load_or_create(project_root: &str) -> Self {
581        Self::load(project_root).unwrap_or_else(|| Self::new(project_root))
582    }
583
584    /// Migrates legacy knowledge that was accidentally stored under an empty project_root ("")
585    /// into the given `target_root`. Keeps a timestamped backup of the legacy file.
586    pub fn migrate_legacy_empty_root(target_root: &str) -> Result<bool, String> {
587        if target_root.trim().is_empty() {
588            return Ok(false);
589        }
590
591        let legacy = match Self::load("") {
592            Some(k) => k,
593            None => return Ok(false),
594        };
595
596        if !legacy.project_root.trim().is_empty() {
597            return Ok(false);
598        }
599        if legacy.facts.is_empty() && legacy.patterns.is_empty() && legacy.history.is_empty() {
600            return Ok(false);
601        }
602
603        let mut target = Self::load_or_create(target_root);
604
605        fn fact_key(f: &KnowledgeFact) -> String {
606            format!(
607                "{}|{}|{}|{}|{}",
608                f.category, f.key, f.value, f.source_session, f.created_at
609            )
610        }
611        fn pattern_key(p: &ProjectPattern) -> String {
612            format!(
613                "{}|{}|{}|{}",
614                p.pattern_type, p.description, p.source_session, p.created_at
615            )
616        }
617        fn history_key(h: &ConsolidatedInsight) -> String {
618            format!(
619                "{}|{}|{}",
620                h.summary,
621                h.from_sessions.join(","),
622                h.timestamp
623            )
624        }
625
626        let mut seen_facts: std::collections::HashSet<String> =
627            target.facts.iter().map(fact_key).collect();
628        for f in legacy.facts {
629            if seen_facts.insert(fact_key(&f)) {
630                target.facts.push(f);
631            }
632        }
633
634        let mut seen_patterns: std::collections::HashSet<String> =
635            target.patterns.iter().map(pattern_key).collect();
636        for p in legacy.patterns {
637            if seen_patterns.insert(pattern_key(&p)) {
638                target.patterns.push(p);
639            }
640        }
641
642        let mut seen_history: std::collections::HashSet<String> =
643            target.history.iter().map(history_key).collect();
644        for h in legacy.history {
645            if seen_history.insert(history_key(&h)) {
646                target.history.push(h);
647            }
648        }
649
650        // Enforce caps to avoid unbounded growth from migration.
651        target.facts.sort_by(|a, b| {
652            b.created_at
653                .cmp(&a.created_at)
654                .then_with(|| b.confidence.total_cmp(&a.confidence))
655        });
656        if target.facts.len() > MAX_FACTS {
657            target.facts.truncate(MAX_FACTS);
658        }
659        target
660            .patterns
661            .sort_by_key(|x| std::cmp::Reverse(x.created_at));
662        if target.patterns.len() > MAX_PATTERNS {
663            target.patterns.truncate(MAX_PATTERNS);
664        }
665        target
666            .history
667            .sort_by_key(|x| std::cmp::Reverse(x.timestamp));
668        if target.history.len() > MAX_HISTORY {
669            target.history.truncate(MAX_HISTORY);
670        }
671
672        target.updated_at = Utc::now();
673        target.save()?;
674
675        // Backup legacy file so we don't keep re-importing it.
676        let legacy_hash = hash_project_root("");
677        let legacy_dir = knowledge_dir(&legacy_hash)?;
678        let legacy_path = legacy_dir.join("knowledge.json");
679        if legacy_path.exists() {
680            let ts = Utc::now().format("%Y%m%d-%H%M%S");
681            let backup = legacy_dir.join(format!("knowledge.legacy-empty-root.{ts}.json"));
682            std::fs::rename(&legacy_path, &backup).map_err(|e| e.to_string())?;
683        }
684
685        Ok(true)
686    }
687
688    pub fn recall_for_output(&mut self, query: &str, limit: usize) -> (Vec<KnowledgeFact>, usize) {
689        let q = query.to_lowercase();
690        let terms: Vec<&str> = q.split_whitespace().filter(|t| !t.is_empty()).collect();
691        if terms.is_empty() {
692            return (Vec::new(), 0);
693        }
694
695        struct Scored {
696            idx: usize,
697            relevance: f32,
698        }
699
700        let mut scored: Vec<Scored> = self
701            .facts
702            .iter()
703            .enumerate()
704            .filter(|(_, f)| f.is_current())
705            .filter_map(|(idx, f)| {
706                let searchable = format!(
707                    "{} {} {} {}",
708                    f.category.to_lowercase(),
709                    f.key.to_lowercase(),
710                    f.value.to_lowercase(),
711                    f.source_session
712                );
713                let match_count = terms.iter().filter(|t| searchable.contains(**t)).count();
714                if match_count > 0 {
715                    let relevance = (match_count as f32 / terms.len() as f32) * f.confidence;
716                    Some(Scored { idx, relevance })
717                } else {
718                    None
719                }
720            })
721            .collect();
722
723        scored.sort_by(|a, b| {
724            b.relevance
725                .partial_cmp(&a.relevance)
726                .unwrap_or(std::cmp::Ordering::Equal)
727                .then_with(|| sort_fact_for_output(&self.facts[a.idx], &self.facts[b.idx]))
728        });
729
730        let total = scored.len();
731        scored.truncate(limit);
732
733        let now = Utc::now();
734        let mut out: Vec<KnowledgeFact> = Vec::new();
735        for s in scored {
736            if let Some(f) = self.facts.get_mut(s.idx) {
737                f.retrieval_count = f.retrieval_count.saturating_add(1);
738                f.last_retrieved = Some(now);
739                out.push(f.clone());
740            }
741        }
742
743        (out, total)
744    }
745
746    pub fn recall_by_category_for_output(
747        &mut self,
748        category: &str,
749        limit: usize,
750    ) -> (Vec<KnowledgeFact>, usize) {
751        let mut idxs: Vec<usize> = self
752            .facts
753            .iter()
754            .enumerate()
755            .filter(|(_, f)| f.is_current() && f.category == category)
756            .map(|(i, _)| i)
757            .collect();
758
759        idxs.sort_by(|a, b| sort_fact_for_output(&self.facts[*a], &self.facts[*b]));
760
761        let total = idxs.len();
762        idxs.truncate(limit);
763
764        let now = Utc::now();
765        let mut out = Vec::new();
766        for idx in idxs {
767            if let Some(f) = self.facts.get_mut(idx) {
768                f.retrieval_count = f.retrieval_count.saturating_add(1);
769                f.last_retrieved = Some(now);
770                out.push(f.clone());
771            }
772        }
773
774        (out, total)
775    }
776}
777
778impl KnowledgeFact {
779    pub fn is_current(&self) -> bool {
780        self.valid_until.is_none()
781    }
782
783    pub fn was_valid_at(&self, at: DateTime<Utc>) -> bool {
784        let after_start = self.valid_from.is_none_or(|from| at >= from);
785        let before_end = self.valid_until.is_none_or(|until| at <= until);
786        after_start && before_end
787    }
788}
789
790fn confidence_stars(confidence: f32) -> &'static str {
791    if confidence >= 0.95 {
792        "★★★★★"
793    } else if confidence >= 0.85 {
794        "★★★★"
795    } else if confidence >= 0.7 {
796        "★★★"
797    } else if confidence >= 0.5 {
798        "★★"
799    } else {
800        "★"
801    }
802}
803
804fn string_similarity(a: &str, b: &str) -> f32 {
805    let a_lower = a.to_lowercase();
806    let b_lower = b.to_lowercase();
807    let a_words: std::collections::HashSet<&str> = a_lower.split_whitespace().collect();
808    let b_words: std::collections::HashSet<&str> = b_lower.split_whitespace().collect();
809
810    if a_words.is_empty() && b_words.is_empty() {
811        return 1.0;
812    }
813
814    let intersection = a_words.intersection(&b_words).count();
815    let union = a_words.union(&b_words).count();
816
817    if union == 0 {
818        return 0.0;
819    }
820
821    intersection as f32 / union as f32
822}
823
824fn knowledge_dir(project_hash: &str) -> Result<PathBuf, String> {
825    Ok(crate::core::data_dir::lean_ctx_data_dir()?
826        .join("knowledge")
827        .join(project_hash))
828}
829
830fn sort_fact_for_output(a: &KnowledgeFact, b: &KnowledgeFact) -> std::cmp::Ordering {
831    salience_score(b)
832        .cmp(&salience_score(a))
833        .then_with(|| {
834            b.confidence
835                .partial_cmp(&a.confidence)
836                .unwrap_or(std::cmp::Ordering::Equal)
837        })
838        .then_with(|| b.confirmation_count.cmp(&a.confirmation_count))
839        .then_with(|| b.retrieval_count.cmp(&a.retrieval_count))
840        .then_with(|| b.last_retrieved.cmp(&a.last_retrieved))
841        .then_with(|| b.last_confirmed.cmp(&a.last_confirmed))
842        .then_with(|| a.category.cmp(&b.category))
843        .then_with(|| a.key.cmp(&b.key))
844        .then_with(|| a.value.cmp(&b.value))
845}
846
847fn salience_score(f: &KnowledgeFact) -> u32 {
848    let cat = f.category.to_lowercase();
849    let base: u32 = match cat.as_str() {
850        "decision" => 70,
851        "gotcha" => 75,
852        "architecture" | "arch" => 60,
853        "security" => 65,
854        "testing" | "tests" => 55,
855        "deployment" | "deploy" => 55,
856        "conventions" | "convention" => 45,
857        "finding" => 40,
858        _ => 30,
859    };
860
861    let confidence_bonus = (f.confidence.clamp(0.0, 1.0) * 30.0) as u32;
862    let confirmation_bonus = f.confirmation_count.min(15);
863    let retrieval_bonus = ((f.retrieval_count as f32).ln_1p() * 8.0).min(20.0) as u32;
864
865    let recency_bonus = f
866        .last_retrieved
867        .map(|t| {
868            let days = Utc::now().signed_duration_since(t).num_days();
869            if days <= 7 {
870                10u32
871            } else if days <= 30 {
872                5u32
873            } else {
874                0u32
875            }
876        })
877        .unwrap_or(0u32);
878
879    base + confidence_bonus + confirmation_bonus + retrieval_bonus + recency_bonus
880}
881
882fn hash_project_root(root: &str) -> String {
883    use std::collections::hash_map::DefaultHasher;
884    use std::hash::{Hash, Hasher};
885
886    let mut hasher = DefaultHasher::new();
887    root.hash(&mut hasher);
888    format!("{:016x}", hasher.finish())
889}
890
891#[cfg(test)]
892mod tests {
893    use super::*;
894
895    #[test]
896    fn remember_and_recall() {
897        let mut k = ProjectKnowledge::new("/tmp/test-project");
898        k.remember("architecture", "auth", "JWT RS256", "session-1", 0.9);
899        k.remember("api", "rate-limit", "100/min", "session-1", 0.8);
900
901        let results = k.recall("auth");
902        assert_eq!(results.len(), 1);
903        assert_eq!(results[0].value, "JWT RS256");
904
905        let results = k.recall("api rate");
906        assert_eq!(results.len(), 1);
907        assert_eq!(results[0].key, "rate-limit");
908    }
909
910    #[test]
911    fn upsert_existing_fact() {
912        let mut k = ProjectKnowledge::new("/tmp/test");
913        k.remember("arch", "db", "PostgreSQL", "s1", 0.7);
914        k.remember("arch", "db", "PostgreSQL 16 with pgvector", "s2", 0.95);
915
916        let current: Vec<_> = k.facts.iter().filter(|f| f.is_current()).collect();
917        assert_eq!(current.len(), 1);
918        assert_eq!(current[0].value, "PostgreSQL 16 with pgvector");
919    }
920
921    #[test]
922    fn contradiction_detection() {
923        let mut k = ProjectKnowledge::new("/tmp/test");
924        k.remember("arch", "db", "PostgreSQL", "s1", 0.95);
925        k.facts[0].confirmation_count = 3;
926
927        let contradiction = k.check_contradiction("arch", "db", "MySQL");
928        assert!(contradiction.is_some());
929        let c = contradiction.unwrap();
930        assert_eq!(c.severity, ContradictionSeverity::High);
931    }
932
933    #[test]
934    fn temporal_validity() {
935        let mut k = ProjectKnowledge::new("/tmp/test");
936        k.remember("arch", "db", "PostgreSQL", "s1", 0.95);
937        k.facts[0].confirmation_count = 3;
938
939        k.remember("arch", "db", "MySQL", "s2", 0.9);
940
941        let current: Vec<_> = k.facts.iter().filter(|f| f.is_current()).collect();
942        assert_eq!(current.len(), 1);
943        assert_eq!(current[0].value, "MySQL");
944
945        let all_db: Vec<_> = k.facts.iter().filter(|f| f.key == "db").collect();
946        assert_eq!(all_db.len(), 2);
947    }
948
949    #[test]
950    fn confirmation_count() {
951        let mut k = ProjectKnowledge::new("/tmp/test");
952        k.remember("arch", "db", "PostgreSQL", "s1", 0.9);
953        assert_eq!(k.facts[0].confirmation_count, 1);
954
955        k.remember("arch", "db", "PostgreSQL", "s2", 0.9);
956        assert_eq!(k.facts[0].confirmation_count, 2);
957    }
958
959    #[test]
960    fn remove_fact() {
961        let mut k = ProjectKnowledge::new("/tmp/test");
962        k.remember("arch", "db", "PostgreSQL", "s1", 0.9);
963        assert!(k.remove_fact("arch", "db"));
964        assert!(k.facts.is_empty());
965        assert!(!k.remove_fact("arch", "db"));
966    }
967
968    #[test]
969    fn list_rooms() {
970        let mut k = ProjectKnowledge::new("/tmp/test");
971        k.remember("architecture", "auth", "JWT", "s1", 0.9);
972        k.remember("architecture", "db", "PG", "s1", 0.9);
973        k.remember("deploy", "host", "AWS", "s1", 0.8);
974
975        let rooms = k.list_rooms();
976        assert_eq!(rooms.len(), 2);
977    }
978
979    #[test]
980    fn aaak_format() {
981        let mut k = ProjectKnowledge::new("/tmp/test");
982        k.remember("architecture", "auth", "JWT RS256", "s1", 0.95);
983        k.remember("architecture", "db", "PostgreSQL", "s1", 0.7);
984
985        let aaak = k.format_aaak();
986        assert!(aaak.contains("ARCHITECTURE:"));
987        assert!(aaak.contains("auth=JWT RS256"));
988    }
989
990    #[test]
991    fn consolidate_history() {
992        let mut k = ProjectKnowledge::new("/tmp/test");
993        k.consolidate(
994            "Migrated from REST to GraphQL",
995            vec!["s1".into(), "s2".into()],
996        );
997        assert_eq!(k.history.len(), 1);
998        assert_eq!(k.history[0].from_sessions.len(), 2);
999    }
1000
1001    #[test]
1002    fn format_summary_output() {
1003        let mut k = ProjectKnowledge::new("/tmp/test");
1004        k.remember("architecture", "auth", "JWT RS256", "s1", 0.9);
1005        k.add_pattern(
1006            "naming",
1007            "snake_case for functions",
1008            vec!["get_user()".into()],
1009            "s1",
1010        );
1011        let summary = k.format_summary();
1012        assert!(summary.contains("PROJECT KNOWLEDGE:"));
1013        assert!(summary.contains("auth: JWT RS256"));
1014        assert!(summary.contains("PROJECT PATTERNS:"));
1015    }
1016
1017    #[test]
1018    fn temporal_recall_at_time() {
1019        let mut k = ProjectKnowledge::new("/tmp/test");
1020        k.remember("arch", "db", "PostgreSQL", "s1", 0.95);
1021        k.facts[0].confirmation_count = 3;
1022
1023        let before_change = Utc::now();
1024        std::thread::sleep(std::time::Duration::from_millis(10));
1025
1026        k.remember("arch", "db", "MySQL", "s2", 0.9);
1027
1028        let results = k.recall_at_time("db", before_change);
1029        assert_eq!(results.len(), 1);
1030        assert_eq!(results[0].value, "PostgreSQL");
1031
1032        let results_now = k.recall_at_time("db", Utc::now());
1033        assert_eq!(results_now.len(), 1);
1034        assert_eq!(results_now[0].value, "MySQL");
1035    }
1036
1037    #[test]
1038    fn timeline_shows_history() {
1039        let mut k = ProjectKnowledge::new("/tmp/test");
1040        k.remember("arch", "db", "PostgreSQL", "s1", 0.95);
1041        k.facts[0].confirmation_count = 3;
1042        k.remember("arch", "db", "MySQL", "s2", 0.9);
1043
1044        let timeline = k.timeline("arch");
1045        assert_eq!(timeline.len(), 2);
1046        assert!(!timeline[0].is_current());
1047        assert!(timeline[1].is_current());
1048    }
1049
1050    #[test]
1051    fn wakeup_format() {
1052        let mut k = ProjectKnowledge::new("/tmp/test");
1053        k.remember("arch", "auth", "JWT", "s1", 0.95);
1054        k.remember("arch", "db", "PG", "s1", 0.8);
1055
1056        let wakeup = k.format_wakeup();
1057        assert!(wakeup.contains("FACTS:"));
1058        assert!(wakeup.contains("arch/auth=JWT"));
1059        assert!(wakeup.contains("arch/db=PG"));
1060    }
1061
1062    #[test]
1063    fn salience_prioritizes_decisions_over_findings_at_similar_confidence() {
1064        let mut k = ProjectKnowledge::new("/tmp/test");
1065        k.remember("finding", "f1", "some thing", "s1", 0.9);
1066        k.remember("decision", "d1", "important", "s1", 0.85);
1067
1068        let wakeup = k.format_wakeup();
1069        let items = wakeup
1070            .strip_prefix("FACTS:")
1071            .unwrap_or(&wakeup)
1072            .split('|')
1073            .collect::<Vec<_>>();
1074        assert!(
1075            items
1076                .first()
1077                .is_some_and(|s| s.contains("decision/d1=important")),
1078            "expected decision first in wakeup: {wakeup}"
1079        );
1080    }
1081
1082    #[test]
1083    fn low_confidence_contradiction() {
1084        let mut k = ProjectKnowledge::new("/tmp/test");
1085        k.remember("arch", "db", "PostgreSQL", "s1", 0.4);
1086
1087        let c = k.check_contradiction("arch", "db", "MySQL");
1088        assert!(c.is_some());
1089        assert_eq!(c.unwrap().severity, ContradictionSeverity::Low);
1090    }
1091
1092    #[test]
1093    fn no_contradiction_for_same_value() {
1094        let mut k = ProjectKnowledge::new("/tmp/test");
1095        k.remember("arch", "db", "PostgreSQL", "s1", 0.95);
1096
1097        let c = k.check_contradiction("arch", "db", "PostgreSQL");
1098        assert!(c.is_none());
1099    }
1100
1101    #[test]
1102    fn no_contradiction_for_similar_values() {
1103        let mut k = ProjectKnowledge::new("/tmp/test");
1104        k.remember(
1105            "arch",
1106            "db",
1107            "PostgreSQL 16 production database server",
1108            "s1",
1109            0.95,
1110        );
1111
1112        let c = k.check_contradiction(
1113            "arch",
1114            "db",
1115            "PostgreSQL 16 production database server config",
1116        );
1117        assert!(c.is_none());
1118    }
1119}