Skip to main content

lean_ctx/core/
knowledge.rs

1use chrono::{DateTime, Utc};
2use serde::{Deserialize, Serialize};
3use std::path::PathBuf;
4
5use crate::core::memory_policy::MemoryPolicy;
6
7#[derive(Debug, Clone, Serialize, Deserialize)]
8pub struct ProjectKnowledge {
9    pub project_root: String,
10    pub project_hash: String,
11    pub facts: Vec<KnowledgeFact>,
12    pub patterns: Vec<ProjectPattern>,
13    pub history: Vec<ConsolidatedInsight>,
14    pub updated_at: DateTime<Utc>,
15}
16
17#[derive(Debug, Clone, Serialize, Deserialize)]
18pub struct KnowledgeFact {
19    pub category: String,
20    pub key: String,
21    pub value: String,
22    pub source_session: String,
23    pub confidence: f32,
24    pub created_at: DateTime<Utc>,
25    pub last_confirmed: DateTime<Utc>,
26    #[serde(default)]
27    pub retrieval_count: u32,
28    #[serde(default)]
29    pub last_retrieved: Option<DateTime<Utc>>,
30    #[serde(default)]
31    pub valid_from: Option<DateTime<Utc>>,
32    #[serde(default)]
33    pub valid_until: Option<DateTime<Utc>>,
34    #[serde(default)]
35    pub supersedes: Option<String>,
36    #[serde(default)]
37    pub confirmation_count: u32,
38    #[serde(default)]
39    pub feedback_up: u32,
40    #[serde(default)]
41    pub feedback_down: u32,
42    #[serde(default)]
43    pub last_feedback: Option<DateTime<Utc>>,
44}
45
46#[derive(Debug, Clone, Serialize, Deserialize)]
47pub struct Contradiction {
48    pub existing_key: String,
49    pub existing_value: String,
50    pub new_value: String,
51    pub category: String,
52    pub severity: ContradictionSeverity,
53    pub resolution: String,
54}
55
56#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
57pub enum ContradictionSeverity {
58    Low,
59    Medium,
60    High,
61}
62
63#[derive(Debug, Clone, Serialize, Deserialize)]
64pub struct ProjectPattern {
65    pub pattern_type: String,
66    pub description: String,
67    pub examples: Vec<String>,
68    pub source_session: String,
69    pub created_at: DateTime<Utc>,
70}
71
72#[derive(Debug, Clone, Serialize, Deserialize)]
73pub struct ConsolidatedInsight {
74    pub summary: String,
75    pub from_sessions: Vec<String>,
76    pub timestamp: DateTime<Utc>,
77}
78
79impl ProjectKnowledge {
80    pub fn run_memory_lifecycle(
81        &mut self,
82        policy: &MemoryPolicy,
83    ) -> crate::core::memory_lifecycle::LifecycleReport {
84        let cfg = crate::core::memory_lifecycle::LifecycleConfig {
85            max_facts: policy.knowledge.max_facts,
86            decay_rate_per_day: policy.lifecycle.decay_rate,
87            low_confidence_threshold: policy.lifecycle.low_confidence_threshold,
88            stale_days: policy.lifecycle.stale_days,
89            consolidation_similarity: policy.lifecycle.similarity_threshold,
90        };
91        crate::core::memory_lifecycle::run_lifecycle(&mut self.facts, &cfg)
92    }
93
94    pub fn new(project_root: &str) -> Self {
95        Self {
96            project_root: project_root.to_string(),
97            project_hash: hash_project_root(project_root),
98            facts: Vec::new(),
99            patterns: Vec::new(),
100            history: Vec::new(),
101            updated_at: Utc::now(),
102        }
103    }
104
105    pub fn check_contradiction(
106        &self,
107        category: &str,
108        key: &str,
109        new_value: &str,
110        policy: &MemoryPolicy,
111    ) -> Option<Contradiction> {
112        let existing = self
113            .facts
114            .iter()
115            .find(|f| f.category == category && f.key == key && f.is_current())?;
116
117        if existing.value.to_lowercase() == new_value.to_lowercase() {
118            return None;
119        }
120
121        let similarity = string_similarity(&existing.value, new_value);
122        if similarity > 0.8 {
123            return None;
124        }
125
126        let severity = if existing.confidence >= 0.9 && existing.confirmation_count >= 2 {
127            ContradictionSeverity::High
128        } else if existing.confidence >= policy.knowledge.contradiction_threshold {
129            ContradictionSeverity::Medium
130        } else {
131            ContradictionSeverity::Low
132        };
133
134        let resolution = match severity {
135            ContradictionSeverity::High => format!(
136                "High-confidence fact [{category}/{key}] changed: '{}' -> '{new_value}' (was confirmed {}x). Previous value archived.",
137                existing.value, existing.confirmation_count
138            ),
139            ContradictionSeverity::Medium => format!(
140                "Fact [{category}/{key}] updated: '{}' -> '{new_value}'",
141                existing.value
142            ),
143            ContradictionSeverity::Low => format!(
144                "Low-confidence fact [{category}/{key}] replaced: '{}' -> '{new_value}'",
145                existing.value
146            ),
147        };
148
149        Some(Contradiction {
150            existing_key: key.to_string(),
151            existing_value: existing.value.clone(),
152            new_value: new_value.to_string(),
153            category: category.to_string(),
154            severity,
155            resolution,
156        })
157    }
158
159    pub fn remember(
160        &mut self,
161        category: &str,
162        key: &str,
163        value: &str,
164        session_id: &str,
165        confidence: f32,
166        policy: &MemoryPolicy,
167    ) -> Option<Contradiction> {
168        let contradiction = self.check_contradiction(category, key, value, policy);
169
170        if let Some(existing) = self
171            .facts
172            .iter_mut()
173            .find(|f| f.category == category && f.key == key && f.is_current())
174        {
175            if existing.value == value {
176                existing.last_confirmed = Utc::now();
177                existing.source_session = session_id.to_string();
178                existing.confidence = f32::midpoint(existing.confidence, confidence);
179                existing.confirmation_count += 1;
180            } else if existing.confidence >= 0.9 && existing.confirmation_count >= 2 {
181                existing.valid_until = Some(Utc::now());
182                let superseded_id = format!("{}/{}", existing.category, existing.key);
183                let now = Utc::now();
184                self.facts.push(KnowledgeFact {
185                    category: category.to_string(),
186                    key: key.to_string(),
187                    value: value.to_string(),
188                    source_session: session_id.to_string(),
189                    confidence,
190                    created_at: now,
191                    last_confirmed: now,
192                    retrieval_count: 0,
193                    last_retrieved: None,
194                    valid_from: Some(now),
195                    valid_until: None,
196                    supersedes: Some(superseded_id),
197                    confirmation_count: 1,
198                    feedback_up: 0,
199                    feedback_down: 0,
200                    last_feedback: None,
201                });
202            } else {
203                existing.value = value.to_string();
204                existing.confidence = confidence;
205                existing.last_confirmed = Utc::now();
206                existing.source_session = session_id.to_string();
207                existing.valid_from = existing.valid_from.or(Some(existing.created_at));
208                existing.confirmation_count = 1;
209            }
210        } else {
211            let now = Utc::now();
212            self.facts.push(KnowledgeFact {
213                category: category.to_string(),
214                key: key.to_string(),
215                value: value.to_string(),
216                source_session: session_id.to_string(),
217                confidence,
218                created_at: now,
219                last_confirmed: now,
220                retrieval_count: 0,
221                last_retrieved: None,
222                valid_from: Some(now),
223                valid_until: None,
224                supersedes: None,
225                confirmation_count: 1,
226                feedback_up: 0,
227                feedback_down: 0,
228                last_feedback: None,
229            });
230        }
231
232        // No hard-prune: archive-only lifecycle will compact if needed.
233        if self.facts.len() > policy.knowledge.max_facts.saturating_mul(2) {
234            let _ = self.run_memory_lifecycle(policy);
235        }
236
237        self.updated_at = Utc::now();
238
239        let action = if contradiction.is_some() {
240            "contradict"
241        } else {
242            "remember"
243        };
244        crate::core::events::emit(crate::core::events::EventKind::KnowledgeUpdate {
245            category: category.to_string(),
246            key: key.to_string(),
247            action: action.to_string(),
248        });
249
250        contradiction
251    }
252
253    pub fn recall(&self, query: &str) -> Vec<&KnowledgeFact> {
254        let q = query.to_lowercase();
255        let terms: Vec<&str> = q.split_whitespace().collect();
256
257        let mut results: Vec<(&KnowledgeFact, f32)> = self
258            .facts
259            .iter()
260            .filter(|f| f.is_current())
261            .filter_map(|f| {
262                let searchable = format!(
263                    "{} {} {} {}",
264                    f.category.to_lowercase(),
265                    f.key.to_lowercase(),
266                    f.value.to_lowercase(),
267                    f.source_session
268                );
269                let match_count = terms.iter().filter(|t| searchable.contains(**t)).count();
270                if match_count > 0 {
271                    let relevance = (match_count as f32 / terms.len() as f32) * f.quality_score();
272                    Some((f, relevance))
273                } else {
274                    None
275                }
276            })
277            .collect();
278
279        results.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
280        results.into_iter().map(|(f, _)| f).collect()
281    }
282
283    pub fn recall_by_category(&self, category: &str) -> Vec<&KnowledgeFact> {
284        self.facts
285            .iter()
286            .filter(|f| f.category == category && f.is_current())
287            .collect()
288    }
289
290    pub fn recall_at_time(&self, query: &str, at: DateTime<Utc>) -> Vec<&KnowledgeFact> {
291        let q = query.to_lowercase();
292        let terms: Vec<&str> = q.split_whitespace().collect();
293
294        let mut results: Vec<(&KnowledgeFact, f32)> = self
295            .facts
296            .iter()
297            .filter(|f| f.was_valid_at(at))
298            .filter_map(|f| {
299                let searchable = format!(
300                    "{} {} {}",
301                    f.category.to_lowercase(),
302                    f.key.to_lowercase(),
303                    f.value.to_lowercase(),
304                );
305                let match_count = terms.iter().filter(|t| searchable.contains(**t)).count();
306                if match_count > 0 {
307                    Some((f, match_count as f32 / terms.len() as f32))
308                } else {
309                    None
310                }
311            })
312            .collect();
313
314        results.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
315        results.into_iter().map(|(f, _)| f).collect()
316    }
317
318    pub fn timeline(&self, category: &str) -> Vec<&KnowledgeFact> {
319        let mut facts: Vec<&KnowledgeFact> = self
320            .facts
321            .iter()
322            .filter(|f| f.category == category)
323            .collect();
324        facts.sort_by_key(|x| x.created_at);
325        facts
326    }
327
328    pub fn list_rooms(&self) -> Vec<(String, usize)> {
329        let mut categories: std::collections::BTreeMap<String, usize> =
330            std::collections::BTreeMap::new();
331        for f in &self.facts {
332            if f.is_current() {
333                *categories.entry(f.category.clone()).or_insert(0) += 1;
334            }
335        }
336        categories.into_iter().collect()
337    }
338
339    pub fn add_pattern(
340        &mut self,
341        pattern_type: &str,
342        description: &str,
343        examples: Vec<String>,
344        session_id: &str,
345        policy: &MemoryPolicy,
346    ) {
347        if let Some(existing) = self
348            .patterns
349            .iter_mut()
350            .find(|p| p.pattern_type == pattern_type && p.description == description)
351        {
352            for ex in &examples {
353                if !existing.examples.contains(ex) {
354                    existing.examples.push(ex.clone());
355                }
356            }
357            return;
358        }
359
360        self.patterns.push(ProjectPattern {
361            pattern_type: pattern_type.to_string(),
362            description: description.to_string(),
363            examples,
364            source_session: session_id.to_string(),
365            created_at: Utc::now(),
366        });
367
368        if self.patterns.len() > policy.knowledge.max_patterns {
369            self.patterns.truncate(policy.knowledge.max_patterns);
370        }
371        self.updated_at = Utc::now();
372    }
373
374    pub fn consolidate(&mut self, summary: &str, session_ids: Vec<String>, policy: &MemoryPolicy) {
375        self.history.push(ConsolidatedInsight {
376            summary: summary.to_string(),
377            from_sessions: session_ids,
378            timestamp: Utc::now(),
379        });
380
381        if self.history.len() > policy.knowledge.max_history {
382            self.history
383                .drain(0..self.history.len() - policy.knowledge.max_history);
384        }
385        self.updated_at = Utc::now();
386    }
387
388    pub fn remove_fact(&mut self, category: &str, key: &str) -> bool {
389        let before = self.facts.len();
390        self.facts
391            .retain(|f| !(f.category == category && f.key == key));
392        let removed = self.facts.len() < before;
393        if removed {
394            self.updated_at = Utc::now();
395        }
396        removed
397    }
398
399    pub fn format_summary(&self) -> String {
400        let mut out = String::new();
401        let current_facts: Vec<&KnowledgeFact> =
402            self.facts.iter().filter(|f| f.is_current()).collect();
403
404        if !current_facts.is_empty() {
405            out.push_str("PROJECT KNOWLEDGE:\n");
406            let mut rooms: Vec<(String, usize)> = self.list_rooms();
407            rooms.sort_by(|a, b| b.1.cmp(&a.1).then_with(|| a.0.cmp(&b.0)));
408
409            let total_rooms = rooms.len();
410            rooms.truncate(crate::core::budgets::KNOWLEDGE_SUMMARY_ROOMS_LIMIT);
411
412            for (cat, _count) in rooms {
413                out.push_str(&format!("  [{cat}]\n"));
414
415                let mut facts_in_cat: Vec<&KnowledgeFact> = current_facts
416                    .iter()
417                    .copied()
418                    .filter(|f| f.category == cat)
419                    .collect();
420                facts_in_cat.sort_by(|a, b| sort_fact_for_output(a, b));
421
422                let total_in_cat = facts_in_cat.len();
423                facts_in_cat.truncate(crate::core::budgets::KNOWLEDGE_SUMMARY_FACTS_PER_ROOM_LIMIT);
424
425                for f in facts_in_cat {
426                    let key = crate::core::sanitize::neutralize_metadata(&f.key);
427                    let val = crate::core::sanitize::neutralize_metadata(&f.value);
428                    out.push_str(&format!(
429                        "    {}: {} (confidence: {:.0}%)\n",
430                        key,
431                        val,
432                        f.confidence * 100.0
433                    ));
434                }
435                if total_in_cat > crate::core::budgets::KNOWLEDGE_SUMMARY_FACTS_PER_ROOM_LIMIT {
436                    out.push_str(&format!(
437                        "    … +{} more\n",
438                        total_in_cat - crate::core::budgets::KNOWLEDGE_SUMMARY_FACTS_PER_ROOM_LIMIT
439                    ));
440                }
441            }
442
443            if total_rooms > crate::core::budgets::KNOWLEDGE_SUMMARY_ROOMS_LIMIT {
444                out.push_str(&format!(
445                    "  … +{} more rooms\n",
446                    total_rooms - crate::core::budgets::KNOWLEDGE_SUMMARY_ROOMS_LIMIT
447                ));
448            }
449        }
450
451        if !self.patterns.is_empty() {
452            out.push_str("PROJECT PATTERNS:\n");
453            let mut patterns = self.patterns.clone();
454            patterns.sort_by(|a, b| {
455                b.created_at
456                    .cmp(&a.created_at)
457                    .then_with(|| a.pattern_type.cmp(&b.pattern_type))
458                    .then_with(|| a.description.cmp(&b.description))
459            });
460            let total = patterns.len();
461            patterns.truncate(crate::core::budgets::KNOWLEDGE_PATTERNS_LIMIT);
462            for p in &patterns {
463                let ty = crate::core::sanitize::neutralize_metadata(&p.pattern_type);
464                let desc = crate::core::sanitize::neutralize_metadata(&p.description);
465                out.push_str(&format!("  [{ty}] {desc}\n"));
466            }
467            if total > crate::core::budgets::KNOWLEDGE_PATTERNS_LIMIT {
468                out.push_str(&format!(
469                    "  … +{} more\n",
470                    total - crate::core::budgets::KNOWLEDGE_PATTERNS_LIMIT
471                ));
472            }
473        }
474
475        if out.is_empty() {
476            out
477        } else {
478            crate::core::sanitize::fence_content("project_knowledge", out.trim_end())
479        }
480    }
481
482    pub fn format_aaak(&self) -> String {
483        let current_facts: Vec<&KnowledgeFact> =
484            self.facts.iter().filter(|f| f.is_current()).collect();
485
486        if current_facts.is_empty() && self.patterns.is_empty() {
487            return String::new();
488        }
489
490        let mut out = String::new();
491
492        let mut rooms: Vec<(String, usize)> = self.list_rooms();
493        rooms.sort_by(|a, b| b.1.cmp(&a.1).then_with(|| a.0.cmp(&b.0)));
494        rooms.truncate(crate::core::budgets::KNOWLEDGE_AAAK_ROOMS_LIMIT);
495
496        for (cat, _count) in rooms {
497            let mut facts_in_cat: Vec<&KnowledgeFact> = current_facts
498                .iter()
499                .copied()
500                .filter(|f| f.category == cat)
501                .collect();
502            facts_in_cat.sort_by(|a, b| sort_fact_for_output(a, b));
503            facts_in_cat.truncate(crate::core::budgets::KNOWLEDGE_AAAK_FACTS_PER_ROOM_LIMIT);
504
505            let items: Vec<String> = facts_in_cat
506                .iter()
507                .map(|f| {
508                    let stars = confidence_stars(f.confidence);
509                    let key = crate::core::sanitize::neutralize_metadata(&f.key);
510                    let val = crate::core::sanitize::neutralize_metadata(&f.value);
511                    format!("{key}={val}{stars}")
512                })
513                .collect();
514            out.push_str(&format!(
515                "{}:{}\n",
516                crate::core::sanitize::neutralize_metadata(&cat.to_uppercase()),
517                items.join("|")
518            ));
519        }
520
521        if !self.patterns.is_empty() {
522            let mut patterns = self.patterns.clone();
523            patterns.sort_by(|a, b| {
524                b.created_at
525                    .cmp(&a.created_at)
526                    .then_with(|| a.pattern_type.cmp(&b.pattern_type))
527                    .then_with(|| a.description.cmp(&b.description))
528            });
529            patterns.truncate(crate::core::budgets::KNOWLEDGE_PATTERNS_LIMIT);
530            let pat_items: Vec<String> = patterns
531                .iter()
532                .map(|p| {
533                    let ty = crate::core::sanitize::neutralize_metadata(&p.pattern_type);
534                    let desc = crate::core::sanitize::neutralize_metadata(&p.description);
535                    format!("{ty}.{desc}")
536                })
537                .collect();
538            out.push_str(&format!("PAT:{}\n", pat_items.join("|")));
539        }
540
541        if out.is_empty() {
542            out
543        } else {
544            crate::core::sanitize::fence_content("project_memory_aaak", out.trim_end())
545        }
546    }
547
548    pub fn format_wakeup(&self) -> String {
549        let current_facts: Vec<&KnowledgeFact> = self
550            .facts
551            .iter()
552            .filter(|f| f.is_current() && f.confidence >= 0.7)
553            .collect();
554
555        if current_facts.is_empty() {
556            return String::new();
557        }
558
559        let mut top_facts: Vec<&KnowledgeFact> = current_facts;
560        top_facts.sort_by(|a, b| sort_fact_for_output(a, b));
561        top_facts.truncate(10);
562
563        let items: Vec<String> = top_facts
564            .iter()
565            .map(|f| {
566                let cat = crate::core::sanitize::neutralize_metadata(&f.category);
567                let key = crate::core::sanitize::neutralize_metadata(&f.key);
568                let val = crate::core::sanitize::neutralize_metadata(&f.value);
569                format!("{cat}/{key}={val}")
570            })
571            .collect();
572
573        crate::core::sanitize::fence_content(
574            "project_facts_wakeup",
575            &format!("FACTS:{}", items.join("|")),
576        )
577    }
578
579    pub fn save(&self) -> Result<(), String> {
580        let dir = knowledge_dir(&self.project_hash)?;
581        std::fs::create_dir_all(&dir).map_err(|e| e.to_string())?;
582
583        let path = dir.join("knowledge.json");
584        let json = serde_json::to_string_pretty(self).map_err(|e| e.to_string())?;
585        std::fs::write(&path, json).map_err(|e| e.to_string())
586    }
587
588    pub fn load(project_root: &str) -> Option<Self> {
589        let hash = hash_project_root(project_root);
590        let dir = knowledge_dir(&hash).ok()?;
591        let path = dir.join("knowledge.json");
592
593        if let Ok(content) = std::fs::read_to_string(&path) {
594            if let Ok(k) = serde_json::from_str::<Self>(&content) {
595                return Some(k);
596            }
597        }
598
599        let old_hash = crate::core::project_hash::hash_path_only(project_root);
600        if old_hash != hash {
601            crate::core::project_hash::migrate_if_needed(&old_hash, &hash, project_root);
602            if let Ok(content) = std::fs::read_to_string(&path) {
603                if let Ok(mut k) = serde_json::from_str::<Self>(&content) {
604                    k.project_hash = hash;
605                    let _ = k.save();
606                    return Some(k);
607                }
608            }
609        }
610
611        None
612    }
613
614    pub fn load_or_create(project_root: &str) -> Self {
615        Self::load(project_root).unwrap_or_else(|| Self::new(project_root))
616    }
617
618    /// Migrates legacy knowledge that was accidentally stored under an empty project_root ("")
619    /// into the given `target_root`. Keeps a timestamped backup of the legacy file.
620    pub fn migrate_legacy_empty_root(
621        target_root: &str,
622        policy: &MemoryPolicy,
623    ) -> Result<bool, String> {
624        if target_root.trim().is_empty() {
625            return Ok(false);
626        }
627
628        let Some(legacy) = Self::load("") else {
629            return Ok(false);
630        };
631
632        if !legacy.project_root.trim().is_empty() {
633            return Ok(false);
634        }
635        if legacy.facts.is_empty() && legacy.patterns.is_empty() && legacy.history.is_empty() {
636            return Ok(false);
637        }
638
639        let mut target = Self::load_or_create(target_root);
640
641        fn fact_key(f: &KnowledgeFact) -> String {
642            format!(
643                "{}|{}|{}|{}|{}",
644                f.category, f.key, f.value, f.source_session, f.created_at
645            )
646        }
647        fn pattern_key(p: &ProjectPattern) -> String {
648            format!(
649                "{}|{}|{}|{}",
650                p.pattern_type, p.description, p.source_session, p.created_at
651            )
652        }
653        fn history_key(h: &ConsolidatedInsight) -> String {
654            format!(
655                "{}|{}|{}",
656                h.summary,
657                h.from_sessions.join(","),
658                h.timestamp
659            )
660        }
661
662        let mut seen_facts: std::collections::HashSet<String> =
663            target.facts.iter().map(fact_key).collect();
664        for f in legacy.facts {
665            if seen_facts.insert(fact_key(&f)) {
666                target.facts.push(f);
667            }
668        }
669
670        let mut seen_patterns: std::collections::HashSet<String> =
671            target.patterns.iter().map(pattern_key).collect();
672        for p in legacy.patterns {
673            if seen_patterns.insert(pattern_key(&p)) {
674                target.patterns.push(p);
675            }
676        }
677
678        let mut seen_history: std::collections::HashSet<String> =
679            target.history.iter().map(history_key).collect();
680        for h in legacy.history {
681            if seen_history.insert(history_key(&h)) {
682                target.history.push(h);
683            }
684        }
685
686        // Enforce caps to avoid unbounded growth from migration.
687        target.facts.sort_by(|a, b| {
688            b.created_at
689                .cmp(&a.created_at)
690                .then_with(|| b.confidence.total_cmp(&a.confidence))
691        });
692        if target.facts.len() > policy.knowledge.max_facts {
693            target.facts.truncate(policy.knowledge.max_facts);
694        }
695        target
696            .patterns
697            .sort_by_key(|x| std::cmp::Reverse(x.created_at));
698        if target.patterns.len() > policy.knowledge.max_patterns {
699            target.patterns.truncate(policy.knowledge.max_patterns);
700        }
701        target
702            .history
703            .sort_by_key(|x| std::cmp::Reverse(x.timestamp));
704        if target.history.len() > policy.knowledge.max_history {
705            target.history.truncate(policy.knowledge.max_history);
706        }
707
708        target.updated_at = Utc::now();
709        target.save()?;
710
711        let legacy_hash = crate::core::project_hash::hash_path_only("");
712        let legacy_dir = knowledge_dir(&legacy_hash)?;
713        let legacy_path = legacy_dir.join("knowledge.json");
714        if legacy_path.exists() {
715            let ts = Utc::now().format("%Y%m%d-%H%M%S");
716            let backup = legacy_dir.join(format!("knowledge.legacy-empty-root.{ts}.json"));
717            std::fs::rename(&legacy_path, &backup).map_err(|e| e.to_string())?;
718        }
719
720        Ok(true)
721    }
722
723    pub fn recall_for_output(&mut self, query: &str, limit: usize) -> (Vec<KnowledgeFact>, usize) {
724        let q = query.to_lowercase();
725        let terms: Vec<&str> = q.split_whitespace().filter(|t| !t.is_empty()).collect();
726        if terms.is_empty() {
727            return (Vec::new(), 0);
728        }
729
730        struct Scored {
731            idx: usize,
732            relevance: f32,
733        }
734
735        let mut scored: Vec<Scored> = self
736            .facts
737            .iter()
738            .enumerate()
739            .filter(|(_, f)| f.is_current())
740            .filter_map(|(idx, f)| {
741                let searchable = format!(
742                    "{} {} {} {}",
743                    f.category.to_lowercase(),
744                    f.key.to_lowercase(),
745                    f.value.to_lowercase(),
746                    f.source_session
747                );
748                let match_count = terms.iter().filter(|t| searchable.contains(**t)).count();
749                if match_count > 0 {
750                    let relevance = (match_count as f32 / terms.len() as f32) * f.confidence;
751                    Some(Scored { idx, relevance })
752                } else {
753                    None
754                }
755            })
756            .collect();
757
758        scored.sort_by(|a, b| {
759            b.relevance
760                .partial_cmp(&a.relevance)
761                .unwrap_or(std::cmp::Ordering::Equal)
762                .then_with(|| sort_fact_for_output(&self.facts[a.idx], &self.facts[b.idx]))
763        });
764
765        let total = scored.len();
766        scored.truncate(limit);
767
768        let now = Utc::now();
769        let mut out: Vec<KnowledgeFact> = Vec::new();
770        for s in scored {
771            if let Some(f) = self.facts.get_mut(s.idx) {
772                f.retrieval_count = f.retrieval_count.saturating_add(1);
773                f.last_retrieved = Some(now);
774                out.push(f.clone());
775            }
776        }
777
778        (out, total)
779    }
780
781    pub fn recall_by_category_for_output(
782        &mut self,
783        category: &str,
784        limit: usize,
785    ) -> (Vec<KnowledgeFact>, usize) {
786        let mut idxs: Vec<usize> = self
787            .facts
788            .iter()
789            .enumerate()
790            .filter(|(_, f)| f.is_current() && f.category == category)
791            .map(|(i, _)| i)
792            .collect();
793
794        idxs.sort_by(|a, b| sort_fact_for_output(&self.facts[*a], &self.facts[*b]));
795
796        let total = idxs.len();
797        idxs.truncate(limit);
798
799        let now = Utc::now();
800        let mut out = Vec::new();
801        for idx in idxs {
802            if let Some(f) = self.facts.get_mut(idx) {
803                f.retrieval_count = f.retrieval_count.saturating_add(1);
804                f.last_retrieved = Some(now);
805                out.push(f.clone());
806            }
807        }
808
809        (out, total)
810    }
811}
812
813impl KnowledgeFact {
814    pub fn is_current(&self) -> bool {
815        self.valid_until.is_none()
816    }
817
818    pub fn quality_score(&self) -> f32 {
819        let confidence = self.confidence.clamp(0.0, 1.0);
820        let confirmations_norm = (self.confirmation_count.min(5) as f32) / 5.0;
821        let balance = self.feedback_up as i32 - self.feedback_down as i32;
822        let feedback_effect = (balance as f32 / 4.0).tanh() * 0.1;
823
824        // IMPORTANT: quality_score must be stable across repeated recall calls.
825        // Retrieval signals (retrieval_count/last_retrieved) are persisted, but should not change
826        // the displayed "quality" score, otherwise recall output becomes non-deterministic.
827        (0.8 * confidence + 0.2 * confirmations_norm + feedback_effect).clamp(0.0, 1.0)
828    }
829
830    pub fn was_valid_at(&self, at: DateTime<Utc>) -> bool {
831        let after_start = self.valid_from.is_none_or(|from| at >= from);
832        let before_end = self.valid_until.is_none_or(|until| at <= until);
833        after_start && before_end
834    }
835}
836
837fn confidence_stars(confidence: f32) -> &'static str {
838    if confidence >= 0.95 {
839        "★★★★★"
840    } else if confidence >= 0.85 {
841        "★★★★"
842    } else if confidence >= 0.7 {
843        "★★★"
844    } else if confidence >= 0.5 {
845        "★★"
846    } else {
847        "★"
848    }
849}
850
851fn string_similarity(a: &str, b: &str) -> f32 {
852    let a_lower = a.to_lowercase();
853    let b_lower = b.to_lowercase();
854    let a_words: std::collections::HashSet<&str> = a_lower.split_whitespace().collect();
855    let b_words: std::collections::HashSet<&str> = b_lower.split_whitespace().collect();
856
857    if a_words.is_empty() && b_words.is_empty() {
858        return 1.0;
859    }
860
861    let intersection = a_words.intersection(&b_words).count();
862    let union = a_words.union(&b_words).count();
863
864    if union == 0 {
865        return 0.0;
866    }
867
868    intersection as f32 / union as f32
869}
870
871fn knowledge_dir(project_hash: &str) -> Result<PathBuf, String> {
872    Ok(crate::core::data_dir::lean_ctx_data_dir()?
873        .join("knowledge")
874        .join(project_hash))
875}
876
877fn sort_fact_for_output(a: &KnowledgeFact, b: &KnowledgeFact) -> std::cmp::Ordering {
878    salience_score(b)
879        .cmp(&salience_score(a))
880        .then_with(|| {
881            b.quality_score()
882                .partial_cmp(&a.quality_score())
883                .unwrap_or(std::cmp::Ordering::Equal)
884        })
885        .then_with(|| {
886            b.confidence
887                .partial_cmp(&a.confidence)
888                .unwrap_or(std::cmp::Ordering::Equal)
889        })
890        .then_with(|| b.confirmation_count.cmp(&a.confirmation_count))
891        .then_with(|| b.retrieval_count.cmp(&a.retrieval_count))
892        .then_with(|| b.last_retrieved.cmp(&a.last_retrieved))
893        .then_with(|| b.last_confirmed.cmp(&a.last_confirmed))
894        .then_with(|| a.category.cmp(&b.category))
895        .then_with(|| a.key.cmp(&b.key))
896        .then_with(|| a.value.cmp(&b.value))
897}
898
899fn salience_score(f: &KnowledgeFact) -> u32 {
900    let cat = f.category.to_lowercase();
901    let base: u32 = match cat.as_str() {
902        "decision" => 70,
903        "gotcha" => 75,
904        "architecture" | "arch" => 60,
905        "security" => 65,
906        "testing" | "tests" | "deployment" | "deploy" => 55,
907        "conventions" | "convention" => 45,
908        "finding" => 40,
909        _ => 30,
910    };
911
912    let quality_bonus = (f.quality_score() * 60.0) as u32;
913
914    let recency_bonus = f.last_retrieved.map_or(0u32, |t| {
915        let days = Utc::now().signed_duration_since(t).num_days();
916        if days <= 7 {
917            10u32
918        } else if days <= 30 {
919            5u32
920        } else {
921            0u32
922        }
923    });
924
925    base + quality_bonus + recency_bonus
926}
927
928fn hash_project_root(root: &str) -> String {
929    crate::core::project_hash::hash_project_root(root)
930}
931
932#[cfg(test)]
933mod tests {
934    use super::*;
935
936    fn default_policy() -> MemoryPolicy {
937        MemoryPolicy::default()
938    }
939
940    #[test]
941    fn remember_and_recall() {
942        let policy = default_policy();
943        let mut k = ProjectKnowledge::new("/tmp/test-project");
944        k.remember(
945            "architecture",
946            "auth",
947            "JWT RS256",
948            "session-1",
949            0.9,
950            &policy,
951        );
952        k.remember("api", "rate-limit", "100/min", "session-1", 0.8, &policy);
953
954        let results = k.recall("auth");
955        assert_eq!(results.len(), 1);
956        assert_eq!(results[0].value, "JWT RS256");
957
958        let results = k.recall("api rate");
959        assert_eq!(results.len(), 1);
960        assert_eq!(results[0].key, "rate-limit");
961    }
962
963    #[test]
964    fn upsert_existing_fact() {
965        let policy = default_policy();
966        let mut k = ProjectKnowledge::new("/tmp/test");
967        k.remember("arch", "db", "PostgreSQL", "s1", 0.7, &policy);
968        k.remember(
969            "arch",
970            "db",
971            "PostgreSQL 16 with pgvector",
972            "s2",
973            0.95,
974            &policy,
975        );
976
977        let current: Vec<_> = k.facts.iter().filter(|f| f.is_current()).collect();
978        assert_eq!(current.len(), 1);
979        assert_eq!(current[0].value, "PostgreSQL 16 with pgvector");
980    }
981
982    #[test]
983    fn contradiction_detection() {
984        let policy = default_policy();
985        let mut k = ProjectKnowledge::new("/tmp/test");
986        k.remember("arch", "db", "PostgreSQL", "s1", 0.95, &policy);
987        k.facts[0].confirmation_count = 3;
988
989        let contradiction = k.check_contradiction("arch", "db", "MySQL", &policy);
990        assert!(contradiction.is_some());
991        let c = contradiction.unwrap();
992        assert_eq!(c.severity, ContradictionSeverity::High);
993    }
994
995    #[test]
996    fn temporal_validity() {
997        let policy = default_policy();
998        let mut k = ProjectKnowledge::new("/tmp/test");
999        k.remember("arch", "db", "PostgreSQL", "s1", 0.95, &policy);
1000        k.facts[0].confirmation_count = 3;
1001
1002        k.remember("arch", "db", "MySQL", "s2", 0.9, &policy);
1003
1004        let current: Vec<_> = k.facts.iter().filter(|f| f.is_current()).collect();
1005        assert_eq!(current.len(), 1);
1006        assert_eq!(current[0].value, "MySQL");
1007
1008        let all_db: Vec<_> = k.facts.iter().filter(|f| f.key == "db").collect();
1009        assert_eq!(all_db.len(), 2);
1010    }
1011
1012    #[test]
1013    fn confirmation_count() {
1014        let policy = default_policy();
1015        let mut k = ProjectKnowledge::new("/tmp/test");
1016        k.remember("arch", "db", "PostgreSQL", "s1", 0.9, &policy);
1017        assert_eq!(k.facts[0].confirmation_count, 1);
1018
1019        k.remember("arch", "db", "PostgreSQL", "s2", 0.9, &policy);
1020        assert_eq!(k.facts[0].confirmation_count, 2);
1021    }
1022
1023    #[test]
1024    fn remove_fact() {
1025        let policy = default_policy();
1026        let mut k = ProjectKnowledge::new("/tmp/test");
1027        k.remember("arch", "db", "PostgreSQL", "s1", 0.9, &policy);
1028        assert!(k.remove_fact("arch", "db"));
1029        assert!(k.facts.is_empty());
1030        assert!(!k.remove_fact("arch", "db"));
1031    }
1032
1033    #[test]
1034    fn list_rooms() {
1035        let policy = default_policy();
1036        let mut k = ProjectKnowledge::new("/tmp/test");
1037        k.remember("architecture", "auth", "JWT", "s1", 0.9, &policy);
1038        k.remember("architecture", "db", "PG", "s1", 0.9, &policy);
1039        k.remember("deploy", "host", "AWS", "s1", 0.8, &policy);
1040
1041        let rooms = k.list_rooms();
1042        assert_eq!(rooms.len(), 2);
1043    }
1044
1045    #[test]
1046    fn aaak_format() {
1047        let policy = default_policy();
1048        let mut k = ProjectKnowledge::new("/tmp/test");
1049        k.remember("architecture", "auth", "JWT RS256", "s1", 0.95, &policy);
1050        k.remember("architecture", "db", "PostgreSQL", "s1", 0.7, &policy);
1051
1052        let aaak = k.format_aaak();
1053        assert!(aaak.contains("ARCHITECTURE:"));
1054        assert!(aaak.contains("auth=JWT RS256"));
1055    }
1056
1057    #[test]
1058    fn consolidate_history() {
1059        let policy = default_policy();
1060        let mut k = ProjectKnowledge::new("/tmp/test");
1061        k.consolidate(
1062            "Migrated from REST to GraphQL",
1063            vec!["s1".into(), "s2".into()],
1064            &policy,
1065        );
1066        assert_eq!(k.history.len(), 1);
1067        assert_eq!(k.history[0].from_sessions.len(), 2);
1068    }
1069
1070    #[test]
1071    fn format_summary_output() {
1072        let policy = default_policy();
1073        let mut k = ProjectKnowledge::new("/tmp/test");
1074        k.remember("architecture", "auth", "JWT RS256", "s1", 0.9, &policy);
1075        k.add_pattern(
1076            "naming",
1077            "snake_case for functions",
1078            vec!["get_user()".into()],
1079            "s1",
1080            &policy,
1081        );
1082        let summary = k.format_summary();
1083        assert!(summary.contains("PROJECT KNOWLEDGE:"));
1084        assert!(summary.contains("auth: JWT RS256"));
1085        assert!(summary.contains("PROJECT PATTERNS:"));
1086    }
1087
1088    #[test]
1089    fn temporal_recall_at_time() {
1090        let policy = default_policy();
1091        let mut k = ProjectKnowledge::new("/tmp/test");
1092        k.remember("arch", "db", "PostgreSQL", "s1", 0.95, &policy);
1093        k.facts[0].confirmation_count = 3;
1094
1095        let before_change = Utc::now();
1096        std::thread::sleep(std::time::Duration::from_millis(10));
1097
1098        k.remember("arch", "db", "MySQL", "s2", 0.9, &policy);
1099
1100        let results = k.recall_at_time("db", before_change);
1101        assert_eq!(results.len(), 1);
1102        assert_eq!(results[0].value, "PostgreSQL");
1103
1104        let results_now = k.recall_at_time("db", Utc::now());
1105        assert_eq!(results_now.len(), 1);
1106        assert_eq!(results_now[0].value, "MySQL");
1107    }
1108
1109    #[test]
1110    fn timeline_shows_history() {
1111        let policy = default_policy();
1112        let mut k = ProjectKnowledge::new("/tmp/test");
1113        k.remember("arch", "db", "PostgreSQL", "s1", 0.95, &policy);
1114        k.facts[0].confirmation_count = 3;
1115        k.remember("arch", "db", "MySQL", "s2", 0.9, &policy);
1116
1117        let timeline = k.timeline("arch");
1118        assert_eq!(timeline.len(), 2);
1119        assert!(!timeline[0].is_current());
1120        assert!(timeline[1].is_current());
1121    }
1122
1123    #[test]
1124    fn wakeup_format() {
1125        let policy = default_policy();
1126        let mut k = ProjectKnowledge::new("/tmp/test");
1127        k.remember("arch", "auth", "JWT", "s1", 0.95, &policy);
1128        k.remember("arch", "db", "PG", "s1", 0.8, &policy);
1129
1130        let wakeup = k.format_wakeup();
1131        assert!(wakeup.contains("FACTS:"));
1132        assert!(wakeup.contains("arch/auth=JWT"));
1133        assert!(wakeup.contains("arch/db=PG"));
1134    }
1135
1136    #[test]
1137    fn salience_prioritizes_decisions_over_findings_at_similar_confidence() {
1138        let policy = default_policy();
1139        let mut k = ProjectKnowledge::new("/tmp/test");
1140        k.remember("finding", "f1", "some thing", "s1", 0.9, &policy);
1141        k.remember("decision", "d1", "important", "s1", 0.85, &policy);
1142
1143        let wakeup = k.format_wakeup();
1144        let items = wakeup
1145            .strip_prefix("FACTS:")
1146            .unwrap_or(&wakeup)
1147            .split('|')
1148            .collect::<Vec<_>>();
1149        assert!(
1150            items
1151                .first()
1152                .is_some_and(|s| s.contains("decision/d1=important")),
1153            "expected decision first in wakeup: {wakeup}"
1154        );
1155    }
1156
1157    #[test]
1158    fn low_confidence_contradiction() {
1159        let policy = default_policy();
1160        let mut k = ProjectKnowledge::new("/tmp/test");
1161        k.remember("arch", "db", "PostgreSQL", "s1", 0.4, &policy);
1162
1163        let c = k.check_contradiction("arch", "db", "MySQL", &policy);
1164        assert!(c.is_some());
1165        assert_eq!(c.unwrap().severity, ContradictionSeverity::Low);
1166    }
1167
1168    #[test]
1169    fn no_contradiction_for_same_value() {
1170        let policy = default_policy();
1171        let mut k = ProjectKnowledge::new("/tmp/test");
1172        k.remember("arch", "db", "PostgreSQL", "s1", 0.95, &policy);
1173
1174        let c = k.check_contradiction("arch", "db", "PostgreSQL", &policy);
1175        assert!(c.is_none());
1176    }
1177
1178    #[test]
1179    fn no_contradiction_for_similar_values() {
1180        let policy = default_policy();
1181        let mut k = ProjectKnowledge::new("/tmp/test");
1182        k.remember(
1183            "arch",
1184            "db",
1185            "PostgreSQL 16 production database server",
1186            "s1",
1187            0.95,
1188            &policy,
1189        );
1190
1191        let c = k.check_contradiction(
1192            "arch",
1193            "db",
1194            "PostgreSQL 16 production database server config",
1195            &policy,
1196        );
1197        assert!(c.is_none());
1198    }
1199}