Skip to main content

lean_ctx/core/
knowledge.rs

1use chrono::{DateTime, Utc};
2use serde::{Deserialize, Serialize};
3use std::path::PathBuf;
4
5use crate::core::memory_boundary::FactPrivacy;
6use crate::core::memory_policy::MemoryPolicy;
7
8#[derive(Debug, Clone, Serialize, Deserialize)]
9pub struct ProjectKnowledge {
10    pub project_root: String,
11    pub project_hash: String,
12    pub facts: Vec<KnowledgeFact>,
13    pub patterns: Vec<ProjectPattern>,
14    pub history: Vec<ConsolidatedInsight>,
15    pub updated_at: DateTime<Utc>,
16}
17
18#[derive(Debug, Clone, Serialize, Deserialize)]
19pub struct KnowledgeFact {
20    pub category: String,
21    pub key: String,
22    pub value: String,
23    pub source_session: String,
24    pub confidence: f32,
25    pub created_at: DateTime<Utc>,
26    pub last_confirmed: DateTime<Utc>,
27    #[serde(default)]
28    pub retrieval_count: u32,
29    #[serde(default)]
30    pub last_retrieved: Option<DateTime<Utc>>,
31    #[serde(default)]
32    pub valid_from: Option<DateTime<Utc>>,
33    #[serde(default)]
34    pub valid_until: Option<DateTime<Utc>>,
35    #[serde(default)]
36    pub supersedes: Option<String>,
37    #[serde(default)]
38    pub confirmation_count: u32,
39    #[serde(default)]
40    pub feedback_up: u32,
41    #[serde(default)]
42    pub feedback_down: u32,
43    #[serde(default)]
44    pub last_feedback: Option<DateTime<Utc>>,
45    #[serde(default)]
46    pub privacy: FactPrivacy,
47}
48
49#[derive(Debug, Clone, Serialize, Deserialize)]
50pub struct Contradiction {
51    pub existing_key: String,
52    pub existing_value: String,
53    pub new_value: String,
54    pub category: String,
55    pub severity: ContradictionSeverity,
56    pub resolution: String,
57}
58
59#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
60pub enum ContradictionSeverity {
61    Low,
62    Medium,
63    High,
64}
65
66#[derive(Debug, Clone, Serialize, Deserialize)]
67pub struct ProjectPattern {
68    pub pattern_type: String,
69    pub description: String,
70    pub examples: Vec<String>,
71    pub source_session: String,
72    pub created_at: DateTime<Utc>,
73}
74
75#[derive(Debug, Clone, Serialize, Deserialize)]
76pub struct ConsolidatedInsight {
77    pub summary: String,
78    pub from_sessions: Vec<String>,
79    pub timestamp: DateTime<Utc>,
80}
81
82impl ProjectKnowledge {
83    pub fn run_memory_lifecycle(
84        &mut self,
85        policy: &MemoryPolicy,
86    ) -> crate::core::memory_lifecycle::LifecycleReport {
87        let cfg = crate::core::memory_lifecycle::LifecycleConfig {
88            max_facts: policy.knowledge.max_facts,
89            decay_rate_per_day: policy.lifecycle.decay_rate,
90            low_confidence_threshold: policy.lifecycle.low_confidence_threshold,
91            stale_days: policy.lifecycle.stale_days,
92            consolidation_similarity: policy.lifecycle.similarity_threshold,
93        };
94        crate::core::memory_lifecycle::run_lifecycle(&mut self.facts, &cfg)
95    }
96
97    pub fn new(project_root: &str) -> Self {
98        Self {
99            project_root: project_root.to_string(),
100            project_hash: hash_project_root(project_root),
101            facts: Vec::new(),
102            patterns: Vec::new(),
103            history: Vec::new(),
104            updated_at: Utc::now(),
105        }
106    }
107
108    pub fn check_contradiction(
109        &self,
110        category: &str,
111        key: &str,
112        new_value: &str,
113        policy: &MemoryPolicy,
114    ) -> Option<Contradiction> {
115        let existing = self
116            .facts
117            .iter()
118            .find(|f| f.category == category && f.key == key && f.is_current())?;
119
120        if existing.value.to_lowercase() == new_value.to_lowercase() {
121            return None;
122        }
123
124        let similarity = string_similarity(&existing.value, new_value);
125        if similarity > 0.8 {
126            return None;
127        }
128
129        let severity = if existing.confidence >= 0.9 && existing.confirmation_count >= 2 {
130            ContradictionSeverity::High
131        } else if existing.confidence >= policy.knowledge.contradiction_threshold {
132            ContradictionSeverity::Medium
133        } else {
134            ContradictionSeverity::Low
135        };
136
137        let resolution = match severity {
138            ContradictionSeverity::High => format!(
139                "High-confidence fact [{category}/{key}] changed: '{}' -> '{new_value}' (was confirmed {}x). Previous value archived.",
140                existing.value, existing.confirmation_count
141            ),
142            ContradictionSeverity::Medium => format!(
143                "Fact [{category}/{key}] updated: '{}' -> '{new_value}'",
144                existing.value
145            ),
146            ContradictionSeverity::Low => format!(
147                "Low-confidence fact [{category}/{key}] replaced: '{}' -> '{new_value}'",
148                existing.value
149            ),
150        };
151
152        Some(Contradiction {
153            existing_key: key.to_string(),
154            existing_value: existing.value.clone(),
155            new_value: new_value.to_string(),
156            category: category.to_string(),
157            severity,
158            resolution,
159        })
160    }
161
162    pub fn remember(
163        &mut self,
164        category: &str,
165        key: &str,
166        value: &str,
167        session_id: &str,
168        confidence: f32,
169        policy: &MemoryPolicy,
170    ) -> Option<Contradiction> {
171        let contradiction = self.check_contradiction(category, key, value, policy);
172
173        if let Some(existing) = self
174            .facts
175            .iter_mut()
176            .find(|f| f.category == category && f.key == key && f.is_current())
177        {
178            let now = Utc::now();
179            let same_value_ci = existing.value.to_lowercase() == value.to_lowercase();
180            let similarity = string_similarity(&existing.value, value);
181
182            if existing.value == value || same_value_ci || similarity > 0.8 {
183                existing.last_confirmed = now;
184                existing.source_session = session_id.to_string();
185                existing.confidence = f32::midpoint(existing.confidence, confidence);
186                existing.confirmation_count += 1;
187
188                if existing.value != value && similarity > 0.8 && value.len() > existing.value.len()
189                {
190                    // Prefer the more informative value when semantically equivalent.
191                    existing.value = value.to_string();
192                }
193            } else {
194                let superseded = fact_version_id_v1(existing);
195                existing.valid_until = Some(now);
196                existing.valid_from = existing.valid_from.or(Some(existing.created_at));
197
198                self.facts.push(KnowledgeFact {
199                    category: category.to_string(),
200                    key: key.to_string(),
201                    value: value.to_string(),
202                    source_session: session_id.to_string(),
203                    confidence,
204                    created_at: now,
205                    last_confirmed: now,
206                    retrieval_count: 0,
207                    last_retrieved: None,
208                    valid_from: Some(now),
209                    valid_until: None,
210                    supersedes: Some(superseded),
211                    confirmation_count: 1,
212                    feedback_up: 0,
213                    feedback_down: 0,
214                    last_feedback: None,
215                    privacy: FactPrivacy::default(),
216                });
217            }
218        } else {
219            let now = Utc::now();
220            self.facts.push(KnowledgeFact {
221                category: category.to_string(),
222                key: key.to_string(),
223                value: value.to_string(),
224                source_session: session_id.to_string(),
225                confidence,
226                created_at: now,
227                last_confirmed: now,
228                retrieval_count: 0,
229                last_retrieved: None,
230                valid_from: Some(now),
231                valid_until: None,
232                supersedes: None,
233                confirmation_count: 1,
234                feedback_up: 0,
235                feedback_down: 0,
236                last_feedback: None,
237                privacy: FactPrivacy::default(),
238            });
239        }
240
241        // No hard-prune: archive-only lifecycle will compact if needed.
242        if self.facts.len() > policy.knowledge.max_facts.saturating_mul(2) {
243            let _ = self.run_memory_lifecycle(policy);
244        }
245
246        self.updated_at = Utc::now();
247
248        let action = if contradiction.is_some() {
249            "contradict"
250        } else {
251            "remember"
252        };
253        crate::core::events::emit(crate::core::events::EventKind::KnowledgeUpdate {
254            category: category.to_string(),
255            key: key.to_string(),
256            action: action.to_string(),
257        });
258
259        contradiction
260    }
261
262    pub fn recall(&self, query: &str) -> Vec<&KnowledgeFact> {
263        let q = query.to_lowercase();
264        let terms: Vec<&str> = q.split_whitespace().collect();
265
266        let mut results: Vec<(&KnowledgeFact, f32)> = self
267            .facts
268            .iter()
269            .filter(|f| f.is_current())
270            .filter_map(|f| {
271                let searchable = format!(
272                    "{} {} {} {}",
273                    f.category.to_lowercase(),
274                    f.key.to_lowercase(),
275                    f.value.to_lowercase(),
276                    f.source_session
277                );
278                let match_count = terms.iter().filter(|t| searchable.contains(**t)).count();
279                if match_count > 0 {
280                    let relevance = (match_count as f32 / terms.len() as f32) * f.quality_score();
281                    Some((f, relevance))
282                } else {
283                    None
284                }
285            })
286            .collect();
287
288        results.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
289        results.into_iter().map(|(f, _)| f).collect()
290    }
291
292    pub fn recall_by_category(&self, category: &str) -> Vec<&KnowledgeFact> {
293        self.facts
294            .iter()
295            .filter(|f| f.category == category && f.is_current())
296            .collect()
297    }
298
299    pub fn recall_at_time(&self, query: &str, at: DateTime<Utc>) -> Vec<&KnowledgeFact> {
300        let q = query.to_lowercase();
301        let terms: Vec<&str> = q.split_whitespace().collect();
302
303        let mut results: Vec<(&KnowledgeFact, f32)> = self
304            .facts
305            .iter()
306            .filter(|f| f.was_valid_at(at))
307            .filter_map(|f| {
308                let searchable = format!(
309                    "{} {} {}",
310                    f.category.to_lowercase(),
311                    f.key.to_lowercase(),
312                    f.value.to_lowercase(),
313                );
314                let match_count = terms.iter().filter(|t| searchable.contains(**t)).count();
315                if match_count > 0 {
316                    Some((f, match_count as f32 / terms.len() as f32))
317                } else {
318                    None
319                }
320            })
321            .collect();
322
323        results.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
324        results.into_iter().map(|(f, _)| f).collect()
325    }
326
327    pub fn timeline(&self, category: &str) -> Vec<&KnowledgeFact> {
328        let mut facts: Vec<&KnowledgeFact> = self
329            .facts
330            .iter()
331            .filter(|f| f.category == category)
332            .collect();
333        facts.sort_by_key(|x| x.created_at);
334        facts
335    }
336
337    pub fn list_rooms(&self) -> Vec<(String, usize)> {
338        let mut categories: std::collections::BTreeMap<String, usize> =
339            std::collections::BTreeMap::new();
340        for f in &self.facts {
341            if f.is_current() {
342                *categories.entry(f.category.clone()).or_insert(0) += 1;
343            }
344        }
345        categories.into_iter().collect()
346    }
347
348    pub fn add_pattern(
349        &mut self,
350        pattern_type: &str,
351        description: &str,
352        examples: Vec<String>,
353        session_id: &str,
354        policy: &MemoryPolicy,
355    ) {
356        if let Some(existing) = self
357            .patterns
358            .iter_mut()
359            .find(|p| p.pattern_type == pattern_type && p.description == description)
360        {
361            for ex in &examples {
362                if !existing.examples.contains(ex) {
363                    existing.examples.push(ex.clone());
364                }
365            }
366            return;
367        }
368
369        self.patterns.push(ProjectPattern {
370            pattern_type: pattern_type.to_string(),
371            description: description.to_string(),
372            examples,
373            source_session: session_id.to_string(),
374            created_at: Utc::now(),
375        });
376
377        if self.patterns.len() > policy.knowledge.max_patterns {
378            self.patterns.truncate(policy.knowledge.max_patterns);
379        }
380        self.updated_at = Utc::now();
381    }
382
383    pub fn consolidate(&mut self, summary: &str, session_ids: Vec<String>, policy: &MemoryPolicy) {
384        self.history.push(ConsolidatedInsight {
385            summary: summary.to_string(),
386            from_sessions: session_ids,
387            timestamp: Utc::now(),
388        });
389
390        if self.history.len() > policy.knowledge.max_history {
391            self.history
392                .drain(0..self.history.len() - policy.knowledge.max_history);
393        }
394        self.updated_at = Utc::now();
395    }
396
397    pub fn remove_fact(&mut self, category: &str, key: &str) -> bool {
398        let before = self.facts.len();
399        self.facts
400            .retain(|f| !(f.category == category && f.key == key));
401        let removed = self.facts.len() < before;
402        if removed {
403            self.updated_at = Utc::now();
404        }
405        removed
406    }
407
408    pub fn format_summary(&self) -> String {
409        let mut out = String::new();
410        let current_facts: Vec<&KnowledgeFact> =
411            self.facts.iter().filter(|f| f.is_current()).collect();
412
413        if !current_facts.is_empty() {
414            out.push_str("PROJECT KNOWLEDGE:\n");
415            let mut rooms: Vec<(String, usize)> = self.list_rooms();
416            rooms.sort_by(|a, b| b.1.cmp(&a.1).then_with(|| a.0.cmp(&b.0)));
417
418            let total_rooms = rooms.len();
419            rooms.truncate(crate::core::budgets::KNOWLEDGE_SUMMARY_ROOMS_LIMIT);
420
421            for (cat, _count) in rooms {
422                out.push_str(&format!("  [{cat}]\n"));
423
424                let mut facts_in_cat: Vec<&KnowledgeFact> = current_facts
425                    .iter()
426                    .copied()
427                    .filter(|f| f.category == cat)
428                    .collect();
429                facts_in_cat.sort_by(|a, b| sort_fact_for_output(a, b));
430
431                let total_in_cat = facts_in_cat.len();
432                facts_in_cat.truncate(crate::core::budgets::KNOWLEDGE_SUMMARY_FACTS_PER_ROOM_LIMIT);
433
434                for f in facts_in_cat {
435                    let key = crate::core::sanitize::neutralize_metadata(&f.key);
436                    let val = crate::core::sanitize::neutralize_metadata(&f.value);
437                    out.push_str(&format!(
438                        "    {}: {} (confidence: {:.0}%)\n",
439                        key,
440                        val,
441                        f.confidence * 100.0
442                    ));
443                }
444                if total_in_cat > crate::core::budgets::KNOWLEDGE_SUMMARY_FACTS_PER_ROOM_LIMIT {
445                    out.push_str(&format!(
446                        "    … +{} more\n",
447                        total_in_cat - crate::core::budgets::KNOWLEDGE_SUMMARY_FACTS_PER_ROOM_LIMIT
448                    ));
449                }
450            }
451
452            if total_rooms > crate::core::budgets::KNOWLEDGE_SUMMARY_ROOMS_LIMIT {
453                out.push_str(&format!(
454                    "  … +{} more rooms\n",
455                    total_rooms - crate::core::budgets::KNOWLEDGE_SUMMARY_ROOMS_LIMIT
456                ));
457            }
458        }
459
460        if !self.patterns.is_empty() {
461            out.push_str("PROJECT PATTERNS:\n");
462            let mut patterns = self.patterns.clone();
463            patterns.sort_by(|a, b| {
464                b.created_at
465                    .cmp(&a.created_at)
466                    .then_with(|| a.pattern_type.cmp(&b.pattern_type))
467                    .then_with(|| a.description.cmp(&b.description))
468            });
469            let total = patterns.len();
470            patterns.truncate(crate::core::budgets::KNOWLEDGE_PATTERNS_LIMIT);
471            for p in &patterns {
472                let ty = crate::core::sanitize::neutralize_metadata(&p.pattern_type);
473                let desc = crate::core::sanitize::neutralize_metadata(&p.description);
474                out.push_str(&format!("  [{ty}] {desc}\n"));
475            }
476            if total > crate::core::budgets::KNOWLEDGE_PATTERNS_LIMIT {
477                out.push_str(&format!(
478                    "  … +{} more\n",
479                    total - crate::core::budgets::KNOWLEDGE_PATTERNS_LIMIT
480                ));
481            }
482        }
483
484        if out.is_empty() {
485            out
486        } else {
487            crate::core::sanitize::fence_content("project_knowledge", out.trim_end())
488        }
489    }
490
491    pub fn format_aaak(&self) -> String {
492        let current_facts: Vec<&KnowledgeFact> =
493            self.facts.iter().filter(|f| f.is_current()).collect();
494
495        if current_facts.is_empty() && self.patterns.is_empty() {
496            return String::new();
497        }
498
499        let mut out = String::new();
500
501        let mut rooms: Vec<(String, usize)> = self.list_rooms();
502        rooms.sort_by(|a, b| b.1.cmp(&a.1).then_with(|| a.0.cmp(&b.0)));
503        rooms.truncate(crate::core::budgets::KNOWLEDGE_AAAK_ROOMS_LIMIT);
504
505        for (cat, _count) in rooms {
506            let mut facts_in_cat: Vec<&KnowledgeFact> = current_facts
507                .iter()
508                .copied()
509                .filter(|f| f.category == cat)
510                .collect();
511            facts_in_cat.sort_by(|a, b| sort_fact_for_output(a, b));
512            facts_in_cat.truncate(crate::core::budgets::KNOWLEDGE_AAAK_FACTS_PER_ROOM_LIMIT);
513
514            let items: Vec<String> = facts_in_cat
515                .iter()
516                .map(|f| {
517                    let stars = confidence_stars(f.confidence);
518                    let key = crate::core::sanitize::neutralize_metadata(&f.key);
519                    let val = crate::core::sanitize::neutralize_metadata(&f.value);
520                    format!("{key}={val}{stars}")
521                })
522                .collect();
523            out.push_str(&format!(
524                "{}:{}\n",
525                crate::core::sanitize::neutralize_metadata(&cat.to_uppercase()),
526                items.join("|")
527            ));
528        }
529
530        if !self.patterns.is_empty() {
531            let mut patterns = self.patterns.clone();
532            patterns.sort_by(|a, b| {
533                b.created_at
534                    .cmp(&a.created_at)
535                    .then_with(|| a.pattern_type.cmp(&b.pattern_type))
536                    .then_with(|| a.description.cmp(&b.description))
537            });
538            patterns.truncate(crate::core::budgets::KNOWLEDGE_PATTERNS_LIMIT);
539            let pat_items: Vec<String> = patterns
540                .iter()
541                .map(|p| {
542                    let ty = crate::core::sanitize::neutralize_metadata(&p.pattern_type);
543                    let desc = crate::core::sanitize::neutralize_metadata(&p.description);
544                    format!("{ty}.{desc}")
545                })
546                .collect();
547            out.push_str(&format!("PAT:{}\n", pat_items.join("|")));
548        }
549
550        if out.is_empty() {
551            out
552        } else {
553            crate::core::sanitize::fence_content("project_memory_aaak", out.trim_end())
554        }
555    }
556
557    pub fn format_wakeup(&self) -> String {
558        let current_facts: Vec<&KnowledgeFact> = self
559            .facts
560            .iter()
561            .filter(|f| f.is_current() && f.confidence >= 0.7)
562            .collect();
563
564        if current_facts.is_empty() {
565            return String::new();
566        }
567
568        let mut top_facts: Vec<&KnowledgeFact> = current_facts;
569        top_facts.sort_by(|a, b| sort_fact_for_output(a, b));
570        top_facts.truncate(10);
571
572        let items: Vec<String> = top_facts
573            .iter()
574            .map(|f| {
575                let cat = crate::core::sanitize::neutralize_metadata(&f.category);
576                let key = crate::core::sanitize::neutralize_metadata(&f.key);
577                let val = crate::core::sanitize::neutralize_metadata(&f.value);
578                format!("{cat}/{key}={val}")
579            })
580            .collect();
581
582        crate::core::sanitize::fence_content(
583            "project_facts_wakeup",
584            &format!("FACTS:{}", items.join("|")),
585        )
586    }
587
588    pub fn save(&self) -> Result<(), String> {
589        let dir = knowledge_dir(&self.project_hash)?;
590        std::fs::create_dir_all(&dir).map_err(|e| e.to_string())?;
591
592        let path = dir.join("knowledge.json");
593        let json = serde_json::to_string_pretty(self).map_err(|e| e.to_string())?;
594        std::fs::write(&path, json).map_err(|e| e.to_string())
595    }
596
597    pub fn load(project_root: &str) -> Option<Self> {
598        let hash = hash_project_root(project_root);
599        let dir = knowledge_dir(&hash).ok()?;
600        let path = dir.join("knowledge.json");
601
602        if let Ok(content) = std::fs::read_to_string(&path) {
603            if let Ok(k) = serde_json::from_str::<Self>(&content) {
604                return Some(k);
605            }
606        }
607
608        let old_hash = crate::core::project_hash::hash_path_only(project_root);
609        if old_hash != hash {
610            crate::core::project_hash::migrate_if_needed(&old_hash, &hash, project_root);
611            if let Ok(content) = std::fs::read_to_string(&path) {
612                if let Ok(mut k) = serde_json::from_str::<Self>(&content) {
613                    k.project_hash = hash;
614                    let _ = k.save();
615                    return Some(k);
616                }
617            }
618        }
619
620        None
621    }
622
623    pub fn load_or_create(project_root: &str) -> Self {
624        Self::load(project_root).unwrap_or_else(|| Self::new(project_root))
625    }
626
627    /// Migrates legacy knowledge that was accidentally stored under an empty project_root ("")
628    /// into the given `target_root`. Keeps a timestamped backup of the legacy file.
629    pub fn migrate_legacy_empty_root(
630        target_root: &str,
631        policy: &MemoryPolicy,
632    ) -> Result<bool, String> {
633        if target_root.trim().is_empty() {
634            return Ok(false);
635        }
636
637        let Some(legacy) = Self::load("") else {
638            return Ok(false);
639        };
640
641        if !legacy.project_root.trim().is_empty() {
642            return Ok(false);
643        }
644        if legacy.facts.is_empty() && legacy.patterns.is_empty() && legacy.history.is_empty() {
645            return Ok(false);
646        }
647
648        let mut target = Self::load_or_create(target_root);
649
650        fn fact_key(f: &KnowledgeFact) -> String {
651            format!(
652                "{}|{}|{}|{}|{}",
653                f.category, f.key, f.value, f.source_session, f.created_at
654            )
655        }
656        fn pattern_key(p: &ProjectPattern) -> String {
657            format!(
658                "{}|{}|{}|{}",
659                p.pattern_type, p.description, p.source_session, p.created_at
660            )
661        }
662        fn history_key(h: &ConsolidatedInsight) -> String {
663            format!(
664                "{}|{}|{}",
665                h.summary,
666                h.from_sessions.join(","),
667                h.timestamp
668            )
669        }
670
671        let mut seen_facts: std::collections::HashSet<String> =
672            target.facts.iter().map(fact_key).collect();
673        for f in legacy.facts {
674            if seen_facts.insert(fact_key(&f)) {
675                target.facts.push(f);
676            }
677        }
678
679        let mut seen_patterns: std::collections::HashSet<String> =
680            target.patterns.iter().map(pattern_key).collect();
681        for p in legacy.patterns {
682            if seen_patterns.insert(pattern_key(&p)) {
683                target.patterns.push(p);
684            }
685        }
686
687        let mut seen_history: std::collections::HashSet<String> =
688            target.history.iter().map(history_key).collect();
689        for h in legacy.history {
690            if seen_history.insert(history_key(&h)) {
691                target.history.push(h);
692            }
693        }
694
695        // Enforce caps to avoid unbounded growth from migration.
696        target.facts.sort_by(|a, b| {
697            b.created_at
698                .cmp(&a.created_at)
699                .then_with(|| b.confidence.total_cmp(&a.confidence))
700        });
701        if target.facts.len() > policy.knowledge.max_facts {
702            target.facts.truncate(policy.knowledge.max_facts);
703        }
704        target
705            .patterns
706            .sort_by_key(|x| std::cmp::Reverse(x.created_at));
707        if target.patterns.len() > policy.knowledge.max_patterns {
708            target.patterns.truncate(policy.knowledge.max_patterns);
709        }
710        target
711            .history
712            .sort_by_key(|x| std::cmp::Reverse(x.timestamp));
713        if target.history.len() > policy.knowledge.max_history {
714            target.history.truncate(policy.knowledge.max_history);
715        }
716
717        target.updated_at = Utc::now();
718        target.save()?;
719
720        let legacy_hash = crate::core::project_hash::hash_path_only("");
721        let legacy_dir = knowledge_dir(&legacy_hash)?;
722        let legacy_path = legacy_dir.join("knowledge.json");
723        if legacy_path.exists() {
724            let ts = Utc::now().format("%Y%m%d-%H%M%S");
725            let backup = legacy_dir.join(format!("knowledge.legacy-empty-root.{ts}.json"));
726            std::fs::rename(&legacy_path, &backup).map_err(|e| e.to_string())?;
727        }
728
729        Ok(true)
730    }
731
732    pub fn recall_for_output(&mut self, query: &str, limit: usize) -> (Vec<KnowledgeFact>, usize) {
733        let q = query.to_lowercase();
734        let terms: Vec<&str> = q.split_whitespace().filter(|t| !t.is_empty()).collect();
735        if terms.is_empty() {
736            return (Vec::new(), 0);
737        }
738
739        struct Scored {
740            idx: usize,
741            relevance: f32,
742        }
743
744        let mut scored: Vec<Scored> = self
745            .facts
746            .iter()
747            .enumerate()
748            .filter(|(_, f)| f.is_current())
749            .filter_map(|(idx, f)| {
750                let searchable = format!(
751                    "{} {} {} {}",
752                    f.category.to_lowercase(),
753                    f.key.to_lowercase(),
754                    f.value.to_lowercase(),
755                    f.source_session
756                );
757                let match_count = terms.iter().filter(|t| searchable.contains(**t)).count();
758                if match_count > 0 {
759                    let relevance = (match_count as f32 / terms.len() as f32) * f.confidence;
760                    Some(Scored { idx, relevance })
761                } else {
762                    None
763                }
764            })
765            .collect();
766
767        scored.sort_by(|a, b| {
768            b.relevance
769                .partial_cmp(&a.relevance)
770                .unwrap_or(std::cmp::Ordering::Equal)
771                .then_with(|| sort_fact_for_output(&self.facts[a.idx], &self.facts[b.idx]))
772        });
773
774        let total = scored.len();
775        scored.truncate(limit);
776
777        let now = Utc::now();
778        let mut out: Vec<KnowledgeFact> = Vec::new();
779        for s in scored {
780            if let Some(f) = self.facts.get_mut(s.idx) {
781                f.retrieval_count = f.retrieval_count.saturating_add(1);
782                f.last_retrieved = Some(now);
783                out.push(f.clone());
784            }
785        }
786
787        (out, total)
788    }
789
790    pub fn recall_by_category_for_output(
791        &mut self,
792        category: &str,
793        limit: usize,
794    ) -> (Vec<KnowledgeFact>, usize) {
795        let mut idxs: Vec<usize> = self
796            .facts
797            .iter()
798            .enumerate()
799            .filter(|(_, f)| f.is_current() && f.category == category)
800            .map(|(i, _)| i)
801            .collect();
802
803        idxs.sort_by(|a, b| sort_fact_for_output(&self.facts[*a], &self.facts[*b]));
804
805        let total = idxs.len();
806        idxs.truncate(limit);
807
808        let now = Utc::now();
809        let mut out = Vec::new();
810        for idx in idxs {
811            if let Some(f) = self.facts.get_mut(idx) {
812                f.retrieval_count = f.retrieval_count.saturating_add(1);
813                f.last_retrieved = Some(now);
814                out.push(f.clone());
815            }
816        }
817
818        (out, total)
819    }
820}
821
822impl KnowledgeFact {
823    pub fn is_current(&self) -> bool {
824        self.valid_until.is_none()
825    }
826
827    /// Stable, intrinsic quality metric (0.0..1.0).
828    ///
829    /// Based only on confidence, confirmation count, and feedback balance.
830    /// Deliberately excludes volatile signals (retrieval count, recency) to
831    /// keep recall output deterministic. For display ordering use
832    /// `salience_score()` which adds recency and category weighting.
833    pub fn quality_score(&self) -> f32 {
834        let confidence = self.confidence.clamp(0.0, 1.0);
835        let confirmations_norm = (self.confirmation_count.min(5) as f32) / 5.0;
836        let balance = self.feedback_up as i32 - self.feedback_down as i32;
837        let feedback_effect = (balance as f32 / 4.0).tanh() * 0.1;
838
839        // IMPORTANT: quality_score must be stable across repeated recall calls.
840        // Retrieval signals (retrieval_count/last_retrieved) are persisted, but should not change
841        // the displayed "quality" score, otherwise recall output becomes non-deterministic.
842        (0.8 * confidence + 0.2 * confirmations_norm + feedback_effect).clamp(0.0, 1.0)
843    }
844
845    pub fn was_valid_at(&self, at: DateTime<Utc>) -> bool {
846        let after_start = self.valid_from.is_none_or(|from| at >= from);
847        let before_end = self.valid_until.is_none_or(|until| at <= until);
848        after_start && before_end
849    }
850}
851
852fn confidence_stars(confidence: f32) -> &'static str {
853    if confidence >= 0.95 {
854        "★★★★★"
855    } else if confidence >= 0.85 {
856        "★★★★"
857    } else if confidence >= 0.7 {
858        "★★★"
859    } else if confidence >= 0.5 {
860        "★★"
861    } else {
862        "★"
863    }
864}
865
866fn string_similarity(a: &str, b: &str) -> f32 {
867    let a_lower = a.to_lowercase();
868    let b_lower = b.to_lowercase();
869    let a_words: std::collections::HashSet<&str> = a_lower.split_whitespace().collect();
870    let b_words: std::collections::HashSet<&str> = b_lower.split_whitespace().collect();
871
872    if a_words.is_empty() && b_words.is_empty() {
873        return 1.0;
874    }
875
876    let intersection = a_words.intersection(&b_words).count();
877    let union = a_words.union(&b_words).count();
878
879    if union == 0 {
880        return 0.0;
881    }
882
883    intersection as f32 / union as f32
884}
885
886fn knowledge_dir(project_hash: &str) -> Result<PathBuf, String> {
887    Ok(crate::core::data_dir::lean_ctx_data_dir()?
888        .join("knowledge")
889        .join(project_hash))
890}
891
892fn sort_fact_for_output(a: &KnowledgeFact, b: &KnowledgeFact) -> std::cmp::Ordering {
893    salience_score(b)
894        .cmp(&salience_score(a))
895        .then_with(|| {
896            b.quality_score()
897                .partial_cmp(&a.quality_score())
898                .unwrap_or(std::cmp::Ordering::Equal)
899        })
900        .then_with(|| {
901            b.confidence
902                .partial_cmp(&a.confidence)
903                .unwrap_or(std::cmp::Ordering::Equal)
904        })
905        .then_with(|| b.confirmation_count.cmp(&a.confirmation_count))
906        .then_with(|| b.retrieval_count.cmp(&a.retrieval_count))
907        .then_with(|| b.last_retrieved.cmp(&a.last_retrieved))
908        .then_with(|| b.last_confirmed.cmp(&a.last_confirmed))
909        .then_with(|| a.category.cmp(&b.category))
910        .then_with(|| a.key.cmp(&b.key))
911        .then_with(|| a.value.cmp(&b.value))
912}
913
914/// Salience-based ranking for fact output ordering.
915///
916/// Unlike `quality_score()` (which is a stable, intrinsic measure of fact
917/// reliability based on confidence, confirmations, and feedback), salience
918/// combines category priority, quality, recency, and retrieval frequency
919/// into a single sort key for _display_ ordering. Salience is volatile and
920/// changes on every access; quality_score is deterministic and stable.
921fn salience_score(f: &KnowledgeFact) -> u32 {
922    let cat = f.category.to_lowercase();
923    let base: u32 = match cat.as_str() {
924        "decision" => 70,
925        "gotcha" => 75,
926        "architecture" | "arch" => 60,
927        "security" => 65,
928        "testing" | "tests" | "deployment" | "deploy" => 55,
929        "conventions" | "convention" => 45,
930        "finding" => 40,
931        _ => 30,
932    };
933
934    let quality_bonus = (f.quality_score() * 60.0) as u32;
935
936    let recency_bonus = f.last_retrieved.map_or(0u32, |t| {
937        let days = Utc::now().signed_duration_since(t).num_days();
938        if days <= 7 {
939            10u32
940        } else if days <= 30 {
941            5u32
942        } else {
943            0u32
944        }
945    });
946
947    base + quality_bonus + recency_bonus
948}
949
950fn hash_project_root(root: &str) -> String {
951    crate::core::project_hash::hash_project_root(root)
952}
953
954fn fact_version_id_v1(f: &KnowledgeFact) -> String {
955    use md5::{Digest, Md5};
956    let mut hasher = Md5::new();
957    hasher.update(f.category.as_bytes());
958    hasher.update(b"\n");
959    hasher.update(f.key.as_bytes());
960    hasher.update(b"\n");
961    hasher.update(f.value.as_bytes());
962    hasher.update(b"\n");
963    hasher.update(f.source_session.as_bytes());
964    hasher.update(b"\n");
965    hasher.update(f.created_at.to_rfc3339().as_bytes());
966    format!("{:x}", hasher.finalize())
967}
968
969#[cfg(test)]
970mod tests {
971    use super::*;
972
973    fn default_policy() -> MemoryPolicy {
974        MemoryPolicy::default()
975    }
976
977    #[test]
978    fn remember_and_recall() {
979        let policy = default_policy();
980        let mut k = ProjectKnowledge::new("/tmp/test-project");
981        k.remember(
982            "architecture",
983            "auth",
984            "JWT RS256",
985            "session-1",
986            0.9,
987            &policy,
988        );
989        k.remember("api", "rate-limit", "100/min", "session-1", 0.8, &policy);
990
991        let results = k.recall("auth");
992        assert_eq!(results.len(), 1);
993        assert_eq!(results[0].value, "JWT RS256");
994
995        let results = k.recall("api rate");
996        assert_eq!(results.len(), 1);
997        assert_eq!(results[0].key, "rate-limit");
998    }
999
1000    #[test]
1001    fn upsert_existing_fact() {
1002        let policy = default_policy();
1003        let mut k = ProjectKnowledge::new("/tmp/test");
1004        k.remember("arch", "db", "PostgreSQL", "s1", 0.7, &policy);
1005        k.remember(
1006            "arch",
1007            "db",
1008            "PostgreSQL 16 with pgvector",
1009            "s2",
1010            0.95,
1011            &policy,
1012        );
1013
1014        let current: Vec<_> = k.facts.iter().filter(|f| f.is_current()).collect();
1015        assert_eq!(current.len(), 1);
1016        assert_eq!(current[0].value, "PostgreSQL 16 with pgvector");
1017    }
1018
1019    #[test]
1020    fn contradiction_detection() {
1021        let policy = default_policy();
1022        let mut k = ProjectKnowledge::new("/tmp/test");
1023        k.remember("arch", "db", "PostgreSQL", "s1", 0.95, &policy);
1024        k.facts[0].confirmation_count = 3;
1025
1026        let contradiction = k.check_contradiction("arch", "db", "MySQL", &policy);
1027        assert!(contradiction.is_some());
1028        let c = contradiction.unwrap();
1029        assert_eq!(c.severity, ContradictionSeverity::High);
1030    }
1031
1032    #[test]
1033    fn temporal_validity() {
1034        let policy = default_policy();
1035        let mut k = ProjectKnowledge::new("/tmp/test");
1036        k.remember("arch", "db", "PostgreSQL", "s1", 0.95, &policy);
1037        k.facts[0].confirmation_count = 3;
1038
1039        k.remember("arch", "db", "MySQL", "s2", 0.9, &policy);
1040
1041        let current: Vec<_> = k.facts.iter().filter(|f| f.is_current()).collect();
1042        assert_eq!(current.len(), 1);
1043        assert_eq!(current[0].value, "MySQL");
1044
1045        let all_db: Vec<_> = k.facts.iter().filter(|f| f.key == "db").collect();
1046        assert_eq!(all_db.len(), 2);
1047    }
1048
1049    #[test]
1050    fn confirmation_count() {
1051        let policy = default_policy();
1052        let mut k = ProjectKnowledge::new("/tmp/test");
1053        k.remember("arch", "db", "PostgreSQL", "s1", 0.9, &policy);
1054        assert_eq!(k.facts[0].confirmation_count, 1);
1055
1056        k.remember("arch", "db", "PostgreSQL", "s2", 0.9, &policy);
1057        assert_eq!(k.facts[0].confirmation_count, 2);
1058    }
1059
1060    #[test]
1061    fn remove_fact() {
1062        let policy = default_policy();
1063        let mut k = ProjectKnowledge::new("/tmp/test");
1064        k.remember("arch", "db", "PostgreSQL", "s1", 0.9, &policy);
1065        assert!(k.remove_fact("arch", "db"));
1066        assert!(k.facts.is_empty());
1067        assert!(!k.remove_fact("arch", "db"));
1068    }
1069
1070    #[test]
1071    fn list_rooms() {
1072        let policy = default_policy();
1073        let mut k = ProjectKnowledge::new("/tmp/test");
1074        k.remember("architecture", "auth", "JWT", "s1", 0.9, &policy);
1075        k.remember("architecture", "db", "PG", "s1", 0.9, &policy);
1076        k.remember("deploy", "host", "AWS", "s1", 0.8, &policy);
1077
1078        let rooms = k.list_rooms();
1079        assert_eq!(rooms.len(), 2);
1080    }
1081
1082    #[test]
1083    fn aaak_format() {
1084        let policy = default_policy();
1085        let mut k = ProjectKnowledge::new("/tmp/test");
1086        k.remember("architecture", "auth", "JWT RS256", "s1", 0.95, &policy);
1087        k.remember("architecture", "db", "PostgreSQL", "s1", 0.7, &policy);
1088
1089        let aaak = k.format_aaak();
1090        assert!(aaak.contains("ARCHITECTURE:"));
1091        assert!(aaak.contains("auth=JWT RS256"));
1092    }
1093
1094    #[test]
1095    fn consolidate_history() {
1096        let policy = default_policy();
1097        let mut k = ProjectKnowledge::new("/tmp/test");
1098        k.consolidate(
1099            "Migrated from REST to GraphQL",
1100            vec!["s1".into(), "s2".into()],
1101            &policy,
1102        );
1103        assert_eq!(k.history.len(), 1);
1104        assert_eq!(k.history[0].from_sessions.len(), 2);
1105    }
1106
1107    #[test]
1108    fn format_summary_output() {
1109        let policy = default_policy();
1110        let mut k = ProjectKnowledge::new("/tmp/test");
1111        k.remember("architecture", "auth", "JWT RS256", "s1", 0.9, &policy);
1112        k.add_pattern(
1113            "naming",
1114            "snake_case for functions",
1115            vec!["get_user()".into()],
1116            "s1",
1117            &policy,
1118        );
1119        let summary = k.format_summary();
1120        assert!(summary.contains("PROJECT KNOWLEDGE:"));
1121        assert!(summary.contains("auth: JWT RS256"));
1122        assert!(summary.contains("PROJECT PATTERNS:"));
1123    }
1124
1125    #[test]
1126    fn temporal_recall_at_time() {
1127        let policy = default_policy();
1128        let mut k = ProjectKnowledge::new("/tmp/test");
1129        k.remember("arch", "db", "PostgreSQL", "s1", 0.95, &policy);
1130        k.facts[0].confirmation_count = 3;
1131
1132        let before_change = Utc::now();
1133        std::thread::sleep(std::time::Duration::from_millis(10));
1134
1135        k.remember("arch", "db", "MySQL", "s2", 0.9, &policy);
1136
1137        let results = k.recall_at_time("db", before_change);
1138        assert_eq!(results.len(), 1);
1139        assert_eq!(results[0].value, "PostgreSQL");
1140
1141        let results_now = k.recall_at_time("db", Utc::now());
1142        assert_eq!(results_now.len(), 1);
1143        assert_eq!(results_now[0].value, "MySQL");
1144    }
1145
1146    #[test]
1147    fn timeline_shows_history() {
1148        let policy = default_policy();
1149        let mut k = ProjectKnowledge::new("/tmp/test");
1150        k.remember("arch", "db", "PostgreSQL", "s1", 0.95, &policy);
1151        k.facts[0].confirmation_count = 3;
1152        k.remember("arch", "db", "MySQL", "s2", 0.9, &policy);
1153
1154        let timeline = k.timeline("arch");
1155        assert_eq!(timeline.len(), 2);
1156        assert!(!timeline[0].is_current());
1157        assert!(timeline[1].is_current());
1158    }
1159
1160    #[test]
1161    fn wakeup_format() {
1162        let policy = default_policy();
1163        let mut k = ProjectKnowledge::new("/tmp/test");
1164        k.remember("arch", "auth", "JWT", "s1", 0.95, &policy);
1165        k.remember("arch", "db", "PG", "s1", 0.8, &policy);
1166
1167        let wakeup = k.format_wakeup();
1168        assert!(wakeup.contains("FACTS:"));
1169        assert!(wakeup.contains("arch/auth=JWT"));
1170        assert!(wakeup.contains("arch/db=PG"));
1171    }
1172
1173    #[test]
1174    fn salience_prioritizes_decisions_over_findings_at_similar_confidence() {
1175        let policy = default_policy();
1176        let mut k = ProjectKnowledge::new("/tmp/test");
1177        k.remember("finding", "f1", "some thing", "s1", 0.9, &policy);
1178        k.remember("decision", "d1", "important", "s1", 0.85, &policy);
1179
1180        let wakeup = k.format_wakeup();
1181        let items = wakeup
1182            .strip_prefix("FACTS:")
1183            .unwrap_or(&wakeup)
1184            .split('|')
1185            .collect::<Vec<_>>();
1186        assert!(
1187            items
1188                .first()
1189                .is_some_and(|s| s.contains("decision/d1=important")),
1190            "expected decision first in wakeup: {wakeup}"
1191        );
1192    }
1193
1194    #[test]
1195    fn low_confidence_contradiction() {
1196        let policy = default_policy();
1197        let mut k = ProjectKnowledge::new("/tmp/test");
1198        k.remember("arch", "db", "PostgreSQL", "s1", 0.4, &policy);
1199
1200        let c = k.check_contradiction("arch", "db", "MySQL", &policy);
1201        assert!(c.is_some());
1202        assert_eq!(c.unwrap().severity, ContradictionSeverity::Low);
1203    }
1204
1205    #[test]
1206    fn no_contradiction_for_same_value() {
1207        let policy = default_policy();
1208        let mut k = ProjectKnowledge::new("/tmp/test");
1209        k.remember("arch", "db", "PostgreSQL", "s1", 0.95, &policy);
1210
1211        let c = k.check_contradiction("arch", "db", "PostgreSQL", &policy);
1212        assert!(c.is_none());
1213    }
1214
1215    #[test]
1216    fn no_contradiction_for_similar_values() {
1217        let policy = default_policy();
1218        let mut k = ProjectKnowledge::new("/tmp/test");
1219        k.remember(
1220            "arch",
1221            "db",
1222            "PostgreSQL 16 production database server",
1223            "s1",
1224            0.95,
1225            &policy,
1226        );
1227
1228        let c = k.check_contradiction(
1229            "arch",
1230            "db",
1231            "PostgreSQL 16 production database server config",
1232            &policy,
1233        );
1234        assert!(c.is_none());
1235    }
1236}