lean_ctx/core/knowledge/
ranking.rs

1use chrono::Utc;
2
3use super::types::{JudgedPair, KnowledgeFact};
4
5pub(super) fn confidence_stars(confidence: f32) -> &'static str {
6    if confidence >= 0.95 {
7        "★★★★★"
8    } else if confidence >= 0.85 {
9        "★★★★"
10    } else if confidence >= 0.7 {
11        "★★★"
12    } else if confidence >= 0.5 {
13        "★★"
14    } else {
15        "★"
16    }
17}
18
19pub(super) fn string_similarity(a: &str, b: &str) -> f32 {
20    let a_lower = a.to_lowercase();
21    let b_lower = b.to_lowercase();
22    let a_words: std::collections::HashSet<&str> = a_lower.split_whitespace().collect();
23    let b_words: std::collections::HashSet<&str> = b_lower.split_whitespace().collect();
24
25    if a_words.is_empty() && b_words.is_empty() {
26        return 1.0;
27    }
28
29    let intersection = a_words.intersection(&b_words).count();
30    let union = a_words.union(&b_words).count();
31
32    if union == 0 {
33        return 0.0;
34    }
35
36    intersection as f32 / union as f32
37}
38
39pub(super) fn sort_fact_for_output(a: &KnowledgeFact, b: &KnowledgeFact) -> std::cmp::Ordering {
40    salience_score(b)
41        .cmp(&salience_score(a))
42        .then_with(|| {
43            b.quality_score()
44                .partial_cmp(&a.quality_score())
45                .unwrap_or(std::cmp::Ordering::Equal)
46        })
47        .then_with(|| {
48            b.confidence
49                .partial_cmp(&a.confidence)
50                .unwrap_or(std::cmp::Ordering::Equal)
51        })
52        .then_with(|| b.confirmation_count.cmp(&a.confirmation_count))
53        .then_with(|| b.retrieval_count.cmp(&a.retrieval_count))
54        .then_with(|| b.last_retrieved.cmp(&a.last_retrieved))
55        .then_with(|| b.last_confirmed.cmp(&a.last_confirmed))
56        .then_with(|| a.category.cmp(&b.category))
57        .then_with(|| a.key.cmp(&b.key))
58        .then_with(|| a.value.cmp(&b.value))
59}
60
61/// Salience-based ranking for fact output ordering.
62///
63/// Unlike `quality_score()` (which is a stable, intrinsic measure of fact
64/// reliability based on confidence, confirmations, and feedback), salience
65/// combines category priority, quality, recency, and retrieval frequency
66/// into a single sort key for _display_ ordering. Salience is volatile and
67/// changes on every access; quality_score is deterministic and stable.
68fn salience_score(f: &KnowledgeFact) -> u32 {
69    let cat = f.category.to_lowercase();
70    let base: u32 = match cat.as_str() {
71        "decision" => 70,
72        "gotcha" => 75,
73        "architecture" | "arch" => 60,
74        "security" => 65,
75        "testing" | "tests" | "deployment" | "deploy" => 55,
76        "conventions" | "convention" => 45,
77        "finding" => 40,
78        _ => 30,
79    };
80
81    let quality_bonus = (f.quality_score() * 60.0) as u32;
82
83    let recency_bonus = f.last_retrieved.map_or(0u32, |t| {
84        let days = Utc::now().signed_duration_since(t).num_days();
85        if days <= 7 {
86            10u32
87        } else if days <= 30 {
88            5u32
89        } else {
90            0u32
91        }
92    });
93
94    let archetype_bonus = f.archetype.salience_bonus();
95
96    let fidelity_bonus = f
97        .fidelity
98        .as_ref()
99        .map_or(0u32, |fi| (fi.structural * 10.0) as u32);
100
101    base + quality_bonus + recency_bonus + archetype_bonus + fidelity_bonus
102}
103
104pub(super) fn hash_project_root(root: &str) -> String {
105    crate::core::project_hash::hash_project_root(root)
106}
107
108pub(super) fn tokenize_lower(s: &str) -> impl Iterator<Item = String> + '_ {
109    s.to_lowercase()
110        .split(|c: char| c.is_whitespace() || c == '-' || c == '_' || c == '/' || c == '.')
111        .filter(|t| !t.is_empty())
112        .map(String::from)
113        .collect::<Vec<_>>()
114        .into_iter()
115}
116
117pub(super) fn build_token_index(
118    facts: &[KnowledgeFact],
119    include_session: bool,
120) -> std::collections::HashMap<String, Vec<usize>> {
121    let mut index: std::collections::HashMap<String, Vec<usize>> = std::collections::HashMap::new();
122    for (i, f) in facts.iter().enumerate() {
123        for token in tokenize_lower(&f.category) {
124            index.entry(token).or_default().push(i);
125        }
126        for token in tokenize_lower(&f.key) {
127            index.entry(token).or_default().push(i);
128        }
129        for token in tokenize_lower(&f.value) {
130            index.entry(token).or_default().push(i);
131        }
132        if include_session {
133            for token in tokenize_lower(&f.source_session) {
134                index.entry(token).or_default().push(i);
135            }
136        }
137    }
138    for indices in index.values_mut() {
139        indices.sort_unstable();
140        indices.dedup();
141    }
142    index
143}
144
145#[derive(Debug, Clone)]
146pub struct SimilarFact {
147    pub category: String,
148    pub key: String,
149    pub value_preview: String,
150    pub similarity: f32,
151}
152
153pub fn find_cross_key_similar(
154    new_category: &str,
155    new_key: &str,
156    new_value: &str,
157    all_facts: &[KnowledgeFact],
158    judged_pairs: &[JudgedPair],
159    limit: usize,
160) -> Vec<SimilarFact> {
161    let composite_key = format!("{new_category}/{new_key}");
162    let mut results: Vec<SimilarFact> = Vec::new();
163
164    for f in all_facts {
165        if !f.is_current() {
166            continue;
167        }
168        let other_key = format!("{}/{}", f.category, f.key);
169        if other_key == composite_key {
170            continue;
171        }
172
173        let already_judged = judged_pairs.iter().any(|jp| {
174            (jp.key_a == composite_key && jp.key_b == other_key)
175                || (jp.key_a == other_key && jp.key_b == composite_key)
176        });
177        if already_judged {
178            continue;
179        }
180
181        let sim = string_similarity(new_value, &f.value);
182        if sim > 0.35 {
183            let preview = if f.value.len() > 60 {
184                format!("{}...", &f.value[..57])
185            } else {
186                f.value.clone()
187            };
188            results.push(SimilarFact {
189                category: f.category.clone(),
190                key: f.key.clone(),
191                value_preview: preview,
192                similarity: sim,
193            });
194        }
195    }
196
197    results.sort_by(|a, b| {
198        b.similarity
199            .partial_cmp(&a.similarity)
200            .unwrap_or(std::cmp::Ordering::Equal)
201    });
202    results.truncate(limit);
203    results
204}
205
206pub(super) fn fact_version_id_v1(f: &KnowledgeFact) -> String {
207    use md5::{Digest, Md5};
208    let mut hasher = Md5::new();
209    hasher.update(f.category.as_bytes());
210    hasher.update(b"\n");
211    hasher.update(f.key.as_bytes());
212    hasher.update(b"\n");
213    hasher.update(f.value.as_bytes());
214    hasher.update(b"\n");
215    hasher.update(f.source_session.as_bytes());
216    hasher.update(b"\n");
217    hasher.update(f.created_at.to_rfc3339().as_bytes());
218    format!("{:x}", hasher.finalize())
219}
lean_ctx/core/knowledge/ranking.rs

lean_ctx/core/knowledge/
ranking.rs