Skip to main content

tsift_resolution/
scoring.rs

1use std::collections::{BTreeMap, BTreeSet, VecDeque};
2
3use serde::Serialize;
4use tsift_core::{GraphEdge, GraphNode};
5
6pub const COMMUNITY_MIN_HANDLE_COVERAGE_PCT: f64 = 95.0;
7pub const COMMUNITY_MIN_DUPLICATE_NAME_PRECISION: f64 = 0.99;
8
9#[derive(Clone, Debug, Serialize, PartialEq)]
10pub struct RankedNeighbor {
11    pub rank: usize,
12    pub node_id: String,
13    pub kind: String,
14    pub label: String,
15    pub score: i64,
16    #[serde(skip_serializing_if = "Option::is_none")]
17    pub depth: Option<usize>,
18    pub edge_kinds: Vec<String>,
19    pub community_co_membership: bool,
20    pub semantic_relation: bool,
21    pub source_handle_fresh: bool,
22    pub duplicate_name_precision: f64,
23    pub handle_coverage_pct: f64,
24    pub reasons: Vec<String>,
25}
26
27#[derive(Clone, Debug, Serialize, PartialEq)]
28pub struct NeighborhoodRankingGate {
29    pub status: String,
30    pub ranked_output_default: bool,
31    pub default_order: String,
32    pub default_change_gate: String,
33    pub required_workloads: Vec<String>,
34    pub required_metrics: Vec<String>,
35    pub max_duration_regression_percent: f64,
36    pub min_handle_coverage_pct: f64,
37    pub min_duplicate_name_precision: f64,
38    pub min_top_community_stability: f64,
39    pub diagnostics: Vec<String>,
40}
41
42pub fn ranked_neighbors(
43    center_id: &str,
44    nodes: &[GraphNode],
45    edges: &[GraphEdge],
46) -> Vec<RankedNeighbor> {
47    let node_by_id = nodes
48        .iter()
49        .map(|node| (node.id.clone(), node))
50        .collect::<BTreeMap<_, _>>();
51    let label_counts = nodes
52        .iter()
53        .fold(BTreeMap::<String, usize>::new(), |mut acc, node| {
54            *acc.entry(node.label.clone()).or_default() += 1;
55            acc
56        });
57    let mut outgoing = BTreeMap::<String, Vec<&GraphEdge>>::new();
58    let mut edge_kinds_by_node = BTreeMap::<String, BTreeSet<String>>::new();
59    let mut fresh_edge_by_node = BTreeSet::<String>::new();
60    for edge in edges {
61        outgoing.entry(edge.from_id.clone()).or_default().push(edge);
62        for endpoint in [&edge.from_id, &edge.to_id] {
63            if node_by_id.contains_key(endpoint) {
64                edge_kinds_by_node
65                    .entry(endpoint.clone())
66                    .or_default()
67                    .insert(edge.kind.clone());
68                if edge.freshness.as_ref().is_some_and(|freshness| {
69                    freshness.content_hash.is_some() || freshness.observed_at_unix.is_some()
70                }) {
71                    fresh_edge_by_node.insert(endpoint.clone());
72                }
73            }
74        }
75    }
76
77    let depths = neighborhood_depths(center_id, &node_by_id, &outgoing);
78    let page_handle_coverage_pct = page_handle_coverage_pct(nodes);
79    let mut ranked = Vec::new();
80
81    for node in nodes {
82        if node.id == center_id {
83            continue;
84        }
85        let edge_kinds = edge_kinds_by_node
86            .get(&node.id)
87            .map(|kinds| kinds.iter().cloned().collect::<Vec<_>>())
88            .unwrap_or_default();
89        let depth = depths.get(&node.id).copied();
90        let community_co_membership = has_community_signal(node, &edge_kinds);
91        let semantic_relation = has_semantic_signal(node, &edge_kinds);
92        let source_handle_fresh = source_handle_is_fresh(node)
93            || (node.kind == "source_handle" && fresh_edge_by_node.contains(&node.id));
94        let duplicate_name_precision = duplicate_name_precision(node, &label_counts);
95        let mut score = 0i64;
96        let mut reasons = Vec::new();
97
98        if let Some(depth) = depth {
99            let depth_score = 120i64
100                .saturating_sub((depth as i64).saturating_mul(18))
101                .max(0);
102            score += depth_score;
103            reasons.push(format!("depth:{depth}+{depth_score}"));
104        } else {
105            reasons.push("depth:unknown".to_string());
106        }
107
108        for edge_kind in &edge_kinds {
109            let edge_score = edge_kind_rank_score(edge_kind);
110            score += edge_score;
111            reasons.push(format!("edge_kind:{edge_kind}+{edge_score}"));
112        }
113        if community_co_membership {
114            score += 18;
115            reasons.push("community_co_membership+18".to_string());
116        }
117        if semantic_relation {
118            score += 24;
119            reasons.push("semantic_relation+24".to_string());
120        }
121        if source_handle_fresh {
122            score += 18;
123            reasons.push("source_handle_fresh+18".to_string());
124        }
125        if duplicate_name_precision + f64::EPSILON >= COMMUNITY_MIN_DUPLICATE_NAME_PRECISION {
126            score += 10;
127            reasons.push("duplicate_name_precision+10".to_string());
128        } else {
129            score -= 10;
130            reasons.push("duplicate_name_precision-10".to_string());
131        }
132        if page_handle_coverage_pct + f64::EPSILON >= COMMUNITY_MIN_HANDLE_COVERAGE_PCT {
133            score += 10;
134            reasons.push("handle_coverage+10".to_string());
135        } else {
136            score -= 10;
137            reasons.push("handle_coverage-10".to_string());
138        }
139
140        ranked.push(RankedNeighbor {
141            rank: 0,
142            node_id: node.id.clone(),
143            kind: node.kind.clone(),
144            label: node.label.clone(),
145            score,
146            depth,
147            edge_kinds,
148            community_co_membership,
149            semantic_relation,
150            source_handle_fresh,
151            duplicate_name_precision,
152            handle_coverage_pct: page_handle_coverage_pct,
153            reasons,
154        });
155    }
156
157    ranked.sort_by(|left, right| {
158        right
159            .score
160            .cmp(&left.score)
161            .then_with(|| {
162                left.depth
163                    .unwrap_or(usize::MAX)
164                    .cmp(&right.depth.unwrap_or(usize::MAX))
165            })
166            .then(left.kind.cmp(&right.kind))
167            .then(left.label.cmp(&right.label))
168            .then(left.node_id.cmp(&right.node_id))
169    });
170    for (idx, neighbor) in ranked.iter_mut().enumerate() {
171        neighbor.rank = idx + 1;
172    }
173    ranked
174}
175
176pub fn ranked_neighbors_capped(
177    center_id: &str,
178    nodes: &[GraphNode],
179    edges: &[GraphEdge],
180    cap: usize,
181) -> Vec<RankedNeighbor> {
182    let mut ranked = ranked_neighbors(center_id, nodes, edges);
183    if cap > 0 && ranked.len() > cap {
184        ranked.truncate(cap);
185    }
186    ranked
187}
188
189pub fn neighborhood_depths(
190    center_id: &str,
191    node_by_id: &BTreeMap<String, &GraphNode>,
192    outgoing: &BTreeMap<String, Vec<&GraphEdge>>,
193) -> BTreeMap<String, usize> {
194    if !node_by_id.contains_key(center_id) {
195        return BTreeMap::new();
196    }
197    let mut depths = BTreeMap::from([(center_id.to_string(), 0usize)]);
198    let mut queue = VecDeque::from([(center_id.to_string(), 0usize)]);
199    while let Some((current, depth)) = queue.pop_front() {
200        for edge in outgoing.get(&current).into_iter().flatten() {
201            if !node_by_id.contains_key(&edge.to_id) || depths.contains_key(&edge.to_id) {
202                continue;
203            }
204            let next_depth = depth + 1;
205            depths.insert(edge.to_id.clone(), next_depth);
206            queue.push_back((edge.to_id.clone(), next_depth));
207        }
208    }
209    depths
210}
211
212pub fn page_handle_coverage_pct(nodes: &[GraphNode]) -> f64 {
213    if nodes.is_empty() {
214        return 0.0;
215    }
216    let covered = nodes
217        .iter()
218        .filter(|node| node_has_handle_coverage(node))
219        .count();
220    (covered as f64 / nodes.len() as f64) * 100.0
221}
222
223pub fn node_has_handle_coverage(node: &GraphNode) -> bool {
224    !node.id.is_empty()
225        || node.properties.contains_key("handle")
226        || node.properties.contains_key("ref_id")
227        || node.properties.contains_key("tagpath_handle")
228}
229
230pub fn duplicate_name_precision(node: &GraphNode, label_counts: &BTreeMap<String, usize>) -> f64 {
231    let count = label_counts.get(&node.label).copied().unwrap_or(1);
232    if count <= 1 || node_has_handle_coverage(node) {
233        1.0
234    } else {
235        1.0 / count as f64
236    }
237}
238
239pub fn has_community_signal(node: &GraphNode, edge_kinds: &[String]) -> bool {
240    node.kind.contains("community")
241        || edge_kinds.iter().any(|kind| kind.contains("community"))
242        || node
243            .properties
244            .iter()
245            .any(|(key, value)| key.contains("community") || value.contains("community"))
246}
247
248pub fn has_semantic_signal(node: &GraphNode, edge_kinds: &[String]) -> bool {
249    node.kind.starts_with("semantic_")
250        || edge_kinds.iter().any(|kind| {
251            kind.contains("semantic") || kind.contains("concept") || kind.contains("entity")
252        })
253}
254
255pub fn source_handle_is_fresh(node: &GraphNode) -> bool {
256    node.kind == "source_handle"
257        && node.freshness.as_ref().is_some_and(|freshness| {
258            freshness.content_hash.is_some() || freshness.observed_at_unix.is_some()
259        })
260}
261
262pub fn edge_kind_rank_score(edge_kind: &str) -> i64 {
263    match edge_kind {
264        "semantic_relation" => 34,
265        "mentions_entity" | "mentions_concept" | "tagged_entity" | "tagged_concept"
266        | "related_concept" => 28,
267        "mentions" => 22,
268        "calls" => 20,
269        "requests_context" | "scopes_context" | "scopes_source" | "explains_result" => 18,
270        "defines" | "contains" | "belongs_to" => 12,
271        kind if kind.contains("community") => 20,
272        kind if kind.contains("semantic")
273            || kind.contains("concept")
274            || kind.contains("entity") =>
275        {
276            24
277        }
278        _ => 8,
279    }
280}
281
282pub fn default_neighborhood_ranking_gate() -> NeighborhoodRankingGate {
283    NeighborhoodRankingGate {
284        status: "active".to_string(),
285        ranked_output_default: true,
286        default_order: "score_desc".to_string(),
287        default_change_gate: "three_sample_regression".to_string(),
288        required_workloads: vec![
289            "real".to_string(),
290            "full_projection".to_string(),
291            "synthetic_high_degree".to_string(),
292            "synthetic_deep_chain".to_string(),
293        ],
294        required_metrics: vec![
295            "handle_coverage_pct".to_string(),
296            "duplicate_name_precision".to_string(),
297            "top_community_stability".to_string(),
298        ],
299        max_duration_regression_percent: 10.0,
300        min_handle_coverage_pct: COMMUNITY_MIN_HANDLE_COVERAGE_PCT,
301        min_duplicate_name_precision: COMMUNITY_MIN_DUPLICATE_NAME_PRECISION,
302        min_top_community_stability: 0.95,
303        diagnostics: vec![],
304    }
305}
306
307#[cfg(test)]
308mod tests {
309    use super::*;
310    use tsift_core::{GraphEdge, GraphFreshness, GraphNode};
311
312    #[test]
313    fn edge_kind_rank_score_semantic_relation() {
314        assert_eq!(edge_kind_rank_score("semantic_relation"), 34);
315    }
316
317    #[test]
318    fn edge_kind_rank_score_calls() {
319        assert_eq!(edge_kind_rank_score("calls"), 20);
320    }
321
322    #[test]
323    fn edge_kind_rank_score_unknown() {
324        assert_eq!(edge_kind_rank_score("unknown_kind"), 8);
325    }
326
327    #[test]
328    fn edge_kind_rank_score_community_prefix() {
329        assert_eq!(edge_kind_rank_score("community_member"), 20);
330    }
331
332    #[test]
333    fn edge_kind_rank_score_concept_prefix() {
334        assert_eq!(edge_kind_rank_score("concept_link"), 24);
335    }
336
337    #[test]
338    fn neighborhood_depths_basic() {
339        let nodes = [
340            GraphNode::new("a", "file", "a"),
341            GraphNode::new("b", "file", "b"),
342            GraphNode::new("c", "file", "c"),
343        ];
344        let edges = [
345            GraphEdge::new("a", "b", "calls"),
346            GraphEdge::new("b", "c", "calls"),
347        ];
348        let node_by_id = nodes.iter().map(|n| (n.id.clone(), n)).collect();
349        let outgoing: BTreeMap<String, Vec<&GraphEdge>> =
350            edges.iter().fold(BTreeMap::new(), |mut acc, e| {
351                acc.entry(e.from_id.clone()).or_default().push(e);
352                acc
353            });
354        let depths = neighborhood_depths("a", &node_by_id, &outgoing);
355        assert_eq!(depths.get("a"), Some(&0));
356        assert_eq!(depths.get("b"), Some(&1));
357        assert_eq!(depths.get("c"), Some(&2));
358    }
359
360    #[test]
361    fn neighborhood_depths_missing_center() {
362        let nodes = [GraphNode::new("a", "file", "a")];
363        let node_by_id = nodes.iter().map(|n| (n.id.clone(), n)).collect();
364        let outgoing = BTreeMap::new();
365        let depths = neighborhood_depths("missing", &node_by_id, &outgoing);
366        assert!(depths.is_empty());
367    }
368
369    #[test]
370    fn ranked_neighbors_sorts_by_score() {
371        let center = GraphNode::new("center", "file", "center");
372        let far = GraphNode::new("far", "symbol", "far_node");
373        let near = GraphNode::new("near", "symbol", "near_node");
374        let nodes = vec![center, far, near];
375        let edges = vec![
376            GraphEdge::new("center", "near", "calls"),
377            GraphEdge::new("near", "far", "calls"),
378        ];
379        let ranked = ranked_neighbors("center", &nodes, &edges);
380        assert_eq!(ranked.len(), 2);
381        assert!(ranked[0].score >= ranked[1].score);
382        assert_eq!(ranked[0].rank, 1);
383        assert_eq!(ranked[1].rank, 2);
384    }
385
386    #[test]
387    fn ranked_neighbors_skips_center() {
388        let center = GraphNode::new("center", "file", "center");
389        let other = GraphNode::new("other", "symbol", "other");
390        let nodes = vec![center, other];
391        let ranked = ranked_neighbors("center", &nodes, &[]);
392        assert_eq!(ranked.len(), 1);
393        assert_eq!(ranked[0].node_id, "other");
394    }
395
396    #[test]
397    fn ranked_neighbors_capped_truncates_after_scoring() {
398        let center = GraphNode::new("center", "file", "center");
399        let low = GraphNode::new("low", "symbol", "low");
400        let high = GraphNode::new("high", "source_handle", "high");
401        let nodes = vec![center, low, high];
402        let edges = vec![
403            GraphEdge::new("center", "low", "unknown"),
404            GraphEdge::new("center", "high", "mentions_concept"),
405        ];
406
407        let ranked = ranked_neighbors_capped("center", &nodes, &edges, 1);
408
409        assert_eq!(ranked.len(), 1);
410        assert_eq!(ranked[0].rank, 1);
411        assert_eq!(ranked[0].node_id, "high");
412    }
413
414    #[test]
415    fn page_handle_coverage_pct_empty() {
416        assert_eq!(page_handle_coverage_pct(&[]), 0.0);
417    }
418
419    #[test]
420    fn page_handle_coverage_pct_all_covered() {
421        let nodes = vec![
422            GraphNode::new("a", "file", "a").with_property("handle", "h:1"),
423            GraphNode::new("b", "file", "b").with_property("ref_id", "#1"),
424        ];
425        let pct = page_handle_coverage_pct(&nodes);
426        assert!(pct > 99.0);
427    }
428
429    #[test]
430    fn page_handle_coverage_pct_partial() {
431        let nodes = vec![
432            GraphNode::new("a", "file", "a").with_property("handle", "h:1"),
433            GraphNode::new("", "file", "b"),
434        ];
435        let pct = page_handle_coverage_pct(&nodes);
436        assert!((pct - 50.0).abs() < 1e-9);
437    }
438
439    #[test]
440    fn duplicate_name_precision_unique() {
441        let node = GraphNode::new("a", "symbol", "unique_name");
442        let counts = BTreeMap::from([("unique_name".to_string(), 1)]);
443        assert_eq!(duplicate_name_precision(&node, &counts), 1.0);
444    }
445
446    #[test]
447    fn duplicate_name_precision_duplicate_with_handle() {
448        let node = GraphNode::new("a", "symbol", "dup").with_property("handle", "h:1");
449        let counts = BTreeMap::from([("dup".to_string(), 5)]);
450        assert_eq!(duplicate_name_precision(&node, &counts), 1.0);
451    }
452
453    #[test]
454    fn duplicate_name_precision_duplicate_without_handle() {
455        let node = GraphNode::new("", "symbol", "dup");
456        let counts = BTreeMap::from([("dup".to_string(), 4)]);
457        assert!((duplicate_name_precision(&node, &counts) - 0.25).abs() < 1e-9);
458    }
459
460    #[test]
461    fn has_community_signal_kind() {
462        let node = GraphNode::new("a", "community_member", "a");
463        assert!(has_community_signal(&node, &[]));
464    }
465
466    #[test]
467    fn has_community_signal_edge_kind() {
468        let node = GraphNode::new("a", "file", "a");
469        assert!(has_community_signal(&node, &["community_link".to_string()]));
470    }
471
472    #[test]
473    fn has_community_signal_property() {
474        let node = GraphNode::new("a", "file", "a").with_property("community_id", "c1");
475        assert!(has_community_signal(&node, &[]));
476    }
477
478    #[test]
479    fn has_semantic_signal_kind() {
480        let node = GraphNode::new("a", "semantic_concept", "a");
481        assert!(has_semantic_signal(&node, &[]));
482    }
483
484    #[test]
485    fn has_semantic_signal_edge_kind() {
486        let node = GraphNode::new("a", "file", "a");
487        assert!(has_semantic_signal(&node, &["concept_link".to_string()]));
488    }
489
490    #[test]
491    fn source_handle_is_fresh_true() {
492        let node = GraphNode::new("a", "source_handle", "a")
493            .with_freshness(GraphFreshness::content_hash("abc"));
494        assert!(source_handle_is_fresh(&node));
495    }
496
497    #[test]
498    fn source_handle_is_fresh_wrong_kind() {
499        let node =
500            GraphNode::new("a", "file", "a").with_freshness(GraphFreshness::content_hash("abc"));
501        assert!(!source_handle_is_fresh(&node));
502    }
503
504    #[test]
505    fn source_handle_is_fresh_no_freshness() {
506        let node = GraphNode::new("a", "source_handle", "a");
507        assert!(!source_handle_is_fresh(&node));
508    }
509
510    #[test]
511    fn node_has_handle_coverage_handle() {
512        let node = GraphNode::new("a", "file", "a").with_property("handle", "h:1");
513        assert!(node_has_handle_coverage(&node));
514    }
515
516    #[test]
517    fn node_has_handle_coverage_ref_id() {
518        let node = GraphNode::new("a", "file", "a").with_property("ref_id", "#1");
519        assert!(node_has_handle_coverage(&node));
520    }
521
522    #[test]
523    fn node_has_handle_coverage_empty() {
524        let node = GraphNode::new("", "file", "a");
525        assert!(!node_has_handle_coverage(&node));
526    }
527
528    #[test]
529    fn default_neighborhood_ranking_gate_values() {
530        let gate = default_neighborhood_ranking_gate();
531        assert_eq!(gate.status, "active");
532        assert!(gate.ranked_output_default);
533        assert_eq!(
534            gate.min_handle_coverage_pct,
535            COMMUNITY_MIN_HANDLE_COVERAGE_PCT
536        );
537        assert_eq!(
538            gate.min_duplicate_name_precision,
539            COMMUNITY_MIN_DUPLICATE_NAME_PRECISION
540        );
541    }
542}