1use std::collections::{BTreeMap, BTreeSet, VecDeque};
2
3use serde::Serialize;
4use tsift_core::{GraphEdge, GraphNode};
5
6pub const COMMUNITY_MIN_HANDLE_COVERAGE_PCT: f64 = 95.0;
7pub const COMMUNITY_MIN_DUPLICATE_NAME_PRECISION: f64 = 0.99;
8
9#[derive(Clone, Debug, Serialize, PartialEq)]
10pub struct RankedNeighbor {
11 pub rank: usize,
12 pub node_id: String,
13 pub kind: String,
14 pub label: String,
15 pub score: i64,
16 #[serde(skip_serializing_if = "Option::is_none")]
17 pub depth: Option<usize>,
18 pub edge_kinds: Vec<String>,
19 pub community_co_membership: bool,
20 pub semantic_relation: bool,
21 pub source_handle_fresh: bool,
22 pub duplicate_name_precision: f64,
23 pub handle_coverage_pct: f64,
24 pub reasons: Vec<String>,
25}
26
27#[derive(Clone, Debug, Serialize, PartialEq)]
28pub struct NeighborhoodRankingGate {
29 pub status: String,
30 pub ranked_output_default: bool,
31 pub default_order: String,
32 pub default_change_gate: String,
33 pub required_workloads: Vec<String>,
34 pub required_metrics: Vec<String>,
35 pub max_duration_regression_percent: f64,
36 pub min_handle_coverage_pct: f64,
37 pub min_duplicate_name_precision: f64,
38 pub min_top_community_stability: f64,
39 pub diagnostics: Vec<String>,
40}
41
42pub fn ranked_neighbors(
43 center_id: &str,
44 nodes: &[GraphNode],
45 edges: &[GraphEdge],
46) -> Vec<RankedNeighbor> {
47 let node_by_id = nodes
48 .iter()
49 .map(|node| (node.id.clone(), node))
50 .collect::<BTreeMap<_, _>>();
51 let label_counts = nodes
52 .iter()
53 .fold(BTreeMap::<String, usize>::new(), |mut acc, node| {
54 *acc.entry(node.label.clone()).or_default() += 1;
55 acc
56 });
57 let mut outgoing = BTreeMap::<String, Vec<&GraphEdge>>::new();
58 let mut edge_kinds_by_node = BTreeMap::<String, BTreeSet<String>>::new();
59 let mut fresh_edge_by_node = BTreeSet::<String>::new();
60 for edge in edges {
61 outgoing.entry(edge.from_id.clone()).or_default().push(edge);
62 for endpoint in [&edge.from_id, &edge.to_id] {
63 if node_by_id.contains_key(endpoint) {
64 edge_kinds_by_node
65 .entry(endpoint.clone())
66 .or_default()
67 .insert(edge.kind.clone());
68 if edge.freshness.as_ref().is_some_and(|freshness| {
69 freshness.content_hash.is_some() || freshness.observed_at_unix.is_some()
70 }) {
71 fresh_edge_by_node.insert(endpoint.clone());
72 }
73 }
74 }
75 }
76
77 let depths = neighborhood_depths(center_id, &node_by_id, &outgoing);
78 let page_handle_coverage_pct = page_handle_coverage_pct(nodes);
79 let mut ranked = Vec::new();
80
81 for node in nodes {
82 if node.id == center_id {
83 continue;
84 }
85 let edge_kinds = edge_kinds_by_node
86 .get(&node.id)
87 .map(|kinds| kinds.iter().cloned().collect::<Vec<_>>())
88 .unwrap_or_default();
89 let depth = depths.get(&node.id).copied();
90 let community_co_membership = has_community_signal(node, &edge_kinds);
91 let semantic_relation = has_semantic_signal(node, &edge_kinds);
92 let source_handle_fresh = source_handle_is_fresh(node)
93 || (node.kind == "source_handle" && fresh_edge_by_node.contains(&node.id));
94 let duplicate_name_precision = duplicate_name_precision(node, &label_counts);
95 let mut score = 0i64;
96 let mut reasons = Vec::new();
97
98 if let Some(depth) = depth {
99 let depth_score = 120i64
100 .saturating_sub((depth as i64).saturating_mul(18))
101 .max(0);
102 score += depth_score;
103 reasons.push(format!("depth:{depth}+{depth_score}"));
104 } else {
105 reasons.push("depth:unknown".to_string());
106 }
107
108 for edge_kind in &edge_kinds {
109 let edge_score = edge_kind_rank_score(edge_kind);
110 score += edge_score;
111 reasons.push(format!("edge_kind:{edge_kind}+{edge_score}"));
112 }
113 if community_co_membership {
114 score += 18;
115 reasons.push("community_co_membership+18".to_string());
116 }
117 if semantic_relation {
118 score += 24;
119 reasons.push("semantic_relation+24".to_string());
120 }
121 if source_handle_fresh {
122 score += 18;
123 reasons.push("source_handle_fresh+18".to_string());
124 }
125 if duplicate_name_precision + f64::EPSILON >= COMMUNITY_MIN_DUPLICATE_NAME_PRECISION {
126 score += 10;
127 reasons.push("duplicate_name_precision+10".to_string());
128 } else {
129 score -= 10;
130 reasons.push("duplicate_name_precision-10".to_string());
131 }
132 if page_handle_coverage_pct + f64::EPSILON >= COMMUNITY_MIN_HANDLE_COVERAGE_PCT {
133 score += 10;
134 reasons.push("handle_coverage+10".to_string());
135 } else {
136 score -= 10;
137 reasons.push("handle_coverage-10".to_string());
138 }
139
140 ranked.push(RankedNeighbor {
141 rank: 0,
142 node_id: node.id.clone(),
143 kind: node.kind.clone(),
144 label: node.label.clone(),
145 score,
146 depth,
147 edge_kinds,
148 community_co_membership,
149 semantic_relation,
150 source_handle_fresh,
151 duplicate_name_precision,
152 handle_coverage_pct: page_handle_coverage_pct,
153 reasons,
154 });
155 }
156
157 ranked.sort_by(|left, right| {
158 right
159 .score
160 .cmp(&left.score)
161 .then_with(|| {
162 left.depth
163 .unwrap_or(usize::MAX)
164 .cmp(&right.depth.unwrap_or(usize::MAX))
165 })
166 .then(left.kind.cmp(&right.kind))
167 .then(left.label.cmp(&right.label))
168 .then(left.node_id.cmp(&right.node_id))
169 });
170 for (idx, neighbor) in ranked.iter_mut().enumerate() {
171 neighbor.rank = idx + 1;
172 }
173 ranked
174}
175
176pub fn ranked_neighbors_capped(
177 center_id: &str,
178 nodes: &[GraphNode],
179 edges: &[GraphEdge],
180 cap: usize,
181) -> Vec<RankedNeighbor> {
182 let mut ranked = ranked_neighbors(center_id, nodes, edges);
183 if cap > 0 && ranked.len() > cap {
184 ranked.truncate(cap);
185 }
186 ranked
187}
188
189pub fn neighborhood_depths(
190 center_id: &str,
191 node_by_id: &BTreeMap<String, &GraphNode>,
192 outgoing: &BTreeMap<String, Vec<&GraphEdge>>,
193) -> BTreeMap<String, usize> {
194 if !node_by_id.contains_key(center_id) {
195 return BTreeMap::new();
196 }
197 let mut depths = BTreeMap::from([(center_id.to_string(), 0usize)]);
198 let mut queue = VecDeque::from([(center_id.to_string(), 0usize)]);
199 while let Some((current, depth)) = queue.pop_front() {
200 for edge in outgoing.get(¤t).into_iter().flatten() {
201 if !node_by_id.contains_key(&edge.to_id) || depths.contains_key(&edge.to_id) {
202 continue;
203 }
204 let next_depth = depth + 1;
205 depths.insert(edge.to_id.clone(), next_depth);
206 queue.push_back((edge.to_id.clone(), next_depth));
207 }
208 }
209 depths
210}
211
212pub fn page_handle_coverage_pct(nodes: &[GraphNode]) -> f64 {
213 if nodes.is_empty() {
214 return 0.0;
215 }
216 let covered = nodes
217 .iter()
218 .filter(|node| node_has_handle_coverage(node))
219 .count();
220 (covered as f64 / nodes.len() as f64) * 100.0
221}
222
223pub fn node_has_handle_coverage(node: &GraphNode) -> bool {
224 !node.id.is_empty()
225 || node.properties.contains_key("handle")
226 || node.properties.contains_key("ref_id")
227 || node.properties.contains_key("tagpath_handle")
228}
229
230pub fn duplicate_name_precision(node: &GraphNode, label_counts: &BTreeMap<String, usize>) -> f64 {
231 let count = label_counts.get(&node.label).copied().unwrap_or(1);
232 if count <= 1 || node_has_handle_coverage(node) {
233 1.0
234 } else {
235 1.0 / count as f64
236 }
237}
238
239pub fn has_community_signal(node: &GraphNode, edge_kinds: &[String]) -> bool {
240 node.kind.contains("community")
241 || edge_kinds.iter().any(|kind| kind.contains("community"))
242 || node
243 .properties
244 .iter()
245 .any(|(key, value)| key.contains("community") || value.contains("community"))
246}
247
248pub fn has_semantic_signal(node: &GraphNode, edge_kinds: &[String]) -> bool {
249 node.kind.starts_with("semantic_")
250 || edge_kinds.iter().any(|kind| {
251 kind.contains("semantic") || kind.contains("concept") || kind.contains("entity")
252 })
253}
254
255pub fn source_handle_is_fresh(node: &GraphNode) -> bool {
256 node.kind == "source_handle"
257 && node.freshness.as_ref().is_some_and(|freshness| {
258 freshness.content_hash.is_some() || freshness.observed_at_unix.is_some()
259 })
260}
261
262pub fn edge_kind_rank_score(edge_kind: &str) -> i64 {
263 match edge_kind {
264 "semantic_relation" => 34,
265 "mentions_entity" | "mentions_concept" | "tagged_entity" | "tagged_concept"
266 | "related_concept" => 28,
267 "mentions" => 22,
268 "calls" => 20,
269 "requests_context" | "scopes_context" | "scopes_source" | "explains_result" => 18,
270 "defines" | "contains" | "belongs_to" => 12,
271 kind if kind.contains("community") => 20,
272 kind if kind.contains("semantic")
273 || kind.contains("concept")
274 || kind.contains("entity") =>
275 {
276 24
277 }
278 _ => 8,
279 }
280}
281
282pub fn default_neighborhood_ranking_gate() -> NeighborhoodRankingGate {
283 NeighborhoodRankingGate {
284 status: "active".to_string(),
285 ranked_output_default: true,
286 default_order: "score_desc".to_string(),
287 default_change_gate: "three_sample_regression".to_string(),
288 required_workloads: vec![
289 "real".to_string(),
290 "full_projection".to_string(),
291 "synthetic_high_degree".to_string(),
292 "synthetic_deep_chain".to_string(),
293 ],
294 required_metrics: vec![
295 "handle_coverage_pct".to_string(),
296 "duplicate_name_precision".to_string(),
297 "top_community_stability".to_string(),
298 ],
299 max_duration_regression_percent: 10.0,
300 min_handle_coverage_pct: COMMUNITY_MIN_HANDLE_COVERAGE_PCT,
301 min_duplicate_name_precision: COMMUNITY_MIN_DUPLICATE_NAME_PRECISION,
302 min_top_community_stability: 0.95,
303 diagnostics: vec![],
304 }
305}
306
307#[cfg(test)]
308mod tests {
309 use super::*;
310 use tsift_core::{GraphEdge, GraphFreshness, GraphNode};
311
312 #[test]
313 fn edge_kind_rank_score_semantic_relation() {
314 assert_eq!(edge_kind_rank_score("semantic_relation"), 34);
315 }
316
317 #[test]
318 fn edge_kind_rank_score_calls() {
319 assert_eq!(edge_kind_rank_score("calls"), 20);
320 }
321
322 #[test]
323 fn edge_kind_rank_score_unknown() {
324 assert_eq!(edge_kind_rank_score("unknown_kind"), 8);
325 }
326
327 #[test]
328 fn edge_kind_rank_score_community_prefix() {
329 assert_eq!(edge_kind_rank_score("community_member"), 20);
330 }
331
332 #[test]
333 fn edge_kind_rank_score_concept_prefix() {
334 assert_eq!(edge_kind_rank_score("concept_link"), 24);
335 }
336
337 #[test]
338 fn neighborhood_depths_basic() {
339 let nodes = [
340 GraphNode::new("a", "file", "a"),
341 GraphNode::new("b", "file", "b"),
342 GraphNode::new("c", "file", "c"),
343 ];
344 let edges = [
345 GraphEdge::new("a", "b", "calls"),
346 GraphEdge::new("b", "c", "calls"),
347 ];
348 let node_by_id = nodes.iter().map(|n| (n.id.clone(), n)).collect();
349 let outgoing: BTreeMap<String, Vec<&GraphEdge>> =
350 edges.iter().fold(BTreeMap::new(), |mut acc, e| {
351 acc.entry(e.from_id.clone()).or_default().push(e);
352 acc
353 });
354 let depths = neighborhood_depths("a", &node_by_id, &outgoing);
355 assert_eq!(depths.get("a"), Some(&0));
356 assert_eq!(depths.get("b"), Some(&1));
357 assert_eq!(depths.get("c"), Some(&2));
358 }
359
360 #[test]
361 fn neighborhood_depths_missing_center() {
362 let nodes = [GraphNode::new("a", "file", "a")];
363 let node_by_id = nodes.iter().map(|n| (n.id.clone(), n)).collect();
364 let outgoing = BTreeMap::new();
365 let depths = neighborhood_depths("missing", &node_by_id, &outgoing);
366 assert!(depths.is_empty());
367 }
368
369 #[test]
370 fn ranked_neighbors_sorts_by_score() {
371 let center = GraphNode::new("center", "file", "center");
372 let far = GraphNode::new("far", "symbol", "far_node");
373 let near = GraphNode::new("near", "symbol", "near_node");
374 let nodes = vec![center, far, near];
375 let edges = vec![
376 GraphEdge::new("center", "near", "calls"),
377 GraphEdge::new("near", "far", "calls"),
378 ];
379 let ranked = ranked_neighbors("center", &nodes, &edges);
380 assert_eq!(ranked.len(), 2);
381 assert!(ranked[0].score >= ranked[1].score);
382 assert_eq!(ranked[0].rank, 1);
383 assert_eq!(ranked[1].rank, 2);
384 }
385
386 #[test]
387 fn ranked_neighbors_skips_center() {
388 let center = GraphNode::new("center", "file", "center");
389 let other = GraphNode::new("other", "symbol", "other");
390 let nodes = vec![center, other];
391 let ranked = ranked_neighbors("center", &nodes, &[]);
392 assert_eq!(ranked.len(), 1);
393 assert_eq!(ranked[0].node_id, "other");
394 }
395
396 #[test]
397 fn ranked_neighbors_capped_truncates_after_scoring() {
398 let center = GraphNode::new("center", "file", "center");
399 let low = GraphNode::new("low", "symbol", "low");
400 let high = GraphNode::new("high", "source_handle", "high");
401 let nodes = vec![center, low, high];
402 let edges = vec![
403 GraphEdge::new("center", "low", "unknown"),
404 GraphEdge::new("center", "high", "mentions_concept"),
405 ];
406
407 let ranked = ranked_neighbors_capped("center", &nodes, &edges, 1);
408
409 assert_eq!(ranked.len(), 1);
410 assert_eq!(ranked[0].rank, 1);
411 assert_eq!(ranked[0].node_id, "high");
412 }
413
414 #[test]
415 fn page_handle_coverage_pct_empty() {
416 assert_eq!(page_handle_coverage_pct(&[]), 0.0);
417 }
418
419 #[test]
420 fn page_handle_coverage_pct_all_covered() {
421 let nodes = vec![
422 GraphNode::new("a", "file", "a").with_property("handle", "h:1"),
423 GraphNode::new("b", "file", "b").with_property("ref_id", "#1"),
424 ];
425 let pct = page_handle_coverage_pct(&nodes);
426 assert!(pct > 99.0);
427 }
428
429 #[test]
430 fn page_handle_coverage_pct_partial() {
431 let nodes = vec![
432 GraphNode::new("a", "file", "a").with_property("handle", "h:1"),
433 GraphNode::new("", "file", "b"),
434 ];
435 let pct = page_handle_coverage_pct(&nodes);
436 assert!((pct - 50.0).abs() < 1e-9);
437 }
438
439 #[test]
440 fn duplicate_name_precision_unique() {
441 let node = GraphNode::new("a", "symbol", "unique_name");
442 let counts = BTreeMap::from([("unique_name".to_string(), 1)]);
443 assert_eq!(duplicate_name_precision(&node, &counts), 1.0);
444 }
445
446 #[test]
447 fn duplicate_name_precision_duplicate_with_handle() {
448 let node = GraphNode::new("a", "symbol", "dup").with_property("handle", "h:1");
449 let counts = BTreeMap::from([("dup".to_string(), 5)]);
450 assert_eq!(duplicate_name_precision(&node, &counts), 1.0);
451 }
452
453 #[test]
454 fn duplicate_name_precision_duplicate_without_handle() {
455 let node = GraphNode::new("", "symbol", "dup");
456 let counts = BTreeMap::from([("dup".to_string(), 4)]);
457 assert!((duplicate_name_precision(&node, &counts) - 0.25).abs() < 1e-9);
458 }
459
460 #[test]
461 fn has_community_signal_kind() {
462 let node = GraphNode::new("a", "community_member", "a");
463 assert!(has_community_signal(&node, &[]));
464 }
465
466 #[test]
467 fn has_community_signal_edge_kind() {
468 let node = GraphNode::new("a", "file", "a");
469 assert!(has_community_signal(&node, &["community_link".to_string()]));
470 }
471
472 #[test]
473 fn has_community_signal_property() {
474 let node = GraphNode::new("a", "file", "a").with_property("community_id", "c1");
475 assert!(has_community_signal(&node, &[]));
476 }
477
478 #[test]
479 fn has_semantic_signal_kind() {
480 let node = GraphNode::new("a", "semantic_concept", "a");
481 assert!(has_semantic_signal(&node, &[]));
482 }
483
484 #[test]
485 fn has_semantic_signal_edge_kind() {
486 let node = GraphNode::new("a", "file", "a");
487 assert!(has_semantic_signal(&node, &["concept_link".to_string()]));
488 }
489
490 #[test]
491 fn source_handle_is_fresh_true() {
492 let node = GraphNode::new("a", "source_handle", "a")
493 .with_freshness(GraphFreshness::content_hash("abc"));
494 assert!(source_handle_is_fresh(&node));
495 }
496
497 #[test]
498 fn source_handle_is_fresh_wrong_kind() {
499 let node =
500 GraphNode::new("a", "file", "a").with_freshness(GraphFreshness::content_hash("abc"));
501 assert!(!source_handle_is_fresh(&node));
502 }
503
504 #[test]
505 fn source_handle_is_fresh_no_freshness() {
506 let node = GraphNode::new("a", "source_handle", "a");
507 assert!(!source_handle_is_fresh(&node));
508 }
509
510 #[test]
511 fn node_has_handle_coverage_handle() {
512 let node = GraphNode::new("a", "file", "a").with_property("handle", "h:1");
513 assert!(node_has_handle_coverage(&node));
514 }
515
516 #[test]
517 fn node_has_handle_coverage_ref_id() {
518 let node = GraphNode::new("a", "file", "a").with_property("ref_id", "#1");
519 assert!(node_has_handle_coverage(&node));
520 }
521
522 #[test]
523 fn node_has_handle_coverage_empty() {
524 let node = GraphNode::new("", "file", "a");
525 assert!(!node_has_handle_coverage(&node));
526 }
527
528 #[test]
529 fn default_neighborhood_ranking_gate_values() {
530 let gate = default_neighborhood_ranking_gate();
531 assert_eq!(gate.status, "active");
532 assert!(gate.ranked_output_default);
533 assert_eq!(
534 gate.min_handle_coverage_pct,
535 COMMUNITY_MIN_HANDLE_COVERAGE_PCT
536 );
537 assert_eq!(
538 gate.min_duplicate_name_precision,
539 COMMUNITY_MIN_DUPLICATE_NAME_PRECISION
540 );
541 }
542}