Skip to main content

bones_search/structural/
similarity.rs

1//! Structural similarity between work items.
2//!
3//! # Overview
4//!
5//! Structural similarity captures relatedness between two work items based on
6//! shared **structural properties** — labels, direct dependency neighbours,
7//! assignees, shared parent goal, and proximity inside the dependency graph —
8//! rather than lexical text overlap.
9//!
10//! ## Usage
11//!
12//! ```rust,ignore
13//! use std::collections::HashSet;
14//! use bones_search::structural::{StructuralScore, structural_similarity, jaccard};
15//! use petgraph::graph::DiGraph;
16//! use rusqlite::Connection;
17//!
18//! let conn: Connection = /* open projection db */;
19//! let graph: DiGraph<String, ()> = /* build from RawGraph */;
20//!
21//! let score = structural_similarity("bn-001", "bn-002", &conn, &graph)?;
22//! println!("label_sim={:.3}  dep_sim={:.3}  graph_proximity={:.3}",
23//!          score.label_sim, score.dep_sim, score.graph_proximity);
24//! ```
25
26#![allow(clippy::module_name_repetitions)]
27
28use std::collections::{HashMap, HashSet, VecDeque};
29
30use anyhow::{Context, Result};
31use petgraph::Direction;
32use petgraph::graph::{DiGraph, NodeIndex};
33use rusqlite::Connection;
34use rusqlite::OptionalExtension as _;
35
36// ---------------------------------------------------------------------------
37// StructuralScore
38// ---------------------------------------------------------------------------
39
40/// Per-feature structural similarity breakdown between two items.
41///
42/// All scores are in `[0.0, 1.0]` range.  The individual fields are kept
43/// separate so downstream consumers (e.g. RRF fusion) can weight them
44/// independently and show per-feature explanations to users.
45#[derive(Debug, Clone, PartialEq)]
46pub struct StructuralScore {
47    /// Jaccard similarity of the two items' label sets.
48    pub label_sim: f32,
49    /// Jaccard similarity of direct dependency-neighbour sets
50    /// (union of out-neighbours and in-neighbours in the graph).
51    pub dep_sim: f32,
52    /// Jaccard similarity of assignee/agent sets.
53    pub assignee_sim: f32,
54    /// `1.0` if both items share the same non-null parent goal, `0.0`
55    /// otherwise.  Could be extended to graded parent-chain overlap.
56    pub parent_sim: f32,
57    /// Graph proximity: `1.0 / (1.0 + shortest_path_distance)` using an
58    /// undirected BFS up to [`MAX_HOPS`] hops.  Items unreachable within
59    /// the hop limit, or not present in the graph, score `0.0`.
60    pub graph_proximity: f32,
61}
62
63impl StructuralScore {
64    /// Weighted average of all feature scores.
65    ///
66    /// Weights are uniform for now; callers may prefer their own aggregation.
67    #[must_use]
68    pub fn mean(&self) -> f32 {
69        (self.label_sim + self.dep_sim + self.assignee_sim + self.parent_sim + self.graph_proximity)
70            / 5.0
71    }
72}
73
74// ---------------------------------------------------------------------------
75// Constants
76// ---------------------------------------------------------------------------
77
78/// Maximum BFS hop distance considered for graph proximity.
79/// Items further apart than this are scored `0.0`.
80const MAX_HOPS: usize = 5;
81
82// ---------------------------------------------------------------------------
83// Public API
84// ---------------------------------------------------------------------------
85
86/// Generic Jaccard similarity: `|A ∩ B| / |A ∪ B|`.
87///
88/// Returns `0.0` if both sets are empty (to avoid 0/0).
89///
90/// # Examples
91///
92/// ```
93/// use std::collections::HashSet;
94/// use bones_search::structural::jaccard;
95///
96/// let a: HashSet<&str> = ["x", "y", "z"].into_iter().collect();
97/// let b: HashSet<&str> = ["y", "z", "w"].into_iter().collect();
98/// let sim = jaccard(&a, &b);
99/// // intersection = {"y","z"} (2), union = {"x","y","z","w"} (4)
100/// assert!((sim - 0.5).abs() < 1e-6);
101/// ```
102#[must_use]
103#[allow(clippy::implicit_hasher, clippy::cast_precision_loss)]
104pub fn jaccard<T: Eq + std::hash::Hash>(a: &HashSet<T>, b: &HashSet<T>) -> f32 {
105    if a.is_empty() && b.is_empty() {
106        return 0.0;
107    }
108    let intersection = a.intersection(b).count() as f32;
109    let union_size = a.union(b).count() as f32;
110    if union_size == 0.0 {
111        0.0
112    } else {
113        intersection / union_size
114    }
115}
116
117/// Compute structural similarity between two items.
118///
119/// Queries `SQLite` for item metadata (labels, assignees, parent goal) and uses
120/// the pre-built dependency graph for neighbour-set and BFS-proximity
121/// computations.
122///
123/// # Parameters
124///
125/// - `a`, `b` — item IDs (e.g. `"bn-001"`).
126/// - `db` — `SQLite` connection to the bones projection database.
127/// - `graph` — directed dependency graph where an edge `X → Y` means "X
128///   blocks Y".  Typically built with `bones_triage::graph::build::RawGraph`.
129///
130/// # Errors
131///
132/// Returns an error if any `SQLite` query fails.
133pub fn structural_similarity(
134    a: &str,
135    b: &str,
136    db: &Connection,
137    graph: &DiGraph<String, ()>,
138) -> Result<StructuralScore> {
139    structural_similarity_with_map(a, b, db, graph, None)
140}
141
142/// Compute structural similarity with an optional pre-computed node map.
143///
144/// Use this variant when performing many pairwise comparisons against the same
145/// graph to avoid rebuilding the `node_map` repeatedly (O(N) vs O(1)).
146///
147/// If `node_map` is `None`, it will be built internally (O(N)).
148///
149/// # Errors
150///
151/// Returns an error if any `SQLite` query fails.
152#[allow(
153    clippy::implicit_hasher,
154    clippy::cast_precision_loss,
155    clippy::option_if_let_else
156)]
157pub fn structural_similarity_with_map(
158    a: &str,
159    b: &str,
160    db: &Connection,
161    graph: &DiGraph<String, ()>,
162    node_map: Option<&HashMap<&str, NodeIndex>>,
163) -> Result<StructuralScore> {
164    // -----------------------------------------------------------------------
165    // 1. Labels — query item_labels table
166    // -----------------------------------------------------------------------
167    let labels_a = fetch_labels(db, a).with_context(|| format!("fetch labels for {a}"))?;
168    let labels_b = fetch_labels(db, b).with_context(|| format!("fetch labels for {b}"))?;
169    let label_sim = jaccard(&labels_a, &labels_b);
170
171    // -----------------------------------------------------------------------
172    // 2. Assignees — query item_assignees table
173    // -----------------------------------------------------------------------
174    let assignees_a = fetch_assignees(db, a).with_context(|| format!("fetch assignees for {a}"))?;
175    let assignees_b = fetch_assignees(db, b).with_context(|| format!("fetch assignees for {b}"))?;
176    let assignee_sim = jaccard(&assignees_a, &assignees_b);
177
178    // -----------------------------------------------------------------------
179    // 3. Parent goal — query items.parent_id
180    // -----------------------------------------------------------------------
181    let parent_a = fetch_parent(db, a).with_context(|| format!("fetch parent for {a}"))?;
182    let parent_b = fetch_parent(db, b).with_context(|| format!("fetch parent for {b}"))?;
183    let parent_sim = match (parent_a.as_deref(), parent_b.as_deref()) {
184        (Some(pa), Some(pb)) if pa == pb => 1.0_f32,
185        _ => 0.0_f32,
186    };
187
188    // -----------------------------------------------------------------------
189    // 4. Build node lookup from petgraph (if not provided)
190    // -----------------------------------------------------------------------
191    let built_map;
192    let map_ref = if let Some(m) = node_map {
193        m
194    } else {
195        built_map = graph
196            .node_indices()
197            .filter_map(|idx| graph.node_weight(idx).map(|s| (s.as_str(), idx)))
198            .collect();
199        &built_map
200    };
201
202    // -----------------------------------------------------------------------
203    // 5. Dep-neighbour Jaccard — undirected direct neighbours in the graph
204    // -----------------------------------------------------------------------
205    let neighbours_a = direct_neighbours(graph, map_ref, a);
206    let neighbours_b = direct_neighbours(graph, map_ref, b);
207    let dep_sim = jaccard(&neighbours_a, &neighbours_b);
208
209    // -----------------------------------------------------------------------
210    // 6. Graph proximity — BFS in undirected view, up to MAX_HOPS
211    // -----------------------------------------------------------------------
212    #[allow(clippy::cast_precision_loss)]
213    let graph_proximity = bfs_distance(graph, map_ref, a, b, MAX_HOPS)
214        .map_or(0.0_f32, |dist| 1.0_f32 / (1.0_f32 + dist as f32));
215
216    Ok(StructuralScore {
217        label_sim,
218        dep_sim,
219        assignee_sim,
220        parent_sim,
221        graph_proximity,
222    })
223}
224
225// ---------------------------------------------------------------------------
226// SQLite helpers
227// ---------------------------------------------------------------------------
228
229/// Fetch the label set for one item from `item_labels`.
230fn fetch_labels(db: &Connection, item_id: &str) -> Result<HashSet<String>> {
231    let mut stmt = db
232        .prepare_cached(
233            "SELECT label
234             FROM item_labels
235             WHERE item_id = ?1",
236        )
237        .context("prepare fetch_labels")?;
238
239    let labels = stmt
240        .query_map([item_id], |row| row.get::<_, String>(0))
241        .context("execute fetch_labels")?
242        .collect::<Result<HashSet<_>, _>>()
243        .context("collect labels")?;
244
245    Ok(labels)
246}
247
248/// Fetch the assignee set for one item from `item_assignees`.
249fn fetch_assignees(db: &Connection, item_id: &str) -> Result<HashSet<String>> {
250    let mut stmt = db
251        .prepare_cached(
252            "SELECT agent
253             FROM item_assignees
254             WHERE item_id = ?1",
255        )
256        .context("prepare fetch_assignees")?;
257
258    let agents = stmt
259        .query_map([item_id], |row| row.get::<_, String>(0))
260        .context("execute fetch_assignees")?
261        .collect::<Result<HashSet<_>, _>>()
262        .context("collect assignees")?;
263
264    Ok(agents)
265}
266
267/// Fetch the `parent_id` for one item from the `items` table.
268/// Returns `None` if the item has no parent or is not found.
269fn fetch_parent(db: &Connection, item_id: &str) -> Result<Option<String>> {
270    let mut stmt = db
271        .prepare_cached(
272            "SELECT parent_id
273             FROM items
274             WHERE item_id = ?1 AND is_deleted = 0",
275        )
276        .context("prepare fetch_parent")?;
277
278    let parent = stmt
279        .query_row([item_id], |row| row.get::<_, Option<String>>(0))
280        .optional()
281        .context("execute fetch_parent")?
282        .flatten(); // flatten Option<Option<String>> → Option<String>
283
284    Ok(parent)
285}
286
287// ---------------------------------------------------------------------------
288// Graph helpers
289// ---------------------------------------------------------------------------
290
291/// Collect the set of direct (undirected) neighbour IDs for one item.
292///
293/// Neighbour IDs are cloned from graph node weights and stored as `String`.
294/// Items not present in the graph produce an empty set.
295fn direct_neighbours<'g>(
296    graph: &'g DiGraph<String, ()>,
297    node_map: &HashMap<&str, NodeIndex>,
298    item_id: &str,
299) -> HashSet<&'g str> {
300    let Some(&idx) = node_map.get(item_id) else {
301        return HashSet::new();
302    };
303
304    // Collect out-neighbours (items this item blocks) and in-neighbours
305    // (items that block this item) for a symmetric neighbour set.
306    graph
307        .neighbors_directed(idx, Direction::Outgoing)
308        .chain(graph.neighbors_directed(idx, Direction::Incoming))
309        .filter_map(|n_idx| graph.node_weight(n_idx).map(String::as_str))
310        .collect()
311}
312
313/// BFS shortest-path distance in the **undirected** view of `graph`.
314///
315/// Explores both outgoing and incoming edges at each step so that directionality
316/// does not obscure structural proximity.  Stops after `max_hops` hops.
317///
318/// Returns `Some(0)` when `from == to`.
319/// Returns `None` when no path exists within `max_hops`.
320fn bfs_distance(
321    graph: &DiGraph<String, ()>,
322    node_map: &HashMap<&str, NodeIndex>,
323    from: &str,
324    to: &str,
325    max_hops: usize,
326) -> Option<usize> {
327    // Items not in the graph are unreachable.
328    let &start = node_map.get(from)?;
329    let &end = node_map.get(to)?;
330
331    if start == end {
332        return Some(0);
333    }
334
335    let mut visited: HashSet<NodeIndex> = HashSet::new();
336    let mut queue: VecDeque<(NodeIndex, usize)> = VecDeque::new();
337
338    visited.insert(start);
339    queue.push_back((start, 0));
340
341    while let Some((current, dist)) = queue.pop_front() {
342        if dist >= max_hops {
343            // No point exploring further — any neighbours would exceed the limit.
344            continue;
345        }
346
347        // Undirected BFS: follow edges in both directions.
348        let next_dist = dist + 1;
349        for neighbour in graph
350            .neighbors_directed(current, Direction::Outgoing)
351            .chain(graph.neighbors_directed(current, Direction::Incoming))
352        {
353            if neighbour == end {
354                return Some(next_dist);
355            }
356            if visited.insert(neighbour) {
357                queue.push_back((neighbour, next_dist));
358            }
359        }
360    }
361
362    None
363}
364
365// ---------------------------------------------------------------------------
366// Tests
367// ---------------------------------------------------------------------------
368
369#[cfg(test)]
370mod tests {
371    use super::*;
372    use bones_core::db::migrations;
373    use petgraph::graph::DiGraph;
374    use rusqlite::params;
375
376    // -----------------------------------------------------------------------
377    // Helpers
378    // -----------------------------------------------------------------------
379
380    fn setup_db() -> rusqlite::Connection {
381        let mut conn = rusqlite::Connection::open_in_memory().expect("in-memory db");
382        migrations::migrate(&mut conn).expect("migrate");
383        conn
384    }
385
386    fn insert_item(conn: &rusqlite::Connection, item_id: &str) {
387        conn.execute(
388            "INSERT INTO items (
389                item_id, title, kind, state, urgency, is_deleted,
390                created_at_us, updated_at_us
391             ) VALUES (?1, ?1, 'task', 'open', 'default', 0, 1000, 1000)",
392            params![item_id],
393        )
394        .expect("insert item");
395    }
396
397    fn insert_item_with_parent(conn: &rusqlite::Connection, item_id: &str, parent_id: &str) {
398        conn.execute(
399            "INSERT INTO items (
400                item_id, title, kind, state, urgency, is_deleted,
401                parent_id, created_at_us, updated_at_us
402             ) VALUES (?1, ?1, 'task', 'open', 'default', 0, ?2, 1000, 1000)",
403            params![item_id, parent_id],
404        )
405        .expect("insert item with parent");
406    }
407
408    fn insert_label(conn: &rusqlite::Connection, item_id: &str, label: &str) {
409        conn.execute(
410            "INSERT INTO item_labels (item_id, label, created_at_us) VALUES (?1, ?2, 1000)",
411            params![item_id, label],
412        )
413        .expect("insert label");
414    }
415
416    fn insert_assignee(conn: &rusqlite::Connection, item_id: &str, agent: &str) {
417        conn.execute(
418            "INSERT INTO item_assignees (item_id, agent, created_at_us) VALUES (?1, ?2, 1000)",
419            params![item_id, agent],
420        )
421        .expect("insert assignee");
422    }
423
424    fn insert_dep(conn: &rusqlite::Connection, blocked: &str, blocker: &str) {
425        conn.execute(
426            "INSERT INTO item_dependencies (item_id, depends_on_item_id, link_type, created_at_us)
427             VALUES (?1, ?2, 'blocks', 1000)",
428            params![blocked, blocker],
429        )
430        .expect("insert dep");
431    }
432
433    fn empty_graph() -> DiGraph<String, ()> {
434        DiGraph::new()
435    }
436
437    /// Build a graph from an edge list `(blocker, blocked)`.
438    fn graph_from_edges(edges: &[(&str, &str)]) -> DiGraph<String, ()> {
439        let mut graph = DiGraph::new();
440        let mut node_map: HashMap<String, NodeIndex> = HashMap::new();
441
442        for &(blocker, blocked) in edges {
443            let blocker_idx = *node_map
444                .entry(blocker.to_owned())
445                .or_insert_with(|| graph.add_node(blocker.to_owned()));
446            let blocked_idx = *node_map
447                .entry(blocked.to_owned())
448                .or_insert_with(|| graph.add_node(blocked.to_owned()));
449            if !graph.contains_edge(blocker_idx, blocked_idx) {
450                graph.add_edge(blocker_idx, blocked_idx, ());
451            }
452        }
453        graph
454    }
455
456    // -----------------------------------------------------------------------
457    // jaccard()
458    // -----------------------------------------------------------------------
459
460    #[test]
461    fn jaccard_empty_sets_returns_zero() {
462        let a: HashSet<&str> = HashSet::new();
463        let b: HashSet<&str> = HashSet::new();
464        assert_eq!(jaccard(&a, &b), 0.0);
465    }
466
467    #[test]
468    fn jaccard_identical_sets_returns_one() {
469        let a: HashSet<&str> = ["x", "y", "z"].into_iter().collect();
470        let b: HashSet<&str> = ["x", "y", "z"].into_iter().collect();
471        assert!((jaccard(&a, &b) - 1.0).abs() < 1e-6);
472    }
473
474    #[test]
475    fn jaccard_disjoint_sets_returns_zero() {
476        let a: HashSet<&str> = ["x"].into_iter().collect();
477        let b: HashSet<&str> = ["y"].into_iter().collect();
478        assert_eq!(jaccard(&a, &b), 0.0);
479    }
480
481    #[test]
482    fn jaccard_partial_overlap() {
483        // |{y,z}| / |{x,y,z,w}| = 2/4 = 0.5
484        let a: HashSet<&str> = ["x", "y", "z"].into_iter().collect();
485        let b: HashSet<&str> = ["y", "z", "w"].into_iter().collect();
486        assert!((jaccard(&a, &b) - 0.5).abs() < 1e-6);
487    }
488
489    #[test]
490    fn jaccard_one_empty_set_returns_zero() {
491        let a: HashSet<&str> = ["x", "y"].into_iter().collect();
492        let b: HashSet<&str> = HashSet::new();
493        assert_eq!(jaccard(&a, &b), 0.0);
494    }
495
496    // -----------------------------------------------------------------------
497    // label_sim
498    // -----------------------------------------------------------------------
499
500    #[test]
501    fn label_sim_shared_labels() {
502        let conn = setup_db();
503        insert_item(&conn, "bn-001");
504        insert_item(&conn, "bn-002");
505        insert_label(&conn, "bn-001", "auth");
506        insert_label(&conn, "bn-001", "backend");
507        insert_label(&conn, "bn-002", "auth");
508        insert_label(&conn, "bn-002", "frontend");
509
510        let score =
511            structural_similarity("bn-001", "bn-002", &conn, &empty_graph()).expect("score");
512        // intersection={auth}(1), union={auth,backend,frontend}(3) → 1/3
513        assert!(
514            (score.label_sim - (1.0_f32 / 3.0)).abs() < 1e-6,
515            "{score:?}"
516        );
517    }
518
519    #[test]
520    fn label_sim_no_labels_is_zero() {
521        let conn = setup_db();
522        insert_item(&conn, "bn-001");
523        insert_item(&conn, "bn-002");
524
525        let score =
526            structural_similarity("bn-001", "bn-002", &conn, &empty_graph()).expect("score");
527        assert_eq!(score.label_sim, 0.0);
528    }
529
530    #[test]
531    fn label_sim_identical_labels_is_one() {
532        let conn = setup_db();
533        insert_item(&conn, "bn-001");
534        insert_item(&conn, "bn-002");
535        insert_label(&conn, "bn-001", "auth");
536        insert_label(&conn, "bn-002", "auth");
537
538        let score =
539            structural_similarity("bn-001", "bn-002", &conn, &empty_graph()).expect("score");
540        assert!((score.label_sim - 1.0).abs() < 1e-6);
541    }
542
543    // -----------------------------------------------------------------------
544    // assignee_sim
545    // -----------------------------------------------------------------------
546
547    #[test]
548    fn assignee_sim_shared_agent() {
549        let conn = setup_db();
550        insert_item(&conn, "bn-001");
551        insert_item(&conn, "bn-002");
552        insert_assignee(&conn, "bn-001", "alice");
553        insert_assignee(&conn, "bn-002", "alice");
554        insert_assignee(&conn, "bn-002", "bob");
555
556        let score =
557            structural_similarity("bn-001", "bn-002", &conn, &empty_graph()).expect("score");
558        // intersection={alice}(1), union={alice,bob}(2) → 0.5
559        assert!((score.assignee_sim - 0.5).abs() < 1e-6);
560    }
561
562    #[test]
563    fn assignee_sim_no_assignees_is_zero() {
564        let conn = setup_db();
565        insert_item(&conn, "bn-001");
566        insert_item(&conn, "bn-002");
567
568        let score =
569            structural_similarity("bn-001", "bn-002", &conn, &empty_graph()).expect("score");
570        assert_eq!(score.assignee_sim, 0.0);
571    }
572
573    // -----------------------------------------------------------------------
574    // parent_sim
575    // -----------------------------------------------------------------------
576
577    #[test]
578    fn parent_sim_shared_parent_is_one() {
579        let conn = setup_db();
580        insert_item(&conn, "bn-goal");
581        insert_item_with_parent(&conn, "bn-001", "bn-goal");
582        insert_item_with_parent(&conn, "bn-002", "bn-goal");
583
584        let score =
585            structural_similarity("bn-001", "bn-002", &conn, &empty_graph()).expect("score");
586        assert!((score.parent_sim - 1.0).abs() < 1e-6);
587    }
588
589    #[test]
590    fn parent_sim_different_parents_is_zero() {
591        let conn = setup_db();
592        insert_item(&conn, "bn-goal-a");
593        insert_item(&conn, "bn-goal-b");
594        insert_item_with_parent(&conn, "bn-001", "bn-goal-a");
595        insert_item_with_parent(&conn, "bn-002", "bn-goal-b");
596
597        let score =
598            structural_similarity("bn-001", "bn-002", &conn, &empty_graph()).expect("score");
599        assert_eq!(score.parent_sim, 0.0);
600    }
601
602    #[test]
603    fn parent_sim_no_parent_is_zero() {
604        let conn = setup_db();
605        insert_item(&conn, "bn-001");
606        insert_item(&conn, "bn-002");
607
608        let score =
609            structural_similarity("bn-001", "bn-002", &conn, &empty_graph()).expect("score");
610        assert_eq!(score.parent_sim, 0.0);
611    }
612
613    // -----------------------------------------------------------------------
614    // dep_sim
615    // -----------------------------------------------------------------------
616
617    #[test]
618    fn dep_sim_shared_dependency_neighbour() {
619        let conn = setup_db();
620        insert_item(&conn, "bn-001");
621        insert_item(&conn, "bn-002");
622        insert_item(&conn, "bn-common");
623        // Both bn-001 and bn-002 block bn-common
624        insert_dep(&conn, "bn-common", "bn-001");
625        insert_dep(&conn, "bn-common", "bn-002");
626
627        // Graph: bn-001 → bn-common, bn-002 → bn-common
628        let graph = graph_from_edges(&[("bn-001", "bn-common"), ("bn-002", "bn-common")]);
629
630        let score = structural_similarity("bn-001", "bn-002", &conn, &graph).expect("score");
631        // neighbours(bn-001) = {bn-common}, neighbours(bn-002) = {bn-common}
632        // intersection=1, union=1 → dep_sim=1.0
633        assert!((score.dep_sim - 1.0).abs() < 1e-6, "{score:?}");
634    }
635
636    #[test]
637    fn dep_sim_no_shared_neighbours_is_zero() {
638        let conn = setup_db();
639        insert_item(&conn, "bn-001");
640        insert_item(&conn, "bn-002");
641        insert_item(&conn, "bn-dep-a");
642        insert_item(&conn, "bn-dep-b");
643        insert_dep(&conn, "bn-dep-a", "bn-001");
644        insert_dep(&conn, "bn-dep-b", "bn-002");
645
646        let graph = graph_from_edges(&[("bn-001", "bn-dep-a"), ("bn-002", "bn-dep-b")]);
647
648        let score = structural_similarity("bn-001", "bn-002", &conn, &graph).expect("score");
649        assert_eq!(score.dep_sim, 0.0);
650    }
651
652    // -----------------------------------------------------------------------
653    // graph_proximity
654    // -----------------------------------------------------------------------
655
656    #[test]
657    fn graph_proximity_direct_edge_is_half() {
658        let conn = setup_db();
659        insert_item(&conn, "bn-001");
660        insert_item(&conn, "bn-002");
661
662        // Direct edge: bn-001 → bn-002, distance=1 → 1/(1+1)=0.5
663        let graph = graph_from_edges(&[("bn-001", "bn-002")]);
664        let score = structural_similarity("bn-001", "bn-002", &conn, &graph).expect("score");
665        assert!((score.graph_proximity - 0.5).abs() < 1e-6, "{score:?}");
666    }
667
668    #[test]
669    fn graph_proximity_two_hops() {
670        let conn = setup_db();
671        insert_item(&conn, "bn-001");
672        insert_item(&conn, "bn-002");
673        insert_item(&conn, "bn-mid");
674
675        // bn-001 → bn-mid → bn-002, distance=2 → 1/(1+2) ≈ 0.333
676        let graph = graph_from_edges(&[("bn-001", "bn-mid"), ("bn-mid", "bn-002")]);
677        let score = structural_similarity("bn-001", "bn-002", &conn, &graph).expect("score");
678        assert!(
679            (score.graph_proximity - (1.0_f32 / 3.0)).abs() < 1e-6,
680            "{score:?}"
681        );
682    }
683
684    #[test]
685    fn graph_proximity_beyond_max_hops_is_zero() {
686        let conn = setup_db();
687        // Chain longer than MAX_HOPS (5): a→b→c→d→e→f→g (6 hops a to g)
688        let ids = [
689            "bn-a01", "bn-a02", "bn-a03", "bn-a04", "bn-a05", "bn-a06", "bn-a07",
690        ];
691        for id in ids {
692            insert_item(&conn, id);
693        }
694        let edges: Vec<(&str, &str)> = ids.windows(2).map(|w| (w[0], w[1])).collect();
695        let graph = graph_from_edges(&edges);
696
697        let score = structural_similarity("bn-a01", "bn-a07", &conn, &graph).expect("score");
698        assert_eq!(
699            score.graph_proximity, 0.0,
700            "6-hop path should exceed MAX_HOPS=5"
701        );
702    }
703
704    #[test]
705    fn graph_proximity_disconnected_is_zero() {
706        let conn = setup_db();
707        insert_item(&conn, "bn-001");
708        insert_item(&conn, "bn-002");
709
710        let graph = graph_from_edges(&[]);
711        // Nodes not in graph → proximity=0
712        let score = structural_similarity("bn-001", "bn-002", &conn, &graph).expect("score");
713        assert_eq!(score.graph_proximity, 0.0);
714    }
715
716    #[test]
717    fn graph_proximity_reversed_edge_reachable() {
718        let conn = setup_db();
719        insert_item(&conn, "bn-001");
720        insert_item(&conn, "bn-002");
721
722        // Edge goes bn-002 → bn-001 (reversed); BFS is undirected so still 1 hop
723        let graph = graph_from_edges(&[("bn-002", "bn-001")]);
724        let score = structural_similarity("bn-001", "bn-002", &conn, &graph).expect("score");
725        assert!((score.graph_proximity - 0.5).abs() < 1e-6, "{score:?}");
726    }
727
728    // -----------------------------------------------------------------------
729    // StructuralScore::mean()
730    // -----------------------------------------------------------------------
731
732    #[test]
733    fn mean_is_arithmetic_average() {
734        let s = StructuralScore {
735            label_sim: 1.0,
736            dep_sim: 0.5,
737            assignee_sim: 0.0,
738            parent_sim: 1.0,
739            graph_proximity: 0.5,
740        };
741        assert!((s.mean() - 0.6).abs() < 1e-6);
742    }
743
744    // -----------------------------------------------------------------------
745    // bfs_distance edge cases
746    // -----------------------------------------------------------------------
747
748    #[test]
749    fn bfs_same_item_is_distance_zero() {
750        let conn = setup_db();
751        insert_item(&conn, "bn-001");
752        let graph = graph_from_edges(&[("bn-001", "bn-002")]);
753        // Same item: score = 1/(1+0) = 1.0
754        let score = structural_similarity("bn-001", "bn-001", &conn, &graph).expect("score");
755        assert!((score.graph_proximity - 1.0).abs() < 1e-6);
756    }
757
758    #[test]
759    fn bfs_exactly_max_hops_is_reachable() {
760        let conn = setup_db();
761        // Chain of MAX_HOPS=5 hops: a→b→c→d→e→f (5 hops, a to f)
762        let ids = ["bn-b01", "bn-b02", "bn-b03", "bn-b04", "bn-b05", "bn-b06"];
763        for id in ids {
764            insert_item(&conn, id);
765        }
766        let edges: Vec<(&str, &str)> = ids.windows(2).map(|w| (w[0], w[1])).collect();
767        let graph = graph_from_edges(&edges);
768
769        let score = structural_similarity("bn-b01", "bn-b06", &conn, &graph).expect("score");
770        // distance=5 → 1/(1+5) ≈ 0.1667
771        assert!(
772            score.graph_proximity > 0.0,
773            "5-hop path (=MAX_HOPS) should be reachable, got {score:?}"
774        );
775        assert!((score.graph_proximity - (1.0_f32 / 6.0)).abs() < 1e-5);
776    }
777}