1#![allow(clippy::module_name_repetitions)]
27
28use std::collections::{HashMap, HashSet, VecDeque};
29
30use anyhow::{Context, Result};
31use petgraph::Direction;
32use petgraph::graph::{DiGraph, NodeIndex};
33use rusqlite::Connection;
34use rusqlite::OptionalExtension as _;
35
36#[derive(Debug, Clone, PartialEq)]
46pub struct StructuralScore {
47 pub label_sim: f32,
49 pub dep_sim: f32,
52 pub assignee_sim: f32,
54 pub parent_sim: f32,
57 pub graph_proximity: f32,
61}
62
63impl StructuralScore {
64 #[must_use]
68 pub fn mean(&self) -> f32 {
69 (self.label_sim + self.dep_sim + self.assignee_sim + self.parent_sim + self.graph_proximity)
70 / 5.0
71 }
72}
73
74const MAX_HOPS: usize = 5;
81
82#[must_use]
103#[allow(clippy::implicit_hasher, clippy::cast_precision_loss)]
104pub fn jaccard<T: Eq + std::hash::Hash>(a: &HashSet<T>, b: &HashSet<T>) -> f32 {
105 if a.is_empty() && b.is_empty() {
106 return 0.0;
107 }
108 let intersection = a.intersection(b).count() as f32;
109 let union_size = a.union(b).count() as f32;
110 if union_size == 0.0 {
111 0.0
112 } else {
113 intersection / union_size
114 }
115}
116
117pub fn structural_similarity(
134 a: &str,
135 b: &str,
136 db: &Connection,
137 graph: &DiGraph<String, ()>,
138) -> Result<StructuralScore> {
139 structural_similarity_with_map(a, b, db, graph, None)
140}
141
142#[allow(
153 clippy::implicit_hasher,
154 clippy::cast_precision_loss,
155 clippy::option_if_let_else
156)]
157pub fn structural_similarity_with_map(
158 a: &str,
159 b: &str,
160 db: &Connection,
161 graph: &DiGraph<String, ()>,
162 node_map: Option<&HashMap<&str, NodeIndex>>,
163) -> Result<StructuralScore> {
164 let labels_a = fetch_labels(db, a).with_context(|| format!("fetch labels for {a}"))?;
168 let labels_b = fetch_labels(db, b).with_context(|| format!("fetch labels for {b}"))?;
169 let label_sim = jaccard(&labels_a, &labels_b);
170
171 let assignees_a = fetch_assignees(db, a).with_context(|| format!("fetch assignees for {a}"))?;
175 let assignees_b = fetch_assignees(db, b).with_context(|| format!("fetch assignees for {b}"))?;
176 let assignee_sim = jaccard(&assignees_a, &assignees_b);
177
178 let parent_a = fetch_parent(db, a).with_context(|| format!("fetch parent for {a}"))?;
182 let parent_b = fetch_parent(db, b).with_context(|| format!("fetch parent for {b}"))?;
183 let parent_sim = match (parent_a.as_deref(), parent_b.as_deref()) {
184 (Some(pa), Some(pb)) if pa == pb => 1.0_f32,
185 _ => 0.0_f32,
186 };
187
188 let built_map;
192 let map_ref = if let Some(m) = node_map {
193 m
194 } else {
195 built_map = graph
196 .node_indices()
197 .filter_map(|idx| graph.node_weight(idx).map(|s| (s.as_str(), idx)))
198 .collect();
199 &built_map
200 };
201
202 let neighbours_a = direct_neighbours(graph, map_ref, a);
206 let neighbours_b = direct_neighbours(graph, map_ref, b);
207 let dep_sim = jaccard(&neighbours_a, &neighbours_b);
208
209 #[allow(clippy::cast_precision_loss)]
213 let graph_proximity = bfs_distance(graph, map_ref, a, b, MAX_HOPS)
214 .map_or(0.0_f32, |dist| 1.0_f32 / (1.0_f32 + dist as f32));
215
216 Ok(StructuralScore {
217 label_sim,
218 dep_sim,
219 assignee_sim,
220 parent_sim,
221 graph_proximity,
222 })
223}
224
225fn fetch_labels(db: &Connection, item_id: &str) -> Result<HashSet<String>> {
231 let mut stmt = db
232 .prepare_cached(
233 "SELECT label
234 FROM item_labels
235 WHERE item_id = ?1",
236 )
237 .context("prepare fetch_labels")?;
238
239 let labels = stmt
240 .query_map([item_id], |row| row.get::<_, String>(0))
241 .context("execute fetch_labels")?
242 .collect::<Result<HashSet<_>, _>>()
243 .context("collect labels")?;
244
245 Ok(labels)
246}
247
248fn fetch_assignees(db: &Connection, item_id: &str) -> Result<HashSet<String>> {
250 let mut stmt = db
251 .prepare_cached(
252 "SELECT agent
253 FROM item_assignees
254 WHERE item_id = ?1",
255 )
256 .context("prepare fetch_assignees")?;
257
258 let agents = stmt
259 .query_map([item_id], |row| row.get::<_, String>(0))
260 .context("execute fetch_assignees")?
261 .collect::<Result<HashSet<_>, _>>()
262 .context("collect assignees")?;
263
264 Ok(agents)
265}
266
267fn fetch_parent(db: &Connection, item_id: &str) -> Result<Option<String>> {
270 let mut stmt = db
271 .prepare_cached(
272 "SELECT parent_id
273 FROM items
274 WHERE item_id = ?1 AND is_deleted = 0",
275 )
276 .context("prepare fetch_parent")?;
277
278 let parent = stmt
279 .query_row([item_id], |row| row.get::<_, Option<String>>(0))
280 .optional()
281 .context("execute fetch_parent")?
282 .flatten(); Ok(parent)
285}
286
287fn direct_neighbours<'g>(
296 graph: &'g DiGraph<String, ()>,
297 node_map: &HashMap<&str, NodeIndex>,
298 item_id: &str,
299) -> HashSet<&'g str> {
300 let Some(&idx) = node_map.get(item_id) else {
301 return HashSet::new();
302 };
303
304 graph
307 .neighbors_directed(idx, Direction::Outgoing)
308 .chain(graph.neighbors_directed(idx, Direction::Incoming))
309 .filter_map(|n_idx| graph.node_weight(n_idx).map(String::as_str))
310 .collect()
311}
312
313fn bfs_distance(
321 graph: &DiGraph<String, ()>,
322 node_map: &HashMap<&str, NodeIndex>,
323 from: &str,
324 to: &str,
325 max_hops: usize,
326) -> Option<usize> {
327 let &start = node_map.get(from)?;
329 let &end = node_map.get(to)?;
330
331 if start == end {
332 return Some(0);
333 }
334
335 let mut visited: HashSet<NodeIndex> = HashSet::new();
336 let mut queue: VecDeque<(NodeIndex, usize)> = VecDeque::new();
337
338 visited.insert(start);
339 queue.push_back((start, 0));
340
341 while let Some((current, dist)) = queue.pop_front() {
342 if dist >= max_hops {
343 continue;
345 }
346
347 let next_dist = dist + 1;
349 for neighbour in graph
350 .neighbors_directed(current, Direction::Outgoing)
351 .chain(graph.neighbors_directed(current, Direction::Incoming))
352 {
353 if neighbour == end {
354 return Some(next_dist);
355 }
356 if visited.insert(neighbour) {
357 queue.push_back((neighbour, next_dist));
358 }
359 }
360 }
361
362 None
363}
364
365#[cfg(test)]
370mod tests {
371 use super::*;
372 use bones_core::db::migrations;
373 use petgraph::graph::DiGraph;
374 use rusqlite::params;
375
376 fn setup_db() -> rusqlite::Connection {
381 let mut conn = rusqlite::Connection::open_in_memory().expect("in-memory db");
382 migrations::migrate(&mut conn).expect("migrate");
383 conn
384 }
385
386 fn insert_item(conn: &rusqlite::Connection, item_id: &str) {
387 conn.execute(
388 "INSERT INTO items (
389 item_id, title, kind, state, urgency, is_deleted,
390 created_at_us, updated_at_us
391 ) VALUES (?1, ?1, 'task', 'open', 'default', 0, 1000, 1000)",
392 params![item_id],
393 )
394 .expect("insert item");
395 }
396
397 fn insert_item_with_parent(conn: &rusqlite::Connection, item_id: &str, parent_id: &str) {
398 conn.execute(
399 "INSERT INTO items (
400 item_id, title, kind, state, urgency, is_deleted,
401 parent_id, created_at_us, updated_at_us
402 ) VALUES (?1, ?1, 'task', 'open', 'default', 0, ?2, 1000, 1000)",
403 params![item_id, parent_id],
404 )
405 .expect("insert item with parent");
406 }
407
408 fn insert_label(conn: &rusqlite::Connection, item_id: &str, label: &str) {
409 conn.execute(
410 "INSERT INTO item_labels (item_id, label, created_at_us) VALUES (?1, ?2, 1000)",
411 params![item_id, label],
412 )
413 .expect("insert label");
414 }
415
416 fn insert_assignee(conn: &rusqlite::Connection, item_id: &str, agent: &str) {
417 conn.execute(
418 "INSERT INTO item_assignees (item_id, agent, created_at_us) VALUES (?1, ?2, 1000)",
419 params![item_id, agent],
420 )
421 .expect("insert assignee");
422 }
423
424 fn insert_dep(conn: &rusqlite::Connection, blocked: &str, blocker: &str) {
425 conn.execute(
426 "INSERT INTO item_dependencies (item_id, depends_on_item_id, link_type, created_at_us)
427 VALUES (?1, ?2, 'blocks', 1000)",
428 params![blocked, blocker],
429 )
430 .expect("insert dep");
431 }
432
433 fn empty_graph() -> DiGraph<String, ()> {
434 DiGraph::new()
435 }
436
437 fn graph_from_edges(edges: &[(&str, &str)]) -> DiGraph<String, ()> {
439 let mut graph = DiGraph::new();
440 let mut node_map: HashMap<String, NodeIndex> = HashMap::new();
441
442 for &(blocker, blocked) in edges {
443 let blocker_idx = *node_map
444 .entry(blocker.to_owned())
445 .or_insert_with(|| graph.add_node(blocker.to_owned()));
446 let blocked_idx = *node_map
447 .entry(blocked.to_owned())
448 .or_insert_with(|| graph.add_node(blocked.to_owned()));
449 if !graph.contains_edge(blocker_idx, blocked_idx) {
450 graph.add_edge(blocker_idx, blocked_idx, ());
451 }
452 }
453 graph
454 }
455
456 #[test]
461 fn jaccard_empty_sets_returns_zero() {
462 let a: HashSet<&str> = HashSet::new();
463 let b: HashSet<&str> = HashSet::new();
464 assert_eq!(jaccard(&a, &b), 0.0);
465 }
466
467 #[test]
468 fn jaccard_identical_sets_returns_one() {
469 let a: HashSet<&str> = ["x", "y", "z"].into_iter().collect();
470 let b: HashSet<&str> = ["x", "y", "z"].into_iter().collect();
471 assert!((jaccard(&a, &b) - 1.0).abs() < 1e-6);
472 }
473
474 #[test]
475 fn jaccard_disjoint_sets_returns_zero() {
476 let a: HashSet<&str> = ["x"].into_iter().collect();
477 let b: HashSet<&str> = ["y"].into_iter().collect();
478 assert_eq!(jaccard(&a, &b), 0.0);
479 }
480
481 #[test]
482 fn jaccard_partial_overlap() {
483 let a: HashSet<&str> = ["x", "y", "z"].into_iter().collect();
485 let b: HashSet<&str> = ["y", "z", "w"].into_iter().collect();
486 assert!((jaccard(&a, &b) - 0.5).abs() < 1e-6);
487 }
488
489 #[test]
490 fn jaccard_one_empty_set_returns_zero() {
491 let a: HashSet<&str> = ["x", "y"].into_iter().collect();
492 let b: HashSet<&str> = HashSet::new();
493 assert_eq!(jaccard(&a, &b), 0.0);
494 }
495
496 #[test]
501 fn label_sim_shared_labels() {
502 let conn = setup_db();
503 insert_item(&conn, "bn-001");
504 insert_item(&conn, "bn-002");
505 insert_label(&conn, "bn-001", "auth");
506 insert_label(&conn, "bn-001", "backend");
507 insert_label(&conn, "bn-002", "auth");
508 insert_label(&conn, "bn-002", "frontend");
509
510 let score =
511 structural_similarity("bn-001", "bn-002", &conn, &empty_graph()).expect("score");
512 assert!(
514 (score.label_sim - (1.0_f32 / 3.0)).abs() < 1e-6,
515 "{score:?}"
516 );
517 }
518
519 #[test]
520 fn label_sim_no_labels_is_zero() {
521 let conn = setup_db();
522 insert_item(&conn, "bn-001");
523 insert_item(&conn, "bn-002");
524
525 let score =
526 structural_similarity("bn-001", "bn-002", &conn, &empty_graph()).expect("score");
527 assert_eq!(score.label_sim, 0.0);
528 }
529
530 #[test]
531 fn label_sim_identical_labels_is_one() {
532 let conn = setup_db();
533 insert_item(&conn, "bn-001");
534 insert_item(&conn, "bn-002");
535 insert_label(&conn, "bn-001", "auth");
536 insert_label(&conn, "bn-002", "auth");
537
538 let score =
539 structural_similarity("bn-001", "bn-002", &conn, &empty_graph()).expect("score");
540 assert!((score.label_sim - 1.0).abs() < 1e-6);
541 }
542
543 #[test]
548 fn assignee_sim_shared_agent() {
549 let conn = setup_db();
550 insert_item(&conn, "bn-001");
551 insert_item(&conn, "bn-002");
552 insert_assignee(&conn, "bn-001", "alice");
553 insert_assignee(&conn, "bn-002", "alice");
554 insert_assignee(&conn, "bn-002", "bob");
555
556 let score =
557 structural_similarity("bn-001", "bn-002", &conn, &empty_graph()).expect("score");
558 assert!((score.assignee_sim - 0.5).abs() < 1e-6);
560 }
561
562 #[test]
563 fn assignee_sim_no_assignees_is_zero() {
564 let conn = setup_db();
565 insert_item(&conn, "bn-001");
566 insert_item(&conn, "bn-002");
567
568 let score =
569 structural_similarity("bn-001", "bn-002", &conn, &empty_graph()).expect("score");
570 assert_eq!(score.assignee_sim, 0.0);
571 }
572
573 #[test]
578 fn parent_sim_shared_parent_is_one() {
579 let conn = setup_db();
580 insert_item(&conn, "bn-goal");
581 insert_item_with_parent(&conn, "bn-001", "bn-goal");
582 insert_item_with_parent(&conn, "bn-002", "bn-goal");
583
584 let score =
585 structural_similarity("bn-001", "bn-002", &conn, &empty_graph()).expect("score");
586 assert!((score.parent_sim - 1.0).abs() < 1e-6);
587 }
588
589 #[test]
590 fn parent_sim_different_parents_is_zero() {
591 let conn = setup_db();
592 insert_item(&conn, "bn-goal-a");
593 insert_item(&conn, "bn-goal-b");
594 insert_item_with_parent(&conn, "bn-001", "bn-goal-a");
595 insert_item_with_parent(&conn, "bn-002", "bn-goal-b");
596
597 let score =
598 structural_similarity("bn-001", "bn-002", &conn, &empty_graph()).expect("score");
599 assert_eq!(score.parent_sim, 0.0);
600 }
601
602 #[test]
603 fn parent_sim_no_parent_is_zero() {
604 let conn = setup_db();
605 insert_item(&conn, "bn-001");
606 insert_item(&conn, "bn-002");
607
608 let score =
609 structural_similarity("bn-001", "bn-002", &conn, &empty_graph()).expect("score");
610 assert_eq!(score.parent_sim, 0.0);
611 }
612
613 #[test]
618 fn dep_sim_shared_dependency_neighbour() {
619 let conn = setup_db();
620 insert_item(&conn, "bn-001");
621 insert_item(&conn, "bn-002");
622 insert_item(&conn, "bn-common");
623 insert_dep(&conn, "bn-common", "bn-001");
625 insert_dep(&conn, "bn-common", "bn-002");
626
627 let graph = graph_from_edges(&[("bn-001", "bn-common"), ("bn-002", "bn-common")]);
629
630 let score = structural_similarity("bn-001", "bn-002", &conn, &graph).expect("score");
631 assert!((score.dep_sim - 1.0).abs() < 1e-6, "{score:?}");
634 }
635
636 #[test]
637 fn dep_sim_no_shared_neighbours_is_zero() {
638 let conn = setup_db();
639 insert_item(&conn, "bn-001");
640 insert_item(&conn, "bn-002");
641 insert_item(&conn, "bn-dep-a");
642 insert_item(&conn, "bn-dep-b");
643 insert_dep(&conn, "bn-dep-a", "bn-001");
644 insert_dep(&conn, "bn-dep-b", "bn-002");
645
646 let graph = graph_from_edges(&[("bn-001", "bn-dep-a"), ("bn-002", "bn-dep-b")]);
647
648 let score = structural_similarity("bn-001", "bn-002", &conn, &graph).expect("score");
649 assert_eq!(score.dep_sim, 0.0);
650 }
651
652 #[test]
657 fn graph_proximity_direct_edge_is_half() {
658 let conn = setup_db();
659 insert_item(&conn, "bn-001");
660 insert_item(&conn, "bn-002");
661
662 let graph = graph_from_edges(&[("bn-001", "bn-002")]);
664 let score = structural_similarity("bn-001", "bn-002", &conn, &graph).expect("score");
665 assert!((score.graph_proximity - 0.5).abs() < 1e-6, "{score:?}");
666 }
667
668 #[test]
669 fn graph_proximity_two_hops() {
670 let conn = setup_db();
671 insert_item(&conn, "bn-001");
672 insert_item(&conn, "bn-002");
673 insert_item(&conn, "bn-mid");
674
675 let graph = graph_from_edges(&[("bn-001", "bn-mid"), ("bn-mid", "bn-002")]);
677 let score = structural_similarity("bn-001", "bn-002", &conn, &graph).expect("score");
678 assert!(
679 (score.graph_proximity - (1.0_f32 / 3.0)).abs() < 1e-6,
680 "{score:?}"
681 );
682 }
683
684 #[test]
685 fn graph_proximity_beyond_max_hops_is_zero() {
686 let conn = setup_db();
687 let ids = [
689 "bn-a01", "bn-a02", "bn-a03", "bn-a04", "bn-a05", "bn-a06", "bn-a07",
690 ];
691 for id in ids {
692 insert_item(&conn, id);
693 }
694 let edges: Vec<(&str, &str)> = ids.windows(2).map(|w| (w[0], w[1])).collect();
695 let graph = graph_from_edges(&edges);
696
697 let score = structural_similarity("bn-a01", "bn-a07", &conn, &graph).expect("score");
698 assert_eq!(
699 score.graph_proximity, 0.0,
700 "6-hop path should exceed MAX_HOPS=5"
701 );
702 }
703
704 #[test]
705 fn graph_proximity_disconnected_is_zero() {
706 let conn = setup_db();
707 insert_item(&conn, "bn-001");
708 insert_item(&conn, "bn-002");
709
710 let graph = graph_from_edges(&[]);
711 let score = structural_similarity("bn-001", "bn-002", &conn, &graph).expect("score");
713 assert_eq!(score.graph_proximity, 0.0);
714 }
715
716 #[test]
717 fn graph_proximity_reversed_edge_reachable() {
718 let conn = setup_db();
719 insert_item(&conn, "bn-001");
720 insert_item(&conn, "bn-002");
721
722 let graph = graph_from_edges(&[("bn-002", "bn-001")]);
724 let score = structural_similarity("bn-001", "bn-002", &conn, &graph).expect("score");
725 assert!((score.graph_proximity - 0.5).abs() < 1e-6, "{score:?}");
726 }
727
728 #[test]
733 fn mean_is_arithmetic_average() {
734 let s = StructuralScore {
735 label_sim: 1.0,
736 dep_sim: 0.5,
737 assignee_sim: 0.0,
738 parent_sim: 1.0,
739 graph_proximity: 0.5,
740 };
741 assert!((s.mean() - 0.6).abs() < 1e-6);
742 }
743
744 #[test]
749 fn bfs_same_item_is_distance_zero() {
750 let conn = setup_db();
751 insert_item(&conn, "bn-001");
752 let graph = graph_from_edges(&[("bn-001", "bn-002")]);
753 let score = structural_similarity("bn-001", "bn-001", &conn, &graph).expect("score");
755 assert!((score.graph_proximity - 1.0).abs() < 1e-6);
756 }
757
758 #[test]
759 fn bfs_exactly_max_hops_is_reachable() {
760 let conn = setup_db();
761 let ids = ["bn-b01", "bn-b02", "bn-b03", "bn-b04", "bn-b05", "bn-b06"];
763 for id in ids {
764 insert_item(&conn, id);
765 }
766 let edges: Vec<(&str, &str)> = ids.windows(2).map(|w| (w[0], w[1])).collect();
767 let graph = graph_from_edges(&edges);
768
769 let score = structural_similarity("bn-b01", "bn-b06", &conn, &graph).expect("score");
770 assert!(
772 score.graph_proximity > 0.0,
773 "5-hop path (=MAX_HOPS) should be reachable, got {score:?}"
774 );
775 assert!((score.graph_proximity - (1.0_f32 / 6.0)).abs() < 1e-5);
776 }
777}