1use crate::bundle::FindingBundle;
17use std::collections::HashSet;
18pub fn deterministic_links(bundles: &mut [FindingBundle]) -> usize {
29 let n = bundles.len();
30 if n < 2 {
31 return 0;
32 }
33
34 let entity_sets: Vec<HashSet<String>> = bundles
37 .iter()
38 .map(|b| {
39 let mut names = HashSet::new();
40 for e in &b.assertion.entities {
41 names.insert(e.name.to_lowercase());
42 for alias in &e.aliases {
43 names.insert(alias.to_lowercase());
44 }
45 }
46 names
47 })
48 .collect();
49
50 let dois: Vec<Option<String>> = bundles
52 .iter()
53 .map(|b| b.provenance.doi.as_ref().map(|d| d.to_lowercase()))
54 .collect();
55
56 struct PendingLink {
58 from_idx: usize,
59 to_id: String,
60 link_type: String,
61 note: String,
62 }
63
64 let mut pending: Vec<PendingLink> = Vec::new();
65
66 for i in 0..n {
67 for j in (i + 1)..n {
68 let shared: HashSet<&String> = entity_sets[i].intersection(&entity_sets[j]).collect();
69 if shared.is_empty() {
70 continue;
71 }
72
73 let same_paper = match (&dois[i], &dois[j]) {
74 (Some(a), Some(b)) => a == b,
75 _ => false,
76 };
77
78 if same_paper {
80 continue;
81 }
82
83 let shared_names: Vec<String> = shared.iter().map(|s| s.to_string()).collect();
84 let overlap_count = shared_names.len();
85 let overlap_label = shared_names.join(", ");
86 let strong = overlap_count >= 2;
87
88 let dir_i = bundles[i].assertion.direction.as_deref();
90 let dir_j = bundles[j].assertion.direction.as_deref();
91
92 let (link_type, note) = if is_opposite(dir_i, dir_j) {
93 (
94 "contradicts",
95 format!(
96 "Opposite directions on shared entit{}: {}{}",
97 if overlap_count == 1 { "y" } else { "ies" },
98 overlap_label,
99 if strong { " (strong overlap)" } else { "" }
100 ),
101 )
102 } else if is_same_direction(dir_i, dir_j) && could_supersede(bundles, i, j) {
103 let (newer, _older) = if supersede_order(bundles, i, j) {
104 (i, j)
105 } else {
106 (j, i)
107 };
108 let _is_i_newer = newer == i;
109 (
110 "supersedes",
111 format!(
112 "Newer/higher-confidence finding on shared entit{}: {}{}",
113 if overlap_count == 1 { "y" } else { "ies" },
114 overlap_label,
115 if strong { " (strong overlap)" } else { "" }
116 ),
117 )
118 } else {
119 (
120 "extends",
121 format!(
122 "Cross-paper shared entit{}: {}{}",
123 if overlap_count == 1 { "y" } else { "ies" },
124 overlap_label,
125 if strong { " (strong overlap)" } else { "" }
126 ),
127 )
128 };
129
130 if link_type == "supersedes" {
132 let (from_idx, to_idx) = if supersede_order(bundles, i, j) {
133 (i, j)
134 } else {
135 (j, i)
136 };
137 pending.push(PendingLink {
138 from_idx,
139 to_id: bundles[to_idx].id.clone(),
140 link_type: link_type.to_string(),
141 note,
142 });
143 } else {
144 pending.push(PendingLink {
146 from_idx: i,
147 to_id: bundles[j].id.clone(),
148 link_type: link_type.to_string(),
149 note,
150 });
151 }
152 }
153 }
154
155 let count = pending.len();
156 for pl in pending {
157 bundles[pl.from_idx].add_link_with_source(&pl.to_id, &pl.link_type, &pl.note, "compiler");
158 }
159
160 count
161}
162
163fn is_opposite(a: Option<&str>, b: Option<&str>) -> bool {
165 matches!(
166 (a, b),
167 (Some("positive"), Some("negative")) | (Some("negative"), Some("positive"))
168 )
169}
170
171fn is_same_direction(a: Option<&str>, b: Option<&str>) -> bool {
173 match (a, b) {
174 (Some(a), Some(b)) => a == b && a != "null",
175 _ => false,
176 }
177}
178
179fn could_supersede(bundles: &[FindingBundle], i: usize, j: usize) -> bool {
182 let yi = bundles[i].provenance.year.unwrap_or(0);
183 let yj = bundles[j].provenance.year.unwrap_or(0);
184 let ci = bundles[i].confidence.score;
185 let cj = bundles[j].confidence.score;
186
187 (yi > yj && ci > cj) || (yj > yi && cj > ci)
189}
190
191fn supersede_order(bundles: &[FindingBundle], i: usize, j: usize) -> bool {
193 let yi = bundles[i].provenance.year.unwrap_or(0);
194 let yj = bundles[j].provenance.year.unwrap_or(0);
195 let ci = bundles[i].confidence.score;
196 let cj = bundles[j].confidence.score;
197 yi > yj && ci > cj
198}
199
200#[cfg(test)]
201mod tests {
202 use super::*;
203 use crate::bundle::*;
204
205 fn make_finding(
206 id: &str,
207 entities: Vec<(&str, &str)>,
208 direction: Option<&str>,
209 doi: Option<&str>,
210 year: i32,
211 score: f64,
212 ) -> FindingBundle {
213 FindingBundle {
214 id: id.into(),
215 version: 1,
216 previous_version: None,
217 assertion: Assertion {
218 text: format!("Finding {id}"),
219 assertion_type: "mechanism".into(),
220 entities: entities
221 .into_iter()
222 .map(|(name, etype)| Entity {
223 name: name.into(),
224 entity_type: etype.into(),
225 identifiers: serde_json::Map::new(),
226 canonical_id: None,
227 candidates: vec![],
228 aliases: vec![],
229 resolution_provenance: None,
230 resolution_confidence: 1.0,
231 resolution_method: None,
232 species_context: None,
233 needs_review: false,
234 })
235 .collect(),
236 relation: None,
237 direction: direction.map(|s| s.to_string()),
238 causal_claim: None,
239 causal_evidence_grade: None,
240 },
241 evidence: Evidence {
242 evidence_type: "experimental".into(),
243 model_system: String::new(),
244 species: None,
245 method: String::new(),
246 sample_size: None,
247 effect_size: None,
248 p_value: None,
249 replicated: false,
250 replication_count: None,
251 evidence_spans: vec![],
252 },
253 conditions: Conditions {
254 text: String::new(),
255 species_verified: vec![],
256 species_unverified: vec![],
257 in_vitro: false,
258 in_vivo: false,
259 human_data: false,
260 clinical_trial: false,
261 concentration_range: None,
262 duration: None,
263 age_group: None,
264 cell_type: None,
265 },
266 confidence: Confidence::raw(score, "seeded prior", 0.85),
267 provenance: Provenance {
268 source_type: "published_paper".into(),
269 doi: doi.map(|s| s.to_string()),
270 pmid: None,
271 pmc: None,
272 openalex_id: None,
273 url: None,
274 title: "Test".into(),
275 authors: vec![],
276 year: Some(year),
277 journal: None,
278 license: None,
279 publisher: None,
280 funders: vec![],
281 extraction: Extraction::default(),
282 review: None,
283 citation_count: None,
284 },
285 flags: Flags {
286 gap: false,
287 negative_space: false,
288 contested: false,
289 retracted: false,
290 declining: false,
291 gravity_well: false,
292 review_state: None,
293 superseded: false,
294 signature_threshold: None,
295 jointly_accepted: false,
296 },
297 links: vec![],
298 annotations: vec![],
299 attachments: vec![],
300 created: String::new(),
301 updated: None,
302
303 access_tier: crate::access_tier::AccessTier::Public,
304 }
305 }
306
307 #[test]
308 fn shared_entity_creates_extends_link() {
309 let mut bundles = vec![
310 make_finding(
311 "f1",
312 vec![("NLRP3", "protein")],
313 None,
314 Some("10.1/a"),
315 2020,
316 0.7,
317 ),
318 make_finding(
319 "f2",
320 vec![("NLRP3", "protein")],
321 None,
322 Some("10.1/b"),
323 2021,
324 0.7,
325 ),
326 ];
327 let count = deterministic_links(&mut bundles);
328 assert_eq!(count, 1);
329 assert_eq!(bundles[0].links.len(), 1);
330 assert_eq!(bundles[0].links[0].link_type, "extends");
331 assert_eq!(bundles[0].links[0].target, "f2");
332 }
333
334 #[test]
335 fn opposite_directions_creates_contradicts_link() {
336 let mut bundles = vec![
337 make_finding(
338 "f1",
339 vec![("NLRP3", "protein")],
340 Some("positive"),
341 Some("10.1/a"),
342 2020,
343 0.7,
344 ),
345 make_finding(
346 "f2",
347 vec![("NLRP3", "protein")],
348 Some("negative"),
349 Some("10.1/b"),
350 2021,
351 0.7,
352 ),
353 ];
354 let count = deterministic_links(&mut bundles);
355 assert_eq!(count, 1);
356 assert_eq!(bundles[0].links[0].link_type, "contradicts");
357 }
358
359 #[test]
360 fn newer_higher_confidence_creates_supersedes() {
361 let mut bundles = vec![
362 make_finding(
363 "f1",
364 vec![("NLRP3", "protein")],
365 Some("positive"),
366 Some("10.1/a"),
367 2018,
368 0.6,
369 ),
370 make_finding(
371 "f2",
372 vec![("NLRP3", "protein")],
373 Some("positive"),
374 Some("10.1/b"),
375 2024,
376 0.9,
377 ),
378 ];
379 let count = deterministic_links(&mut bundles);
380 assert_eq!(count, 1);
381 assert_eq!(bundles[1].links.len(), 1);
383 assert_eq!(bundles[1].links[0].link_type, "supersedes");
384 assert_eq!(bundles[1].links[0].target, "f1");
385 }
386
387 #[test]
388 fn no_shared_entities_no_link() {
389 let mut bundles = vec![
390 make_finding(
391 "f1",
392 vec![("NLRP3", "protein")],
393 None,
394 Some("10.1/a"),
395 2020,
396 0.7,
397 ),
398 make_finding(
399 "f2",
400 vec![("APOE4", "gene")],
401 None,
402 Some("10.1/b"),
403 2021,
404 0.7,
405 ),
406 ];
407 let count = deterministic_links(&mut bundles);
408 assert_eq!(count, 0);
409 assert!(bundles[0].links.is_empty());
410 assert!(bundles[1].links.is_empty());
411 }
412
413 #[test]
414 fn same_paper_skipped() {
415 let mut bundles = vec![
416 make_finding(
417 "f1",
418 vec![("NLRP3", "protein")],
419 None,
420 Some("10.1/same"),
421 2020,
422 0.7,
423 ),
424 make_finding(
425 "f2",
426 vec![("NLRP3", "protein")],
427 None,
428 Some("10.1/same"),
429 2020,
430 0.7,
431 ),
432 ];
433 let count = deterministic_links(&mut bundles);
434 assert_eq!(count, 0);
435 }
436
437 #[test]
438 fn single_bundle_no_links() {
439 let mut bundles = vec![make_finding(
440 "f1",
441 vec![("NLRP3", "protein")],
442 None,
443 Some("10.1/a"),
444 2020,
445 0.7,
446 )];
447 let count = deterministic_links(&mut bundles);
448 assert_eq!(count, 0);
449 }
450
451 #[test]
452 fn empty_bundles_no_links() {
453 let mut bundles: Vec<FindingBundle> = vec![];
454 let count = deterministic_links(&mut bundles);
455 assert_eq!(count, 0);
456 }
457
458 #[test]
459 fn strong_overlap_noted() {
460 let mut bundles = vec![
461 make_finding(
462 "f1",
463 vec![("NLRP3", "protein"), ("IL-1β", "protein")],
464 None,
465 Some("10.1/a"),
466 2020,
467 0.7,
468 ),
469 make_finding(
470 "f2",
471 vec![("NLRP3", "protein"), ("IL-1β", "protein")],
472 None,
473 Some("10.1/b"),
474 2021,
475 0.7,
476 ),
477 ];
478 let count = deterministic_links(&mut bundles);
479 assert_eq!(count, 1);
480 assert!(bundles[0].links[0].note.contains("strong overlap"));
481 }
482
483 #[test]
484 fn alias_matching_works() {
485 let mut bundles = vec![
486 make_finding("f1", vec![], None, Some("10.1/a"), 2020, 0.7),
487 make_finding("f2", vec![], None, Some("10.1/b"), 2021, 0.7),
488 ];
489 bundles[0].assertion.entities.push(Entity {
491 name: "NLRP3".into(),
492 entity_type: "protein".into(),
493 identifiers: serde_json::Map::new(),
494 canonical_id: None,
495 candidates: vec![],
496 aliases: vec!["cryopyrin".into()],
497 resolution_provenance: None,
498 resolution_confidence: 1.0,
499 resolution_method: None,
500 species_context: None,
501 needs_review: false,
502 });
503 bundles[1].assertion.entities.push(Entity {
505 name: "cryopyrin".into(),
506 entity_type: "protein".into(),
507 identifiers: serde_json::Map::new(),
508 canonical_id: None,
509 candidates: vec![],
510 aliases: vec![],
511 resolution_provenance: None,
512 resolution_confidence: 1.0,
513 resolution_method: None,
514 species_context: None,
515 needs_review: false,
516 });
517 let count = deterministic_links(&mut bundles);
518 assert_eq!(count, 1);
519 }
520
521 #[test]
522 fn link_inferred_by_is_compiler() {
523 let mut bundles = vec![
524 make_finding(
525 "f1",
526 vec![("NLRP3", "protein")],
527 None,
528 Some("10.1/a"),
529 2020,
530 0.7,
531 ),
532 make_finding(
533 "f2",
534 vec![("NLRP3", "protein")],
535 None,
536 Some("10.1/b"),
537 2021,
538 0.7,
539 ),
540 ];
541 deterministic_links(&mut bundles);
542 assert_eq!(bundles[0].links[0].inferred_by, "compiler");
543 }
544
545 #[test]
546 fn is_opposite_helper() {
547 assert!(is_opposite(Some("positive"), Some("negative")));
548 assert!(is_opposite(Some("negative"), Some("positive")));
549 assert!(!is_opposite(Some("positive"), Some("positive")));
550 assert!(!is_opposite(None, Some("negative")));
551 assert!(!is_opposite(None, None));
552 }
553
554 #[test]
555 fn is_same_direction_helper() {
556 assert!(is_same_direction(Some("positive"), Some("positive")));
557 assert!(!is_same_direction(Some("positive"), Some("negative")));
558 assert!(!is_same_direction(None, None));
559 assert!(!is_same_direction(Some("null"), Some("null")));
560 }
561
562 #[test]
563 fn valid_link_types_list() {
564 assert!(VALID_LINK_TYPES.contains(&"supports"));
565 assert!(VALID_LINK_TYPES.contains(&"contradicts"));
566 assert!(VALID_LINK_TYPES.contains(&"extends"));
567 assert!(VALID_LINK_TYPES.contains(&"supersedes"));
568 assert!(!VALID_LINK_TYPES.contains(&"invalidtype"));
569 }
570}