1use crate::bundle::{Entity, ResolutionMethod, ResolvedId};
18use crate::project::Project;
19
20struct OntologyEntry {
23 canonical_name: &'static str,
24 entity_type: &'static str,
25 match_names: &'static [&'static str],
26 source: &'static str,
27 id: &'static str,
28}
29
30const TABLE: &[OntologyEntry] = &[
35 OntologyEntry {
37 canonical_name: "amyloid-beta",
38 entity_type: "protein",
39 match_names: &["amyloid-beta", "amyloid beta", "abeta", "aβ", "ab"],
40 source: "UniProt",
41 id: "P05067", },
43 OntologyEntry {
44 canonical_name: "APP",
45 entity_type: "protein",
46 match_names: &["app", "amyloid precursor protein"],
47 source: "UniProt",
48 id: "P05067",
49 },
50 OntologyEntry {
51 canonical_name: "BACE1",
52 entity_type: "protein",
53 match_names: &["bace1", "β-secretase 1", "beta-secretase 1"],
54 source: "UniProt",
55 id: "P56817",
56 },
57 OntologyEntry {
58 canonical_name: "tau",
59 entity_type: "protein",
60 match_names: &["tau", "mapt", "microtubule-associated protein tau"],
61 source: "UniProt",
62 id: "P10636",
63 },
64 OntologyEntry {
65 canonical_name: "TREM2",
66 entity_type: "protein",
67 match_names: &["trem2", "triggering receptor expressed on myeloid cells 2"],
68 source: "UniProt",
69 id: "Q9NZC2",
70 },
71 OntologyEntry {
72 canonical_name: "ApoE",
73 entity_type: "protein",
74 match_names: &["apoe", "apolipoprotein e"],
75 source: "UniProt",
76 id: "P02649",
77 },
78 OntologyEntry {
79 canonical_name: "PSEN1",
80 entity_type: "protein",
81 match_names: &["psen1", "presenilin-1", "presenilin 1"],
82 source: "UniProt",
83 id: "P49768",
84 },
85 OntologyEntry {
86 canonical_name: "PSEN2",
87 entity_type: "protein",
88 match_names: &["psen2", "presenilin-2", "presenilin 2"],
89 source: "UniProt",
90 id: "P49810",
91 },
92 OntologyEntry {
94 canonical_name: "PSEN1",
95 entity_type: "gene",
96 match_names: &["psen1"],
97 source: "NCBI Gene",
98 id: "5663",
99 },
100 OntologyEntry {
101 canonical_name: "APOE",
102 entity_type: "gene",
103 match_names: &["apoe"],
104 source: "NCBI Gene",
105 id: "348",
106 },
107 OntologyEntry {
109 canonical_name: "Alzheimer's disease",
110 entity_type: "disease",
111 match_names: &[
112 "alzheimer's disease",
113 "alzheimer disease",
114 "alzheimers disease",
115 "ad",
116 ],
117 source: "MeSH",
118 id: "D000544",
119 },
120 OntologyEntry {
121 canonical_name: "mild cognitive impairment",
122 entity_type: "disease",
123 match_names: &["mild cognitive impairment", "mci"],
124 source: "MeSH",
125 id: "D060825",
126 },
127 OntologyEntry {
129 canonical_name: "Lecanemab",
130 entity_type: "compound",
131 match_names: &["lecanemab", "leqembi"],
132 source: "DrugBank",
133 id: "DB16703",
134 },
135 OntologyEntry {
136 canonical_name: "Aducanumab",
137 entity_type: "compound",
138 match_names: &["aducanumab", "aduhelm"],
139 source: "DrugBank",
140 id: "DB12274",
141 },
142 OntologyEntry {
143 canonical_name: "Donanemab",
144 entity_type: "compound",
145 match_names: &["donanemab", "kisunla"],
146 source: "DrugBank",
147 id: "DB17791",
148 },
149 OntologyEntry {
150 canonical_name: "Verubecestat",
151 entity_type: "compound",
152 match_names: &["verubecestat", "mk-8931"],
153 source: "DrugBank",
154 id: "DB12089",
155 },
156 OntologyEntry {
157 canonical_name: "Liraglutide",
158 entity_type: "compound",
159 match_names: &["liraglutide", "victoza", "saxenda"],
160 source: "DrugBank",
161 id: "DB06655",
162 },
163 OntologyEntry {
164 canonical_name: "Semaglutide",
165 entity_type: "compound",
166 match_names: &["semaglutide", "ozempic", "wegovy"],
167 source: "DrugBank",
168 id: "DB13928",
169 },
170 OntologyEntry {
171 canonical_name: "Exendin-4",
172 entity_type: "compound",
173 match_names: &["exendin-4", "exenatide", "byetta"],
174 source: "DrugBank",
175 id: "DB01276",
176 },
177 OntologyEntry {
178 canonical_name: "Xenon",
179 entity_type: "compound",
180 match_names: &["xenon", "xe"],
181 source: "ChEBI",
182 id: "CHEBI:49957",
183 },
184 OntologyEntry {
186 canonical_name: "microglia",
187 entity_type: "cell_type",
188 match_names: &["microglia", "microglial cell"],
189 source: "Cell Ontology",
190 id: "CL:0000129",
191 },
192 OntologyEntry {
194 canonical_name: "blood-brain barrier",
195 entity_type: "anatomical_structure",
196 match_names: &["blood-brain barrier", "bbb"],
197 source: "MeSH",
198 id: "D001812",
199 },
200 OntologyEntry {
202 canonical_name: "Homo sapiens",
203 entity_type: "organism",
204 match_names: &["homo sapiens", "human"],
205 source: "NCBI Taxonomy",
206 id: "9606",
207 },
208 OntologyEntry {
209 canonical_name: "Mus musculus",
210 entity_type: "organism",
211 match_names: &["mus musculus", "mouse", "house mouse"],
212 source: "NCBI Taxonomy",
213 id: "10090",
214 },
215 OntologyEntry {
217 canonical_name: "WIMP",
218 entity_type: "particle",
219 match_names: &["wimp", "weakly interacting massive particle"],
220 source: "PDG",
221 id: "WIMP",
222 },
223 OntologyEntry {
224 canonical_name: "XENONnT",
225 entity_type: "instrument",
226 match_names: &["xenonnt", "xenon nt"],
227 source: "ROR",
228 id: "https://ror.org/03wkt5x30",
229 },
230 OntologyEntry {
231 canonical_name: "LZ",
232 entity_type: "instrument",
233 match_names: &["lz", "lux-zeplin", "lux zeplin"],
234 source: "ROR",
235 id: "https://ror.org/04xeg9z08",
236 },
237];
238
239fn normalize(s: &str) -> String {
241 s.split_whitespace()
242 .collect::<Vec<_>>()
243 .join(" ")
244 .to_lowercase()
245}
246
247fn lookup(entity: &Entity) -> Option<&'static OntologyEntry> {
250 let n = normalize(&entity.name);
251 TABLE.iter().find(|row| {
252 row.entity_type == entity.entity_type && row.match_names.iter().any(|m| *m == n)
253 })
254}
255
256#[derive(Debug, Clone, serde::Serialize)]
258pub struct FindingResolutionReport {
259 pub finding_id: String,
260 pub resolved: usize,
261 pub unresolved: Vec<String>,
262 pub already_resolved: usize,
263}
264
265#[derive(Debug, Clone, serde::Serialize)]
267pub struct ResolveReport {
268 pub frontier: String,
269 pub total_entities: usize,
270 pub resolved: usize,
271 pub already_resolved: usize,
272 pub unresolved_count: usize,
273 pub findings_touched: usize,
274 pub per_finding: Vec<FindingResolutionReport>,
275}
276
277pub fn resolve_frontier(project: &mut Project, force: bool) -> ResolveReport {
281 let frontier_name = project.project.name.clone();
282 let mut total = 0usize;
283 let mut resolved = 0usize;
284 let mut already = 0usize;
285 let mut unresolved_count = 0usize;
286 let mut findings_touched = 0usize;
287 let mut per_finding: Vec<FindingResolutionReport> = Vec::new();
288
289 for finding in project.findings.iter_mut() {
290 let mut f_resolved = 0usize;
291 let mut f_unresolved: Vec<String> = Vec::new();
292 let mut f_already = 0usize;
293 for entity in finding.assertion.entities.iter_mut() {
294 total += 1;
295 if entity.canonical_id.is_some() && !force {
296 already += 1;
297 f_already += 1;
298 continue;
299 }
300 match lookup(entity) {
301 Some(row) => {
302 entity.canonical_id = Some(ResolvedId {
303 source: row.source.to_string(),
304 id: row.id.to_string(),
305 confidence: 0.95,
306 matched_name: Some(row.canonical_name.to_string()),
307 });
308 entity.resolution_method = Some(ResolutionMethod::Manual);
309 entity.resolution_confidence = 0.95;
310 entity.resolution_provenance =
311 Some("vela_entity_resolve_v0_19_bundled_table".to_string());
312 entity.needs_review = false;
313 resolved += 1;
314 f_resolved += 1;
315 }
316 None => {
317 unresolved_count += 1;
318 f_unresolved.push(format!("{}:{}", entity.name, entity.entity_type));
319 }
320 }
321 }
322 if f_resolved > 0 || !f_unresolved.is_empty() || f_already > 0 {
323 if f_resolved > 0 {
324 findings_touched += 1;
325 }
326 per_finding.push(FindingResolutionReport {
327 finding_id: finding.id.clone(),
328 resolved: f_resolved,
329 unresolved: f_unresolved,
330 already_resolved: f_already,
331 });
332 }
333 }
334
335 crate::project::recompute_stats(project);
337
338 ResolveReport {
339 frontier: frontier_name,
340 total_entities: total,
341 resolved,
342 already_resolved: already,
343 unresolved_count,
344 findings_touched,
345 per_finding,
346 }
347}
348
349pub fn bundled_entry_count() -> usize {
352 TABLE.len()
353}
354
355pub fn iter_bundled()
357-> impl Iterator<Item = (&'static str, &'static str, &'static str, &'static str)> {
358 TABLE
359 .iter()
360 .map(|r| (r.canonical_name, r.entity_type, r.source, r.id))
361}
362
363#[cfg(test)]
364mod tests {
365 use super::*;
366
367 fn make_entity(name: &str, et: &str) -> Entity {
368 Entity {
369 name: name.to_string(),
370 entity_type: et.to_string(),
371 identifiers: serde_json::Map::new(),
372 canonical_id: None,
373 candidates: Vec::new(),
374 aliases: Vec::new(),
375 resolution_provenance: Some("manual_state_transition".to_string()),
376 resolution_confidence: 0.6,
377 resolution_method: None,
378 species_context: None,
379 needs_review: true,
380 }
381 }
382
383 #[test]
384 fn lookup_amyloid_beta_protein() {
385 let e = make_entity("amyloid-beta", "protein");
386 let row = lookup(&e).expect("amyloid-beta should resolve");
387 assert_eq!(row.source, "UniProt");
388 assert_eq!(row.id, "P05067");
389 }
390
391 #[test]
392 fn lookup_alzheimers_disease_with_apostrophe_variants() {
393 for n in [
394 "Alzheimer's disease",
395 "alzheimers disease",
396 "ALZHEIMER'S DISEASE",
397 ] {
398 let e = make_entity(n, "disease");
399 assert!(lookup(&e).is_some(), "should resolve '{n}'");
400 }
401 }
402
403 #[test]
404 fn lookup_respects_entity_type() {
405 assert!(lookup(&make_entity("LZ", "instrument")).is_some());
407 assert!(lookup(&make_entity("LZ", "compound")).is_none());
408 }
409
410 #[test]
411 fn unmatched_name_returns_none() {
412 let e = make_entity("totally made-up entity name", "protein");
413 assert!(lookup(&e).is_none());
414 }
415}