1#![forbid(unsafe_code)]
14
15pub mod cache;
16pub mod download;
17pub mod parser;
18
19pub use cache::{
20 BranchRoot, BuildError, InstallError, InstalledOntology, KnownOntology, OntologyCache,
21 OntologyDescriptor, SourceFormat, VerifyError,
22};
23
24use std::borrow::Cow;
25use std::collections::{BTreeMap, BTreeSet};
26use std::fs;
27use std::io;
28use std::path::Path;
29use std::sync::OnceLock;
30
31const FACTS: &str = include_str!("../data/sbol3_ontology_facts.tsv");
32const SOURCES: &str = include_str!("../data/ontology_sources.tsv");
33
34pub const TSV_FORMAT_VERSION: u32 = 1;
37
38#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
40pub enum ComponentTypeFamily {
41 NucleicAcid,
42 Protein,
43 SimpleChemical,
44 Complex,
45 Functional,
46}
47
48#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
50pub enum SequenceEncodingFamily {
51 NucleicAcid,
52 Protein,
53 SimpleChemical,
54 OtherTextual,
55}
56
57#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
59pub enum OntologyNamespace {
60 Edam,
61 Sbo,
62 So,
63 Go,
64 Chebi,
65 Cl,
66 Ncit,
67}
68
69#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
71pub enum TermRole {
72 SequenceEncoding,
73 ComponentType,
74 ComponentTypeModifier,
75 InteractionType,
76 ParticipationRole,
77 FeatureRole,
78 Other,
79}
80
81#[derive(Clone, Debug, PartialEq, Eq)]
83pub struct OntologyProvenance {
84 pub ontology: String,
85 pub source_url: String,
86 pub version: String,
87 pub license: String,
88 pub retrieved: String,
89 pub raw_sha256: String,
90 pub fact_sha256: String,
91 pub notes: String,
92}
93
94#[derive(Clone, Debug, PartialEq, Eq)]
96pub struct Ontology {
97 terms: BTreeMap<String, TermFact>,
98 aliases: BTreeMap<String, String>,
99 branches: BTreeSet<(String, String)>,
100 compatibilities: BTreeSet<(String, String)>,
101 conflicts: BTreeSet<(String, String)>,
102 component_role_terms: BTreeSet<String>,
103 component_role_compatibilities: BTreeSet<(String, String)>,
104 participation_compatibilities: BTreeSet<(String, String)>,
105 provenance: Vec<OntologyProvenance>,
106}
107
108#[derive(Clone, Debug, PartialEq, Eq)]
109struct TermFact {
110 iri: String,
111 label: String,
112 parents: Vec<String>,
113 namespace: OntologyNamespace,
114 role: TermRole,
115 component_family: Option<ComponentTypeFamily>,
116 sequence_family: Option<SequenceEncodingFamily>,
117 table_1_sequence_encoding: bool,
118 table_2_component_type: bool,
119}
120
121impl Ontology {
122 pub fn bundled() -> &'static Self {
124 static ONTOLOGY: OnceLock<Ontology> = OnceLock::new();
125 ONTOLOGY.get_or_init(|| {
126 Ontology::from_tsv(FACTS, SOURCES)
127 .expect("bundled SBOL ontology facts must parse successfully")
128 })
129 }
130
131 pub fn provenance(&self) -> &[OntologyProvenance] {
133 &self.provenance
134 }
135
136 pub fn canonical_id(&self, term: &str) -> Option<String> {
138 if let Some(canonical) = self.aliases.get(term) {
139 return Some(canonical.clone());
140 }
141 let candidate = normalize_term_id(term)?;
142 self.terms.contains_key(&candidate).then_some(candidate)
143 }
144
145 pub fn canonical_iri(&self, term: &str) -> Option<&str> {
147 let canonical = self.canonical_id(term)?;
148 self.terms.get(&canonical).map(|fact| fact.iri.as_str())
149 }
150
151 pub fn contains_term(&self, term: &str) -> bool {
153 self.canonical_id(term).is_some()
154 }
155
156 pub fn label(&self, term: &str) -> Option<&str> {
158 let canonical = self.canonical_id(term)?;
159 self.terms.get(&canonical).map(|fact| fact.label.as_str())
160 }
161
162 pub fn namespace(&self, term: &str) -> Option<OntologyNamespace> {
164 let canonical = self.canonical_id(term)?;
165 self.terms.get(&canonical).map(|fact| fact.namespace)
166 }
167
168 pub fn term_role(&self, term: &str) -> Option<TermRole> {
170 let canonical = self.canonical_id(term)?;
171 self.terms.get(&canonical).map(|fact| fact.role)
172 }
173
174 pub fn is_sequence_encoding_term(&self, term: &str) -> Option<bool> {
178 self.term_role(term)
179 .map(|role| role == TermRole::SequenceEncoding)
180 }
181
182 pub fn is_component_type_term(&self, term: &str) -> Option<bool> {
188 self.term_role(term).map(|role| {
189 matches!(
190 role,
191 TermRole::ComponentType | TermRole::ComponentTypeModifier
192 )
193 })
194 }
195
196 pub fn is_feature_role_term(&self, term: &str) -> Option<bool> {
199 self.term_role(term)
200 .map(|role| role == TermRole::FeatureRole)
201 }
202
203 pub fn is_component_role_term(&self, term: &str) -> Option<bool> {
206 let canonical = self.canonical_id(term)?;
207 if self.component_role_terms.contains(&canonical) {
208 return Some(true);
209 }
210 self.terms
211 .get(&canonical)
212 .map(|fact| fact.role == TermRole::FeatureRole)
213 }
214
215 pub fn is_sequence_feature_role_term(&self, term: &str) -> Option<bool> {
217 self.contains_term(term)
218 .then(|| self.is_in_branch(term, "SO:0000110"))
219 }
220
221 pub fn is_cell_type_term(&self, term: &str) -> Option<bool> {
225 self.contains_term(term)
226 .then(|| self.is_in_branch(term, "CL:0000000"))
227 }
228
229 pub fn is_interaction_type_term(&self, term: &str) -> Option<bool> {
232 self.term_role(term)
233 .map(|role| role == TermRole::InteractionType)
234 }
235
236 pub fn is_participation_role_term(&self, term: &str) -> Option<bool> {
239 self.term_role(term)
240 .map(|role| role == TermRole::ParticipationRole)
241 }
242
243 pub fn is_table_1_sequence_encoding(&self, term: &str) -> bool {
245 let Some(canonical) = self.canonical_id(term) else {
246 return false;
247 };
248 self.terms
249 .get(&canonical)
250 .is_some_and(|fact| fact.table_1_sequence_encoding)
251 }
252
253 pub fn is_table_2_component_type(&self, term: &str) -> bool {
255 let Some(canonical) = self.canonical_id(term) else {
256 return false;
257 };
258 self.terms
259 .get(&canonical)
260 .is_some_and(|fact| fact.table_2_component_type)
261 }
262
263 pub fn is_descendant(&self, term: &str, ancestor: &str) -> bool {
265 let Some(term) = self.canonical_id(term) else {
266 return false;
267 };
268 let Some(ancestor) = self.canonical_id(ancestor) else {
269 return false;
270 };
271 if term == ancestor {
272 return false;
273 }
274 self.has_ancestor(&term, &ancestor)
275 }
276
277 pub fn is_equivalent_or_descendant(&self, term: &str, ancestor: &str) -> bool {
279 let Some(term) = self.canonical_id(term) else {
280 return false;
281 };
282 let Some(ancestor) = self.canonical_id(ancestor) else {
283 return false;
284 };
285 term == ancestor || self.has_ancestor(&term, &ancestor)
286 }
287
288 pub fn is_in_branch(&self, term: &str, branch_root: &str) -> bool {
290 let Some(term) = self.canonical_id(term) else {
291 return false;
292 };
293 let Some(branch_root) = self.canonical_id(branch_root) else {
294 return false;
295 };
296 term == branch_root
297 || self.branches.contains(&(term.clone(), branch_root.clone()))
298 || self.has_ancestor(&term, &branch_root)
299 }
300
301 pub fn terms_conflict(&self, left: &str, right: &str) -> Option<bool> {
305 let left = self.canonical_id(left)?;
306 let right = self.canonical_id(right)?;
307 if left == right {
308 return Some(false);
309 }
310 if self.conflicts.contains(&ordered_pair(&left, &right)) {
311 return Some(true);
312 }
313 let left_fact = self.terms.get(&left)?;
314 let right_fact = self.terms.get(&right)?;
315 if let (Some(left_family), Some(right_family)) =
316 (left_fact.component_family, right_fact.component_family)
317 {
318 return Some(left_family != right_family);
319 }
320 if let (Some(left_family), Some(right_family)) =
321 (left_fact.sequence_family, right_fact.sequence_family)
322 {
323 return Some(left_family != right_family);
324 }
325 Some(false)
326 }
327
328 pub fn participation_role_compatible_with_interaction_type(
331 &self,
332 role: &str,
333 interaction_type: &str,
334 ) -> Option<bool> {
335 let role = self.canonical_id(role)?;
336 let interaction_type = self.canonical_id(interaction_type)?;
337 let role_fact = self.terms.get(&role)?;
338 let interaction_fact = self.terms.get(&interaction_type)?;
339 if role_fact.role != TermRole::ParticipationRole
340 || interaction_fact.role != TermRole::InteractionType
341 {
342 return None;
343 }
344 Some(
345 self.participation_compatibilities
346 .contains(&(interaction_type, role)),
347 )
348 }
349
350 pub fn component_role_compatible_with_component_type(
355 &self,
356 role: &str,
357 component_type: &str,
358 ) -> Option<bool> {
359 let role = self.canonical_id(role)?;
360 let component_type = self.canonical_id(component_type)?;
361 let role_fact = self.terms.get(&role)?;
362 let component_fact = self.terms.get(&component_type)?;
363 if role_fact.role != TermRole::FeatureRole || component_fact.role != TermRole::ComponentType
364 {
365 return None;
366 }
367 if self
368 .component_role_compatibilities
369 .contains(&(role.clone(), component_type.clone()))
370 {
371 return Some(true);
372 }
373
374 let component_family = component_fact.component_family?;
375 if self.is_in_branch(&role, "SO:0000110") {
376 return Some(component_family == ComponentTypeFamily::NucleicAcid);
377 }
378 if self.is_in_branch(&role, "GO:0003674") {
379 return Some(component_family == ComponentTypeFamily::Protein);
380 }
381 if self.is_in_branch(&role, "CHEBI:50906") {
382 return Some(component_family == ComponentTypeFamily::SimpleChemical);
383 }
384 None
385 }
386
387 pub fn encoding_compatible_with_component_type(
392 &self,
393 encoding: &str,
394 component_type: &str,
395 ) -> Option<bool> {
396 let encoding = self.canonical_id(encoding)?;
397 let component_type = self.canonical_id(component_type)?;
398 let encoding_fact = self.terms.get(&encoding)?;
399 let component_fact = self.terms.get(&component_type)?;
400 if encoding_fact.role != TermRole::SequenceEncoding
401 || component_fact.role != TermRole::ComponentType
402 {
403 return None;
404 }
405 if self
406 .compatibilities
407 .contains(&(encoding.clone(), component_type.clone()))
408 {
409 return Some(true);
410 }
411 let encoding_family = encoding_fact.sequence_family?;
412 let component_family = component_fact.component_family?;
413 Some(matches!(
414 (encoding_family, component_family),
415 (
416 SequenceEncodingFamily::NucleicAcid,
417 ComponentTypeFamily::NucleicAcid
418 ) | (
419 SequenceEncodingFamily::Protein,
420 ComponentTypeFamily::Protein
421 ) | (
422 SequenceEncodingFamily::SimpleChemical,
423 ComponentTypeFamily::SimpleChemical
424 )
425 ))
426 }
427
428 pub fn recommended_sequence_encoding_for_component_type(
430 &self,
431 component_type: &str,
432 ) -> Option<&str> {
433 self.compatible_sequence_encodings_for_component_type(component_type)
434 .into_iter()
435 .next()
436 }
437
438 pub fn compatible_sequence_encodings_for_component_type(
440 &self,
441 component_type: &str,
442 ) -> Vec<&str> {
443 let Some(component_type) = self.canonical_id(component_type) else {
444 return Vec::new();
445 };
446 self.compatibilities
447 .iter()
448 .filter_map(|(encoding, compatible_component)| {
449 (compatible_component == &component_type)
450 .then(|| self.terms.get(encoding).map(|fact| fact.iri.as_str()))
451 .flatten()
452 })
453 .collect()
454 }
455
456 pub fn component_type_family(&self, component_type: &str) -> Option<ComponentTypeFamily> {
458 let canonical = self.canonical_id(component_type)?;
459 self.terms
460 .get(&canonical)
461 .and_then(|fact| fact.component_family)
462 }
463
464 pub fn sequence_encoding_family(&self, encoding: &str) -> Option<SequenceEncodingFamily> {
466 let canonical = self.canonical_id(encoding)?;
467 self.terms
468 .get(&canonical)
469 .and_then(|fact| fact.sequence_family)
470 }
471
472 fn has_ancestor(&self, term: &str, ancestor: &str) -> bool {
473 let Some(fact) = self.terms.get(term) else {
474 return false;
475 };
476 fact.parents
477 .iter()
478 .any(|parent| parent == ancestor || self.has_ancestor(parent, ancestor))
479 }
480
481 pub fn from_tsv_str(facts: &str) -> Result<Self, String> {
485 Self::from_tsv(facts, "")
486 }
487
488 pub fn from_tsv_path(path: impl AsRef<Path>) -> Result<Self, io::Error> {
490 let path = path.as_ref();
491 let text = fs::read_to_string(path)?;
492 Self::from_tsv_str(&text)
493 .map_err(|message| io::Error::new(io::ErrorKind::InvalidData, message))
494 }
495
496 pub fn set_provenance(&mut self, provenance: Vec<OntologyProvenance>) {
498 self.provenance = provenance;
499 }
500
501 pub fn extend_with(&mut self, other: Ontology) {
505 for (id, fact) in other.terms {
506 self.terms.entry(id).or_insert(fact);
507 }
508 for (alias, canonical) in other.aliases {
509 self.aliases.entry(alias).or_insert(canonical);
510 }
511 self.branches.extend(other.branches);
512 self.compatibilities.extend(other.compatibilities);
513 self.conflicts.extend(other.conflicts);
514 self.component_role_terms.extend(other.component_role_terms);
515 self.component_role_compatibilities
516 .extend(other.component_role_compatibilities);
517 self.participation_compatibilities
518 .extend(other.participation_compatibilities);
519 self.provenance.extend(other.provenance);
520 }
521
522 fn from_tsv(facts: &str, sources: &str) -> Result<Self, String> {
523 let mut ontology = Self {
524 terms: BTreeMap::new(),
525 aliases: BTreeMap::new(),
526 branches: BTreeSet::new(),
527 compatibilities: BTreeSet::new(),
528 conflicts: BTreeSet::new(),
529 component_role_terms: BTreeSet::new(),
530 component_role_compatibilities: BTreeSet::new(),
531 participation_compatibilities: BTreeSet::new(),
532 provenance: parse_sources(sources)?,
533 };
534
535 let mut format_version: Option<u32> = None;
536 for (line_number, line) in facts.lines().enumerate() {
537 let trimmed = line.trim_start();
538 if let Some(rest) = trimmed.strip_prefix("# format_version:") {
539 let value = rest.trim();
540 let parsed = value.parse::<u32>().map_err(|_| {
541 format!(
542 "ontology snapshot has unparseable format_version `{value}` on line {}",
543 line_number + 1
544 )
545 })?;
546 format_version = Some(parsed);
547 continue;
548 }
549 if line.trim().is_empty() || line.starts_with('#') {
550 continue;
551 }
552 let columns = line.split('\t').collect::<Vec<_>>();
553 match columns.first().copied() {
554 Some("term") => ontology.insert_term(&columns, line_number + 1)?,
555 Some("branch") => ontology.insert_branch(&columns, line_number + 1)?,
556 Some("compat") => ontology.insert_compatibility(&columns, line_number + 1)?,
557 Some("conflict") => ontology.insert_conflict(&columns, line_number + 1)?,
558 Some("component_role") => {
559 ontology.insert_component_role_term(&columns, line_number + 1)?
560 }
561 Some("component_role_compat") => {
562 ontology.insert_component_role_compatibility(&columns, line_number + 1)?
563 }
564 Some("participation_compat") => {
565 ontology.insert_participation_compatibility(&columns, line_number + 1)?
566 }
567 Some(other) => {
568 return Err(format!(
569 "unknown ontology fact kind `{other}` on line {line_number}"
570 ));
571 }
572 None => {}
573 }
574 }
575
576 match format_version {
577 Some(version) if version == TSV_FORMAT_VERSION => Ok(ontology),
578 Some(version) => Err(format!(
579 "ontology snapshot uses format_version {version} but this build only supports {TSV_FORMAT_VERSION}",
580 )),
581 None => Err(format!(
582 "ontology snapshot is missing the `# format_version: {TSV_FORMAT_VERSION}` header line",
583 )),
584 }
585 }
586
587 fn insert_term(&mut self, columns: &[&str], line_number: usize) -> Result<(), String> {
588 if columns.len() != 12 {
589 return Err(format!(
590 "term line {line_number} has {} columns",
591 columns.len()
592 ));
593 }
594 let id = columns[1].to_owned();
595 let aliases = split_list(columns[4]);
596 let parents = split_list(columns[5]);
597 let fact = TermFact {
598 iri: columns[2].to_owned(),
599 label: columns[3].to_owned(),
600 parents,
601 namespace: parse_namespace(columns[6])?,
602 role: parse_role(columns[7])?,
603 component_family: parse_component_family(columns[8])?,
604 sequence_family: parse_sequence_family(columns[9])?,
605 table_1_sequence_encoding: parse_bool(columns[10])?,
606 table_2_component_type: parse_bool(columns[11])?,
607 };
608
609 self.aliases.insert(id.clone(), id.clone());
610 self.aliases.insert(fact.iri.clone(), id.clone());
611 for alias in aliases {
612 self.aliases.insert(alias, id.clone());
613 }
614 self.terms.insert(id, fact);
615 Ok(())
616 }
617
618 fn insert_compatibility(&mut self, columns: &[&str], line_number: usize) -> Result<(), String> {
619 if columns.len() != 3 {
620 return Err(format!(
621 "compatibility line {line_number} has {} columns",
622 columns.len()
623 ));
624 }
625 self.compatibilities
626 .insert((columns[1].to_owned(), columns[2].to_owned()));
627 Ok(())
628 }
629
630 fn insert_branch(&mut self, columns: &[&str], line_number: usize) -> Result<(), String> {
631 if columns.len() != 3 {
632 return Err(format!(
633 "branch line {line_number} has {} columns",
634 columns.len()
635 ));
636 }
637 self.branches
638 .insert((columns[1].to_owned(), columns[2].to_owned()));
639 Ok(())
640 }
641
642 fn insert_conflict(&mut self, columns: &[&str], line_number: usize) -> Result<(), String> {
643 if columns.len() != 3 {
644 return Err(format!(
645 "conflict line {line_number} has {} columns",
646 columns.len()
647 ));
648 }
649 self.conflicts.insert(ordered_pair(columns[1], columns[2]));
650 Ok(())
651 }
652
653 fn insert_component_role_term(
654 &mut self,
655 columns: &[&str],
656 line_number: usize,
657 ) -> Result<(), String> {
658 if columns.len() != 2 {
659 return Err(format!(
660 "component role line {line_number} has {} columns",
661 columns.len()
662 ));
663 }
664 self.component_role_terms.insert(columns[1].to_owned());
665 Ok(())
666 }
667
668 fn insert_component_role_compatibility(
669 &mut self,
670 columns: &[&str],
671 line_number: usize,
672 ) -> Result<(), String> {
673 if columns.len() != 3 {
674 return Err(format!(
675 "component role compatibility line {line_number} has {} columns",
676 columns.len()
677 ));
678 }
679 self.component_role_compatibilities
680 .insert((columns[1].to_owned(), columns[2].to_owned()));
681 Ok(())
682 }
683
684 fn insert_participation_compatibility(
685 &mut self,
686 columns: &[&str],
687 line_number: usize,
688 ) -> Result<(), String> {
689 if columns.len() != 3 {
690 return Err(format!(
691 "participation compatibility line {line_number} has {} columns",
692 columns.len()
693 ));
694 }
695 self.participation_compatibilities
696 .insert((columns[1].to_owned(), columns[2].to_owned()));
697 Ok(())
698 }
699}
700
701fn parse_sources(sources: &str) -> Result<Vec<OntologyProvenance>, String> {
702 let mut provenance = Vec::new();
703 for (line_number, line) in sources.lines().enumerate() {
704 if line.trim().is_empty() || line.starts_with('#') {
705 continue;
706 }
707 let columns = line.split('\t').collect::<Vec<_>>();
708 if columns.len() != 8 {
709 return Err(format!(
710 "ontology source line {line_number} has {} columns",
711 columns.len()
712 ));
713 }
714 provenance.push(OntologyProvenance {
715 ontology: columns[0].to_owned(),
716 source_url: columns[1].to_owned(),
717 version: columns[2].to_owned(),
718 license: columns[3].to_owned(),
719 retrieved: columns[4].to_owned(),
720 raw_sha256: columns[5].to_owned(),
721 fact_sha256: columns[6].to_owned(),
722 notes: columns[7].to_owned(),
723 });
724 }
725 Ok(provenance)
726}
727
728fn split_list(value: &str) -> Vec<String> {
729 if value == "-" {
730 return Vec::new();
731 }
732 value.split('|').map(ToOwned::to_owned).collect()
733}
734
735fn parse_bool(value: &str) -> Result<bool, String> {
736 match value {
737 "true" => Ok(true),
738 "false" => Ok(false),
739 _ => Err(format!("invalid boolean `{value}`")),
740 }
741}
742
743fn parse_namespace(value: &str) -> Result<OntologyNamespace, String> {
744 match value {
745 "EDAM" => Ok(OntologyNamespace::Edam),
746 "SBO" => Ok(OntologyNamespace::Sbo),
747 "SO" => Ok(OntologyNamespace::So),
748 "GO" => Ok(OntologyNamespace::Go),
749 "CHEBI" => Ok(OntologyNamespace::Chebi),
750 "CL" => Ok(OntologyNamespace::Cl),
751 "NCIT" => Ok(OntologyNamespace::Ncit),
752 _ => Err(format!("unknown ontology namespace `{value}`")),
753 }
754}
755
756fn parse_role(value: &str) -> Result<TermRole, String> {
757 match value {
758 "sequence_encoding" => Ok(TermRole::SequenceEncoding),
759 "component_type" => Ok(TermRole::ComponentType),
760 "component_type_modifier" => Ok(TermRole::ComponentTypeModifier),
761 "interaction_type" => Ok(TermRole::InteractionType),
762 "participation_role" => Ok(TermRole::ParticipationRole),
763 "feature_role" => Ok(TermRole::FeatureRole),
764 "other" => Ok(TermRole::Other),
765 _ => Err(format!("unknown term role `{value}`")),
766 }
767}
768
769fn parse_component_family(value: &str) -> Result<Option<ComponentTypeFamily>, String> {
770 match value {
771 "-" => Ok(None),
772 "nucleic_acid" => Ok(Some(ComponentTypeFamily::NucleicAcid)),
773 "protein" => Ok(Some(ComponentTypeFamily::Protein)),
774 "simple_chemical" => Ok(Some(ComponentTypeFamily::SimpleChemical)),
775 "complex" => Ok(Some(ComponentTypeFamily::Complex)),
776 "functional" => Ok(Some(ComponentTypeFamily::Functional)),
777 _ => Err(format!("unknown Component type family `{value}`")),
778 }
779}
780
781fn parse_sequence_family(value: &str) -> Result<Option<SequenceEncodingFamily>, String> {
782 match value {
783 "-" => Ok(None),
784 "nucleic_acid" => Ok(Some(SequenceEncodingFamily::NucleicAcid)),
785 "protein" => Ok(Some(SequenceEncodingFamily::Protein)),
786 "simple_chemical" => Ok(Some(SequenceEncodingFamily::SimpleChemical)),
787 "other_textual" => Ok(Some(SequenceEncodingFamily::OtherTextual)),
788 _ => Err(format!("unknown Sequence encoding family `{value}`")),
789 }
790}
791
792pub fn normalize_term_id(value: &str) -> Option<String> {
794 if let Some((prefix, local)) = value.split_once(':')
795 && !value.starts_with("http://")
796 && !value.starts_with("https://")
797 {
798 return Some(format!("{}:{local}", normalize_prefix(prefix)?));
799 }
800
801 if let Some(rest) = value
802 .strip_prefix("https://identifiers.org/")
803 .or_else(|| value.strip_prefix("http://identifiers.org/"))
804 {
805 let (prefix, local) = rest.split_once(':')?;
806 return Some(format!("{}:{local}", normalize_prefix(prefix)?));
807 }
808
809 if let Some(local) = value.strip_prefix("http://edamontology.org/") {
810 return Some(format!("EDAM:{local}"));
811 }
812
813 if let Some(local) = value
814 .strip_prefix("http://biomodels.net/SBO/SBO_")
815 .or_else(|| value.strip_prefix("https://biomodels.net/SBO/SBO_"))
816 {
817 return Some(format!("SBO:{local}"));
818 }
819
820 if let Some(local) = value.strip_prefix("http://purl.obolibrary.org/obo/") {
821 let (prefix, suffix) = local.split_once('_')?;
822 return Some(format!("{}:{suffix}", normalize_prefix(prefix)?));
823 }
824
825 None
826}
827
828fn ordered_pair(left: &str, right: &str) -> (String, String) {
829 if left <= right {
830 (left.to_owned(), right.to_owned())
831 } else {
832 (right.to_owned(), left.to_owned())
833 }
834}
835
836#[derive(Clone, Debug)]
845pub struct OntologyRegistry {
846 inner: Cow<'static, Ontology>,
847}
848
849impl OntologyRegistry {
850 pub fn bundled_only() -> Self {
852 Self {
853 inner: Cow::Borrowed(Ontology::bundled()),
854 }
855 }
856
857 pub fn bundled_with<I>(extensions: I) -> Self
860 where
861 I: IntoIterator<Item = Ontology>,
862 {
863 let mut iter = extensions.into_iter();
864 let Some(first) = iter.next() else {
865 return Self::bundled_only();
866 };
867 let mut merged = Ontology::bundled().clone();
868 merged.extend_with(first);
869 for ext in iter {
870 merged.extend_with(ext);
871 }
872 Self {
873 inner: Cow::Owned(merged),
874 }
875 }
876
877 pub fn with_extension(mut self, extension: Ontology) -> Self {
879 let merged = self.inner.to_mut();
880 merged.extend_with(extension);
881 self
882 }
883
884 pub fn ontology(&self) -> &Ontology {
886 self.inner.as_ref()
887 }
888}
889
890impl Default for OntologyRegistry {
891 fn default() -> Self {
892 Self::bundled_only()
893 }
894}
895
896impl AsRef<Ontology> for OntologyRegistry {
897 fn as_ref(&self) -> &Ontology {
898 self.ontology()
899 }
900}
901
902fn normalize_prefix(prefix: &str) -> Option<&'static str> {
903 match prefix.to_ascii_uppercase().as_str() {
904 "EDAM" => Some("EDAM"),
905 "SBO" => Some("SBO"),
906 "SO" => Some("SO"),
907 "GO" => Some("GO"),
908 "CHEBI" => Some("CHEBI"),
909 "CL" => Some("CL"),
910 "NCIT" => Some("NCIT"),
911 _ => None,
912 }
913}
914
915#[cfg(test)]
916mod tests {
917 use super::*;
918
919 #[test]
920 fn bundled_ontology_loads_core_terms() {
921 let ontology = Ontology::bundled();
922
923 assert!(ontology.contains_term("https://identifiers.org/edam:format_1207"));
924 assert!(ontology.contains_term("https://identifiers.org/SBO:0000251"));
925 assert!(ontology.contains_term("https://identifiers.org/SO:0000987"));
926 assert!(ontology.contains_term("https://identifiers.org/GO:0003700"));
927 assert!(ontology.contains_term("https://identifiers.org/CHEBI:35224"));
928 assert!(ontology.contains_term("https://identifiers.org/CL:0000540"));
929 assert!(!ontology.provenance().is_empty());
930 }
931
932 fn synthetic_extension_tsv() -> &'static str {
933 "# format_version: 1\n# kind\tid\tiri\tlabel\taliases\tparents\tontology\trole\tcomponent_family\tsequence_family\ttable1\ttable2\n\
934 term\tCL:9999999\thttps://identifiers.org/CL:9999999\tlab-only synthetic cell\t-\tCL:0000540\tCL\tcomponent_type\t-\t-\tfalse\tfalse\n\
935 branch\tCL:9999999\tCL:0000000\n"
936 }
937
938 #[test]
939 fn from_tsv_str_rejects_missing_format_version() {
940 let result = Ontology::from_tsv_str(
941 "term\tFOO:1\thttps://example.org/foo\tfoo\t-\t-\tEDAM\tother\t-\t-\tfalse\tfalse\n",
942 );
943 assert!(
944 result.is_err(),
945 "expected missing-header error, got {result:?}"
946 );
947 }
948
949 #[test]
950 fn from_tsv_str_rejects_unknown_format_version() {
951 let bumped = "# format_version: 9999\n# kind\tid\tiri\tlabel\taliases\tparents\tontology\trole\tcomponent_family\tsequence_family\ttable1\ttable2\n\
952 term\tEDAM:format_1915\thttps://identifiers.org/edam:format_1915\tFormat\t-\t-\tEDAM\tother\t-\t-\tfalse\tfalse\n";
953 let err = Ontology::from_tsv_str(bumped).unwrap_err();
954 assert!(
955 err.contains("format_version 9999"),
956 "unexpected error `{err}`"
957 );
958 }
959
960 #[test]
961 fn ontology_registry_layers_extensions_over_bundled() {
962 let extension = Ontology::from_tsv_str(synthetic_extension_tsv()).unwrap();
963 let registry = OntologyRegistry::bundled_with([extension]);
964 let ontology = registry.ontology();
965
966 assert!(ontology.contains_term("CL:9999999"));
967 assert_eq!(ontology.is_cell_type_term("CL:9999999"), Some(true));
968 assert_eq!(ontology.is_cell_type_term("CL:0000540"), Some(true));
970 }
971
972 #[test]
973 fn ontology_registry_bundled_only_borrows_static() {
974 let a = OntologyRegistry::bundled_only();
977 let b = OntologyRegistry::bundled_only();
978 assert!(std::ptr::eq(a.ontology(), b.ontology()));
979 }
980
981 #[test]
982 fn cell_ontology_terms_resolve_via_branch_root() {
983 let ontology = Ontology::bundled();
984
985 assert_eq!(ontology.is_cell_type_term("CL:0000540"), Some(true));
986 assert_eq!(ontology.is_cell_type_term("CL:0000084"), Some(true));
987 assert_eq!(ontology.is_cell_type_term("CL:0000000"), Some(true));
988 assert_eq!(
989 ontology.is_cell_type_term("http://purl.obolibrary.org/obo/CL_0000540"),
990 Some(true)
991 );
992 assert_eq!(
993 ontology.namespace("CL:0000540"),
994 Some(OntologyNamespace::Cl)
995 );
996 assert_eq!(ontology.is_cell_type_term("SO:0000316"), Some(false));
997 assert_eq!(
998 ontology.is_cell_type_term("https://example.org/custom"),
999 None
1000 );
1001 }
1002
1003 #[test]
1004 fn normalizes_identifiers_org_obo_purls_and_native_edam_iris() {
1005 let ontology = Ontology::bundled();
1006
1007 assert_eq!(
1008 ontology.canonical_id("http://edamontology.org/format_1207"),
1009 Some("EDAM:format_1207".to_owned())
1010 );
1011 assert_eq!(
1012 ontology.canonical_id("http://purl.obolibrary.org/obo/SBO_0000251"),
1013 Some("SBO:0000251".to_owned())
1014 );
1015 assert_eq!(
1016 ontology.canonical_id("https://identifiers.org/SO:0000987"),
1017 Some("SO:0000987".to_owned())
1018 );
1019 }
1020
1021 #[test]
1022 fn branch_queries_follow_parent_links() {
1023 let ontology = Ontology::bundled();
1024
1025 assert!(ontology.is_descendant("EDAM:format_1207", "EDAM:format_2330"));
1026 assert!(ontology.is_in_branch("EDAM:format_3752", "EDAM:format_2330"));
1027 assert!(ontology.is_in_branch("SBO:0000243", "SBO:0000236"));
1028 assert!(ontology.is_in_branch("SBO:0000176", "SBO:0000231"));
1029 assert!(ontology.is_in_branch("SBO:0000010", "SBO:0000003"));
1030 assert!(ontology.is_in_branch("EDAM:format_1207", "EDAM:format_1915"));
1031 assert!(ontology.is_in_branch("SO:0000987", "SO:0000986"));
1032 assert!(ontology.is_in_branch("SO:0000984", "SO:0000983"));
1033 assert!(ontology.is_in_branch("SO:0000167", "SO:0000110"));
1034 assert!(ontology.is_in_branch("GO:0001216", "GO:0003674"));
1035 assert!(ontology.is_in_branch("GO:0003700", "GO:0003674"));
1036 assert!(ontology.is_in_branch("CHEBI:35224", "CHEBI:50906"));
1037 assert!(!ontology.is_in_branch("SO:0000987", "SO:0000983"));
1038 assert!(ontology.is_equivalent_or_descendant("EDAM:format_2330", "EDAM:format_2330"));
1039 assert!(!ontology.is_descendant("EDAM:format_2330", "EDAM:format_2330"));
1040 }
1041
1042 #[test]
1043 fn compatibility_and_conflict_queries_distinguish_unknowns() {
1044 let ontology = Ontology::bundled();
1045
1046 assert_eq!(
1047 ontology.encoding_compatible_with_component_type("EDAM:format_1207", "SBO:0000251"),
1048 Some(true)
1049 );
1050 assert_eq!(
1051 ontology.encoding_compatible_with_component_type("EDAM:format_1208", "SBO:0000251"),
1052 Some(false)
1053 );
1054 assert_eq!(
1055 ontology.encoding_compatible_with_component_type(
1056 "https://example.org/custom",
1057 "SBO:0000251"
1058 ),
1059 None
1060 );
1061 assert_eq!(
1062 ontology.terms_conflict("SBO:0000251", "SBO:0000252"),
1063 Some(true)
1064 );
1065 assert_eq!(
1066 ontology.terms_conflict("SBO:0000169", "SBO:0000170"),
1067 Some(true)
1068 );
1069 assert_eq!(
1070 ontology.terms_conflict("SBO:0000251", "SO:0000987"),
1071 Some(false)
1072 );
1073 assert_eq!(
1074 ontology.terms_conflict("SBO:0000251", "https://example.org/custom"),
1075 None
1076 );
1077 assert_eq!(
1078 ontology.is_component_type_term("https://example.org/custom"),
1079 None
1080 );
1081 assert_eq!(
1082 ontology.is_component_role_term("https://example.org/custom"),
1083 None
1084 );
1085 assert_eq!(
1086 ontology.component_role_compatible_with_component_type(
1087 "https://example.org/custom",
1088 "SBO:0000251"
1089 ),
1090 None
1091 );
1092 }
1093
1094 #[test]
1095 fn role_queries_cover_sbol_feature_interaction_and_participation_terms() {
1096 let ontology = Ontology::bundled();
1097
1098 assert_eq!(ontology.is_feature_role_term("SO:0000167"), Some(true));
1099 assert_eq!(ontology.is_component_role_term("SO:0000167"), Some(true));
1100 assert_eq!(ontology.is_component_role_term("SBO:0000289"), Some(true));
1101 assert_eq!(ontology.is_component_role_term("SBO:0000290"), Some(true));
1102 assert_eq!(
1103 ontology.is_sequence_feature_role_term("SO:0000167"),
1104 Some(true)
1105 );
1106 assert_eq!(ontology.is_feature_role_term("SBO:0000176"), Some(false));
1107 assert_eq!(ontology.is_component_type_term("SBO:0000243"), Some(true));
1108 assert_eq!(ontology.is_component_type_term("SBO:0000290"), Some(true));
1109 assert_eq!(ontology.is_interaction_type_term("SBO:0000176"), Some(true));
1110 assert_eq!(
1111 ontology.is_participation_role_term("SBO:0000010"),
1112 Some(true)
1113 );
1114 assert_eq!(
1115 ontology
1116 .participation_role_compatible_with_interaction_type("SBO:0000010", "SBO:0000176"),
1117 Some(true)
1118 );
1119 assert_eq!(
1120 ontology
1121 .participation_role_compatible_with_interaction_type("SBO:0000459", "SBO:0000169"),
1122 Some(false)
1123 );
1124 assert_eq!(
1125 ontology.component_role_compatible_with_component_type("SO:0000167", "SBO:0000251"),
1126 Some(true)
1127 );
1128 assert_eq!(
1129 ontology.component_role_compatible_with_component_type("GO:0003700", "SBO:0000252"),
1130 Some(true)
1131 );
1132 assert_eq!(
1133 ontology.component_role_compatible_with_component_type("CHEBI:35224", "SBO:0000247"),
1134 Some(true)
1135 );
1136 assert_eq!(
1137 ontology.component_role_compatible_with_component_type("SO:0000167", "SBO:0000252"),
1138 Some(false)
1139 );
1140 }
1141}