use std::collections::HashSet;
use std::path::Path;
use crate::BoxError;
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub enum Axiom {
Intersection(String, String, String),
Subsumption(String, String),
Existential(String, String, String),
InverseExistential(String, String, String),
RoleInclusion(String, String),
RoleChain(String, String, String),
Disjointness(String, String),
}
#[derive(Debug, Clone, Default)]
pub struct ElDataset {
pub nf1: Vec<(String, String, String)>,
pub nf2: Vec<(String, String)>,
pub nf3: Vec<(String, String, String)>,
pub nf4: Vec<(String, String, String)>,
pub ri6: Vec<(String, String)>,
pub ri7: Vec<(String, String, String)>,
pub disj: Vec<(String, String)>,
}
impl ElDataset {
pub fn len(&self) -> usize {
self.nf1.len()
+ self.nf2.len()
+ self.nf3.len()
+ self.nf4.len()
+ self.ri6.len()
+ self.ri7.len()
+ self.disj.len()
}
pub fn is_empty(&self) -> bool {
self.len() == 0
}
pub fn classes(&self) -> HashSet<&str> {
let mut s = HashSet::new();
for (a, b, c) in &self.nf1 {
s.insert(a.as_str());
s.insert(b.as_str());
s.insert(c.as_str());
}
for (a, b) in &self.nf2 {
s.insert(a.as_str());
s.insert(b.as_str());
}
for (c, _, d) in &self.nf3 {
s.insert(c.as_str());
s.insert(d.as_str());
}
for (_, c, d) in &self.nf4 {
s.insert(c.as_str());
s.insert(d.as_str());
}
for (a, b) in &self.disj {
s.insert(a.as_str());
s.insert(b.as_str());
}
s
}
pub fn roles(&self) -> HashSet<&str> {
let mut s = HashSet::new();
for (_, r, _) in &self.nf3 {
s.insert(r.as_str());
}
for (r, _, _) in &self.nf4 {
s.insert(r.as_str());
}
for (r, s_) in &self.ri6 {
s.insert(r.as_str());
s.insert(s_.as_str());
}
for (r, s_, t) in &self.ri7 {
s.insert(r.as_str());
s.insert(s_.as_str());
s.insert(t.as_str());
}
s
}
pub fn iter(&self) -> impl Iterator<Item = Axiom> + '_ {
self.nf1
.iter()
.map(|(a, b, c)| Axiom::Intersection(a.clone(), b.clone(), c.clone()))
.chain(
self.nf2
.iter()
.map(|(a, b)| Axiom::Subsumption(a.clone(), b.clone())),
)
.chain(
self.nf3
.iter()
.map(|(c, r, d)| Axiom::Existential(c.clone(), r.clone(), d.clone())),
)
.chain(
self.nf4
.iter()
.map(|(r, c, d)| Axiom::InverseExistential(r.clone(), c.clone(), d.clone())),
)
.chain(
self.ri6
.iter()
.map(|(r, s)| Axiom::RoleInclusion(r.clone(), s.clone())),
)
.chain(
self.ri7
.iter()
.map(|(r, s, t)| Axiom::RoleChain(r.clone(), s.clone(), t.clone())),
)
.chain(
self.disj
.iter()
.map(|(a, b)| Axiom::Disjointness(a.clone(), b.clone())),
)
}
}
pub fn load_el_axioms<P: AsRef<Path>>(path: P) -> Result<ElDataset, BoxError> {
let content =
std::fs::read_to_string(path.as_ref()).map_err(|e| BoxError::Io(format!("{e}")))?;
parse_el_axioms(&content)
}
pub fn parse_el_axioms(text: &str) -> Result<ElDataset, BoxError> {
let mut dataset = ElDataset::default();
for (line_num, line) in text.lines().enumerate() {
let line = line.trim();
if line.is_empty() || line.starts_with('#') {
continue;
}
let parts: Vec<&str> = line.split('\t').collect();
if parts.is_empty() {
continue;
}
match parts[0] {
"NF1" if parts.len() == 4 => {
dataset
.nf1
.push((parts[1].into(), parts[2].into(), parts[3].into()));
}
"NF2" if parts.len() == 3 => {
dataset.nf2.push((parts[1].into(), parts[2].into()));
}
"NF3" if parts.len() == 4 => {
dataset
.nf3
.push((parts[1].into(), parts[2].into(), parts[3].into()));
}
"NF4" if parts.len() == 4 => {
dataset
.nf4
.push((parts[1].into(), parts[2].into(), parts[3].into()));
}
"RI6" if parts.len() == 3 => {
dataset.ri6.push((parts[1].into(), parts[2].into()));
}
"RI7" if parts.len() == 4 => {
dataset
.ri7
.push((parts[1].into(), parts[2].into(), parts[3].into()));
}
"DISJ" if parts.len() == 3 => {
dataset.disj.push((parts[1].into(), parts[2].into()));
}
_ => {
return Err(BoxError::Internal(format!(
"line {}: invalid axiom '{}'",
line_num + 1,
line
)));
}
}
}
Ok(dataset)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_all_forms() {
let text = "\
# Gene Ontology subset (normalized)
NF1\tHeartDisease\tGenetic\tInheritedHeartDisease
NF2\tDog\tAnimal
NF2\tCat\tAnimal
NF3\tHeart\tpartOf\tBody
NF4\thasParent\tHuman\tHuman
RI6\thasChild\thasDescendant
RI7\thasParent\thasSibling\thasUncle
DISJ\tCat\tDog
";
let ds = parse_el_axioms(text).unwrap();
assert_eq!(ds.nf1.len(), 1);
assert_eq!(ds.nf2.len(), 2);
assert_eq!(ds.nf3.len(), 1);
assert_eq!(ds.nf4.len(), 1);
assert_eq!(ds.ri6.len(), 1);
assert_eq!(ds.ri7.len(), 1);
assert_eq!(ds.disj.len(), 1);
assert_eq!(ds.len(), 8);
let classes = ds.classes();
assert!(classes.contains("Dog"));
assert!(classes.contains("Animal"));
assert!(classes.contains("HeartDisease"));
assert!(!classes.contains("partOf"));
let roles = ds.roles();
assert!(roles.contains("partOf"));
assert!(roles.contains("hasParent"));
assert!(!roles.contains("Dog")); }
#[test]
fn test_empty_and_comments() {
let text = "# comment\n\n# another comment\n";
let ds = parse_el_axioms(text).unwrap();
assert!(ds.is_empty());
}
#[test]
fn test_invalid_tag() {
let text = "INVALID\tFoo\tBar";
assert!(parse_el_axioms(text).is_err());
}
#[test]
fn test_wrong_field_count() {
let text = "NF2\tDog";
assert!(parse_el_axioms(text).is_err());
}
#[test]
fn test_iter_roundtrip() {
let text = "NF2\tA\tB\nNF3\tC\tr\tD\nDISJ\tE\tF\n";
let ds = parse_el_axioms(text).unwrap();
let axioms: Vec<Axiom> = ds.iter().collect();
assert_eq!(axioms.len(), 3);
assert!(matches!(&axioms[0], Axiom::Subsumption(a, b) if a == "A" && b == "B"));
assert!(
matches!(&axioms[1], Axiom::Existential(c, r, d) if c == "C" && r == "r" && d == "D")
);
assert!(matches!(&axioms[2], Axiom::Disjointness(e, f) if e == "E" && f == "F"));
}
}