use crate::containers::ControlInfo;
use crate::four_sect_dict::{DictErr, IdKind};
use crate::header::Header;
use crate::predicate_object_iter::PredicateObjectIter;
use crate::triples::{BitmapIter, TripleId, TriplesBitmap};
use crate::FourSectDict;
use bytesize::ByteSize;
use log::{debug, error};
use std::io;
use std::marker::PhantomData;
use thiserror::Error;
#[derive(Debug)]
pub struct Hdt<T: Data> {
pub dict: FourSectDict,
pub triples: TriplesBitmap,
phantom: PhantomData<T>,
}
pub trait Data:
AsRef<str> + Clone + Eq + From<std::boxed::Box<str>> + From<String> + From<&'static str> + std::hash::Hash
{
}
impl<T> Data for T where
T: AsRef<str> + Clone + Eq + From<std::boxed::Box<str>> + From<String> + From<&'static str> + std::hash::Hash
{
}
type StringTriple<T> = (T, T, T);
#[derive(Error, Debug)]
#[error("Cannot translate triple ID {t:?} to string triple: {e}")]
pub struct TranslateErr {
#[source]
e: DictErr,
t: TripleId,
}
impl<T: Data> Hdt<T> {
pub fn new<R: std::io::BufRead>(mut reader: R) -> io::Result<Self> {
ControlInfo::read(&mut reader)?;
Header::read(&mut reader)?;
let mut dict = FourSectDict::read(&mut reader)?;
let triples = TriplesBitmap::read_sect(&mut reader)?;
dict.validate()?;
let hdt = Hdt { dict, triples, phantom: PhantomData {} };
debug!("HDT size in memory {}, details:", ByteSize(hdt.size_in_bytes() as u64));
Ok(hdt)
}
pub fn size_in_bytes(&self) -> usize {
self.dict.size_in_bytes() + self.triples.size_in_bytes()
}
fn translate_id(&self, t: TripleId) -> Result<StringTriple<T>, TranslateErr> {
let s = self.dict.id_to_string(t.subject_id, &IdKind::Subject).map_err(|e| TranslateErr { e, t })?;
let p = self.dict.id_to_string(t.predicate_id, &IdKind::Predicate).map_err(|e| TranslateErr { e, t })?;
let o = self.dict.id_to_string(t.object_id, &IdKind::Object).map_err(|e| TranslateErr { e, t })?;
Ok((s.into(), p.into(), o.into()))
}
pub fn triples(&self) -> impl Iterator<Item = StringTriple<T>> + '_ {
self.triples.into_iter().map(|id| self.translate_id(id).unwrap())
}
pub fn triples_with(&self, s: &str, kind: &'static IdKind) -> Box<dyn Iterator<Item = StringTriple<T>> + '_> {
debug_assert_ne!("", s);
let id = self.dict.string_to_id(s, kind);
if id == 0 {
return Box::new(std::iter::empty());
}
let owned = s.to_owned();
Box::new(
self.triples
.triples_with_id(id, kind)
.map(move |tid| self.translate_id(tid))
.filter_map(move |r| r.map_err(|e| error!("Error on triple with {kind:?} {owned}: {e}")).ok()),
)
}
pub fn objects_with_sp(&self, s: &str, p: &str) -> Box<dyn Iterator<Item = String> + '_> {
let sid = self.dict.string_to_id(s, &IdKind::Subject);
let pid = self.dict.string_to_id(p, &IdKind::Predicate);
if sid == 0 || pid == 0 {
return Box::new(std::iter::empty());
}
let s_owned = s.to_owned();
let p_owned = p.to_owned();
Box::new(
BitmapIter::with_pattern(&self.triples, &TripleId::new(sid, pid, 0))
.map(move |tid| self.dict.id_to_string(tid.object_id, &IdKind::Object))
.filter_map(move |r| {
r.map_err(|e| error!("Error on triple with subject {s_owned} and property {p_owned}: {e}"))
.ok()
}),
)
}
pub fn triples_with_sp<'a>(&'a self, s: &'a str, p: &'a str) -> impl Iterator<Item = StringTriple<T>> + '_ {
let st = T::from(s.to_owned());
let pt = T::from(p.to_owned());
self.objects_with_sp(s, p).map(move |o| (st.clone(), pt.clone(), T::from(o)))
}
pub fn triples_with_so(&self, s: &str, o: &str) -> Box<dyn Iterator<Item = StringTriple<T>> + '_> {
let sid = self.dict.string_to_id(s, &IdKind::Subject);
let oid = self.dict.string_to_id(o, &IdKind::Object);
if sid == 0 || oid == 0 {
return Box::new(std::iter::empty());
}
let st = T::from(s.to_owned());
let ot = T::from(o.to_owned());
Box::new(
self.triples
.triples_with_id(sid, &IdKind::Subject)
.filter(move |tid| tid.object_id == oid)
.map(move |tid| self.dict.id_to_string(tid.predicate_id, &IdKind::Predicate))
.filter_map(move |r| {
r.map_err(|e| error!("Error on triple: {e}")).ok()
})
.map(move |ps| (st.clone(), T::from(ps), ot.clone())),
)
}
pub fn subjects_with_po(&self, p: &str, o: &str) -> Box<dyn Iterator<Item = String> + '_> {
let pid = self.dict.string_to_id(p, &IdKind::Predicate);
let oid = self.dict.string_to_id(o, &IdKind::Object);
if pid == 0 || oid == 0 {
return Box::new(std::iter::empty());
}
let p_owned = p.to_owned();
let o_owned = o.to_owned();
Box::new(
PredicateObjectIter::new(&self.triples, pid, oid)
.map(move |sid| self.dict.id_to_string(sid, &IdKind::Subject))
.filter_map(move |r| {
r.map_err(|e| error!("Error on triple with property {p_owned} and object {o_owned}: {e}")).ok()
}),
)
}
pub fn triples_with_po<'a>(&'a self, p: &'a str, o: &'a str) -> impl Iterator<Item = StringTriple<T>> + 'a {
let pt = T::from(p.to_owned());
let ot = T::from(o.to_owned());
self.subjects_with_po(p, o).map(move |s| (T::from(s), pt.clone(), ot.clone()))
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::tests::init;
use pretty_assertions::{assert_eq, assert_ne};
use std::fs::File;
use std::rc::Rc;
#[test]
fn triples() {
init();
let filename = "tests/resources/snikmeta.hdt";
let file = File::open(filename).expect("error opening file");
let hdt = Hdt::<Rc<str>>::new(std::io::BufReader::new(file)).unwrap();
let triples = hdt.triples();
let v: Vec<StringTriple<_>> = triples.collect();
assert_eq!(v.len(), 327);
assert_ne!(0, hdt.dict.string_to_id("http://www.snik.eu/ontology/meta", &IdKind::Subject));
for uri in ["http://www.snik.eu/ontology/meta/Top", "http://www.snik.eu/ontology/meta", "doesnotexist"] {
let filtered: Vec<_> = v.clone().into_iter().filter(|triple| triple.0.as_ref() == uri).collect();
let with_s: Vec<_> = hdt.triples_with(uri, &IdKind::Subject).collect();
assert_eq!(filtered, with_s, "different results between triples() and triples_with_s() for {}", uri);
}
let s = "http://www.snik.eu/ontology/meta/Top";
let p = "http://www.w3.org/2000/01/rdf-schema#label";
let o = "\"top class\"@en";
let triple_vec = vec![(Rc::from(s), Rc::from(p), Rc::from(o))];
assert_eq!(triple_vec, hdt.triples_with_sp(s, p).collect::<Vec<_>>());
assert_eq!(triple_vec, hdt.triples_with_so(s, o).collect::<Vec<_>>());
assert_eq!(triple_vec, hdt.triples_with_po(p, o).collect::<Vec<_>>());
let et = "http://www.snik.eu/ontology/meta/EntityType";
assert_eq!(4, hdt.subjects_with_po("http://www.w3.org/2000/01/rdf-schema#subClassOf", et).count());
assert_eq!(
12,
hdt.triples_with("http://www.w3.org/2000/01/rdf-schema#subClassOf", &IdKind::Predicate).count()
);
assert_eq!(20, hdt.triples_with(et, &IdKind::Object).count());
}
}