#[cfg(feature = "sophia")]
use crate::four_sect_dict::IdKind;
use crate::hdt::Hdt;
use crate::predicate_object_iter::PredicateObjectIter;
use crate::triples::{BitmapIter, TripleId};
use log::debug;
use log::error;
use sophia::graph::*;
use sophia::term::iri::Iri;
use sophia::term::literal::Literal;
use sophia::term::*;
use sophia::triple::stream::*;
use sophia::triple::streaming_mode::*;
use std::convert::From;
use std::convert::Infallible;
use std::hash::Hash;
use std::marker::PhantomData;
pub trait Data:
AsRef<str> + Clone + Eq + From<std::boxed::Box<str>> + From<String> + From<&'static str> + Hash
{
}
impl<T> Data for T where
T: AsRef<str> + Clone + Eq + From<std::boxed::Box<str>> + From<String> + From<&'static str> + Hash
{
}
pub trait DisplayData: Data + std::fmt::Debug + std::fmt::Display {}
impl<T> DisplayData for T where T: Data + std::fmt::Debug + std::fmt::Display {}
pub struct HdtGraph<T: DisplayData> {
hdt: Hdt<T>,
phantom: PhantomData<T>,
}
impl<T: DisplayData> HdtGraph<T> {
pub const fn new(hdt: Hdt<T>) -> Self {
HdtGraph { hdt, phantom: PhantomData {} }
}
pub fn size_in_bytes(&self) -> usize {
self.hdt.size_in_bytes()
}
}
const XSD_STRING: &str = "http://www.w3.org/2001/XMLSchema#string";
fn auto_term<T: DisplayData>(s: &str) -> Result<Term<T>, TermError> {
match s.chars().next() {
None => Err(TermError::InvalidIri(String::new())),
Some('"') => match s.rfind('"') {
None => Err(TermError::UnsupportedDatatype(s.to_owned())),
Some(index) => {
let lex = s[1..index].to_owned();
let rest = &s[index + 1..];
if rest.is_empty() {
let dt_string = Iri::<&str>::new_unchecked(XSD_STRING);
return Ok(Term::<T>::from(Literal::new_dt(lex, dt_string)));
}
if let Some(tag_index) = rest.find('@') {
return Ok(Term::<T>::from(Literal::new_lang_unchecked(
lex,
rest[tag_index + 1..].to_owned(),
)));
}
let mut dt_split = rest.split("^^");
dt_split.next(); match dt_split.next() {
Some(dt) => {
let unquoted = dt[1..dt.len() - 1].to_owned().into_boxed_str();
let dt = Iri::<Box<str>>::new_unchecked(unquoted);
Ok(Term::<T>::from(Literal::new_dt(lex, dt)))
}
None => Err(TermError::UnsupportedDatatype(s.to_owned())),
}
}
},
Some('_') => Term::<T>::new_bnode(s[2..].to_owned()),
_ => Term::<T>::new_iri(s.to_owned()),
}
}
fn triple_source<'s, T: DisplayData>(
triples: impl Iterator<Item = (T, T, T)> + 's,
) -> GTripleSource<'s, HdtGraph<T>> {
Box::new(
triples
.map(|(s, p, o)| -> Result<_> {
debug_assert_ne!("", s.as_ref(), "triple_source subject is empty ({s}, {p}, {o})");
debug_assert_ne!("", p.as_ref(), "triple_source predicate is empty ({s}, {p}, {o})");
debug_assert_ne!("", o.as_ref(), "triple_source object is empty ({s}, {p}, {o})");
Ok(StreamedTriple::by_value([
auto_term(s.as_ref())?,
auto_term(p.as_ref())?,
auto_term(o.as_ref())?,
]))
})
.filter_map(|r| r.map_err(|e| error!("{e}")).ok())
.into_triple_source(),
)
}
fn term_string(t: &(impl TTerm + ?Sized)) -> String {
match t.kind() {
TermKind::BlankNode => "_:".to_owned() + &t.value(),
TermKind::Iri => t.value().to_string(),
TermKind::Literal => {
let value = format!("\"{}\"", t.value());
if let Some(lang) = t.language() {
return format!("{value}@{lang}");
}
if let Some(dt) = t.datatype() {
if dt.value() != "http://www.w3.org/2001/XMLSchema#string" {
return format!("{value}^^{dt}");
}
}
value
}
TermKind::Variable => {
panic!("Variable term strings are not supported.");
}
}
}
impl<T: DisplayData> Graph for HdtGraph<T> {
type Triple = ByValue<[Term<T>; 3]>;
type Error = Infallible;
fn triples(&self) -> GTripleSource<Self> {
debug!("Iterating through ALL triples in the HDT Graph. This can be inefficient for large graphs.");
triple_source(self.hdt.triples())
}
fn triples_with_s<'s, TS: TTerm + ?Sized>(&'s self, s: &'s TS) -> GTripleSource<'s, Self> {
let ss = term_string(s);
let sid = self.hdt.dict.string_to_id(&ss, &IdKind::Subject);
if sid == 0 {
return Box::new(std::iter::empty());
}
let s = auto_term(&ss).unwrap();
Box::new(
BitmapIter::with_pattern(&self.hdt.triples, &TripleId::new(sid, 0, 0))
.map(move |tid| -> Result<_> {
Ok(StreamedTriple::by_value([
s.clone(),
Term::new_iri_unchecked(
self.hdt.dict.id_to_string(tid.predicate_id, &IdKind::Predicate).unwrap(),
),
auto_term(&self.hdt.dict.id_to_string(tid.object_id, &IdKind::Object).unwrap())?,
]))
})
.filter_map(|r| r.map_err(|e| error!("{e}")).ok())
.into_triple_source(),
)
}
fn triples_with_p<'s, TS: TTerm + ?Sized>(&'s self, p: &'s TS) -> GTripleSource<'s, Self> {
triple_source(self.hdt.triples_with(&p.value(), &IdKind::Predicate))
}
fn triples_with_o<'s, TS: TTerm + ?Sized>(&'s self, o: &'s TS) -> GTripleSource<'s, Self> {
triple_source(self.hdt.triples_with(&term_string(o), &IdKind::Object))
}
fn triples_with_sp<'s, TS: TTerm + ?Sized, TP: TTerm + ?Sized>(
&'s self, s: &'s TS, p: &'s TP,
) -> GTripleSource<'s, Self> {
let ss = term_string(s);
let ps = term_string(p);
let sid = self.hdt.dict.string_to_id(&ss, &IdKind::Subject);
let pid = self.hdt.dict.string_to_id(&ps, &IdKind::Predicate);
if sid == 0 || pid == 0 {
return Box::new(std::iter::empty());
}
let s = auto_term(&ss).unwrap();
let p = auto_term(&ps).unwrap();
Box::new(
BitmapIter::with_pattern(&self.hdt.triples, &TripleId::new(sid, pid, 0))
.map(move |tid| -> Result<_> {
Ok(StreamedTriple::by_value([
s.clone(),
p.clone(),
auto_term(&self.hdt.dict.id_to_string(tid.object_id, &IdKind::Object).unwrap())?,
]))
})
.filter_map(|r| r.map_err(|e| error!("{e}")).ok())
.into_triple_source(),
)
}
fn triples_with_so<'s, TS: TTerm + ?Sized, TO: TTerm + ?Sized>(
&'s self, s: &'s TS, o: &'s TO,
) -> GTripleSource<'s, Self> {
triple_source(self.hdt.triples_with_so(&term_string(s), &term_string(o)))
}
fn triples_with_po<'s, TP: TTerm + ?Sized, TO: TTerm + ?Sized>(
&'s self, p: &'s TP, o: &'s TO,
) -> GTripleSource<'s, Self> {
let ps = term_string(p);
let os = term_string(o);
let pid = self.hdt.dict.string_to_id(&ps, &IdKind::Predicate);
let oid = self.hdt.dict.string_to_id(&os, &IdKind::Object);
let p = Term::new_iri_unchecked(ps);
let o = auto_term(&os).unwrap();
Box::new(
PredicateObjectIter::new(&self.hdt.triples, pid, oid)
.map(move |sid| -> Result<_> {
Ok(StreamedTriple::by_value([
Term::new_iri_unchecked(self.hdt.dict.id_to_string(sid, &IdKind::Subject).unwrap()),
p.clone(),
o.clone(),
]))
})
.filter_map(|r| r.map_err(|e| error!("{e}")).ok())
.into_triple_source(),
)
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::tests::init;
use sophia::triple::Triple;
use std::fs::File;
use std::rc::Rc;
#[test]
fn test_graph() {
type T = Rc<str>;
init();
let file = File::open("tests/resources/snikmeta.hdt").expect("error opening file");
let hdt = Hdt::<Rc<str>>::new(std::io::BufReader::new(file)).unwrap();
let graph = HdtGraph::new(hdt);
let triples: Vec<_> = graph.triples().collect();
assert_eq!(triples.len(), 327);
assert!(graph
.triples_with_s(&Term::<T>::new_iri_unchecked("http://www.snik.eu/ontology/meta".to_owned()))
.next()
.is_some());
for uri in ["http://www.snik.eu/ontology/meta/Top", "http://www.snik.eu/ontology/meta", "doesnotexist"] {
let term = Term::<T>::new_iri_unchecked(uri.to_owned());
let filtered: Vec<_> = triples
.iter()
.map(|triple| triple.as_ref().unwrap())
.filter(|triple| triple.s().value() == uri)
.collect();
let with_s: Vec<_> = graph.triples_with_s(&term).map(std::result::Result::unwrap).collect();
let filtered_string = format!("{filtered:?}");
let with_s_string = format!("{with_s:?}");
assert_eq!(
filtered_string, with_s_string,
"different results between triples() and triples_with_s() for {uri}"
);
}
let s = Term::<T>::new_iri_unchecked("http://www.snik.eu/ontology/meta/Top".to_owned());
let p = Term::<T>::new_iri_unchecked("http://www.w3.org/2000/01/rdf-schema#label".to_owned());
let o = Term::<T>::from(Literal::new_lang_unchecked("top class", "en"));
assert!(graph.triples_with_o(&o).next().is_some());
let triple = (&s, &p, &o);
let sp = graph.triples_with_sp(&s, &p).map(std::result::Result::unwrap).collect::<Vec<_>>();
let so = graph.triples_with_so(&s, &o).map(std::result::Result::unwrap).collect::<Vec<_>>();
let po = graph.triples_with_po(&p, &o).map(std::result::Result::unwrap).collect::<Vec<_>>();
for vec in [sp, so, po] {
assert_eq!(1, vec.len());
let e = &vec[0];
assert_eq!(e.s(), triple.0);
assert_eq!(e.p(), triple.1);
assert_eq!(e.o(), triple.2);
}
assert!(graph.triples_with_o(&Term::<T>::from("22.10".to_owned())).count() == 1);
let date = &Term::<T>::from(Literal::new_dt(
"2022-10-20",
Iri::<&str>::new_unchecked("http://www.w3.org/2001/XMLSchema#date"),
));
assert!(graph.triples_with_o(date).count() == 1);
}
}