use std::collections::BTreeMap;
use std::fmt;
use serde::{Deserialize, Serialize};
use crate::model::{StarTerm, StarTriple};
use crate::{StarError, StarResult};
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
pub struct TermKey(pub String);
impl TermKey {
pub fn from_term(term: &StarTerm) -> Self {
TermKey(term_to_string(term))
}
pub fn wildcard() -> Self {
TermKey(String::new())
}
pub fn is_wildcard(&self) -> bool {
self.0.is_empty()
}
}
impl fmt::Display for TermKey {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str(&self.0)
}
}
fn term_to_string(term: &StarTerm) -> String {
match term {
StarTerm::NamedNode(n) => format!("<{}>", n.iri),
StarTerm::BlankNode(b) => format!("_:{}", b.id),
StarTerm::Literal(l) => {
if let Some(lang) = &l.language {
format!("{:?}@{lang}", l.value)
} else if let Some(dt) = &l.datatype {
format!("{:?}^^<{}>", l.value, dt.iri)
} else {
format!("{:?}", l.value)
}
}
StarTerm::QuotedTriple(t) => {
format!(
"<<{} {} {}>>",
term_to_string(&t.subject),
term_to_string(&t.predicate),
term_to_string(&t.object),
)
}
StarTerm::Variable(v) => format!("?{}", v.name),
}
}
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
pub struct CompoundKey {
pub outer_s: TermKey,
pub outer_p: TermKey,
pub outer_o: TermKey,
pub inner_s: TermKey,
pub inner_p: TermKey,
pub inner_o: TermKey,
}
impl CompoundKey {
pub fn from_triple(outer: &StarTriple) -> Self {
let (inner_s, inner_p, inner_o) = match &outer.subject {
StarTerm::QuotedTriple(inner) => (
TermKey::from_term(&inner.subject),
TermKey::from_term(&inner.predicate),
TermKey::from_term(&inner.object),
),
other => (
TermKey::from_term(other),
TermKey::wildcard(),
TermKey::wildcard(),
),
};
Self {
outer_s: TermKey::from_term(&outer.subject),
outer_p: TermKey::from_term(&outer.predicate),
outer_o: TermKey::from_term(&outer.object),
inner_s,
inner_p,
inner_o,
}
}
}
#[derive(Debug, Clone, Default)]
pub struct CompoundPattern {
pub outer_s: Option<TermKey>,
pub outer_p: Option<TermKey>,
pub outer_o: Option<TermKey>,
pub inner_s: Option<TermKey>,
pub inner_p: Option<TermKey>,
pub inner_o: Option<TermKey>,
}
impl CompoundPattern {
pub fn wildcard() -> Self {
Self::default()
}
pub fn from_triple_pattern(pattern: &StarTriple) -> Self {
Self {
outer_s: if is_variable(&pattern.subject) {
None
} else {
Some(TermKey::from_term(&pattern.subject))
},
outer_p: if is_variable(&pattern.predicate) {
None
} else {
Some(TermKey::from_term(&pattern.predicate))
},
outer_o: if is_variable(&pattern.object) {
None
} else {
Some(TermKey::from_term(&pattern.object))
},
inner_s: extract_inner_s(&pattern.subject),
inner_p: extract_inner_p(&pattern.subject),
inner_o: extract_inner_o(&pattern.subject),
}
}
pub fn matches(&self, key: &CompoundKey) -> bool {
matches_component(&self.outer_s, &key.outer_s)
&& matches_component(&self.outer_p, &key.outer_p)
&& matches_component(&self.outer_o, &key.outer_o)
&& matches_component(&self.inner_s, &key.inner_s)
&& matches_component(&self.inner_p, &key.inner_p)
&& matches_component(&self.inner_o, &key.inner_o)
}
}
fn is_variable(term: &StarTerm) -> bool {
matches!(term, StarTerm::Variable(_))
}
fn extract_inner_s(term: &StarTerm) -> Option<TermKey> {
if let StarTerm::QuotedTriple(inner) = term {
if !is_variable(&inner.subject) {
return Some(TermKey::from_term(&inner.subject));
}
}
None
}
fn extract_inner_p(term: &StarTerm) -> Option<TermKey> {
if let StarTerm::QuotedTriple(inner) = term {
if !is_variable(&inner.predicate) {
return Some(TermKey::from_term(&inner.predicate));
}
}
None
}
fn extract_inner_o(term: &StarTerm) -> Option<TermKey> {
if let StarTerm::QuotedTriple(inner) = term {
if !is_variable(&inner.object) {
return Some(TermKey::from_term(&inner.object));
}
}
None
}
fn matches_component(pattern: &Option<TermKey>, key: &TermKey) -> bool {
match pattern {
None => true, Some(p) => p == key,
}
}
#[derive(Debug, Default)]
pub struct RdfStarIndex {
btree: BTreeMap<CompoundKey, StarTriple>,
}
impl RdfStarIndex {
pub fn new() -> Self {
Self::default()
}
pub fn insert(&mut self, triple: StarTriple) -> bool {
let key = CompoundKey::from_triple(&triple);
let was_absent = !self.btree.contains_key(&key);
self.btree.insert(key, triple);
was_absent
}
pub fn remove(&mut self, triple: &StarTriple) -> bool {
let key = CompoundKey::from_triple(triple);
self.btree.remove(&key).is_some()
}
pub fn contains(&self, triple: &StarTriple) -> bool {
let key = CompoundKey::from_triple(triple);
self.btree.contains_key(&key)
}
pub fn len(&self) -> usize {
self.btree.len()
}
pub fn is_empty(&self) -> bool {
self.btree.is_empty()
}
pub fn prefix_scan(&self, pattern: &CompoundPattern) -> Vec<StarTriple> {
self.btree
.iter()
.filter(|(key, _)| pattern.matches(key))
.map(|(_, triple)| triple.clone())
.collect()
}
pub fn scan_by_quoted_subject(&self, inner: &StarTriple) -> Vec<StarTriple> {
let inner_s = TermKey::from_term(&inner.subject);
let inner_p = TermKey::from_term(&inner.predicate);
let inner_o = TermKey::from_term(&inner.object);
let pattern = CompoundPattern {
outer_s: None,
outer_p: None,
outer_o: None,
inner_s: Some(inner_s),
inner_p: Some(inner_p),
inner_o: Some(inner_o),
};
self.prefix_scan(&pattern)
}
pub fn iter(&self) -> impl Iterator<Item = &StarTriple> {
self.btree.values()
}
pub fn clear(&mut self) {
self.btree.clear();
}
pub fn validate(&self) -> StarResult<()> {
for (key, triple) in &self.btree {
let expected = CompoundKey::from_triple(triple);
if *key != expected {
return Err(StarError::QueryError {
message: format!(
"Index corruption: key {key:?} does not match triple {triple:?}"
),
query_fragment: None,
position: None,
suggestion: None,
});
}
}
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::model::{StarTerm, StarTriple, Variable};
fn iri(s: &str) -> StarTerm {
StarTerm::iri(s).expect("valid IRI")
}
fn lit(s: &str) -> StarTerm {
StarTerm::literal(s).expect("ok")
}
fn triple(s: &str, p: &str, o: &str) -> StarTriple {
StarTriple::new(iri(s), iri(p), iri(o))
}
fn var(name: &str) -> StarTerm {
StarTerm::Variable(Variable { name: name.into() })
}
#[test]
fn test_index_insert_and_contains() {
let mut idx = RdfStarIndex::new();
let t = triple("http://ex.org/s", "http://ex.org/p", "http://ex.org/o");
assert!(idx.insert(t.clone()));
assert!(idx.contains(&t));
assert_eq!(idx.len(), 1);
}
#[test]
fn test_index_insert_duplicate() {
let mut idx = RdfStarIndex::new();
let t = triple("http://ex.org/s", "http://ex.org/p", "http://ex.org/o");
assert!(idx.insert(t.clone())); assert!(!idx.insert(t.clone())); assert_eq!(idx.len(), 1);
}
#[test]
fn test_index_remove() {
let mut idx = RdfStarIndex::new();
let t = triple("http://ex.org/s", "http://ex.org/p", "http://ex.org/o");
idx.insert(t.clone());
assert!(idx.remove(&t));
assert!(!idx.contains(&t));
assert!(idx.is_empty());
}
#[test]
fn test_index_remove_nonexistent() {
let mut idx = RdfStarIndex::new();
let t = triple("http://ex.org/s", "http://ex.org/p", "http://ex.org/o");
assert!(!idx.remove(&t));
}
#[test]
fn test_index_quoted_triple_subject() {
let mut idx = RdfStarIndex::new();
let inner = triple(
"http://ex.org/alice",
"http://ex.org/age",
"http://ex.org/30",
);
let meta = StarTriple::new(
StarTerm::quoted_triple(inner.clone()),
iri("http://ex.org/certainty"),
lit("0.9"),
);
assert!(idx.insert(meta.clone()));
assert!(idx.contains(&meta));
assert_eq!(idx.len(), 1);
}
#[test]
fn test_prefix_scan_wildcard_returns_all() {
let mut idx = RdfStarIndex::new();
idx.insert(triple(
"http://ex.org/s1",
"http://ex.org/p",
"http://ex.org/o1",
));
idx.insert(triple(
"http://ex.org/s2",
"http://ex.org/p",
"http://ex.org/o2",
));
idx.insert(triple(
"http://ex.org/s3",
"http://ex.org/p",
"http://ex.org/o3",
));
let results = idx.prefix_scan(&CompoundPattern::wildcard());
assert_eq!(results.len(), 3);
}
#[test]
fn test_prefix_scan_bound_predicate() {
let mut idx = RdfStarIndex::new();
idx.insert(triple(
"http://ex.org/s1",
"http://ex.org/knows",
"http://ex.org/o1",
));
idx.insert(triple(
"http://ex.org/s2",
"http://ex.org/age",
"http://ex.org/o2",
));
idx.insert(triple(
"http://ex.org/s3",
"http://ex.org/knows",
"http://ex.org/o3",
));
let pattern = CompoundPattern {
outer_p: Some(TermKey::from_term(&iri("http://ex.org/knows"))),
..CompoundPattern::default()
};
let results = idx.prefix_scan(&pattern);
assert_eq!(results.len(), 2);
}
#[test]
fn test_prefix_scan_no_match() {
let mut idx = RdfStarIndex::new();
idx.insert(triple(
"http://ex.org/s",
"http://ex.org/p",
"http://ex.org/o",
));
let pattern = CompoundPattern {
outer_s: Some(TermKey::from_term(&iri("http://ex.org/nobody"))),
..CompoundPattern::default()
};
let results = idx.prefix_scan(&pattern);
assert!(results.is_empty());
}
#[test]
fn test_scan_by_quoted_subject() {
let mut idx = RdfStarIndex::new();
let inner = triple(
"http://ex.org/alice",
"http://ex.org/age",
"http://ex.org/30",
);
let meta1 = StarTriple::new(
StarTerm::quoted_triple(inner.clone()),
iri("http://ex.org/certainty"),
lit("0.9"),
);
let meta2 = StarTriple::new(
StarTerm::quoted_triple(inner.clone()),
iri("http://ex.org/source"),
iri("http://ex.org/db"),
);
let unrelated = triple("http://ex.org/bob", "http://ex.org/age", "http://ex.org/25");
idx.insert(meta1.clone());
idx.insert(meta2.clone());
idx.insert(unrelated.clone());
let results = idx.scan_by_quoted_subject(&inner);
assert_eq!(results.len(), 2);
}
#[test]
fn test_pattern_from_triple_pattern_bound_s() {
let pattern = StarTriple::new(iri("http://ex.org/s"), var("p"), var("o"));
let cp = CompoundPattern::from_triple_pattern(&pattern);
assert!(cp.outer_s.is_some());
assert!(cp.outer_p.is_none()); assert!(cp.outer_o.is_none()); }
#[test]
fn test_pattern_from_triple_pattern_all_wildcards() {
let pattern = StarTriple::new(var("s"), var("p"), var("o"));
let cp = CompoundPattern::from_triple_pattern(&pattern);
assert!(cp.outer_s.is_none());
assert!(cp.outer_p.is_none());
assert!(cp.outer_o.is_none());
}
#[test]
fn test_pattern_from_quoted_subject() {
let inner = triple("http://ex.org/a", "http://ex.org/b", "http://ex.org/c");
let pattern = StarTriple::new(StarTerm::quoted_triple(inner), var("p"), var("o"));
let cp = CompoundPattern::from_triple_pattern(&pattern);
assert!(cp.inner_s.is_some());
assert!(cp.inner_p.is_some());
assert!(cp.inner_o.is_some());
}
#[test]
fn test_validate_clean_index() {
let mut idx = RdfStarIndex::new();
idx.insert(triple(
"http://ex.org/s",
"http://ex.org/p",
"http://ex.org/o",
));
assert!(idx.validate().is_ok());
}
#[test]
fn test_clear() {
let mut idx = RdfStarIndex::new();
idx.insert(triple(
"http://ex.org/s",
"http://ex.org/p",
"http://ex.org/o",
));
idx.clear();
assert!(idx.is_empty());
}
#[test]
fn test_term_key_ordering() {
let a = TermKey::from_term(&iri("http://ex.org/a"));
let b = TermKey::from_term(&iri("http://ex.org/b"));
assert!(a < b);
}
#[test]
fn test_wildcard_is_lexicographically_smallest() {
let wc = TermKey::wildcard();
let any = TermKey::from_term(&iri("http://ex.org/any"));
assert!(wc < any);
}
}