use std::collections::HashMap;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub struct QuotedId(pub u64);
impl QuotedId {
pub const NULL: Self = Self(0);
pub fn is_null(self) -> bool {
self.0 == 0
}
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct QuotedTriple {
pub subject: String,
pub predicate: String,
pub object: String,
}
impl QuotedTriple {
pub fn new(
subject: impl Into<String>,
predicate: impl Into<String>,
object: impl Into<String>,
) -> Self {
Self {
subject: subject.into(),
predicate: predicate.into(),
object: object.into(),
}
}
pub fn to_turtle_star(&self) -> String {
format!("<< {} {} {} >>", self.subject, self.predicate, self.object)
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct QuotedGraphStats {
pub unique_quoted: usize,
pub total_assertions: usize,
}
#[derive(Debug, Default)]
pub struct QuotedGraph {
triples_by_id: HashMap<QuotedId, QuotedTriple>,
id_by_triple: HashMap<QuotedTriple, QuotedId>,
by_subject: HashMap<String, Vec<QuotedId>>,
by_predicate: HashMap<String, Vec<QuotedId>>,
by_object: HashMap<String, Vec<QuotedId>>,
assertions: HashMap<QuotedId, Vec<(String, String)>>,
next_id: u64,
}
impl QuotedGraph {
pub fn new() -> Self {
Self {
next_id: 1,
..Default::default()
}
}
pub fn intern(&mut self, triple: QuotedTriple) -> QuotedId {
if let Some(&existing_id) = self.id_by_triple.get(&triple) {
return existing_id;
}
let id = QuotedId(self.next_id);
self.next_id += 1;
self.by_subject
.entry(triple.subject.clone())
.or_default()
.push(id);
self.by_predicate
.entry(triple.predicate.clone())
.or_default()
.push(id);
self.by_object
.entry(triple.object.clone())
.or_default()
.push(id);
self.id_by_triple.insert(triple.clone(), id);
self.triples_by_id.insert(id, triple);
id
}
pub fn lookup(&self, id: QuotedId) -> Option<&QuotedTriple> {
self.triples_by_id.get(&id)
}
pub fn find_by_subject(&self, s: &str) -> Vec<QuotedId> {
self.by_subject.get(s).cloned().unwrap_or_default()
}
pub fn find_by_predicate(&self, p: &str) -> Vec<QuotedId> {
self.by_predicate.get(p).cloned().unwrap_or_default()
}
pub fn find_by_object(&self, o: &str) -> Vec<QuotedId> {
self.by_object.get(o).cloned().unwrap_or_default()
}
pub fn add_assertion(
&mut self,
quoted_id: QuotedId,
predicate: impl Into<String>,
object: impl Into<String>,
) {
self.assertions
.entry(quoted_id)
.or_default()
.push((predicate.into(), object.into()));
}
pub fn get_assertions(&self, quoted_id: QuotedId) -> Vec<(String, String)> {
self.assertions.get("ed_id).cloned().unwrap_or_default()
}
pub fn stats(&self) -> QuotedGraphStats {
let total_assertions = self.assertions.values().map(Vec::len).sum();
QuotedGraphStats {
unique_quoted: self.triples_by_id.len(),
total_assertions,
}
}
pub fn len(&self) -> usize {
self.triples_by_id.len()
}
pub fn is_empty(&self) -> bool {
self.triples_by_id.is_empty()
}
pub fn clear(&mut self) {
self.triples_by_id.clear();
self.id_by_triple.clear();
self.by_subject.clear();
self.by_predicate.clear();
self.by_object.clear();
self.assertions.clear();
self.next_id = 1;
}
pub fn contains(&self, triple: &QuotedTriple) -> bool {
self.id_by_triple.contains_key(triple)
}
pub fn get_id(&self, triple: &QuotedTriple) -> Option<QuotedId> {
self.id_by_triple.get(triple).copied()
}
pub fn all_ids(&self) -> Vec<QuotedId> {
let mut ids: Vec<QuotedId> = self.triples_by_id.keys().copied().collect();
ids.sort();
ids
}
pub fn find_by_subject_and_object(&self, s: &str, o: &str) -> Vec<QuotedId> {
let by_s: std::collections::HashSet<QuotedId> =
self.find_by_subject(s).into_iter().collect();
self.find_by_object(o)
.into_iter()
.filter(|id| by_s.contains(id))
.collect()
}
pub fn find_pattern(&self, s: Option<&str>, p: Option<&str>, o: Option<&str>) -> Vec<QuotedId> {
let s_set: Option<std::collections::HashSet<QuotedId>> =
s.map(|v| self.find_by_subject(v).into_iter().collect());
let p_set: Option<std::collections::HashSet<QuotedId>> =
p.map(|v| self.find_by_predicate(v).into_iter().collect());
let o_set: Option<std::collections::HashSet<QuotedId>> =
o.map(|v| self.find_by_object(v).into_iter().collect());
let mut result: Vec<QuotedId> = self.all_ids();
if let Some(s_ids) = &s_set {
result.retain(|id| s_ids.contains(id));
}
if let Some(p_ids) = &p_set {
result.retain(|id| p_ids.contains(id));
}
if let Some(o_ids) = &o_set {
result.retain(|id| o_ids.contains(id));
}
result
}
}
#[cfg(test)]
mod tests {
use super::*;
fn qt(s: &str, p: &str, o: &str) -> QuotedTriple {
QuotedTriple::new(s, p, o)
}
#[test]
fn test_quoted_id_null() {
assert!(QuotedId::NULL.is_null());
assert_eq!(QuotedId::NULL.0, 0);
}
#[test]
fn test_quoted_id_non_null() {
assert!(!QuotedId(1).is_null());
}
#[test]
fn test_quoted_id_ordering() {
assert!(QuotedId(1) < QuotedId(2));
assert!(QuotedId(10) > QuotedId(5));
}
#[test]
fn test_quoted_triple_new() {
let t = qt("s", "p", "o");
assert_eq!(t.subject, "s");
assert_eq!(t.predicate, "p");
assert_eq!(t.object, "o");
}
#[test]
fn test_quoted_triple_turtle_star() {
let t = qt("s", "p", "o");
let turtle = t.to_turtle_star();
assert!(turtle.contains("<<"));
assert!(turtle.contains(">>"));
assert!(turtle.contains("s"));
}
#[test]
fn test_quoted_triple_equality() {
let a = qt("s", "p", "o");
let b = qt("s", "p", "o");
assert_eq!(a, b);
}
#[test]
fn test_intern_new_triple() {
let mut g = QuotedGraph::new();
let id = g.intern(qt("s", "p", "o"));
assert!(!id.is_null());
assert_eq!(id.0, 1);
}
#[test]
fn test_intern_duplicate_returns_same_id() {
let mut g = QuotedGraph::new();
let id1 = g.intern(qt("s", "p", "o"));
let id2 = g.intern(qt("s", "p", "o"));
assert_eq!(id1, id2);
}
#[test]
fn test_intern_different_triples_different_ids() {
let mut g = QuotedGraph::new();
let id1 = g.intern(qt("s", "p", "o1"));
let id2 = g.intern(qt("s", "p", "o2"));
assert_ne!(id1, id2);
}
#[test]
fn test_lookup_existing() {
let mut g = QuotedGraph::new();
let triple = qt("subj", "pred", "obj");
let id = g.intern(triple.clone());
let found = g.lookup(id).expect("should exist");
assert_eq!(found, &triple);
}
#[test]
fn test_lookup_missing() {
let g = QuotedGraph::new();
assert!(g.lookup(QuotedId(99)).is_none());
}
#[test]
fn test_len() {
let mut g = QuotedGraph::new();
assert_eq!(g.len(), 0);
g.intern(qt("s", "p", "o"));
assert_eq!(g.len(), 1);
g.intern(qt("s", "p", "o")); assert_eq!(g.len(), 1);
g.intern(qt("s", "p", "o2"));
assert_eq!(g.len(), 2);
}
#[test]
fn test_is_empty() {
let mut g = QuotedGraph::new();
assert!(g.is_empty());
g.intern(qt("s", "p", "o"));
assert!(!g.is_empty());
}
#[test]
fn test_contains() {
let mut g = QuotedGraph::new();
let triple = qt("s", "p", "o");
assert!(!g.contains(&triple));
g.intern(triple.clone());
assert!(g.contains(&triple));
}
#[test]
fn test_get_id() {
let mut g = QuotedGraph::new();
let triple = qt("s", "p", "o");
assert!(g.get_id(&triple).is_none());
let id = g.intern(triple.clone());
assert_eq!(g.get_id(&triple), Some(id));
}
#[test]
fn test_find_by_subject() {
let mut g = QuotedGraph::new();
let id1 = g.intern(qt("alice", "age", "30"));
let id2 = g.intern(qt("alice", "name", "Alice"));
let _id3 = g.intern(qt("bob", "age", "25"));
let mut found = g.find_by_subject("alice");
found.sort();
assert!(found.contains(&id1));
assert!(found.contains(&id2));
assert_eq!(found.len(), 2);
}
#[test]
fn test_find_by_predicate() {
let mut g = QuotedGraph::new();
let id1 = g.intern(qt("alice", "age", "30"));
let id2 = g.intern(qt("bob", "age", "25"));
let _id3 = g.intern(qt("alice", "name", "Alice"));
let found = g.find_by_predicate("age");
assert_eq!(found.len(), 2);
assert!(found.contains(&id1));
assert!(found.contains(&id2));
}
#[test]
fn test_find_by_object() {
let mut g = QuotedGraph::new();
let id1 = g.intern(qt("alice", "type", "Person"));
let id2 = g.intern(qt("bob", "type", "Person"));
let _id3 = g.intern(qt("corp", "type", "Organization"));
let found = g.find_by_object("Person");
assert_eq!(found.len(), 2);
assert!(found.contains(&id1));
assert!(found.contains(&id2));
}
#[test]
fn test_find_by_subject_none() {
let mut g = QuotedGraph::new();
g.intern(qt("alice", "age", "30"));
let found = g.find_by_subject("nobody");
assert!(found.is_empty());
}
#[test]
fn test_add_assertion_basic() {
let mut g = QuotedGraph::new();
let id = g.intern(qt("alice", "age", "30"));
g.add_assertion(id, "certainty", "0.9");
let assertions = g.get_assertions(id);
assert_eq!(assertions.len(), 1);
assert_eq!(assertions[0], ("certainty".to_string(), "0.9".to_string()));
}
#[test]
fn test_add_assertion_multiple() {
let mut g = QuotedGraph::new();
let id = g.intern(qt("alice", "age", "30"));
g.add_assertion(id, "certainty", "0.9");
g.add_assertion(id, "source", "http://example.org/census");
let assertions = g.get_assertions(id);
assert_eq!(assertions.len(), 2);
}
#[test]
fn test_get_assertions_no_assertions() {
let mut g = QuotedGraph::new();
let id = g.intern(qt("s", "p", "o"));
let assertions = g.get_assertions(id);
assert!(assertions.is_empty());
}
#[test]
fn test_assertions_per_quoted_id() {
let mut g = QuotedGraph::new();
let id1 = g.intern(qt("a", "p", "o"));
let id2 = g.intern(qt("b", "p", "o"));
g.add_assertion(id1, "k", "v1");
g.add_assertion(id2, "k", "v2");
assert_eq!(g.get_assertions(id1)[0].1, "v1");
assert_eq!(g.get_assertions(id2)[0].1, "v2");
}
#[test]
fn test_stats_empty() {
let g = QuotedGraph::new();
let s = g.stats();
assert_eq!(s.unique_quoted, 0);
assert_eq!(s.total_assertions, 0);
}
#[test]
fn test_stats_after_insert() {
let mut g = QuotedGraph::new();
let id = g.intern(qt("s", "p", "o"));
g.add_assertion(id, "k1", "v1");
g.add_assertion(id, "k2", "v2");
let id2 = g.intern(qt("s2", "p", "o"));
g.add_assertion(id2, "k", "v");
let s = g.stats();
assert_eq!(s.unique_quoted, 2);
assert_eq!(s.total_assertions, 3);
}
#[test]
fn test_all_ids_sorted() {
let mut g = QuotedGraph::new();
g.intern(qt("a", "p", "o1"));
g.intern(qt("a", "p", "o2"));
g.intern(qt("a", "p", "o3"));
let ids = g.all_ids();
assert_eq!(ids.len(), 3);
assert!(ids.windows(2).all(|w| w[0] < w[1]));
}
#[test]
fn test_clear() {
let mut g = QuotedGraph::new();
let id = g.intern(qt("s", "p", "o"));
g.add_assertion(id, "k", "v");
g.clear();
assert!(g.is_empty());
assert!(g.get_assertions(id).is_empty());
let new_id = g.intern(qt("s", "p", "o"));
assert_eq!(new_id.0, 1);
}
#[test]
fn test_find_by_subject_and_object() {
let mut g = QuotedGraph::new();
let id1 = g.intern(qt("alice", "knows", "bob"));
let _id2 = g.intern(qt("alice", "age", "30"));
let _id3 = g.intern(qt("carol", "knows", "bob"));
let found = g.find_by_subject_and_object("alice", "bob");
assert_eq!(found, vec![id1]);
}
#[test]
fn test_find_pattern_wildcard() {
let mut g = QuotedGraph::new();
let id1 = g.intern(qt("a", "p", "o"));
let id2 = g.intern(qt("b", "p", "o"));
let mut found = g.find_pattern(None, None, None);
found.sort();
assert!(found.contains(&id1));
assert!(found.contains(&id2));
}
#[test]
fn test_find_pattern_by_predicate() {
let mut g = QuotedGraph::new();
let id1 = g.intern(qt("s1", "rdf:type", "Person"));
let _id2 = g.intern(qt("s2", "other", "val"));
let found = g.find_pattern(None, Some("rdf:type"), None);
assert_eq!(found, vec![id1]);
}
#[test]
fn test_find_pattern_full_triple() {
let mut g = QuotedGraph::new();
let id1 = g.intern(qt("s", "p", "o"));
let _id2 = g.intern(qt("s", "p", "o2"));
let found = g.find_pattern(Some("s"), Some("p"), Some("o"));
assert_eq!(found, vec![id1]);
}
#[test]
fn test_find_pattern_no_match() {
let mut g = QuotedGraph::new();
g.intern(qt("s", "p", "o"));
let found = g.find_pattern(Some("nobody"), None, None);
assert!(found.is_empty());
}
#[test]
fn test_large_insert() {
let mut g = QuotedGraph::new();
for i in 0..100 {
let id = g.intern(qt(&format!("subj_{i}"), "rdf:type", "Entity"));
g.add_assertion(id, "confidence", format!("{}", 0.5 + i as f64 * 0.005));
}
let s = g.stats();
assert_eq!(s.unique_quoted, 100);
assert_eq!(s.total_assertions, 100);
let by_pred = g.find_by_predicate("rdf:type");
assert_eq!(by_pred.len(), 100);
}
}