use super::permutation::SuccinctPermutation;
use crate::codec::succinct::WaveletTree;
use crate::graph::rdf::{Term, Triple, TriplePattern};
use hashbrown::HashMap;
use std::sync::Arc;
#[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize)]
pub struct TermDictionary {
term_to_id: HashMap<Arc<Term>, u32, foldhash::fast::RandomState>,
id_to_term: Vec<Arc<Term>>,
}
impl TermDictionary {
#[must_use]
pub fn new() -> Self {
Self {
term_to_id: HashMap::with_hasher(foldhash::fast::RandomState::default()),
id_to_term: Vec::new(),
}
}
#[must_use]
pub fn with_capacity(capacity: usize) -> Self {
Self {
term_to_id: HashMap::with_capacity_and_hasher(
capacity,
foldhash::fast::RandomState::default(),
),
id_to_term: Vec::with_capacity(capacity),
}
}
#[must_use]
pub fn len(&self) -> usize {
self.id_to_term.len()
}
#[must_use]
pub fn is_empty(&self) -> bool {
self.id_to_term.is_empty()
}
pub fn get_or_insert(&mut self, term: Term) -> u32 {
let term = Arc::new(term);
if let Some(&id) = self.term_to_id.get(&term) {
return id;
}
let id = self.id_to_term.len() as u32;
self.id_to_term.push(Arc::clone(&term));
self.term_to_id.insert(term, id);
id
}
#[must_use]
pub fn get_term(&self, id: u32) -> Option<&Term> {
self.id_to_term.get(id as usize).map(Arc::as_ref)
}
#[must_use]
pub fn get_id(&self, term: &Term) -> Option<u32> {
self.term_to_id.get(term).copied()
}
#[must_use]
pub fn size_bytes(&self) -> usize {
let base = std::mem::size_of::<Self>();
let terms: usize = self
.id_to_term
.iter()
.map(|t| std::mem::size_of_val(t.as_ref()) + std::mem::size_of::<Arc<Term>>())
.sum();
let map_overhead = self.term_to_id.capacity()
* (std::mem::size_of::<Arc<Term>>() + std::mem::size_of::<u32>());
base + terms + map_overhead
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
struct CompactTriple {
subject: u32,
predicate: u32,
object: u32,
}
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
pub struct TripleRing {
dict: TermDictionary,
num_triples: usize,
subjects: WaveletTree,
predicates: WaveletTree,
objects: WaveletTree,
spo_to_pos: SuccinctPermutation,
spo_to_osp: SuccinctPermutation,
}
impl TripleRing {
#[must_use]
pub fn from_triples(triples: impl Iterator<Item = Triple>) -> Self {
let mut dict = TermDictionary::new();
let mut compact_triples: Vec<CompactTriple> = Vec::new();
for triple in triples {
let (s, p, o) = triple.into_parts();
let compact = CompactTriple {
subject: dict.get_or_insert(s),
predicate: dict.get_or_insert(p),
object: dict.get_or_insert(o),
};
compact_triples.push(compact);
}
if compact_triples.is_empty() {
return Self {
dict,
num_triples: 0,
subjects: WaveletTree::new(&[]),
predicates: WaveletTree::new(&[]),
objects: WaveletTree::new(&[]),
spo_to_pos: SuccinctPermutation::default(),
spo_to_osp: SuccinctPermutation::default(),
};
}
compact_triples.sort_by_key(|t| (t.subject, t.predicate, t.object));
compact_triples.dedup();
let n = compact_triples.len();
let subjects: Vec<u64> = compact_triples.iter().map(|t| t.subject as u64).collect();
let predicates: Vec<u64> = compact_triples.iter().map(|t| t.predicate as u64).collect();
let objects: Vec<u64> = compact_triples.iter().map(|t| t.object as u64).collect();
let subjects_wt = WaveletTree::new(&subjects);
let predicates_wt = WaveletTree::new(&predicates);
let objects_wt = WaveletTree::new(&objects);
let mut pos_order: Vec<usize> = (0..n).collect();
pos_order.sort_by_key(|&i| {
let t = &compact_triples[i];
(t.predicate, t.object, t.subject)
});
let mut spo_to_pos_arr = vec![0usize; n];
for (pos_idx, &spo_idx) in pos_order.iter().enumerate() {
spo_to_pos_arr[spo_idx] = pos_idx;
}
let mut osp_order: Vec<usize> = (0..n).collect();
osp_order.sort_by_key(|&i| {
let t = &compact_triples[i];
(t.object, t.subject, t.predicate)
});
let mut spo_to_osp_arr = vec![0usize; n];
for (osp_idx, &spo_idx) in osp_order.iter().enumerate() {
spo_to_osp_arr[spo_idx] = osp_idx;
}
Self {
dict,
num_triples: n,
subjects: subjects_wt,
predicates: predicates_wt,
objects: objects_wt,
spo_to_pos: SuccinctPermutation::new(&spo_to_pos_arr),
spo_to_osp: SuccinctPermutation::new(&spo_to_osp_arr),
}
}
#[must_use]
pub fn len(&self) -> usize {
self.num_triples
}
#[must_use]
pub fn is_empty(&self) -> bool {
self.num_triples == 0
}
#[must_use]
pub fn num_terms(&self) -> usize {
self.dict.len()
}
#[must_use]
pub fn get_spo(&self, index: usize) -> Option<Triple> {
if index >= self.num_triples {
return None;
}
let s_id = self.subjects.access(index) as u32;
let p_id = self.predicates.access(index) as u32;
let o_id = self.objects.access(index) as u32;
let s = self.dict.get_term(s_id)?.clone();
let p = self.dict.get_term(p_id)?.clone();
let o = self.dict.get_term(o_id)?.clone();
Some(Triple::new_unchecked(s, p, o))
}
#[must_use]
pub fn subjects_wt(&self) -> &WaveletTree {
&self.subjects
}
#[must_use]
pub fn predicates_wt(&self) -> &WaveletTree {
&self.predicates
}
#[must_use]
pub fn objects_wt(&self) -> &WaveletTree {
&self.objects
}
#[must_use]
pub fn pos_to_spo(&self, pos_index: usize) -> Option<usize> {
self.spo_to_pos.apply_inverse(pos_index)
}
#[must_use]
pub fn osp_to_spo(&self, osp_index: usize) -> Option<usize> {
self.spo_to_osp.apply_inverse(osp_index)
}
#[must_use]
pub fn spo_to_pos(&self, spo_index: usize) -> Option<usize> {
self.spo_to_pos.apply(spo_index)
}
#[must_use]
pub fn spo_to_osp(&self, spo_index: usize) -> Option<usize> {
self.spo_to_osp.apply(spo_index)
}
pub fn find<'a>(&'a self, pattern: &'a TriplePattern) -> impl Iterator<Item = Triple> + 'a {
RingPatternIterator {
ring: self,
pattern,
current: 0,
}
}
#[must_use]
pub fn count(&self, pattern: &TriplePattern) -> usize {
if let (Some(s), Some(p), Some(o)) = (&pattern.subject, &pattern.predicate, &pattern.object)
{
let Some(s_id) = self.dict.get_id(s) else {
return 0;
};
let Some(p_id) = self.dict.get_id(p) else {
return 0;
};
let Some(o_id) = self.dict.get_id(o) else {
return 0;
};
return usize::from(self.contains_ids(s_id, p_id, o_id));
}
match (&pattern.subject, &pattern.predicate, &pattern.object) {
(Some(s), None, None) => {
if let Some(s_id) = self.dict.get_id(s) {
self.subjects.count(s_id as u64)
} else {
0
}
}
(None, Some(p), None) => {
if let Some(p_id) = self.dict.get_id(p) {
self.predicates.count(p_id as u64)
} else {
0
}
}
(None, None, Some(o)) => {
if let Some(o_id) = self.dict.get_id(o) {
self.objects.count(o_id as u64)
} else {
0
}
}
(None, None, None) => self.num_triples,
_ => {
self.find(pattern).count()
}
}
}
fn contains_ids(&self, s_id: u32, p_id: u32, o_id: u32) -> bool {
let s_count = self.subjects.count(s_id as u64);
if s_count == 0 {
return false;
}
for rank in 0..s_count {
if let Some(pos) = self.subjects.select(s_id as u64, rank) {
let p = self.predicates.access(pos);
let o = self.objects.access(pos);
if p as u32 == p_id && o as u32 == o_id {
return true;
}
}
}
false
}
#[must_use]
pub fn dictionary(&self) -> &TermDictionary {
&self.dict
}
#[must_use]
pub fn size_bytes(&self) -> usize {
let base = std::mem::size_of::<Self>();
let dict = self.dict.size_bytes();
let subjects = self.subjects.size_bytes();
let predicates = self.predicates.size_bytes();
let objects = self.objects.size_bytes();
let spo_to_pos = self.spo_to_pos.size_bytes();
let spo_to_osp = self.spo_to_osp.size_bytes();
base + dict + subjects + predicates + objects + spo_to_pos + spo_to_osp
}
pub fn save(&self, mut writer: impl std::io::Write) -> std::io::Result<()> {
bincode::serde::encode_into_std_write(self, &mut writer, bincode::config::standard())
.map_err(|e| std::io::Error::other(e.to_string()))?;
Ok(())
}
fn validate(&self) -> std::io::Result<()> {
let n = self.num_triples;
if self.dict.term_to_id.len() != self.dict.id_to_term.len() {
return Err(std::io::Error::other(format!(
"term dictionary inconsistent: term_to_id has {} entries, id_to_term has {}",
self.dict.term_to_id.len(),
self.dict.id_to_term.len()
)));
}
if self.subjects.len() != n {
return Err(std::io::Error::other(format!(
"subjects wavelet tree length {} != num_triples {n}",
self.subjects.len()
)));
}
if self.predicates.len() != n {
return Err(std::io::Error::other(format!(
"predicates wavelet tree length {} != num_triples {n}",
self.predicates.len()
)));
}
if self.objects.len() != n {
return Err(std::io::Error::other(format!(
"objects wavelet tree length {} != num_triples {n}",
self.objects.len()
)));
}
self.subjects
.validate()
.map_err(|e| std::io::Error::other(format!("subjects wavelet tree: {e}")))?;
self.predicates
.validate()
.map_err(|e| std::io::Error::other(format!("predicates wavelet tree: {e}")))?;
self.objects
.validate()
.map_err(|e| std::io::Error::other(format!("objects wavelet tree: {e}")))?;
let dict_len = self.dict.len() as u64;
for sym in self.subjects.alphabet() {
if sym >= dict_len {
return Err(std::io::Error::other(format!(
"subjects wavelet tree contains symbol {sym} >= dict size {dict_len}"
)));
}
}
for sym in self.predicates.alphabet() {
if sym >= dict_len {
return Err(std::io::Error::other(format!(
"predicates wavelet tree contains symbol {sym} >= dict size {dict_len}"
)));
}
}
for sym in self.objects.alphabet() {
if sym >= dict_len {
return Err(std::io::Error::other(format!(
"objects wavelet tree contains symbol {sym} >= dict size {dict_len}"
)));
}
}
if self.spo_to_pos.len() != n {
return Err(std::io::Error::other(format!(
"spo_to_pos permutation length {} != num_triples {n}",
self.spo_to_pos.len()
)));
}
if self.spo_to_osp.len() != n {
return Err(std::io::Error::other(format!(
"spo_to_osp permutation length {} != num_triples {n}",
self.spo_to_osp.len()
)));
}
Ok(())
}
pub fn load(mut reader: impl std::io::Read) -> std::io::Result<Self> {
let ring: Self =
bincode::serde::decode_from_std_read(&mut reader, bincode::config::standard())
.map_err(|e| std::io::Error::other(e.to_string()))?;
ring.validate()?;
Ok(ring)
}
pub fn save_to_file(&self, path: impl AsRef<std::path::Path>) -> std::io::Result<()> {
let file = std::fs::File::create(path)?;
let writer = std::io::BufWriter::new(file);
self.save(writer)
}
pub fn load_from_file(path: impl AsRef<std::path::Path>) -> std::io::Result<Self> {
let file = std::fs::File::open(path)?;
let reader = std::io::BufReader::new(file);
Self::load(reader)
}
pub fn save_to_bytes(&self) -> std::io::Result<Vec<u8>> {
bincode::serde::encode_to_vec(self, bincode::config::standard())
.map_err(|e| std::io::Error::other(e.to_string()))
}
pub fn load_from_bytes(data: &[u8]) -> std::io::Result<Self> {
let (ring, _bytes_read): (Self, _) =
bincode::serde::decode_from_slice(data, bincode::config::standard())
.map_err(|e| std::io::Error::other(e.to_string()))?;
ring.validate()?;
Ok(ring)
}
}
struct RingPatternIterator<'a> {
ring: &'a TripleRing,
pattern: &'a TriplePattern,
current: usize,
}
impl Iterator for RingPatternIterator<'_> {
type Item = Triple;
fn next(&mut self) -> Option<Self::Item> {
while self.current < self.ring.num_triples {
let idx = self.current;
self.current += 1;
if let Some(triple) = self.ring.get_spo(idx)
&& self.pattern.matches(&triple)
{
return Some(triple);
}
}
None
}
}
#[cfg(test)]
mod tests {
use super::*;
fn make_triple(s: &str, p: &str, o: &str) -> Triple {
Triple::new(Term::iri(s), Term::iri(p), Term::iri(o))
}
#[test]
fn test_empty() {
let ring = TripleRing::from_triples(std::iter::empty());
assert!(ring.is_empty());
assert_eq!(ring.len(), 0);
assert_eq!(ring.num_terms(), 0);
}
#[test]
fn test_single_triple() {
let triples = vec![make_triple("s1", "p1", "o1")];
let ring = TripleRing::from_triples(triples.into_iter());
assert_eq!(ring.len(), 1);
assert_eq!(ring.num_terms(), 3);
let retrieved = ring.get_spo(0).unwrap();
assert_eq!(retrieved.subject(), &Term::iri("s1"));
assert_eq!(retrieved.predicate(), &Term::iri("p1"));
assert_eq!(retrieved.object(), &Term::iri("o1"));
}
#[test]
fn test_multiple_triples() {
let triples = vec![
make_triple("s1", "p1", "o1"),
make_triple("s1", "p2", "o2"),
make_triple("s2", "p1", "o1"),
make_triple("s2", "p1", "o3"),
];
let ring = TripleRing::from_triples(triples.into_iter());
assert_eq!(ring.len(), 4);
assert_eq!(ring.num_terms(), 7);
}
#[test]
fn test_deduplication() {
let triples = vec![
make_triple("s1", "p1", "o1"),
make_triple("s1", "p1", "o1"), make_triple("s2", "p1", "o1"),
];
let ring = TripleRing::from_triples(triples.into_iter());
assert_eq!(ring.len(), 2);
}
#[test]
fn test_find_by_subject() {
let triples = vec![
make_triple("alix", "knows", "gus"),
make_triple("alix", "knows", "harm"),
make_triple("gus", "knows", "harm"),
];
let ring = TripleRing::from_triples(triples.into_iter());
let pattern = TriplePattern::with_subject(Term::iri("alix"));
let results: Vec<Triple> = ring.find(&pattern).collect();
assert_eq!(results.len(), 2);
for triple in &results {
assert_eq!(triple.subject(), &Term::iri("alix"));
}
}
#[test]
fn test_find_by_predicate() {
let triples = vec![
make_triple("s1", "type", "Person"),
make_triple("s2", "type", "Place"),
make_triple("s1", "name", "Alix"),
];
let ring = TripleRing::from_triples(triples.into_iter());
let pattern = TriplePattern::with_predicate(Term::iri("type"));
let results: Vec<Triple> = ring.find(&pattern).collect();
assert_eq!(results.len(), 2);
}
#[test]
fn test_find_by_object() {
let triples = vec![
make_triple("s1", "p1", "shared"),
make_triple("s2", "p2", "shared"),
make_triple("s3", "p3", "other"),
];
let ring = TripleRing::from_triples(triples.into_iter());
let pattern = TriplePattern::with_object(Term::iri("shared"));
let results: Vec<Triple> = ring.find(&pattern).collect();
assert_eq!(results.len(), 2);
}
#[test]
fn test_count() {
let triples = vec![
make_triple("s1", "p1", "o1"),
make_triple("s1", "p2", "o2"),
make_triple("s2", "p1", "o1"),
make_triple("s2", "p1", "o3"),
];
let ring = TripleRing::from_triples(triples.into_iter());
assert_eq!(ring.count(&TriplePattern::with_subject(Term::iri("s1"))), 2);
assert_eq!(ring.count(&TriplePattern::with_subject(Term::iri("s2"))), 2);
assert_eq!(
ring.count(&TriplePattern::with_predicate(Term::iri("p1"))),
3
);
assert_eq!(
ring.count(&TriplePattern::with_predicate(Term::iri("p2"))),
1
);
assert_eq!(ring.count(&TriplePattern::with_object(Term::iri("o1"))), 2);
assert_eq!(ring.count(&TriplePattern::any()), 4);
}
#[test]
fn test_permutation_consistency() {
let triples = vec![
make_triple("a", "x", "1"),
make_triple("a", "y", "2"),
make_triple("b", "x", "1"),
make_triple("b", "y", "3"),
];
let ring = TripleRing::from_triples(triples.into_iter());
for spo_idx in 0..ring.len() {
if let Some(pos_idx) = ring.spo_to_pos(spo_idx) {
let back = ring.pos_to_spo(pos_idx);
assert_eq!(back, Some(spo_idx), "POS roundtrip failed for {}", spo_idx);
}
if let Some(osp_idx) = ring.spo_to_osp(spo_idx) {
let back = ring.osp_to_spo(osp_idx);
assert_eq!(back, Some(spo_idx), "OSP roundtrip failed for {}", spo_idx);
}
}
}
#[test]
fn test_size_bytes() {
let triples: Vec<Triple> = (0..100)
.map(|i| make_triple(&format!("s{}", i % 10), "knows", &format!("o{}", i % 20)))
.collect();
let ring = TripleRing::from_triples(triples.into_iter());
let size = ring.size_bytes();
assert!(size > 0);
assert!(size < 100_000, "Size {} seems too large", size);
}
#[test]
fn test_term_dictionary_with_capacity() {
let mut dict = TermDictionary::with_capacity(100);
assert!(dict.is_empty());
assert_eq!(dict.len(), 0);
let id1 = dict.get_or_insert(Term::iri("test1"));
let id2 = dict.get_or_insert(Term::iri("test2"));
assert_eq!(id1, 0);
assert_eq!(id2, 1);
assert_eq!(dict.len(), 2);
}
#[test]
fn test_term_dictionary_size_bytes() {
let mut dict = TermDictionary::new();
let empty_size = dict.size_bytes();
assert!(empty_size > 0);
dict.get_or_insert(Term::iri("some_long_term_name"));
let size_with_term = dict.size_bytes();
assert!(size_with_term > empty_size);
}
#[test]
fn test_term_dictionary_get_existing() {
let mut dict = TermDictionary::new();
let term = Term::iri("test");
let id1 = dict.get_or_insert(term.clone());
let id2 = dict.get_or_insert(term.clone());
assert_eq!(id1, id2);
assert_eq!(dict.len(), 1);
}
#[test]
fn test_term_dictionary_get_term_not_found() {
let dict = TermDictionary::new();
assert!(dict.get_term(999).is_none());
}
#[test]
fn test_term_dictionary_get_id_not_found() {
let dict = TermDictionary::new();
assert!(dict.get_id(&Term::iri("nonexistent")).is_none());
}
#[test]
fn test_get_spo_out_of_bounds() {
let triples = vec![make_triple("s", "p", "o")];
let ring = TripleRing::from_triples(triples.into_iter());
assert!(ring.get_spo(0).is_some());
assert!(ring.get_spo(1).is_none());
assert!(ring.get_spo(100).is_none());
}
#[test]
fn test_count_exact_match() {
let triples = vec![
make_triple("s1", "p1", "o1"),
make_triple("s1", "p1", "o2"),
make_triple("s2", "p1", "o1"),
];
let ring = TripleRing::from_triples(triples.into_iter());
let pattern = TriplePattern {
subject: Some(Term::iri("s1")),
predicate: Some(Term::iri("p1")),
object: Some(Term::iri("o1")),
};
assert_eq!(ring.count(&pattern), 1);
let pattern_missing = TriplePattern {
subject: Some(Term::iri("s1")),
predicate: Some(Term::iri("p1")),
object: Some(Term::iri("o3")),
};
assert_eq!(ring.count(&pattern_missing), 0);
}
#[test]
fn test_count_two_components_bound() {
let triples = vec![
make_triple("s1", "p1", "o1"),
make_triple("s1", "p1", "o2"),
make_triple("s1", "p2", "o1"),
make_triple("s2", "p1", "o1"),
];
let ring = TripleRing::from_triples(triples.into_iter());
let pattern_sp = TriplePattern {
subject: Some(Term::iri("s1")),
predicate: Some(Term::iri("p1")),
object: None,
};
assert_eq!(ring.count(&pattern_sp), 2);
let pattern_so = TriplePattern {
subject: Some(Term::iri("s1")),
predicate: None,
object: Some(Term::iri("o1")),
};
assert_eq!(ring.count(&pattern_so), 2);
let pattern_po = TriplePattern {
subject: None,
predicate: Some(Term::iri("p1")),
object: Some(Term::iri("o1")),
};
assert_eq!(ring.count(&pattern_po), 2);
}
#[test]
fn test_count_nonexistent_term() {
let triples = vec![make_triple("s1", "p1", "o1")];
let ring = TripleRing::from_triples(triples.into_iter());
assert_eq!(
ring.count(&TriplePattern::with_subject(Term::iri("nonexistent"))),
0
);
assert_eq!(
ring.count(&TriplePattern::with_predicate(Term::iri("nonexistent"))),
0
);
assert_eq!(
ring.count(&TriplePattern::with_object(Term::iri("nonexistent"))),
0
);
}
#[test]
fn test_count_exact_match_nonexistent_subject() {
let triples = vec![make_triple("s1", "p1", "o1")];
let ring = TripleRing::from_triples(triples.into_iter());
let pattern = TriplePattern {
subject: Some(Term::iri("nonexistent")),
predicate: Some(Term::iri("p1")),
object: Some(Term::iri("o1")),
};
assert_eq!(ring.count(&pattern), 0);
}
#[test]
fn test_count_exact_match_nonexistent_predicate() {
let triples = vec![make_triple("s1", "p1", "o1")];
let ring = TripleRing::from_triples(triples.into_iter());
let pattern = TriplePattern {
subject: Some(Term::iri("s1")),
predicate: Some(Term::iri("nonexistent")),
object: Some(Term::iri("o1")),
};
assert_eq!(ring.count(&pattern), 0);
}
#[test]
fn test_count_exact_match_nonexistent_object() {
let triples = vec![make_triple("s1", "p1", "o1")];
let ring = TripleRing::from_triples(triples.into_iter());
let pattern = TriplePattern {
subject: Some(Term::iri("s1")),
predicate: Some(Term::iri("p1")),
object: Some(Term::iri("nonexistent")),
};
assert_eq!(ring.count(&pattern), 0);
}
#[test]
fn test_dictionary_accessor() {
let triples = vec![
make_triple("alix", "knows", "gus"),
make_triple("alix", "likes", "vincent"),
];
let ring = TripleRing::from_triples(triples.into_iter());
let dict = ring.dictionary();
assert!(!dict.is_empty());
assert_eq!(dict.len(), 5);
assert!(dict.get_id(&Term::iri("alix")).is_some());
assert!(dict.get_id(&Term::iri("knows")).is_some());
assert!(dict.get_id(&Term::iri("gus")).is_some());
}
#[test]
fn test_same_term_multiple_positions() {
let triples = vec![
make_triple("same", "same", "same"),
make_triple("same", "other", "different"),
];
let ring = TripleRing::from_triples(triples.into_iter());
assert_eq!(ring.num_terms(), 3);
assert_eq!(ring.len(), 2);
let pattern_s = TriplePattern::with_subject(Term::iri("same"));
assert_eq!(ring.count(&pattern_s), 2);
let pattern_p = TriplePattern::with_predicate(Term::iri("same"));
assert_eq!(ring.count(&pattern_p), 1);
let pattern_o = TriplePattern::with_object(Term::iri("same"));
assert_eq!(ring.count(&pattern_o), 1);
}
#[test]
fn test_find_no_matches() {
let triples = vec![make_triple("s1", "p1", "o1")];
let ring = TripleRing::from_triples(triples.into_iter());
let pattern = TriplePattern::with_subject(Term::iri("nonexistent"));
let results: Vec<Triple> = ring.find(&pattern).collect();
assert!(results.is_empty());
}
#[test]
fn test_find_all_triples() {
let triples = vec![
make_triple("s1", "p1", "o1"),
make_triple("s2", "p2", "o2"),
make_triple("s3", "p3", "o3"),
];
let ring = TripleRing::from_triples(triples.into_iter());
let pattern = TriplePattern::any();
let results: Vec<Triple> = ring.find(&pattern).collect();
assert_eq!(results.len(), 3);
}
#[test]
fn test_wavelet_tree_accessors() {
let triples = vec![make_triple("s", "p", "o")];
let ring = TripleRing::from_triples(triples.into_iter());
let subjects_wt = ring.subjects_wt();
let predicates_wt = ring.predicates_wt();
let objects_wt = ring.objects_wt();
assert_eq!(subjects_wt.len(), 1);
assert_eq!(predicates_wt.len(), 1);
assert_eq!(objects_wt.len(), 1);
}
#[test]
fn test_permutation_out_of_bounds() {
let triples = vec![make_triple("s", "p", "o")];
let ring = TripleRing::from_triples(triples.into_iter());
assert!(ring.spo_to_pos(0).is_some());
assert!(ring.spo_to_osp(0).is_some());
assert!(ring.spo_to_pos(100).is_none());
assert!(ring.spo_to_osp(100).is_none());
assert!(ring.pos_to_spo(100).is_none());
assert!(ring.osp_to_spo(100).is_none());
}
#[test]
fn test_contains_ids_no_match() {
let triples = vec![make_triple("s1", "p1", "o1"), make_triple("s1", "p2", "o2")];
let ring = TripleRing::from_triples(triples.into_iter());
let pattern = TriplePattern {
subject: Some(Term::iri("s1")),
predicate: Some(Term::iri("p1")),
object: Some(Term::iri("o2")),
};
assert_eq!(ring.count(&pattern), 0);
}
#[test]
fn test_empty_ring_operations() {
let ring = TripleRing::from_triples(std::iter::empty());
assert!(ring.is_empty());
assert_eq!(ring.len(), 0);
assert!(ring.get_spo(0).is_none());
assert_eq!(ring.count(&TriplePattern::any()), 0);
assert_eq!(ring.count(&TriplePattern::with_subject(Term::iri("s"))), 0);
assert!(ring.spo_to_pos(0).is_none());
assert!(ring.osp_to_spo(0).is_none());
let results: Vec<Triple> = ring.find(&TriplePattern::any()).collect();
assert!(results.is_empty());
}
#[test]
fn test_serialization_roundtrip() {
let triples = vec![
Triple::new(
Term::iri("http://ex.org/alix"),
Term::iri("http://xmlns.com/foaf/0.1/name"),
Term::literal("Alix"),
),
Triple::new(
Term::iri("http://ex.org/gus"),
Term::iri("http://xmlns.com/foaf/0.1/name"),
Term::literal("Gus"),
),
Triple::new(
Term::iri("http://ex.org/alix"),
Term::iri("http://xmlns.com/foaf/0.1/knows"),
Term::iri("http://ex.org/gus"),
),
];
let ring = TripleRing::from_triples(triples.into_iter());
assert_eq!(ring.len(), 3);
let mut buf = Vec::new();
ring.save(&mut buf).expect("save should succeed");
assert!(!buf.is_empty());
let loaded = TripleRing::load(&buf[..]).expect("load should succeed");
assert_eq!(loaded.len(), ring.len());
assert_eq!(loaded.num_terms(), ring.num_terms());
let original: Vec<Triple> = ring.find(&TriplePattern::any()).collect();
let restored: Vec<Triple> = loaded.find(&TriplePattern::any()).collect();
assert_eq!(original.len(), restored.len());
let name_pattern = TriplePattern {
subject: None,
predicate: Some(Term::iri("http://xmlns.com/foaf/0.1/name")),
object: None,
};
assert_eq!(loaded.count(&name_pattern), 2);
}
#[test]
fn test_save_load_file() {
let triples = vec![
Triple::new(
Term::iri("http://ex.org/a"),
Term::iri("http://ex.org/p"),
Term::iri("http://ex.org/b"),
),
Triple::new(
Term::iri("http://ex.org/b"),
Term::iri("http://ex.org/p"),
Term::iri("http://ex.org/c"),
),
];
let ring = TripleRing::from_triples(triples.into_iter());
let dir = tempfile::tempdir().expect("tempdir");
let path = dir.path().join("test.ring");
ring.save_to_file(&path).expect("save_to_file");
let loaded = TripleRing::load_from_file(&path).expect("load_from_file");
assert_eq!(loaded.len(), 2);
assert_eq!(loaded.count(&TriplePattern::any()), 2);
}
#[test]
fn test_load_rejects_truncated_data() {
let triples = vec![make_triple("s1", "p1", "o1")];
let ring = TripleRing::from_triples(triples.into_iter());
let bytes = ring.save_to_bytes().expect("save_to_bytes");
let truncated = &bytes[..bytes.len() / 2];
let result = TripleRing::load_from_bytes(truncated);
assert!(result.is_err(), "truncated data should fail to load");
}
#[test]
fn test_load_rejects_empty_bytes() {
let result = TripleRing::load_from_bytes(&[]);
assert!(result.is_err(), "empty bytes should fail to load");
}
#[test]
fn test_load_rejects_garbage_bytes() {
let garbage = vec![0xFF; 256];
let result = TripleRing::load_from_bytes(&garbage);
assert!(result.is_err(), "garbage bytes should fail to load");
}
#[test]
fn test_load_from_bytes_roundtrip() {
let triples = vec![make_triple("s1", "p1", "o1"), make_triple("s2", "p2", "o2")];
let ring = TripleRing::from_triples(triples.into_iter());
let bytes = ring.save_to_bytes().expect("save_to_bytes");
let loaded = TripleRing::load_from_bytes(&bytes).expect("load_from_bytes");
assert_eq!(loaded.len(), ring.len());
assert_eq!(loaded.num_terms(), ring.num_terms());
}
#[test]
fn test_validate_passes_for_valid_ring() {
let triples = vec![
make_triple("alix", "knows", "gus"),
make_triple("alix", "likes", "vincent"),
make_triple("gus", "knows", "vincent"),
];
let ring = TripleRing::from_triples(triples.into_iter());
assert!(ring.validate().is_ok());
}
#[test]
fn test_validate_passes_for_empty_ring() {
let ring = TripleRing::from_triples(std::iter::empty());
assert!(ring.validate().is_ok());
}
#[test]
fn test_save_load_bytes_roundtrip() {
let triples = vec![
make_triple("alix", "knows", "gus"),
make_triple("alix", "likes", "vincent"),
make_triple("gus", "knows", "vincent"),
];
let ring = TripleRing::from_triples(triples.into_iter());
let bytes = ring.save_to_bytes().unwrap();
let loaded = TripleRing::load_from_bytes(&bytes).unwrap();
assert_eq!(loaded.len(), ring.len());
let pattern = TriplePattern {
subject: None,
predicate: None,
object: None,
};
assert_eq!(loaded.count(&pattern), ring.count(&pattern));
}
#[test]
fn test_save_load_writer_reader_roundtrip() {
let triples = vec![
make_triple("alix", "knows", "gus"),
make_triple("gus", "likes", "vincent"),
];
let ring = TripleRing::from_triples(triples.into_iter());
let mut buf = Vec::new();
ring.save(&mut buf).unwrap();
let loaded = TripleRing::load(&buf[..]).unwrap();
assert_eq!(loaded.len(), ring.len());
}
#[test]
fn test_load_from_bytes_corrupt_data_fails() {
let result = TripleRing::load_from_bytes(&[0xFF, 0xFE, 0xFD, 0xFC]);
assert!(result.is_err());
}
#[test]
fn test_save_load_file_roundtrip() {
let triples = vec![
make_triple("alix", "knows", "gus"),
make_triple("gus", "knows", "vincent"),
];
let ring = TripleRing::from_triples(triples.into_iter());
let dir = tempfile::tempdir().expect("tempdir");
let path = dir.path().join("ring_roundtrip.bin");
ring.save_to_file(&path).unwrap();
let loaded = TripleRing::load_from_file(&path).unwrap();
assert_eq!(loaded.len(), ring.len());
}
#[test]
fn test_save_load_empty_ring() {
let ring = TripleRing::from_triples(std::iter::empty());
let bytes = ring.save_to_bytes().unwrap();
let loaded = TripleRing::load_from_bytes(&bytes).unwrap();
assert_eq!(loaded.len(), 0);
}
}