use bytes::Bytes;
use crate::index::ring::{
PackedDictError, PackedPermutationError, PackedTermDictionary, PackedWaveletError,
SuccinctPermutation, TripleRing, deserialize_permutation, deserialize_wavelet_tree,
serialize_permutation, serialize_wavelet_tree,
};
const MAGIC: &[u8; 4] = b"GRFR";
const VERSION: u8 = 2;
const HEADER_SIZE: usize = 64;
const TRAILER_SIZE: usize = 4;
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum PackedRingError {
TruncatedHeader,
BadMagic,
UnsupportedVersion(u8),
OffsetOutOfBounds {
section: &'static str,
offset: u64,
},
ChecksumMismatch {
expected: u32,
actual: u32,
},
Dict(PackedDictError),
Wavelet(PackedWaveletError),
Permutation(PackedPermutationError),
NumTriplesMismatch {
declared: usize,
observed: usize,
},
RingInvariantViolation(super::triple_ring::TripleRingInvariantError),
}
impl std::fmt::Display for PackedRingError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::TruncatedHeader => write!(f, "ring v2 header truncated"),
Self::BadMagic => write!(f, "ring v2 bad magic (expected 'GRFR')"),
Self::UnsupportedVersion(v) => write!(f, "ring v2 unsupported version {v}"),
Self::OffsetOutOfBounds { section, offset } => write!(
f,
"ring v2 offset out of bounds: section '{section}' at {offset}"
),
Self::ChecksumMismatch { expected, actual } => write!(
f,
"ring v2 CRC mismatch: expected {expected:#010X}, got {actual:#010X}"
),
Self::Dict(e) => write!(f, "ring v2 dictionary parse error: {e}"),
Self::Wavelet(e) => write!(f, "ring v2 wavelet parse error: {e}"),
Self::Permutation(e) => write!(f, "ring v2 permutation parse error: {e}"),
Self::NumTriplesMismatch { declared, observed } => write!(
f,
"ring v2 num_triples mismatch: declared {declared}, observed {observed}"
),
Self::RingInvariantViolation(e) => {
write!(f, "ring v2 ring invariant violation: {e}")
}
}
}
}
impl std::error::Error for PackedRingError {}
impl From<PackedDictError> for PackedRingError {
fn from(e: PackedDictError) -> Self {
Self::Dict(e)
}
}
impl From<PackedWaveletError> for PackedRingError {
fn from(e: PackedWaveletError) -> Self {
Self::Wavelet(e)
}
}
impl From<PackedPermutationError> for PackedRingError {
fn from(e: PackedPermutationError) -> Self {
Self::Permutation(e)
}
}
impl From<super::triple_ring::TripleRingInvariantError> for PackedRingError {
fn from(e: super::triple_ring::TripleRingInvariantError) -> Self {
Self::RingInvariantViolation(e)
}
}
#[must_use]
pub fn serialize_triple_ring(ring: &TripleRing) -> Vec<u8> {
let dict_bytes = PackedTermDictionary::from_term_dict(ring.dictionary()).to_bytes();
let subj_bytes = serialize_wavelet_tree(ring.subjects_wt());
let pred_bytes = serialize_wavelet_tree(ring.predicates_wt());
let obj_bytes = serialize_wavelet_tree(ring.objects_wt());
let pos_bytes = serialize_permutation(ring.spo_to_pos_perm());
let osp_bytes = serialize_permutation(ring.spo_to_osp_perm());
let dict_offset = HEADER_SIZE as u64;
let subj_offset = dict_offset + dict_bytes.len() as u64;
let pred_offset = subj_offset + subj_bytes.len() as u64;
let obj_offset = pred_offset + pred_bytes.len() as u64;
let pos_offset = obj_offset + obj_bytes.len() as u64;
let osp_offset = pos_offset + pos_bytes.len() as u64;
let body_end = osp_offset + osp_bytes.len() as u64;
let total = usize::try_from(body_end).unwrap_or(usize::MAX) + TRAILER_SIZE;
let mut buf = Vec::with_capacity(total);
buf.extend_from_slice(MAGIC); buf.push(VERSION); buf.extend_from_slice(&[0u8; 3]); buf.extend_from_slice(&(ring.len() as u64).to_le_bytes()); buf.extend_from_slice(&dict_offset.to_le_bytes()); buf.extend_from_slice(&subj_offset.to_le_bytes()); buf.extend_from_slice(&pred_offset.to_le_bytes()); buf.extend_from_slice(&obj_offset.to_le_bytes()); buf.extend_from_slice(&pos_offset.to_le_bytes()); buf.extend_from_slice(&osp_offset.to_le_bytes());
buf.extend_from_slice(&dict_bytes);
buf.extend_from_slice(&subj_bytes);
buf.extend_from_slice(&pred_bytes);
buf.extend_from_slice(&obj_bytes);
buf.extend_from_slice(&pos_bytes);
buf.extend_from_slice(&osp_bytes);
let crc = crc32fast::hash(&buf);
buf.extend_from_slice(&crc.to_le_bytes());
buf
}
pub fn deserialize_triple_ring(data: Bytes) -> Result<TripleRing, PackedRingError> {
if data.len() < HEADER_SIZE + TRAILER_SIZE {
return Err(PackedRingError::TruncatedHeader);
}
if &data[0..4] != MAGIC {
return Err(PackedRingError::BadMagic);
}
let version = data[4];
if version != VERSION {
return Err(PackedRingError::UnsupportedVersion(version));
}
let body_end = data.len() - TRAILER_SIZE;
let trailer: [u8; 4] = data[body_end..].try_into().expect("4-byte trailer slice");
let expected_crc = u32::from_le_bytes(trailer);
let actual_crc = crc32fast::hash(&data[..body_end]);
if actual_crc != expected_crc {
return Err(PackedRingError::ChecksumMismatch {
expected: expected_crc,
actual: actual_crc,
});
}
let num_triples_raw = u64::from_le_bytes(data[8..16].try_into().expect("8-byte slice"));
let num_triples =
usize::try_from(num_triples_raw).map_err(|_| PackedRingError::OffsetOutOfBounds {
section: "num_triples",
offset: num_triples_raw,
})?;
let dict_offset = u64::from_le_bytes(data[16..24].try_into().expect("8-byte slice"));
let subj_offset = u64::from_le_bytes(data[24..32].try_into().expect("8-byte slice"));
let pred_offset = u64::from_le_bytes(data[32..40].try_into().expect("8-byte slice"));
let obj_offset = u64::from_le_bytes(data[40..48].try_into().expect("8-byte slice"));
let pos_offset = u64::from_le_bytes(data[48..56].try_into().expect("8-byte slice"));
let osp_offset = u64::from_le_bytes(data[56..64].try_into().expect("8-byte slice"));
let body_end_u64 = body_end as u64;
for (section, offset) in [
("dict", dict_offset),
("subjects", subj_offset),
("predicates", pred_offset),
("objects", obj_offset),
("spo_to_pos", pos_offset),
("spo_to_osp", osp_offset),
] {
if offset > body_end_u64 {
return Err(PackedRingError::OffsetOutOfBounds { section, offset });
}
}
let offsets = [
dict_offset,
subj_offset,
pred_offset,
obj_offset,
pos_offset,
osp_offset,
];
for window in offsets.windows(2) {
if window[0] >= window[1] {
return Err(PackedRingError::OffsetOutOfBounds {
section: "(non-monotonic offsets)",
offset: window[1],
});
}
}
let to_usize = |o: u64| -> Result<usize, PackedRingError> {
usize::try_from(o).map_err(|_| PackedRingError::OffsetOutOfBounds {
section: "(offset overflow)",
offset: o,
})
};
let dict_slice = data.slice(to_usize(dict_offset)?..to_usize(subj_offset)?);
let subj_slice = data.slice(to_usize(subj_offset)?..to_usize(pred_offset)?);
let pred_slice = data.slice(to_usize(pred_offset)?..to_usize(obj_offset)?);
let obj_slice = data.slice(to_usize(obj_offset)?..to_usize(pos_offset)?);
let pos_slice = data.slice(to_usize(pos_offset)?..to_usize(osp_offset)?);
let osp_slice = data.slice(to_usize(osp_offset)?..body_end);
let dict = PackedTermDictionary::from_bytes(dict_slice)?;
let subjects = deserialize_wavelet_tree(subj_slice)?;
let predicates = deserialize_wavelet_tree(pred_slice)?;
let objects = deserialize_wavelet_tree(obj_slice)?;
let spo_to_pos: SuccinctPermutation = deserialize_permutation(pos_slice)?;
let spo_to_osp: SuccinctPermutation = deserialize_permutation(osp_slice)?;
if subjects.len() != num_triples {
return Err(PackedRingError::NumTriplesMismatch {
declared: num_triples,
observed: subjects.len(),
});
}
TripleRing::from_packed_parts(
dict,
num_triples,
subjects,
predicates,
objects,
spo_to_pos,
spo_to_osp,
)
.map_err(PackedRingError::RingInvariantViolation)
}
#[cfg(test)]
mod tests {
use super::*;
use crate::graph::rdf::{Term, Triple, TriplePattern};
fn build_test_ring() -> TripleRing {
let triples = vec![
Triple::new(
Term::iri("http://ex.org/alix"),
Term::iri("http://xmlns.com/foaf/0.1/name"),
Term::literal("Alix"),
),
Triple::new(
Term::iri("http://ex.org/gus"),
Term::iri("http://xmlns.com/foaf/0.1/name"),
Term::literal("Gus"),
),
Triple::new(
Term::iri("http://ex.org/alix"),
Term::iri("http://xmlns.com/foaf/0.1/knows"),
Term::iri("http://ex.org/gus"),
),
];
TripleRing::from_triples(triples.into_iter())
}
#[test]
fn alix_packed_ring_roundtrip() {
let ring = build_test_ring();
let bytes = serialize_triple_ring(&ring);
let restored = deserialize_triple_ring(Bytes::from(bytes)).expect("deserialize");
assert_eq!(restored.len(), ring.len());
assert_eq!(restored.num_terms(), ring.num_terms());
let pattern = TriplePattern {
subject: None,
predicate: Some(Term::iri("http://xmlns.com/foaf/0.1/name")),
object: None,
};
assert_eq!(restored.count(&pattern), ring.count(&pattern));
assert_eq!(restored.count(&pattern), 2);
}
#[test]
fn gus_packed_ring_empty() {
let ring = TripleRing::from_triples(std::iter::empty());
let bytes = serialize_triple_ring(&ring);
let restored = deserialize_triple_ring(Bytes::from(bytes)).expect("deserialize empty");
assert_eq!(restored.len(), 0);
assert!(restored.is_empty());
}
#[test]
fn vincent_packed_ring_bad_magic_rejected() {
let bad = Bytes::from(vec![0u8; HEADER_SIZE + TRAILER_SIZE]);
assert_eq!(
deserialize_triple_ring(bad).unwrap_err(),
PackedRingError::BadMagic
);
}
#[test]
fn jules_packed_ring_truncated_header_rejected() {
let short = Bytes::from(vec![b'G', b'R', b'F', b'R']);
assert_eq!(
deserialize_triple_ring(short).unwrap_err(),
PackedRingError::TruncatedHeader
);
}
#[test]
fn mia_packed_ring_unsupported_version_rejected() {
let mut buf = vec![0u8; HEADER_SIZE + TRAILER_SIZE];
buf[..4].copy_from_slice(MAGIC);
buf[4] = 99;
let crc = crc32fast::hash(&buf[..buf.len() - TRAILER_SIZE]);
let len = buf.len();
buf[len - 4..].copy_from_slice(&crc.to_le_bytes());
assert_eq!(
deserialize_triple_ring(Bytes::from(buf)).unwrap_err(),
PackedRingError::UnsupportedVersion(99)
);
}
#[test]
fn shosanna_packed_ring_corrupted_byte_caught_by_crc() {
let ring = build_test_ring();
let mut bytes = serialize_triple_ring(&ring);
let len = bytes.len();
bytes[len / 2] ^= 0x01;
let result = deserialize_triple_ring(Bytes::from(bytes));
assert!(matches!(
result.unwrap_err(),
PackedRingError::ChecksumMismatch { .. }
));
}
#[test]
fn beatrix_packed_ring_query_correctness() {
let triples = vec![
Triple::new(
Term::iri("http://ex.org/a"),
Term::iri("http://ex.org/p"),
Term::iri("http://ex.org/x"),
),
Triple::new(
Term::iri("http://ex.org/a"),
Term::iri("http://ex.org/p"),
Term::iri("http://ex.org/y"),
),
Triple::new(
Term::iri("http://ex.org/b"),
Term::iri("http://ex.org/p"),
Term::iri("http://ex.org/x"),
),
Triple::new(
Term::iri("http://ex.org/b"),
Term::iri("http://ex.org/q"),
Term::iri("http://ex.org/y"),
),
];
let ring = TripleRing::from_triples(triples.into_iter());
let bytes = serialize_triple_ring(&ring);
let restored = deserialize_triple_ring(Bytes::from(bytes)).expect("deserialize");
let all = TriplePattern {
subject: None,
predicate: None,
object: None,
};
assert_eq!(restored.count(&all), 4);
let p_only = TriplePattern {
subject: None,
predicate: Some(Term::iri("http://ex.org/p")),
object: None,
};
assert_eq!(restored.count(&p_only), 3);
assert_eq!(restored.count(&p_only), ring.count(&p_only));
let x_only = TriplePattern {
subject: None,
predicate: None,
object: Some(Term::iri("http://ex.org/x")),
};
assert_eq!(restored.count(&x_only), 2);
let a_p = TriplePattern {
subject: Some(Term::iri("http://ex.org/a")),
predicate: Some(Term::iri("http://ex.org/p")),
object: None,
};
assert_eq!(restored.count(&a_p), 2);
}
#[test]
fn hans_packed_ring_size_smaller_than_bincode() {
let triples: Vec<Triple> = (0..100u32)
.flat_map(|i| {
let s = format!("http://ex.org/s-{i}");
(0..5u32).map(move |j| {
Triple::new(
Term::iri(s.clone()),
Term::iri(format!("http://ex.org/p-{}", j)),
Term::literal(format!("value-{}-{}", i, j)),
)
})
})
.collect();
let ring = TripleRing::from_triples(triples.into_iter());
let v2_bytes = serialize_triple_ring(&ring);
let v1_bytes = ring.save_to_bytes().expect("v1 save");
eprintln!(
"v1 bincode: {} bytes, v2 packed: {} bytes",
v1_bytes.len(),
v2_bytes.len()
);
assert!(
v2_bytes.len() < v1_bytes.len(),
"v2 packed must be smaller than v1 bincode (v1={}, v2={})",
v1_bytes.len(),
v2_bytes.len()
);
}
}