#[cfg(test)]
mod hdt_tests {
use std::io::Cursor;
use crate::hdt::{
dictionary::{parse_plain_dictionary, DictionarySection, HdtDictionary},
format::{compute_crc16, compute_crc32, read_vbyte, write_vbyte},
triples::{bitmap_access, bitmap_rank, HdtTriplesSection},
HdtError, HdtHeader, HdtReader, HdtTriple,
};
fn build_minimal_hdt(
hdr_props: &str,
shared: &[&str],
subjects: &[&str],
predicates: &[&str],
objects: &[&str],
triples_bytes: &[u8],
) -> Vec<u8> {
let mut buf = Vec::new();
buf.extend_from_slice(b"$HDT\x01");
let hdr_bytes = hdr_props.as_bytes();
buf.extend_from_slice(&(hdr_bytes.len() as u64).to_le_bytes());
buf.extend_from_slice(hdr_bytes);
let mut push_dict_section = |strings: &[&str]| {
let mut sec = Vec::new();
for s in strings {
sec.extend_from_slice(s.as_bytes());
sec.push(0u8); }
buf.extend_from_slice(&(sec.len() as u32).to_le_bytes());
buf.extend_from_slice(&sec);
};
push_dict_section(shared);
push_dict_section(subjects);
push_dict_section(predicates);
push_dict_section(objects);
buf.extend_from_slice(triples_bytes);
buf
}
fn build_flat_triples(triples: &[(u32, u32, u32)]) -> Vec<u8> {
let count_sy = triples.len() as u32;
let count_z = triples.len() as u32;
let mut buf = Vec::new();
buf.extend_from_slice(&count_sy.to_le_bytes());
buf.extend_from_slice(&count_z.to_le_bytes());
for &(_, p, _) in triples {
buf.extend_from_slice(&p.to_le_bytes());
}
for &(_, _, o) in triples {
buf.extend_from_slice(&o.to_le_bytes());
}
for _ in triples {
buf.extend_from_slice(&1u32.to_le_bytes());
}
for _ in triples {
buf.extend_from_slice(&1u32.to_le_bytes());
}
buf
}
#[test]
fn test_vbyte_encode_single_byte() {
assert_eq!(write_vbyte(0), vec![0x00]);
assert_eq!(write_vbyte(1), vec![0x01]);
assert_eq!(write_vbyte(42), vec![0x2A]);
assert_eq!(write_vbyte(127), vec![0x7F]);
}
#[test]
fn test_vbyte_encode_multi_byte() {
assert_eq!(write_vbyte(128), vec![0x80, 0x01]);
assert_eq!(write_vbyte(300), vec![0b1010_1100, 0b0000_0010]);
let enc = write_vbyte(16384);
assert_eq!(enc.len(), 3);
}
#[test]
fn test_vbyte_roundtrip() {
let values = [0u64, 1, 63, 64, 127, 128, 255, 1000, 16383, 16384, 2_097_151, 2_097_152];
for v in values {
let encoded = write_vbyte(v);
let mut cur = Cursor::new(&encoded);
let decoded = read_vbyte(&mut cur).expect("vbyte roundtrip");
assert_eq!(decoded, v, "roundtrip failed for {}", v);
}
}
#[test]
fn test_crc16_known_value() {
assert_eq!(compute_crc16(b"123456789"), 0x29B1);
}
#[test]
fn test_crc32_known_value() {
assert_eq!(compute_crc32(b"123456789"), 0xCBF4_3926);
}
#[test]
fn test_dictionary_section_plain() {
let data = b"apple\0banana\0cherry\0";
let section = DictionarySection::from_plain(data).expect("parse plain");
assert_eq!(section.terms, vec!["apple", "banana", "cherry"]);
}
#[test]
fn test_dictionary_section_front_coded() {
let mut data = Vec::new();
data.extend_from_slice(b"abc\0");
data.push(0x02); data.extend_from_slice(b"d\0");
data.extend_from_slice(b"xyz\0");
let section = DictionarySection::from_front_coded(&data, 2).expect("parse front-coded");
assert_eq!(section.terms, vec!["abc", "abd", "xyz"]);
}
#[test]
fn test_dictionary_id_to_term() {
let data = b"alpha\0beta\0gamma\0";
let section = DictionarySection::from_plain(data).expect("parse");
assert_eq!(section.id_to_term(1), Some("alpha"));
assert_eq!(section.id_to_term(2), Some("beta"));
assert_eq!(section.id_to_term(3), Some("gamma"));
assert_eq!(section.id_to_term(0), None);
assert_eq!(section.id_to_term(4), None);
}
#[test]
fn test_dictionary_term_to_id() {
let data = b"alpha\0beta\0gamma\0";
let section = DictionarySection::from_plain(data).expect("parse");
assert_eq!(section.term_to_id("alpha"), Some(1));
assert_eq!(section.term_to_id("beta"), Some(2));
assert_eq!(section.term_to_id("gamma"), Some(3));
assert_eq!(section.term_to_id("delta"), None);
}
#[test]
fn test_bitmap_access() {
let mut bm = vec![0u64; 2];
bm[0] = 1u64 | (1u64 << 63); bm[1] = 1u64;
assert!(bitmap_access(&bm, 0));
assert!(!bitmap_access(&bm, 1));
assert!(bitmap_access(&bm, 63));
assert!(bitmap_access(&bm, 64));
assert!(!bitmap_access(&bm, 65));
assert!(!bitmap_access(&bm, 128));
}
#[test]
fn test_bitmap_rank() {
let bm = vec![0b0001_0101u64];
assert_eq!(bitmap_rank(&bm, 0), 0);
assert_eq!(bitmap_rank(&bm, 1), 1); assert_eq!(bitmap_rank(&bm, 3), 2); assert_eq!(bitmap_rank(&bm, 5), 3); assert_eq!(bitmap_rank(&bm, 64), 3);
}
#[test]
fn test_read_write_vbyte_large() {
let large_values = [2_097_152u64, 10_000_000, u32::MAX as u64, u64::MAX / 2];
for v in large_values {
let encoded = write_vbyte(v);
assert!(encoded.len() >= 4, "expected >= 4 bytes for {}", v);
let mut cur = Cursor::new(&encoded);
let decoded = read_vbyte(&mut cur).expect("decode large vbyte");
assert_eq!(decoded, v);
}
}
#[test]
fn test_hdt_reader_invalid_magic() {
let bad = b"not-hdt-data at all".to_vec();
let err = HdtReader::from_bytes(bad).expect_err("should fail with invalid magic");
assert!(
matches!(err, HdtError::InvalidMagic { .. }),
"expected InvalidMagic, got {:?}",
err
);
}
#[test]
fn test_dictionary_shared_so_lookup() {
let mut d = HdtDictionary::new();
d.shared.push("<http://example.org/Alice>".to_owned());
d.shared.push("<http://example.org/Bob>".to_owned());
d.subjects.push("<http://example.org/Charlie>".to_owned());
assert_eq!(d.lookup_subject(1), Some("<http://example.org/Alice>"));
assert_eq!(d.lookup_object(1), Some("<http://example.org/Alice>"));
assert_eq!(d.lookup_subject(2), Some("<http://example.org/Bob>"));
assert_eq!(d.lookup_object(2), Some("<http://example.org/Bob>"));
assert_eq!(d.lookup_subject(3), Some("<http://example.org/Charlie>"));
assert_eq!(d.lookup_object(3), None);
}
#[test]
fn test_subject_count_from_stats() {
let hdr_props =
"triples=5\nsubjects=3\npredicates=2\nobjects=4\nshared=1\nformat=hdt/plain\n";
let triples_bytes = build_flat_triples(&[]);
let data = build_minimal_hdt(hdr_props, &[], &[], &[], &[], &triples_bytes);
let reader = HdtReader::from_bytes(data).expect("parse");
let stats = reader.stats();
assert_eq!(stats.triple_count, 5);
assert_eq!(stats.distinct_subjects, 3);
assert_eq!(stats.distinct_predicates, 2);
assert_eq!(stats.distinct_objects, 4);
assert_eq!(stats.shared_so_count, 1);
}
#[test]
fn test_front_coding_k4() {
let mut data = Vec::new();
data.extend_from_slice(b"abcde\0"); data.push(4); data.extend_from_slice(b"f\0"); data.push(4);
data.extend_from_slice(b"g\0");
data.push(4);
data.extend_from_slice(b"h\0");
data.extend_from_slice(b"xyz\0");
let section = DictionarySection::from_front_coded(&data, 4).expect("k=4 decode");
assert_eq!(section.terms.len(), 5);
assert_eq!(section.terms[0], "abcde");
assert_eq!(section.terms[1], "abcdf");
assert_eq!(section.terms[2], "abcdg");
assert_eq!(section.terms[3], "abcdh");
assert_eq!(section.terms[4], "xyz");
}
#[test]
fn test_hdt_magic_bytes() {
let bad = vec![0u8, 1, 2, 3, 4, 5, 6, 7];
let err = HdtReader::from_bytes(bad).expect_err("bad magic");
assert!(matches!(err, HdtError::InvalidMagic { .. }));
}
#[test]
fn test_dictionary_lookup_shared() {
let mut d = HdtDictionary::new();
d.shared.push("<http://example.org/Alice>".to_owned());
d.shared.push("<http://example.org/Bob>".to_owned());
assert_eq!(d.lookup_subject(1), Some("<http://example.org/Alice>"));
assert_eq!(d.lookup_subject(2), Some("<http://example.org/Bob>"));
assert_eq!(d.lookup_object(1), Some("<http://example.org/Alice>"));
assert_eq!(d.lookup_object(2), Some("<http://example.org/Bob>"));
}
#[test]
fn test_dictionary_lookup_subject_only() {
let mut d = HdtDictionary::new();
d.shared.push("<http://shared>".to_owned());
d.subjects.push("<http://subject-only>".to_owned());
assert_eq!(d.lookup_subject(2), Some("<http://subject-only>"));
}
#[test]
fn test_dictionary_lookup_predicate() {
let mut d = HdtDictionary::new();
d.predicates
.push("<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>".to_owned());
d.predicates.push("<http://schema.org/name>".to_owned());
assert_eq!(
d.lookup_predicate(1),
Some("<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>")
);
assert_eq!(d.lookup_predicate(2), Some("<http://schema.org/name>"));
}
#[test]
fn test_dictionary_lookup_object_only() {
let mut d = HdtDictionary::new();
d.shared.push("<http://shared>".to_owned());
d.objects.push("\"Alice\"".to_owned());
assert_eq!(d.lookup_object(2), Some("\"Alice\""));
}
#[test]
fn test_dictionary_invalid_id_zero() {
let mut d = HdtDictionary::new();
d.shared.push("<http://x>".to_owned());
assert_eq!(d.lookup_subject(0), None);
assert_eq!(d.lookup_predicate(0), None);
assert_eq!(d.lookup_object(0), None);
}
#[test]
fn test_dictionary_out_of_range() {
let mut d = HdtDictionary::new();
d.shared.push("<http://x>".to_owned());
assert_eq!(d.lookup_subject(999), None);
assert_eq!(d.lookup_predicate(999), None);
assert_eq!(d.lookup_object(999), None);
}
#[test]
fn test_dictionary_shared_count() {
let mut d = HdtDictionary::new();
d.shared.push("s1".to_owned());
d.shared.push("s2".to_owned());
d.subjects.push("so1".to_owned());
assert_eq!(d.subject_count(), 3);
assert_eq!(d.object_count(), 2);
}
#[test]
fn test_parse_plain_dictionary_single() {
let data = b"hello\0";
let result = parse_plain_dictionary(data).expect("parse");
assert_eq!(result, vec!["hello".to_owned()]);
}
#[test]
fn test_parse_plain_dictionary_multiple() {
let data = b"alpha\0beta\0gamma\0";
let result = parse_plain_dictionary(data).expect("parse");
assert_eq!(result, vec!["alpha", "beta", "gamma"]);
}
#[test]
fn test_parse_plain_dictionary_empty() {
let result = parse_plain_dictionary(b"").expect("parse empty");
assert!(result.is_empty());
}
#[test]
fn test_triples_iter_basic() {
let raw = build_flat_triples(&[(1, 1, 1), (2, 1, 2)]);
let section = HdtTriplesSection::parse(&raw).expect("parse");
let ids: Vec<(u32, u32, u32)> = section.iter_ids().collect();
assert_eq!(ids.len(), 2);
assert_eq!(ids[0].1, 1);
assert_eq!(ids[0].2, 1);
assert_eq!(ids[1].2, 2);
}
#[test]
fn test_triples_section_round_trip() {
let input = vec![(1u32, 1u32, 1u32), (1, 2, 3), (2, 1, 2)];
let raw = build_flat_triples(&input);
let section = HdtTriplesSection::parse(&raw).expect("parse");
let ids: Vec<(u32, u32, u32)> = section.iter_ids().collect();
assert_eq!(ids.len(), input.len());
}
#[test]
fn test_header_triple_count() {
let hdr_props =
"triples=42\nsubjects=10\npredicates=5\nobjects=30\nshared=3\nformat=hdt/plain\n";
let triples_bytes = build_flat_triples(&[]);
let data = build_minimal_hdt(hdr_props, &[], &[], &[], &[], &triples_bytes);
let reader = HdtReader::from_bytes(data).expect("parse");
assert_eq!(reader.header().triples_count, 42);
assert_eq!(reader.header().subjects_count, 10);
assert_eq!(reader.header().predicates_count, 5);
assert_eq!(reader.header().objects_count, 30);
assert_eq!(reader.header().shared_count, 3);
assert_eq!(reader.header().format, "hdt/plain");
}
#[test]
fn test_hdt_reader_from_bytes_empty() {
let err = HdtReader::from_bytes(vec![]).expect_err("empty should fail");
assert!(matches!(err, HdtError::InvalidMagic { .. }));
}
fn build_two_triple_hdt() -> Vec<u8> {
let hdr =
"triples=2\nsubjects=2\npredicates=1\nobjects=2\nshared=0\nformat=hdt/plain\n";
let shared: &[&str] = &[];
let subjects: &[&str] = &["<http://s1>", "<http://s2>"];
let predicates: &[&str] = &["<http://p>"];
let objects: &[&str] = &["<http://o1>", "<http://o2>"];
let triples_bytes = build_flat_triples(&[(1, 1, 1), (2, 1, 2)]);
build_minimal_hdt(hdr, shared, subjects, predicates, objects, &triples_bytes)
}
#[test]
fn test_triple_lookup_subject() {
let data = build_two_triple_hdt();
let reader = HdtReader::from_bytes(data).expect("parse");
assert_eq!(reader.lookup_subject(1).expect("lookup"), "<http://s1>");
assert_eq!(reader.lookup_subject(2).expect("lookup"), "<http://s2>");
}
#[test]
fn test_triple_lookup_predicate() {
let data = build_two_triple_hdt();
let reader = HdtReader::from_bytes(data).expect("parse");
assert_eq!(reader.lookup_predicate(1).expect("lookup"), "<http://p>");
}
#[test]
fn test_triple_lookup_object() {
let data = build_two_triple_hdt();
let reader = HdtReader::from_bytes(data).expect("parse");
assert_eq!(reader.lookup_object(1).expect("lookup"), "<http://o1>");
assert_eq!(reader.lookup_object(2).expect("lookup"), "<http://o2>");
}
#[test]
fn test_triples_iterator_resolves_strings() {
let data = build_two_triple_hdt();
let reader = HdtReader::from_bytes(data).expect("parse");
let triples: Result<Vec<HdtTriple>, _> = reader.triples().collect();
let triples = triples.expect("resolve");
assert_eq!(triples.len(), 2);
assert_eq!(triples[0].predicate, "<http://p>");
assert_eq!(triples[1].predicate, "<http://p>");
}
}