use super::HdtError;
pub fn bitmap_access(bitmap: &[u64], pos: usize) -> bool {
let word_idx = pos / 64;
let bit_idx = pos % 64;
bitmap
.get(word_idx)
.map(|w| (w >> bit_idx) & 1 == 1)
.unwrap_or(false)
}
pub fn bitmap_rank(bitmap: &[u64], pos: usize) -> u64 {
if pos == 0 {
return 0;
}
let full_words = pos / 64;
let remainder = pos % 64;
let mut count: u64 = 0;
for &w in bitmap.iter().take(full_words) {
count += w.count_ones() as u64;
}
if remainder > 0 {
if let Some(&w) = bitmap.get(full_words) {
let mask = (1u64 << remainder).wrapping_sub(1);
count += (w & mask).count_ones() as u64;
}
}
count
}
#[derive(Debug, Clone)]
pub struct TriplesBitmap {
pub bitmap_y: Vec<u64>,
pub bitmap_z: Vec<u64>,
pub array_y: Vec<u32>,
pub array_z: Vec<u32>,
}
impl TriplesBitmap {
pub fn read_bitmap(data: &[u8]) -> Result<Vec<u64>, HdtError> {
if data.len() < 4 {
return Err(HdtError::TripleDecodeError {
msg: "bitmap: truncated before count field".to_owned(),
});
}
let count = u32::from_le_bytes(data[..4].try_into().map_err(|_| HdtError::TripleDecodeError {
msg: "bitmap: cannot read count".to_owned(),
})?) as usize;
let needed = 4 + count * 8;
if data.len() < needed {
return Err(HdtError::TripleDecodeError {
msg: format!("bitmap: need {} bytes, have {}", needed, data.len()),
});
}
let mut words = Vec::with_capacity(count);
for i in 0..count {
let off = 4 + i * 8;
let w = u64::from_le_bytes(
data[off..off + 8]
.try_into()
.map_err(|_| HdtError::TripleDecodeError {
msg: format!("bitmap: cannot read word {}", i),
})?,
);
words.push(w);
}
Ok(words)
}
pub fn iterate_spo(&self) -> impl Iterator<Item = (u32, u32, u32)> + '_ {
let triples = materialise_triples_from_bitmap(
&self.array_y,
&self.array_z,
&self.bitmap_y,
&self.bitmap_z,
);
triples.into_iter()
}
pub fn lookup_by_subject(&self, s_id: u32) -> Vec<(u32, u32, u32)> {
self.iterate_spo()
.filter(|(s, _, _)| *s == s_id)
.collect()
}
pub fn lookup_by_predicate(&self, p_id: u32) -> Vec<(u32, u32, u32)> {
self.iterate_spo()
.filter(|(_, p, _)| *p == p_id)
.collect()
}
}
fn materialise_triples_from_bitmap(
array_y: &[u32],
array_z: &[u32],
bitmap_y: &[u64],
bitmap_z: &[u64],
) -> Vec<(u32, u32, u32)> {
let mut triples = Vec::new();
let mut subject_id: u32 = 1;
let mut z_index: usize = 0;
for (sy_idx, &pred_id) in array_y.iter().enumerate() {
loop {
if z_index >= array_z.len() {
break;
}
let obj_id = array_z[z_index];
triples.push((subject_id, pred_id, obj_id));
let is_last_z = bitmap_access(bitmap_z, z_index);
z_index += 1;
if is_last_z {
break;
}
}
if bitmap_access(bitmap_y, sy_idx) {
subject_id += 1;
}
}
triples
}
#[derive(Debug, Clone)]
pub struct HdtTriplesSection {
pub bitmap_y: Vec<u64>,
pub bitmap_z: Vec<u64>,
pub array_y: Vec<u32>,
pub array_z: Vec<u32>,
triples: Vec<(u32, u32, u32)>,
}
impl HdtTriplesSection {
pub fn parse(data: &[u8]) -> Result<Self, HdtError> {
let mut off = 0usize;
let count_sy = read_u32_le(data, &mut off)
.ok_or_else(|| HdtError::TripleDecodeError { msg: "cannot read count_sy".to_owned() })?
as usize;
let count_z = read_u32_le(data, &mut off)
.ok_or_else(|| HdtError::TripleDecodeError { msg: "cannot read count_z".to_owned() })?
as usize;
let mut array_y = Vec::with_capacity(count_sy);
for i in 0..count_sy {
let v = read_u32_le(data, &mut off).ok_or_else(|| HdtError::TripleDecodeError {
msg: format!("array_y truncated at index {}", i),
})?;
array_y.push(v);
}
let mut array_z = Vec::with_capacity(count_z);
for i in 0..count_z {
let v = read_u32_le(data, &mut off).ok_or_else(|| HdtError::TripleDecodeError {
msg: format!("array_z truncated at index {}", i),
})?;
array_z.push(v);
}
let mut bitmap_y_raw = Vec::with_capacity(count_sy);
for i in 0..count_sy {
let v = read_u32_le(data, &mut off).ok_or_else(|| HdtError::TripleDecodeError {
msg: format!("bitmap_y truncated at index {}", i),
})?;
bitmap_y_raw.push(v);
}
let mut bitmap_z_raw = Vec::with_capacity(count_z);
for i in 0..count_z {
let v = read_u32_le(data, &mut off).ok_or_else(|| HdtError::TripleDecodeError {
msg: format!("bitmap_z truncated at index {}", i),
})?;
bitmap_z_raw.push(v);
}
let bitmap_y = pack_bitmap_from_raw(&bitmap_y_raw);
let bitmap_z = pack_bitmap_from_raw(&bitmap_z_raw);
let triples =
materialise_triples_from_bitmap(&array_y, &array_z, &bitmap_y, &bitmap_z);
Ok(HdtTriplesSection {
bitmap_y,
bitmap_z,
array_y,
array_z,
triples,
})
}
pub fn iter_ids(&self) -> impl Iterator<Item = (u32, u32, u32)> + '_ {
self.triples.iter().copied()
}
pub fn to_bitmap(&self) -> TriplesBitmap {
TriplesBitmap {
bitmap_y: self.bitmap_y.clone(),
bitmap_z: self.bitmap_z.clone(),
array_y: self.array_y.clone(),
array_z: self.array_z.clone(),
}
}
}
fn read_u32_le(data: &[u8], offset: &mut usize) -> Option<u32> {
if *offset + 4 > data.len() {
return None;
}
let bytes: [u8; 4] = data[*offset..*offset + 4].try_into().ok()?;
*offset += 4;
Some(u32::from_le_bytes(bytes))
}
fn pack_bitmap_from_raw(flags: &[u32]) -> Vec<u64> {
let num_words = (flags.len() + 63) / 64;
let mut words = vec![0u64; num_words];
for (i, &flag) in flags.iter().enumerate() {
if flag != 0 {
words[i / 64] |= 1u64 << (i % 64);
}
}
words
}