use crate::BLOCK_MAGIC;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct BlockBoundary {
pub bit_offset: u64,
}
impl BlockBoundary {
#[inline]
pub fn byte_offset(self) -> usize {
(self.bit_offset / 8) as usize
}
#[inline]
pub fn bit_within_byte(self) -> u8 {
(self.bit_offset % 8) as u8
}
}
pub fn find_next_block(buf: &[u8], start_bit: u64) -> Option<BlockBoundary> {
let start_byte = (start_bit / 8) as usize;
let start_bit_off = (start_bit % 8) as u32;
if buf.len() < start_byte + 8 {
return None;
}
let search = &buf[start_byte..];
let mut byte_idx = 0usize;
while byte_idx + 8 <= search.len() {
let window = u64::from_be_bytes(
search[byte_idx..byte_idx + 8].try_into().unwrap(),
);
for shift in 0u32..16 {
let candidate = (window >> (16 - shift)) & 0xFFFF_FFFF_FFFF;
if candidate == BLOCK_MAGIC {
let abs_bit = (start_byte + byte_idx) as u64 * 8 + shift as u64;
if abs_bit >= start_bit {
return Some(BlockBoundary { bit_offset: abs_bit });
}
}
}
byte_idx += 2;
}
None
}
pub fn find_all_blocks(buf: &[u8]) -> Vec<BlockBoundary> {
let mut result = Vec::new();
let mut bit = 0u64;
while let Some(b) = find_next_block(buf, bit) {
result.push(b);
bit = b.bit_offset + 48; }
result
}
pub fn find_all_blocks_parallel(buf: &[u8], n_threads: usize) -> Vec<BlockBoundary> {
use rayon::prelude::*;
let n = n_threads.max(1);
if buf.len() < 16 || n == 1 {
return find_all_blocks(buf);
}
let chunk_size = buf.len() / n;
let mut per_thread: Vec<Vec<BlockBoundary>> = (0..n)
.into_par_iter()
.map(|i| {
let start_byte = i * chunk_size;
let end_byte = if i + 1 < n {
((i + 1) * chunk_size) + 8
} else {
buf.len()
};
let start_bit = start_byte as u64 * 8;
let end_bit = end_byte as u64 * 8;
let mut result = Vec::new();
let mut bit = start_bit;
while bit < end_bit {
match find_next_block(buf, bit) {
Some(b) if b.bit_offset < end_bit => {
result.push(b);
bit = b.bit_offset + 48;
}
_ => break,
}
}
result
})
.collect();
let mut all: Vec<BlockBoundary> = per_thread.drain(..).flatten().collect();
all.sort_by_key(|b| b.bit_offset);
all.dedup_by_key(|b| b.bit_offset);
all
}
pub fn split_boundaries(buf: &[u8], n_splits: usize) -> Vec<BlockBoundary> {
if n_splits <= 1 || buf.is_empty() {
return Vec::new();
}
let total_bits = buf.len() as u64 * 8;
let mut boundaries = Vec::with_capacity(n_splits - 1);
for i in 1..n_splits {
let nominal_bit = total_bits * i as u64 / n_splits as u64;
if let Some(b) = find_next_block(buf, nominal_bit) {
if boundaries.last().map_or(true, |prev: &BlockBoundary| prev.bit_offset != b.bit_offset) {
boundaries.push(b);
}
}
}
boundaries
}
#[inline]
fn quick_verify_block(buf: &[u8], magic_bit_offset: u64, max_blocksize: u32) -> bool {
use crate::bitreader::BitReader;
let total_bits = buf.len() as u64 * 8;
let after_magic = magic_bit_offset + 48;
if after_magic + 73 > total_bits {
return false;
}
let mut reader = BitReader::from_bit_offset(buf, after_magic as usize);
if reader.read_u32(32).is_none() { return false; }
match reader.read_bit() {
Some(false) => {}
_ => return false,
}
match reader.read_u32(24) {
Some(v) if v < max_blocksize => {}
_ => return false,
}
match reader.read_u16(16) {
Some(v) if v != 0 => {}
_ => return false,
}
true
}
pub fn find_quick_boundary(
buf: &[u8],
start_bit: u64,
max_blocksize: u32,
) -> Option<BlockBoundary> {
let total_bits = buf.len() as u64 * 8;
let mut search_bit = start_bit;
loop {
let candidate = find_next_block(buf, search_bit)?;
if candidate.bit_offset + 48 + 73 > total_bits {
return None;
}
if quick_verify_block(buf, candidate.bit_offset, max_blocksize) {
return Some(candidate);
}
search_bit = candidate.bit_offset + 1;
}
}
pub fn split_boundaries_parallel(
buf: &[u8],
n_splits: usize,
max_blocksize: u32,
) -> Vec<BlockBoundary> {
use rayon::prelude::*;
if n_splits <= 1 || buf.is_empty() {
return Vec::new();
}
let total_bits = buf.len() as u64 * 8;
let mut boundaries: Vec<Option<BlockBoundary>> = (1..n_splits)
.into_par_iter()
.map(|i| {
let nominal_bit = total_bits * i as u64 / n_splits as u64;
find_quick_boundary(buf, nominal_bit, max_blocksize)
})
.collect();
let mut result: Vec<BlockBoundary> = boundaries.drain(..).flatten().collect();
result.sort_by_key(|b| b.bit_offset);
result.dedup_by_key(|b| b.bit_offset);
result
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_find_magic_byte_aligned() {
let mut buf = vec![0u8; 64];
let magic_bytes: [u8; 6] = [0x31, 0x41, 0x59, 0x26, 0x53, 0x59];
buf[10..16].copy_from_slice(&magic_bytes);
let b = find_next_block(&buf, 0).unwrap();
assert_eq!(b.bit_offset, 80);
assert_eq!(b.byte_offset(), 10);
assert_eq!(b.bit_within_byte(), 0);
}
#[test]
fn test_find_magic_bit_shifted() {
let mut buf = vec![0u8; 64];
let magic: u64 = BLOCK_MAGIC;
let shifted = magic << (64 - 48 - 3); let bytes = shifted.to_be_bytes();
for (i, &b) in bytes.iter().enumerate() {
if 10 + i < buf.len() {
buf[10 + i] |= b;
}
}
let b = find_next_block(&buf, 0).unwrap();
assert_eq!(b.bit_offset, 83);
assert_eq!(b.byte_offset(), 10);
assert_eq!(b.bit_within_byte(), 3);
}
#[test]
fn test_no_magic() {
let buf = vec![0xFFu8; 64];
assert!(find_next_block(&buf, 0).is_none());
}
}