use crate::alloc::vec;
use crate::alloc::vec::Vec;
pub fn bwt_transform(data: &[u8]) -> (Vec<u8>, usize) {
let n = data.len();
if n == 0 {
return (vec![], 0);
}
let mut indices: Vec<usize> = (0..n).collect();
indices.sort_by(|&a, &b| {
for i in 0..n {
let char_a = data[(a + i) % n];
let char_b = data[(b + i) % n];
if char_a != char_b {
return char_a.cmp(&char_b);
}
}
a.cmp(&b)
});
let mut bwt = Vec::with_capacity(n);
let mut primary_index = 0;
for (i, &idx) in indices.iter().enumerate() {
if idx == 0 {
primary_index = i;
bwt.push(data[n - 1]);
} else {
bwt.push(data[idx - 1]);
}
}
(bwt, primary_index)
}
pub const BWT_BLOCK_SIZE: usize = 32 * 1024;
pub fn bwt_transform_blocked(data: &[u8]) -> (Vec<u8>, Vec<usize>) {
if data.is_empty() {
return (Vec::new(), Vec::new());
}
let mut out = Vec::with_capacity(data.len());
let mut indices = Vec::new();
for chunk in data.chunks(BWT_BLOCK_SIZE) {
let (chunk_bwt, idx) = bwt_transform(chunk);
out.extend_from_slice(&chunk_bwt);
indices.push(idx);
}
(out, indices)
}
pub fn bwt_inverse_blocked(data: &[u8], indices: &[usize]) -> Vec<u8> {
if data.is_empty() {
return Vec::new();
}
let mut out = Vec::with_capacity(data.len());
let mut idx_iter = indices.iter();
for chunk in data.chunks(BWT_BLOCK_SIZE) {
if let Some(&idx) = idx_iter.next() {
let chunk_orig = bwt_inverse(chunk, idx);
out.extend_from_slice(&chunk_orig);
} else {
#[cfg(feature = "std")]
std::println!("[BWT Inverse Blocked] Error: Not enough indices");
break;
}
}
out
}
pub fn bwt_inverse(bwt: &[u8], primary_index: usize) -> Vec<u8> {
let n = bwt.len();
if n == 0 {
return vec![];
}
let mut count = [0usize; 256];
for &b in bwt {
count[b as usize] += 1;
}
let mut sum = 0;
let mut first = [0usize; 256];
for i in 0..256 {
first[i] = sum;
sum += count[i];
}
let mut t = vec![0usize; n];
let mut char_count = [0usize; 256];
for (i, &b) in bwt.iter().enumerate() {
t[first[b as usize] + char_count[b as usize]] = i;
char_count[b as usize] += 1;
}
let mut out = Vec::with_capacity(n);
let mut curr = t[primary_index];
for _ in 0..n {
out.push(bwt[curr]);
curr = t[curr];
}
out
}
pub fn bwt_encode_safe(data: &[u8]) -> (Vec<u8>, usize) {
let n = data.len();
if n == 0 {
return (vec![], 0);
}
let mut out = Vec::with_capacity(n + n / BWT_BLOCK_SIZE * 8 + 4);
let chunks = data.chunks(BWT_BLOCK_SIZE);
let num_chunks = chunks.len() as u32;
out.extend_from_slice(&num_chunks.to_le_bytes());
for chunk in chunks {
let (bwt_chunk, idx) = bwt_transform(chunk);
let len = bwt_chunk.len() as u32;
let idx = idx as u32;
out.extend_from_slice(&len.to_le_bytes());
out.extend_from_slice(&idx.to_le_bytes());
out.extend_from_slice(&bwt_chunk);
}
(out, 0) }
pub fn bwt_decode_safe(data: &[u8], _ignore_idx: usize) -> Vec<u8> {
if data.len() < 4 {
#[cfg(feature = "std")]
std::println!("[BWT Decode] Error: data too short ({})", data.len());
return vec![];
}
let mut pos = 0;
let num_chunks_bytes: [u8; 4] = data[pos..pos + 4].try_into().unwrap_or([0; 4]);
let num_chunks = u32::from_le_bytes(num_chunks_bytes);
pos += 4;
#[cfg(feature = "std")]
std::println!(
"[BWT Decode] data_len={}, num_chunks={}, first_bytes={:02x?}",
data.len(),
num_chunks,
&data[..data.len().min(16)]
);
let mut out = Vec::new();
for i in 0..num_chunks {
if pos + 8 > data.len() {
#[cfg(feature = "std")]
std::println!(
"[BWT Decode] Break: pos={} + 8 > data_len={}",
pos,
data.len()
);
break; }
let len_bytes: [u8; 4] = data[pos..pos + 4].try_into().unwrap_or([0; 4]);
let len = u32::from_le_bytes(len_bytes) as usize;
pos += 4;
let idx_bytes: [u8; 4] = data[pos..pos + 4].try_into().unwrap_or([0; 4]);
let idx = u32::from_le_bytes(idx_bytes) as usize;
pos += 4;
#[cfg(feature = "std")]
std::println!(
"[BWT Decode] Chunk {}: len={}, idx={}, pos={}, remaining={}",
i,
len,
idx,
pos,
data.len() - pos
);
if pos + len > data.len() {
#[cfg(feature = "std")]
std::println!(
"[BWT Decode] Break: pos={} + len={} > data_len={}",
pos,
len,
data.len()
);
break; }
let chunk_data = &data[pos..pos + len];
pos += len;
let decoded_chunk = bwt_inverse(chunk_data, idx);
out.extend_from_slice(&decoded_chunk);
}
#[cfg(feature = "std")]
std::println!("[BWT Decode] Final output: {} bytes", out.len());
out
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_bwt_roundtrip_simple() {
let data = b"banana";
let (bwt, idx) = bwt_transform(data);
let restored = bwt_inverse(&bwt, idx);
assert_eq!(data.to_vec(), restored, "Simple roundtrip failed");
}
#[test]
fn test_bwt_roundtrip_8kb() {
let base = b"Compression Test Data. ";
let mut data = Vec::with_capacity(8192);
while data.len() < 8192 {
data.extend_from_slice(base);
}
data.truncate(8192);
let (bwt, idx) = bwt_transform(&data);
let restored = bwt_inverse(&bwt, idx);
assert_eq!(data, restored, "8KB roundtrip failed");
}
}