use crate::bitmap::Bitmap;
use crate::zp_impl::encoder::ZpEncoder;
#[cfg(not(feature = "std"))]
use alloc::collections::BTreeMap;
#[cfg(feature = "std")]
use std::collections::BTreeMap;
struct NumContext {
ctx: Vec<u8>,
left: Vec<u32>,
right: Vec<u32>,
}
impl NumContext {
fn new() -> Self {
NumContext {
ctx: vec![0, 0],
left: vec![0, 0],
right: vec![0, 0],
}
}
fn root(&self) -> usize {
1
}
fn get_left(&mut self, node: usize) -> usize {
if self.left[node] == 0 {
let idx = self.ctx.len() as u32;
self.ctx.push(0);
self.left.push(0);
self.right.push(0);
self.left[node] = idx;
}
self.left[node] as usize
}
fn get_right(&mut self, node: usize) -> usize {
if self.right[node] == 0 {
let idx = self.ctx.len() as u32;
self.ctx.push(0);
self.left.push(0);
self.right.push(0);
self.right[node] = idx;
}
self.right[node] as usize
}
}
fn encode_num(zp: &mut ZpEncoder, ctx: &mut NumContext, low: i32, high: i32, val: i32) {
let mut low = low;
let mut high = high;
let mut val_inner = val;
let mut cutoff: i32 = 0;
let mut phase: u32 = 1;
let mut range: u32 = 0xffff_ffff;
let mut node = ctx.root();
while range != 1 {
let decision = if low >= cutoff {
let child = ctx.get_right(node);
node = child;
true
} else if high >= cutoff {
let bit = val_inner >= cutoff;
let child = if bit {
ctx.get_right(node)
} else {
ctx.get_left(node)
};
zp.encode_bit(&mut ctx.ctx[node], bit);
node = child;
bit
} else {
let child = ctx.get_left(node);
node = child;
false
};
match phase {
1 => {
let negative = !decision;
if negative {
let temp = -low - 1;
low = -high - 1;
high = temp;
val_inner = -val_inner - 1;
}
phase = 2;
cutoff = 1;
}
2 => {
if !decision {
phase = 3;
range = ((cutoff + 1) / 2) as u32;
if range <= 1 {
range = 1;
cutoff = 0;
} else {
cutoff -= (range / 2) as i32;
}
} else {
cutoff = cutoff * 2 + 1;
}
}
3 => {
range /= 2;
if range == 0 {
range = 1;
}
if range != 1 {
if !decision {
cutoff -= (range / 2) as i32;
} else {
cutoff += (range / 2) as i32;
}
} else if !decision {
cutoff -= 1;
}
}
_ => unreachable!(),
}
}
}
#[allow(unsafe_code)]
fn encode_bitmap_direct(zp: &mut ZpEncoder, ctx: &mut [u8], bm: &Bitmap) {
debug_assert_eq!(ctx.len(), 1024);
let w = bm.width as usize;
let h = bm.height as usize;
let pw = w + 4;
let mut pixels = vec![0u8; (h + 2) * pw];
for y in 0..h {
for x in 0..w {
pixels[(y + 2) * pw + x] = bm.get(x as u32, y as u32) as u8;
}
}
for bm_y in 0..h {
let row_p2 = &pixels[bm_y * pw..(bm_y + 1) * pw];
let row_p1 = &pixels[(bm_y + 1) * pw..(bm_y + 2) * pw];
let row_cur = &pixels[(bm_y + 2) * pw..(bm_y + 3) * pw];
let mut r2 = (row_p2[0] as u32) << 1 | row_p2[1] as u32;
let mut r1 = (row_p1[0] as u32) << 2 | (row_p1[1] as u32) << 1 | row_p1[2] as u32;
let mut r0: u32 = 0;
for col in 0..w {
let idx = ((r2 << 7) | (r1 << 2) | r0) as usize;
let bit = row_cur[col] != 0;
let ctx_byte = unsafe { ctx.get_unchecked_mut(idx) };
zp.encode_bit(ctx_byte, bit);
r2 = ((r2 << 1) & 0b111) | row_p2[col + 2] as u32;
r1 = ((r1 << 1) & 0b11111) | row_p1[col + 3] as u32;
r0 = ((r0 << 1) & 0b11) | bit as u32;
}
}
}
fn encode_bitmap_ref(zp: &mut ZpEncoder, ctx: &mut [u8], cbm: &Bitmap, mbm: &Bitmap) {
debug_assert_eq!(ctx.len(), 2048);
let cw = cbm.width as i32;
let ch = cbm.height as i32;
if cw <= 0 || ch <= 0 {
return;
}
let mw = mbm.width as i32;
let mh = mbm.height as i32;
let crow = (ch - 1) >> 1;
let ccol = (cw - 1) >> 1;
let mrow = (mh - 1) >> 1;
let mcol = (mw - 1) >> 1;
let row_offset = mrow - crow;
let col_shift = mcol - ccol;
let mbm_pixel = |y: i32, x: i32| -> u32 {
if y < 0 || y >= mh || x < 0 || x >= mw {
0
} else {
mbm.get(x as u32, y as u32) as u32
}
};
let cbm_pixel = |y: i32, x: i32| -> u32 {
if y < 0 || y >= ch || x < 0 || x >= cw {
0
} else {
cbm.get(x as u32, y as u32) as u32
}
};
for y in 0..ch {
let my = y + row_offset;
let mut c_r1 = (cbm_pixel(y - 1, 0) << 1) | cbm_pixel(y - 1, 1);
let mut c_r0: u32 = 0;
let mut m_r1 = (mbm_pixel(my, col_shift - 1) << 2)
| (mbm_pixel(my, col_shift) << 1)
| mbm_pixel(my, col_shift + 1);
let mut m_r0 = (mbm_pixel(my + 1, col_shift - 1) << 2)
| (mbm_pixel(my + 1, col_shift) << 1)
| mbm_pixel(my + 1, col_shift + 1);
for col in 0..cw {
let m_r2 = mbm_pixel(my - 1, col + col_shift);
let idx = ((c_r1 << 8) | (c_r0 << 7) | (m_r2 << 6) | (m_r1 << 3) | m_r0) & 2047;
let bit = cbm_pixel(y, col) != 0;
zp.encode_bit(&mut ctx[idx as usize], bit);
c_r1 = ((c_r1 << 1) & 0b111) | cbm_pixel(y - 1, col + 2);
c_r0 = bit as u32;
m_r1 = ((m_r1 << 1) & 0b111) | mbm_pixel(my, col + col_shift + 2);
m_r0 = ((m_r0 << 1) & 0b111) | mbm_pixel(my + 1, col + col_shift + 2);
}
}
}
pub fn encode_jb2(bitmap: &Bitmap) -> Vec<u8> {
let w = bitmap.width as i32;
let h = bitmap.height as i32;
if w == 0 || h == 0 {
return Vec::new();
}
let mut zp = ZpEncoder::new();
let mut record_type_ctx = NumContext::new();
let mut image_size_ctx = NumContext::new();
let mut symbol_width_ctx = NumContext::new();
let mut symbol_height_ctx = NumContext::new();
let mut hoff_ctx = NumContext::new();
let mut voff_ctx = NumContext::new();
let mut direct_bitmap_ctx = vec![0u8; 1024];
let mut offset_type_ctx: u8 = 0;
let mut flag_ctx: u8 = 0;
encode_num(&mut zp, &mut record_type_ctx, 0, 11, 0);
encode_num(&mut zp, &mut image_size_ctx, 0, 262142, w);
encode_num(&mut zp, &mut image_size_ctx, 0, 262142, h);
zp.encode_bit(&mut flag_ctx, false);
const TILE: u32 = 1024;
let mut first_left: i32 = -1;
let mut first_bottom: i32 = h - 1;
let mut ty: u32 = 0;
while ty < bitmap.height {
let th = TILE.min(bitmap.height - ty);
let mut tx: u32 = 0;
while tx < bitmap.width {
let tw = TILE.min(bitmap.width - tx);
encode_num(&mut zp, &mut record_type_ctx, 0, 11, 3);
encode_num(&mut zp, &mut symbol_width_ctx, 0, 262142, tw as i32);
encode_num(&mut zp, &mut symbol_height_ctx, 0, 262142, th as i32);
let tile_bm = if tw == bitmap.width && th == bitmap.height {
bitmap.clone()
} else {
crop_bitmap(bitmap, tx, ty, tw, th)
};
encode_bitmap_direct(&mut zp, &mut direct_bitmap_ctx, &tile_bm);
let hoff = tx as i32 - first_left;
let voff = h - 1 - ty as i32 - first_bottom;
zp.encode_bit(&mut offset_type_ctx, true);
encode_num(&mut zp, &mut hoff_ctx, -262143, 262142, hoff);
encode_num(&mut zp, &mut voff_ctx, -262143, 262142, voff);
first_left = tx as i32;
first_bottom = h - th as i32 - ty as i32;
tx += tw;
}
ty += th;
}
encode_num(&mut zp, &mut record_type_ctx, 0, 11, 11);
zp.finish()
}
fn crop_bitmap(src: &Bitmap, x0: u32, y0: u32, w: u32, h: u32) -> Bitmap {
let mut out = Bitmap::new(w, h);
for y in 0..h {
for x in 0..w {
if src.get(x0 + x, y0 + y) {
out.set_black(x, y);
}
}
}
out
}
struct Cc {
x: u32,
y: u32,
bitmap: Bitmap,
}
fn extract_ccs(bitmap: &Bitmap) -> Vec<Cc> {
let w = bitmap.width as usize;
let h = bitmap.height as usize;
if w == 0 || h == 0 {
return Vec::new();
}
let mut pix = vec![0u8; w * h];
for y in 0..h {
for x in 0..w {
if bitmap.get(x as u32, y as u32) {
pix[y * w + x] = 1;
}
}
}
let mut out = Vec::new();
let mut stack: Vec<(u32, u32)> = Vec::new();
let mut cc_pixels: Vec<(u32, u32)> = Vec::new();
for y0 in 0..h {
for x0 in 0..w {
if pix[y0 * w + x0] == 0 {
continue;
}
stack.clear();
cc_pixels.clear();
stack.push((x0 as u32, y0 as u32));
pix[y0 * w + x0] = 0;
let mut min_x = x0;
let mut max_x = x0;
let mut min_y = y0;
let mut max_y = y0;
while let Some((cx, cy)) = stack.pop() {
cc_pixels.push((cx, cy));
let cxi = cx as usize;
let cyi = cy as usize;
if cxi < min_x {
min_x = cxi;
}
if cxi > max_x {
max_x = cxi;
}
if cyi < min_y {
min_y = cyi;
}
if cyi > max_y {
max_y = cyi;
}
let lo_x = cxi.saturating_sub(1);
let hi_x = (cxi + 1).min(w - 1);
let lo_y = cyi.saturating_sub(1);
let hi_y = (cyi + 1).min(h - 1);
for ny in lo_y..=hi_y {
let row_base = ny * w;
for nx in lo_x..=hi_x {
if pix[row_base + nx] != 0 {
pix[row_base + nx] = 0;
stack.push((nx as u32, ny as u32));
}
}
}
}
let cc_w = (max_x - min_x + 1) as u32;
let cc_h = (max_y - min_y + 1) as u32;
let mut cc_bm = Bitmap::new(cc_w, cc_h);
for &(px, py) in &cc_pixels {
cc_bm.set(px - min_x as u32, py - min_y as u32, true);
}
out.push(Cc {
x: min_x as u32,
y: min_y as u32,
bitmap: cc_bm,
});
}
}
out
}
fn packed_hamming(a: &[u8], b: &[u8]) -> u32 {
debug_assert_eq!(a.len(), b.len());
let mut total: u32 = 0;
for (x, y) in a.iter().zip(b.iter()) {
total += (x ^ y).count_ones();
}
total
}
const REFINEMENT_DIFF_FRACTION: u32 = 4;
const REFINEMENT_MIN_PIXELS: u64 = 32;
fn find_refinement_ref(
cand: &Bitmap,
dict_entries: &[Bitmap],
same_size_indices: &[usize],
) -> Option<usize> {
if same_size_indices.is_empty() {
return None;
}
let pixel_count = (cand.width as u64) * (cand.height as u64);
if pixel_count < REFINEMENT_MIN_PIXELS {
return None;
}
let max_diff = ((pixel_count * REFINEMENT_DIFF_FRACTION as u64) / 100) as u32;
let mut best: Option<(usize, u32)> = None;
for &i in same_size_indices {
let ref_bm = &dict_entries[i];
debug_assert_eq!(ref_bm.width, cand.width);
debug_assert_eq!(ref_bm.height, cand.height);
let d = packed_hamming(&cand.data, &ref_bm.data);
if d > max_diff {
continue;
}
match best {
None => best = Some((i, d)),
Some((_, bd)) if d < bd => best = Some((i, d)),
_ => {}
}
}
best.map(|(i, _)| i)
}
pub fn encode_jb2_dict(bitmap: &Bitmap) -> Vec<u8> {
encode_jb2_dict_with_shared(bitmap, &[])
}
pub fn encode_jb2_dict_with_shared(bitmap: &Bitmap, shared_symbols: &[Bitmap]) -> Vec<u8> {
let w = bitmap.width as i32;
let h = bitmap.height as i32;
if w == 0 || h == 0 {
return Vec::new();
}
let ccs = extract_ccs(bitmap);
let mut order: Vec<usize> = (0..ccs.len()).collect();
let bucket = (SAME_LINE_BASELINE_TOL.max(1)) as u32;
order.sort_by_key(|&i| {
let cc = &ccs[i];
let bottom = cc.y + cc.bitmap.height;
(bottom / bucket, cc.x)
});
let mut zp = ZpEncoder::new();
let mut record_type_ctx = NumContext::new();
let mut image_size_ctx = NumContext::new();
let mut symbol_width_ctx = NumContext::new();
let mut symbol_height_ctx = NumContext::new();
let mut symbol_width_diff_ctx = NumContext::new();
let mut symbol_height_diff_ctx = NumContext::new();
let mut symbol_index_ctx = NumContext::new();
let mut inherit_dict_size_ctx = NumContext::new();
let mut hoff_ctx = NumContext::new();
let mut voff_ctx = NumContext::new();
let mut shoff_ctx = NumContext::new();
let mut svoff_ctx = NumContext::new();
let mut direct_bitmap_ctx = vec![0u8; 1024];
let mut refinement_bitmap_ctx = vec![0u8; 2048];
let mut offset_type_ctx: u8 = 0;
let mut flag_ctx: u8 = 0;
if !shared_symbols.is_empty() {
encode_num(&mut zp, &mut record_type_ctx, 0, 11, 9);
encode_num(
&mut zp,
&mut inherit_dict_size_ctx,
0,
262142,
shared_symbols.len() as i32,
);
}
encode_num(&mut zp, &mut record_type_ctx, 0, 11, 0);
encode_num(&mut zp, &mut image_size_ctx, 0, 262142, w);
encode_num(&mut zp, &mut image_size_ctx, 0, 262142, h);
zp.encode_bit(&mut flag_ctx, false);
let mut layout = EncoderLayout::new(h);
let mut dedup: BTreeMap<(u32, u32, Vec<u8>), usize> = BTreeMap::new();
let mut dict_entries: Vec<Bitmap> = Vec::new();
let mut by_size: BTreeMap<(u32, u32), Vec<usize>> = BTreeMap::new();
for sym in shared_symbols {
let idx = dict_entries.len();
dedup.insert((sym.width, sym.height, sym.data.clone()), idx);
by_size
.entry((sym.width, sym.height))
.or_default()
.push(idx);
dict_entries.push(sym.clone());
}
for &cc_idx in &order {
let cc = &ccs[cc_idx];
let cc_w = cc.bitmap.width as i32;
let cc_h = cc.bitmap.height as i32;
let x_jb2 = cc.x as i32;
let y_jb2 = h - cc.y as i32 - cc_h;
let key = (cc.bitmap.width, cc.bitmap.height, cc.bitmap.data.clone());
let exact_match = dedup.get(&key).copied();
enum Action {
New,
Copy(usize),
Refine(usize),
}
let action = if let Some(idx) = exact_match {
Action::Copy(idx)
} else {
let candidates = by_size
.get(&(cc.bitmap.width, cc.bitmap.height))
.map(|v| v.as_slice())
.unwrap_or(&[]);
match find_refinement_ref(&cc.bitmap, &dict_entries, candidates) {
Some(ref_idx) => Action::Refine(ref_idx),
None => Action::New,
}
};
let dict_size = dict_entries.len();
match &action {
Action::New => {
encode_num(&mut zp, &mut record_type_ctx, 0, 11, 1);
encode_num(&mut zp, &mut symbol_width_ctx, 0, 262142, cc_w);
encode_num(&mut zp, &mut symbol_height_ctx, 0, 262142, cc_h);
encode_bitmap_direct(&mut zp, &mut direct_bitmap_ctx, &cc.bitmap);
}
Action::Copy(dict_idx) => {
encode_num(&mut zp, &mut record_type_ctx, 0, 11, 7);
encode_num(
&mut zp,
&mut symbol_index_ctx,
0,
(dict_size - 1) as i32,
*dict_idx as i32,
);
}
Action::Refine(ref_idx) => {
encode_num(&mut zp, &mut record_type_ctx, 0, 11, 6);
encode_num(
&mut zp,
&mut symbol_index_ctx,
0,
(dict_size - 1) as i32,
*ref_idx as i32,
);
encode_num(&mut zp, &mut symbol_width_diff_ctx, -262143, 262142, 0);
encode_num(&mut zp, &mut symbol_height_diff_ctx, -262143, 262142, 0);
encode_bitmap_ref(
&mut zp,
&mut refinement_bitmap_ctx,
&cc.bitmap,
&dict_entries[*ref_idx],
);
}
}
let shoff = x_jb2 - layout.last_right;
let svoff = y_jb2 - layout.baseline_get();
let same_line = layout.same_line_seen
&& svoff.abs() <= SAME_LINE_BASELINE_TOL
&& (-SAME_LINE_OVERLAP_TOL..=SAME_LINE_GAP_MAX).contains(&shoff);
if same_line {
zp.encode_bit(&mut offset_type_ctx, false);
encode_num(&mut zp, &mut shoff_ctx, -262143, 262142, shoff);
encode_num(&mut zp, &mut svoff_ctx, -262143, 262142, svoff);
let nx = layout.last_right + shoff;
let ny = layout.baseline_get() + svoff;
layout.baseline_add(ny);
layout.last_right = nx + cc_w - 1;
} else {
zp.encode_bit(&mut offset_type_ctx, true);
let hoff = x_jb2 - layout.first_left;
let voff = y_jb2 + cc_h - 1 - layout.first_bottom;
encode_num(&mut zp, &mut hoff_ctx, -262143, 262142, hoff);
encode_num(&mut zp, &mut voff_ctx, -262143, 262142, voff);
let nx = layout.first_left + hoff;
let ny = layout.first_bottom + voff - cc_h + 1;
layout.first_left = nx;
layout.first_bottom = ny;
layout.baseline_fill(ny);
layout.baseline_add(ny);
layout.last_right = nx + cc_w - 1;
layout.same_line_seen = true;
}
if matches!(action, Action::New) {
let next_idx = dict_entries.len();
dedup.insert(key, next_idx);
by_size
.entry((cc.bitmap.width, cc.bitmap.height))
.or_default()
.push(next_idx);
dict_entries.push(cc.bitmap.clone());
}
}
encode_num(&mut zp, &mut record_type_ctx, 0, 11, 11);
zp.finish()
}
const SAME_LINE_BASELINE_TOL: i32 = 16;
const SAME_LINE_OVERLAP_TOL: i32 = 16;
const SAME_LINE_GAP_MAX: i32 = 1000;
struct EncoderLayout {
first_left: i32,
first_bottom: i32,
last_right: i32,
baseline: [i32; 3],
baseline_idx: i32,
same_line_seen: bool,
}
impl EncoderLayout {
fn new(image_height: i32) -> Self {
Self {
first_left: -1,
first_bottom: image_height - 1,
last_right: 0,
baseline: [0, 0, 0],
baseline_idx: -1,
same_line_seen: false,
}
}
fn baseline_fill(&mut self, val: i32) {
self.baseline = [val, val, val];
}
fn baseline_add(&mut self, val: i32) {
self.baseline_idx += 1;
if self.baseline_idx == 3 {
self.baseline_idx = 0;
}
self.baseline[self.baseline_idx as usize] = val;
}
fn baseline_get(&self) -> i32 {
let (a, b, c) = (self.baseline[0], self.baseline[1], self.baseline[2]);
if (a >= b && a <= c) || (a <= b && a >= c) {
a
} else if (b >= a && b <= c) || (b <= a && b >= c) {
b
} else {
c
}
}
}
pub fn encode_jb2_djbz(symbols: &[Bitmap]) -> Vec<u8> {
let mut zp = ZpEncoder::new();
let mut record_type_ctx = NumContext::new();
let mut image_size_ctx = NumContext::new();
let mut symbol_width_ctx = NumContext::new();
let mut symbol_height_ctx = NumContext::new();
let mut direct_bitmap_ctx = vec![0u8; 1024];
let mut flag_ctx: u8 = 0;
encode_num(&mut zp, &mut record_type_ctx, 0, 11, 0);
encode_num(&mut zp, &mut image_size_ctx, 0, 262142, 0);
encode_num(&mut zp, &mut image_size_ctx, 0, 262142, 0);
zp.encode_bit(&mut flag_ctx, false);
for sym in symbols {
encode_num(&mut zp, &mut record_type_ctx, 0, 11, 2);
encode_num(&mut zp, &mut symbol_width_ctx, 0, 262142, sym.width as i32);
encode_num(
&mut zp,
&mut symbol_height_ctx,
0,
262142,
sym.height as i32,
);
encode_bitmap_direct(&mut zp, &mut direct_bitmap_ctx, sym);
}
encode_num(&mut zp, &mut record_type_ctx, 0, 11, 11);
zp.finish()
}
pub(crate) fn cluster_shared_symbols(pages: &[Bitmap], page_threshold: usize) -> Vec<Bitmap> {
if page_threshold < 2 || pages.len() < page_threshold {
return Vec::new();
}
type Key = (u32, u32, Vec<u8>);
struct ClusterEntry {
pages_seen: Vec<usize>,
first_seen: (usize, usize),
bitmap: Bitmap,
}
let mut seen: BTreeMap<Key, ClusterEntry> = BTreeMap::new();
for (page_idx, page) in pages.iter().enumerate() {
let ccs = extract_ccs(page);
for (cc_idx, cc) in ccs.iter().enumerate() {
let key = (cc.bitmap.width, cc.bitmap.height, cc.bitmap.data.clone());
let entry = seen.entry(key).or_insert_with(|| ClusterEntry {
pages_seen: Vec::new(),
first_seen: (page_idx, cc_idx),
bitmap: cc.bitmap.clone(),
});
if !entry.pages_seen.contains(&page_idx) {
entry.pages_seen.push(page_idx);
}
}
}
let mut promoted: Vec<ClusterEntry> = seen
.into_values()
.filter(|e| e.pages_seen.len() >= page_threshold)
.collect();
promoted.sort_by_key(|e| e.first_seen);
promoted.into_iter().map(|e| e.bitmap).collect()
}
pub fn encode_djvm_bundle_jb2(pages: &[Bitmap], shared_dict_page_threshold: usize) -> Vec<u8> {
use crate::iff;
let shared = cluster_shared_symbols(pages, shared_dict_page_threshold);
let djbz_bytes = encode_jb2_djbz(&shared);
let mut comp_form_bodies: Vec<(Vec<u8>, /*is_page*/ bool, String)> = Vec::new();
let dict_id = "dict0001.djvi".to_string();
if !shared.is_empty() {
let mut djvi_body = Vec::new();
djvi_body.extend_from_slice(b"DJVI");
djvi_body.extend_from_slice(b"Djbz");
djvi_body.extend_from_slice(&(djbz_bytes.len() as u32).to_be_bytes());
djvi_body.extend_from_slice(&djbz_bytes);
if !djbz_bytes.len().is_multiple_of(2) {
djvi_body.push(0);
}
comp_form_bodies.push((djvi_body, false, dict_id.clone()));
}
let shared_ref: &[Bitmap] = &shared;
for (page_idx, page) in pages.iter().enumerate() {
let sjbz = encode_jb2_dict_with_shared(page, shared_ref);
let mut info = Vec::with_capacity(10);
info.extend_from_slice(&(page.width as u16).to_be_bytes());
info.extend_from_slice(&(page.height as u16).to_be_bytes());
info.extend_from_slice(&[24, 0, 100, 0, 1, 0]);
let mut djvu_body = Vec::new();
djvu_body.extend_from_slice(b"DJVU");
djvu_body.extend_from_slice(b"INFO");
djvu_body.extend_from_slice(&(info.len() as u32).to_be_bytes());
djvu_body.extend_from_slice(&info);
if !info.len().is_multiple_of(2) {
djvu_body.push(0);
}
if !shared.is_empty() {
let incl_payload = dict_id.as_bytes();
djvu_body.extend_from_slice(b"INCL");
djvu_body.extend_from_slice(&(incl_payload.len() as u32).to_be_bytes());
djvu_body.extend_from_slice(incl_payload);
if !incl_payload.len().is_multiple_of(2) {
djvu_body.push(0);
}
}
djvu_body.extend_from_slice(b"Sjbz");
djvu_body.extend_from_slice(&(sjbz.len() as u32).to_be_bytes());
djvu_body.extend_from_slice(&sjbz);
if !sjbz.len().is_multiple_of(2) {
djvu_body.push(0);
}
let pid = format!("p{:04}.djvu", page_idx + 1);
comp_form_bodies.push((djvu_body, true, pid));
}
let comp_form_data: Vec<&[u8]> = comp_form_bodies
.iter()
.map(|(b, _, _)| b.as_slice())
.collect();
let n = comp_form_bodies.len();
let mut dirm = Vec::new();
dirm.push(0x81); dirm.extend_from_slice(&(n as u16).to_be_bytes());
let mut meta = Vec::new();
for (body, _, _) in &comp_form_bodies {
let total = body.len() + 8; meta.extend_from_slice(&(total as u32).to_be_bytes()[1..4]); }
for (_, is_page, _) in &comp_form_bodies {
let flag = if *is_page { 1u8 } else { 0u8 };
meta.push(flag);
}
for (_, _, id) in &comp_form_bodies {
meta.extend_from_slice(id.as_bytes());
meta.push(0);
}
for (_, _, id) in &comp_form_bodies {
meta.extend_from_slice(id.as_bytes());
meta.push(0);
}
meta.extend(core::iter::repeat_n(0u8, n)); let bzz_meta = crate::bzz_encode::bzz_encode(&meta);
let dirm_size = 1 + 2 + 4 * n + bzz_meta.len();
let dirm_chunk_total = 8 + dirm_size + (dirm_size & 1); let mut form_body_size: usize = 4; form_body_size += dirm_chunk_total;
let mut comp_offsets: Vec<u32> = Vec::with_capacity(n);
for body in &comp_form_data {
let off = 4 + 4 + 4 + 4 + dirm_chunk_total + (form_body_size - 4 - dirm_chunk_total);
comp_offsets.push(off as u32);
let tot = body.len() + 8;
form_body_size += tot + (tot & 1); }
let _ = dirm; let mut dirm_full = Vec::with_capacity(dirm_size);
dirm_full.push(0x81);
dirm_full.extend_from_slice(&(n as u16).to_be_bytes());
for off in &comp_offsets {
dirm_full.extend_from_slice(&off.to_be_bytes());
}
dirm_full.extend_from_slice(&bzz_meta);
debug_assert_eq!(dirm_full.len(), dirm_size);
let mut out = Vec::with_capacity(12 + form_body_size);
out.extend_from_slice(b"AT&T");
out.extend_from_slice(b"FORM");
out.extend_from_slice(&(form_body_size as u32).to_be_bytes());
out.extend_from_slice(b"DJVM");
out.extend_from_slice(b"DIRM");
out.extend_from_slice(&(dirm_size as u32).to_be_bytes());
out.extend_from_slice(&dirm_full);
if dirm_size & 1 == 1 {
out.push(0);
}
for body in &comp_form_data {
out.extend_from_slice(b"FORM");
out.extend_from_slice(&(body.len() as u32).to_be_bytes());
out.extend_from_slice(body);
if body.len() & 1 == 1 {
out.push(0);
}
}
let _ = iff::parse_form; out
}
#[cfg(test)]
mod tests {
use super::*;
use crate::bitmap::Bitmap;
use crate::jb2;
fn make_bitmap(w: u32, h: u32, f: impl Fn(u32, u32) -> bool) -> Bitmap {
let mut bm = Bitmap::new(w, h);
for y in 0..h {
for x in 0..w {
bm.set(x, y, f(x, y));
}
}
bm
}
fn roundtrip(bm: &Bitmap) -> Bitmap {
let encoded = encode_jb2(bm);
jb2::decode(&encoded, None).expect("decode failed")
}
#[test]
fn all_white_roundtrip() {
let src = Bitmap::new(32, 32);
let decoded = roundtrip(&src);
assert_eq!(decoded.width, 32);
assert_eq!(decoded.height, 32);
for y in 0..32u32 {
for x in 0..32u32 {
assert!(!decoded.get(x, y), "expected white at ({x},{y})");
}
}
}
#[test]
fn all_black_roundtrip() {
let src = make_bitmap(32, 32, |_, _| true);
let decoded = roundtrip(&src);
for y in 0..32u32 {
for x in 0..32u32 {
assert!(decoded.get(x, y), "expected black at ({x},{y})");
}
}
}
#[test]
fn checkerboard_roundtrip() {
let src = make_bitmap(16, 16, |x, y| (x + y) % 2 == 0);
let decoded = roundtrip(&src);
for y in 0..16u32 {
for x in 0..16u32 {
assert_eq!(decoded.get(x, y), (x + y) % 2 == 0, "mismatch at ({x},{y})");
}
}
}
#[test]
fn single_pixel_roundtrip() {
let src = make_bitmap(1, 1, |_, _| true);
let decoded = roundtrip(&src);
assert_eq!(decoded.width, 1);
assert_eq!(decoded.height, 1);
assert!(decoded.get(0, 0));
}
#[test]
fn larger_image_roundtrip() {
let src = make_bitmap(64, 64, |x, y| (x * 17 + y * 31) % 5 != 0);
let decoded = roundtrip(&src);
assert_eq!(decoded.width, 64);
assert_eq!(decoded.height, 64);
let mut mismatches = 0u32;
for y in 0..64u32 {
for x in 0..64u32 {
if decoded.get(x, y) != src.get(x, y) {
mismatches += 1;
}
}
}
assert_eq!(
mismatches, 0,
"{mismatches} pixel mismatches in 64×64 roundtrip"
);
}
#[test]
fn encoded_is_nonempty() {
let src = Bitmap::new(8, 8);
let encoded = encode_jb2(&src);
assert!(!encoded.is_empty());
}
#[test]
fn zero_dimension_returns_empty() {
assert!(encode_jb2(&Bitmap::new(0, 0)).is_empty());
assert!(encode_jb2(&Bitmap::new(8, 0)).is_empty());
assert!(encode_jb2(&Bitmap::new(0, 8)).is_empty());
}
#[test]
fn tiled_2048x2048_roundtrip() {
let src = make_bitmap(2048, 2048, |x, y| {
((x.wrapping_mul(2654435761)) ^ y.wrapping_mul(40503)) & 7 == 0
});
let encoded = encode_jb2(&src);
let decoded = jb2::decode(&encoded, None).expect("decode failed");
assert_eq!(decoded.width, 2048);
assert_eq!(decoded.height, 2048);
for y in 0..2048u32 {
for x in 0..2048u32 {
assert_eq!(decoded.get(x, y), src.get(x, y), "mismatch at ({x},{y})");
}
}
}
#[test]
fn tiled_irregular_size_roundtrip() {
let src = make_bitmap(1500, 1100, |x, y| (x * 13 + y * 7) % 11 == 0);
let encoded = encode_jb2(&src);
let decoded = jb2::decode(&encoded, None).expect("decode failed");
assert_eq!(decoded.width, 1500);
assert_eq!(decoded.height, 1100);
let mut mismatches = 0u32;
for y in 0..1100u32 {
for x in 0..1500u32 {
if decoded.get(x, y) != src.get(x, y) {
mismatches += 1;
}
}
}
assert_eq!(mismatches, 0);
}
#[test]
fn tiled_1x1_roundtrip() {
for &px in &[false, true] {
let src = make_bitmap(1, 1, |_, _| px);
let encoded = encode_jb2(&src);
let decoded = jb2::decode(&encoded, None).expect("decode failed");
assert_eq!(decoded.width, 1);
assert_eq!(decoded.height, 1);
assert_eq!(decoded.get(0, 0), px, "1x1 pixel mismatch px={px}");
}
}
#[test]
fn tiled_100x100_roundtrip() {
let src = make_bitmap(100, 100, |x, y| (x ^ y) & 1 == 0);
let encoded = encode_jb2(&src);
let decoded = jb2::decode(&encoded, None).expect("decode failed");
assert_eq!(decoded.width, 100);
assert_eq!(decoded.height, 100);
for y in 0..100u32 {
for x in 0..100u32 {
assert_eq!(decoded.get(x, y), src.get(x, y), "mismatch at ({x},{y})");
}
}
}
#[test]
#[ignore = "16 MP pixel-by-pixel verify is slow; enable with --ignored"]
fn tiled_4096x4096_roundtrip() {
let src = make_bitmap(4096, 4096, |x, y| {
((x.wrapping_mul(2654435761)) ^ y.wrapping_mul(40503)) & 31 == 0
});
let encoded = encode_jb2(&src);
let decoded = jb2::decode(&encoded, None).expect("decode failed");
assert_eq!(decoded.width, 4096);
assert_eq!(decoded.height, 4096);
for y in 0..4096u32 {
for x in 0..4096u32 {
assert_eq!(decoded.get(x, y), src.get(x, y), "mismatch at ({x},{y})");
}
}
}
fn roundtrip_dict(bm: &Bitmap) -> Bitmap {
let encoded = encode_jb2_dict(bm);
jb2::decode(&encoded, None).expect("dict decode failed")
}
fn assert_bitmaps_eq(a: &Bitmap, b: &Bitmap) {
assert_eq!(a.width, b.width, "width mismatch");
assert_eq!(a.height, b.height, "height mismatch");
let mut mismatches = Vec::new();
for y in 0..a.height {
for x in 0..a.width {
if a.get(x, y) != b.get(x, y) {
mismatches.push((x, y, a.get(x, y), b.get(x, y)));
}
}
}
assert!(
mismatches.is_empty(),
"{} pixel mismatches: {:?}",
mismatches.len(),
mismatches
);
}
#[test]
fn dict_all_white_roundtrip() {
let src = Bitmap::new(32, 32);
let decoded = roundtrip_dict(&src);
assert_bitmaps_eq(&src, &decoded);
}
#[test]
fn dict_single_pixel_roundtrip() {
let src = make_bitmap(16, 16, |x, y| x == 4 && y == 7);
let decoded = roundtrip_dict(&src);
assert_bitmaps_eq(&src, &decoded);
}
#[test]
fn dict_two_dots_dedup() {
let src = make_bitmap(32, 32, |x, y| (x == 3 && y == 5) || (x == 20 && y == 25));
let decoded = roundtrip_dict(&src);
assert_bitmaps_eq(&src, &decoded);
let ccs = extract_ccs(&src);
assert_eq!(ccs.len(), 2);
}
#[test]
fn dict_letter_like_shapes() {
let src = make_bitmap(32, 32, |x, y| {
(x < 3 && y < 5) || (x >= 20 && x < 23 && y >= 10 && y < 15)
});
let decoded = roundtrip_dict(&src);
assert_bitmaps_eq(&src, &decoded);
}
#[test]
fn dict_checkerboard_many_ccs() {
let src = make_bitmap(8, 8, |x, y| (x + y) % 2 == 0);
let decoded = roundtrip_dict(&src);
assert_bitmaps_eq(&src, &decoded);
}
#[test]
fn dict_two_different_shapes_multiple_occurrences() {
let src = make_bitmap(64, 64, |x, y| {
let in_a = |ax: u32, ay: u32| x >= ax && x < ax + 2 && y >= ay && y < ay + 2;
let in_b = |bx: u32, by: u32| x == bx && y >= by && y < by + 3;
in_a(0, 0)
|| in_a(30, 0)
|| in_a(0, 30)
|| in_a(30, 30)
|| in_b(10, 5)
|| in_b(40, 5)
|| in_b(10, 45)
|| in_b(40, 45)
});
let decoded = roundtrip_dict(&src);
assert_bitmaps_eq(&src, &decoded);
let ccs = extract_ccs(&src);
assert_eq!(ccs.len(), 8, "expected 4+4 CCs");
}
#[test]
fn dict_dimension_encoded_correctly() {
let src = make_bitmap(13, 7, |x, y| (x * 3 + y) % 5 == 0);
let decoded = roundtrip_dict(&src);
assert_bitmaps_eq(&src, &decoded);
}
#[test]
fn dict_zero_dimension_returns_empty() {
assert!(encode_jb2_dict(&Bitmap::new(0, 0)).is_empty());
assert!(encode_jb2_dict(&Bitmap::new(8, 0)).is_empty());
assert!(encode_jb2_dict(&Bitmap::new(0, 8)).is_empty());
}
#[test]
fn dict_extract_ccs_counts() {
let src = make_bitmap(30, 30, |x, y| {
(x < 3 && y < 3)
|| (x >= 10 && x < 13 && y >= 10 && y < 13)
|| (x >= 25 && x < 28 && y >= 25 && y < 28)
});
let ccs = extract_ccs(&src);
assert_eq!(ccs.len(), 3);
for cc in &ccs {
assert_eq!(cc.bitmap.width, 3);
assert_eq!(cc.bitmap.height, 3);
}
}
#[test]
fn dict_extract_ccs_8connected() {
let src = make_bitmap(4, 4, |x, y| (x == 0 && y == 0) || (x == 1 && y == 1));
let ccs = extract_ccs(&src);
assert_eq!(ccs.len(), 1);
assert_eq!(ccs[0].bitmap.width, 2);
assert_eq!(ccs[0].bitmap.height, 2);
}
#[test]
fn refine_near_duplicate_glyphs_roundtrip() {
let src = make_bitmap(40, 12, |x, y| {
let in_a = (2..7).contains(&x) && (2..7).contains(&y);
let in_b = (20..25).contains(&x) && (2..7).contains(&y) && !(x == 24 && y == 6);
in_a || in_b
});
let decoded = roundtrip_dict(&src);
assert_bitmaps_eq(&src, &decoded);
}
#[test]
fn refine_text_like_repeats_roundtrip() {
let src = make_bitmap(80, 12, |x, y| {
let local_x = x % 12;
let local_y = y;
let glyph_idx = x / 12;
let base = (local_x == 3 && (1..8).contains(&local_y))
|| (local_y == 4 && (1..7).contains(&local_x));
let perturbed = match glyph_idx {
1 => local_x == 0 && local_y == 0,
2 => local_x == 6 && local_y == 8,
3 => local_x == 6 && local_y == 0,
4 => local_x == 0 && local_y == 8,
_ => false,
};
base ^ perturbed
});
let decoded = roundtrip_dict(&src);
assert_bitmaps_eq(&src, &decoded);
}
#[test]
fn refine_far_glyph_falls_back_to_new() {
let src = make_bitmap(40, 12, |x, y| {
let in_block = (2..7).contains(&x) && (2..7).contains(&y);
let in_x = (20..25).contains(&x)
&& (2..7).contains(&y)
&& (x - 20 == y - 2 || x - 20 == 6 - (y - 2));
in_block || in_x
});
let decoded = roundtrip_dict(&src);
assert_bitmaps_eq(&src, &decoded);
}
#[test]
fn refine_packed_hamming_basic() {
let a = vec![0b1010_1010u8, 0b0000_1111u8];
let b = vec![0b1010_1011u8, 0b0000_1111u8];
assert_eq!(packed_hamming(&a, &b), 1);
let c = vec![0u8; 2];
let d = vec![0xff; 2];
assert_eq!(packed_hamming(&c, &d), 16);
}
fn render_glyph(bm: &mut Bitmap, x: u32, y: u32, glyph: &[&[u8]]) {
for (gy, row) in glyph.iter().enumerate() {
for (gx, &c) in row.iter().enumerate() {
if c == b'#' {
bm.set(x + gx as u32, y + gy as u32, true);
}
}
}
}
fn glyph_a() -> Vec<&'static [u8]> {
vec![
b" ## " as &[u8],
b"# #" as &[u8],
b"####" as &[u8],
b"# #" as &[u8],
b"# #" as &[u8],
]
}
fn glyph_b() -> Vec<&'static [u8]> {
vec![
b"### " as &[u8],
b"# #" as &[u8],
b"### " as &[u8],
b"# #" as &[u8],
b"### " as &[u8],
]
}
fn make_text_page(words: &[&[u8]]) -> Bitmap {
let mut bm = Bitmap::new(80, 30);
let mut x = 4;
for word in words {
for &letter in *word {
let g = match letter {
b'A' => glyph_a(),
b'B' => glyph_b(),
_ => continue,
};
render_glyph(&mut bm, x, 8, &g);
x += 6;
}
x += 4;
}
bm
}
fn assert_decoded_eq(src: &Bitmap, decoded: &Bitmap) {
assert_eq!(src.width, decoded.width, "width mismatch");
assert_eq!(src.height, decoded.height, "height mismatch");
let mut mismatches = 0u32;
for y in 0..src.height {
for x in 0..src.width {
if src.get(x, y) != decoded.get(x, y) {
mismatches += 1;
}
}
}
assert_eq!(mismatches, 0, "{mismatches} pixel mismatches");
}
#[test]
fn djbz_roundtrip_two_glyphs() {
let mut a = Bitmap::new(4, 5);
render_glyph(&mut a, 0, 0, &glyph_a());
let mut b = Bitmap::new(4, 5);
render_glyph(&mut b, 0, 0, &glyph_b());
let djbz = encode_jb2_djbz(&[a.clone(), b.clone()]);
assert!(!djbz.is_empty());
let dict = jb2::decode_dict(&djbz, None).expect("decode_dict");
let mut page = Bitmap::new(20, 8);
render_glyph(&mut page, 2, 2, &glyph_a());
render_glyph(&mut page, 10, 2, &glyph_b());
let sjbz = encode_jb2_dict_with_shared(&page, &[a, b]);
let decoded = jb2::decode(&sjbz, Some(&dict)).expect("decode");
assert_decoded_eq(&page, &decoded);
}
#[test]
fn shared_dict_smaller_than_independent_for_repeated_pages() {
let p1 = make_text_page(&[b"AABB", b"BABA"]);
let p2 = make_text_page(&[b"AABB", b"BABA"]);
let independent_total = encode_jb2_dict(&p1).len() + encode_jb2_dict(&p2).len();
let bundle = encode_djvm_bundle_jb2(&[p1.clone(), p2.clone()], 2);
assert!(!bundle.is_empty());
let doc = crate::djvu_document::DjVuDocument::parse(&bundle).expect("parse DJVM");
assert_eq!(doc.page_count(), 2);
let d1 = doc
.page(0)
.expect("page 0")
.extract_mask()
.expect("extract_mask 0")
.expect("mask 0 present");
let d2 = doc
.page(1)
.expect("page 1")
.extract_mask()
.expect("extract_mask 1")
.expect("mask 1 present");
assert_decoded_eq(&p1, &d1);
assert_decoded_eq(&p2, &d2);
let shared = cluster_shared_symbols(&[p1.clone(), p2.clone()], 2);
assert!(
!shared.is_empty(),
"two identical pages should produce shared symbols"
);
let djbz = encode_jb2_djbz(&shared);
let sjbz1 = encode_jb2_dict_with_shared(&p1, &shared);
let sjbz2 = encode_jb2_dict_with_shared(&p2, &shared);
let shared_jb2_total = djbz.len() + sjbz1.len() + sjbz2.len();
assert!(
shared_jb2_total < independent_total,
"expected shared jb2 < independent: shared={} independent={}",
shared_jb2_total,
independent_total
);
}
#[test]
fn cluster_promotes_only_repeated_glyphs() {
let mut p1 = Bitmap::new(20, 10);
render_glyph(&mut p1, 2, 2, &glyph_a());
render_glyph(&mut p1, 10, 2, &glyph_b());
let mut p2 = Bitmap::new(20, 10);
render_glyph(&mut p2, 2, 2, &glyph_a());
let shared = cluster_shared_symbols(&[p1, p2], 2);
assert_eq!(shared.len(), 1, "only A should cross the threshold");
assert_eq!(shared[0].width, 4);
assert_eq!(shared[0].height, 5);
}
#[test]
fn djvm_bundle_with_no_repeats_still_round_trips() {
let mut p1 = Bitmap::new(20, 10);
render_glyph(&mut p1, 2, 2, &glyph_a());
let mut p2 = Bitmap::new(20, 10);
render_glyph(&mut p2, 2, 2, &glyph_b());
let bundle = encode_djvm_bundle_jb2(&[p1.clone(), p2.clone()], 2);
let doc = crate::djvu_document::DjVuDocument::parse(&bundle).expect("parse DJVM");
assert_eq!(doc.page_count(), 2);
let d1 = doc
.page(0)
.expect("page 0")
.extract_mask()
.expect("extract_mask 0")
.expect("mask 0 present");
let d2 = doc
.page(1)
.expect("page 1")
.extract_mask()
.expect("extract_mask 1")
.expect("mask 1 present");
assert_decoded_eq(&p1, &d1);
assert_decoded_eq(&p2, &d2);
}
}