#[cfg(not(feature = "std"))]
use alloc::{format, string::String, vec, vec::Vec};
use crate::djvu_document::DjVuDocument;
use crate::error::IffError;
use crate::iff;
#[derive(Debug, thiserror::Error)]
pub enum DjvmError {
#[error("IFF parse error: {0}")]
Iff(#[from] IffError),
#[error("document error: {0}")]
Doc(#[from] crate::djvu_document::DocError),
#[error("no pages to merge")]
EmptyMerge,
#[error("page range {start}..{end} is out of bounds (document has {count} pages)")]
PageRangeOutOfBounds {
start: usize,
end: usize,
count: usize,
},
}
pub fn merge(documents: &[&[u8]]) -> Result<Vec<u8>, DjvmError> {
if documents.is_empty() {
return Err(DjvmError::EmptyMerge);
}
let mut components: Vec<Vec<u8>> = Vec::new();
let mut component_ids: Vec<String> = Vec::new();
let mut component_flags: Vec<u8> = Vec::new();
for (doc_idx, &doc_data) in documents.iter().enumerate() {
let form = iff::parse_form(doc_data)?;
if &form.form_type == b"DJVU" {
components.push(doc_data.to_vec());
component_ids.push(format!("p{:04}.djvu", components.len()));
component_flags.push(1); } else if &form.form_type == b"DJVM" {
for chunk in &form.chunks {
if &chunk.id == b"FORM" && chunk.data.len() >= 4 {
let child_form_type = &chunk.data[..4];
let mut form_bytes = Vec::with_capacity(4 + 4 + 4 + chunk.data.len());
form_bytes.extend_from_slice(b"AT&T");
form_bytes.extend_from_slice(b"FORM");
let form_len = chunk.data.len() as u32;
form_bytes.extend_from_slice(&form_len.to_be_bytes());
form_bytes.extend_from_slice(chunk.data);
components.push(form_bytes);
component_ids.push(format!("d{}p{:04}.djvu", doc_idx, components.len()));
let flag = if child_form_type == b"DJVI" { 0 } else { 1 }; component_flags.push(flag);
}
}
}
}
if components.is_empty() {
return Err(DjvmError::EmptyMerge);
}
build_djvm(&components, &component_ids, &component_flags)
}
pub fn split(doc_data: &[u8], start: usize, end: usize) -> Result<Vec<u8>, DjvmError> {
let doc = DjVuDocument::parse(doc_data)?;
let count = doc.page_count();
if start >= count || end > count || start >= end {
return Err(DjvmError::PageRangeOutOfBounds { start, end, count });
}
let form = iff::parse_form(doc_data)?;
if &form.form_type == b"DJVU" && start == 0 && end == 1 {
return Ok(doc_data.to_vec());
}
if end - start == 1 && &form.form_type == b"DJVM" {
let mut page_idx = 0;
for chunk in &form.chunks {
if &chunk.id == b"FORM" && chunk.data.len() >= 4 && &chunk.data[..4] == b"DJVU" {
if page_idx == start {
let mut result = Vec::with_capacity(4 + 4 + 4 + chunk.data.len());
result.extend_from_slice(b"AT&T");
result.extend_from_slice(b"FORM");
let len = chunk.data.len() as u32;
result.extend_from_slice(&len.to_be_bytes());
result.extend_from_slice(chunk.data);
return Ok(result);
}
page_idx += 1;
}
}
}
let mut components: Vec<Vec<u8>> = Vec::new();
let mut component_ids: Vec<String> = Vec::new();
let mut component_flags: Vec<u8> = Vec::new();
for chunk in &form.chunks {
if &chunk.id == b"FORM" && chunk.data.len() >= 4 && &chunk.data[..4] == b"DJVI" {
let mut form_bytes = Vec::with_capacity(4 + 4 + 4 + chunk.data.len());
form_bytes.extend_from_slice(b"AT&T");
form_bytes.extend_from_slice(b"FORM");
let len = chunk.data.len() as u32;
form_bytes.extend_from_slice(&len.to_be_bytes());
form_bytes.extend_from_slice(chunk.data);
components.push(form_bytes);
component_ids.push(format!("shared{}.djvi", components.len()));
component_flags.push(0); }
}
let mut page_idx = 0;
for chunk in &form.chunks {
if &chunk.id == b"FORM" && chunk.data.len() >= 4 && &chunk.data[..4] == b"DJVU" {
if page_idx >= start && page_idx < end {
let mut form_bytes = Vec::with_capacity(4 + 4 + 4 + chunk.data.len());
form_bytes.extend_from_slice(b"AT&T");
form_bytes.extend_from_slice(b"FORM");
let len = chunk.data.len() as u32;
form_bytes.extend_from_slice(&len.to_be_bytes());
form_bytes.extend_from_slice(chunk.data);
components.push(form_bytes);
component_ids.push(format!("p{:04}.djvu", page_idx + 1));
component_flags.push(1); }
page_idx += 1;
}
}
build_djvm(&components, &component_ids, &component_flags)
}
fn build_djvm(components: &[Vec<u8>], ids: &[String], flags: &[u8]) -> Result<Vec<u8>, DjvmError> {
let n = components.len();
let dirm_data = build_dirm(n, flags, ids);
let mut body_size: usize = 4; body_size += 8 + dirm_data.len(); if !dirm_data.len().is_multiple_of(2) {
body_size += 1; }
for comp in components {
let comp_data = if comp.len() >= 4 && &comp[..4] == b"AT&T" {
&comp[4..]
} else {
comp.as_slice()
};
body_size += comp_data.len();
if !comp_data.len().is_multiple_of(2) {
body_size += 1; }
}
let mut output = Vec::with_capacity(4 + 4 + 4 + body_size);
output.extend_from_slice(b"AT&T");
output.extend_from_slice(b"FORM");
output.extend_from_slice(&(body_size as u32).to_be_bytes());
output.extend_from_slice(b"DJVM");
output.extend_from_slice(b"DIRM");
output.extend_from_slice(&(dirm_data.len() as u32).to_be_bytes());
output.extend_from_slice(&dirm_data);
if !dirm_data.len().is_multiple_of(2) {
output.push(0); }
for comp in components {
let comp_data = if comp.len() >= 4 && &comp[..4] == b"AT&T" {
&comp[4..]
} else {
comp.as_slice()
};
output.extend_from_slice(comp_data);
if !comp_data.len().is_multiple_of(2) {
output.push(0); }
}
Ok(output)
}
pub fn create_indirect(page_names: &[&str]) -> Result<Vec<u8>, DjvmError> {
if page_names.is_empty() {
return Err(DjvmError::EmptyMerge);
}
let count = page_names.len();
let ids: Vec<String> = page_names.iter().map(|s| s.to_string()).collect();
let flags: Vec<u8> = vec![1u8; count];
let dirm_data = build_dirm_indirect(count, &flags, &ids);
let mut body_size: usize = 4; body_size += 8 + dirm_data.len(); if !dirm_data.len().is_multiple_of(2) {
body_size += 1;
}
let mut output = Vec::with_capacity(4 + 4 + 4 + body_size);
output.extend_from_slice(b"AT&T");
output.extend_from_slice(b"FORM");
output.extend_from_slice(&(body_size as u32).to_be_bytes());
output.extend_from_slice(b"DJVM");
output.extend_from_slice(b"DIRM");
output.extend_from_slice(&(dirm_data.len() as u32).to_be_bytes());
output.extend_from_slice(&dirm_data);
if !dirm_data.len().is_multiple_of(2) {
output.push(0);
}
Ok(output)
}
fn build_dirm_indirect(count: usize, flags: &[u8], ids: &[String]) -> Vec<u8> {
let mut data = Vec::new();
data.push(0x00);
data.push((count >> 8) as u8);
data.push(count as u8);
let mut meta = Vec::new();
for _ in 0..count {
meta.extend_from_slice(&[0, 0, 0]); }
for &f in flags {
meta.push(f);
}
for id in ids {
meta.extend_from_slice(id.as_bytes());
meta.push(0);
}
for id in ids {
meta.extend_from_slice(id.as_bytes());
meta.push(0);
}
meta.extend(core::iter::repeat_n(0u8, count));
let compressed = crate::bzz_encode::bzz_encode(&meta);
data.extend_from_slice(&compressed);
data
}
fn build_dirm(count: usize, flags: &[u8], ids: &[String]) -> Vec<u8> {
let mut data = Vec::new();
data.push(0x80);
data.push((count >> 8) as u8);
data.push(count as u8);
let _offsets_start = data.len();
for _ in 0..count {
data.extend_from_slice(&[0, 0, 0, 0]);
}
let mut meta = Vec::new();
for _ in 0..count {
meta.extend_from_slice(&[0, 0, 0]);
}
for &f in flags {
meta.push(f);
}
for id in ids {
meta.extend_from_slice(id.as_bytes());
meta.push(0);
}
for id in ids {
meta.extend_from_slice(id.as_bytes());
meta.push(0);
}
meta.extend(core::iter::repeat_n(0u8, count));
let compressed = crate::bzz_encode::bzz_encode(&meta);
data.extend_from_slice(&compressed);
data
}
#[cfg(test)]
mod tests {
use super::*;
fn fixture_path(name: &str) -> std::path::PathBuf {
std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"))
.join("tests/fixtures")
.join(name)
}
#[test]
fn merge_empty_returns_error() {
let result = merge(&[]);
assert!(result.is_err());
}
#[test]
fn split_single_page_from_multipage() {
let path = fixture_path("DjVu3Spec_bundled.djvu");
if !path.exists() {
return;
}
let data = std::fs::read(&path).expect("read fixture");
let doc = DjVuDocument::parse(&data).expect("parse");
let count = doc.page_count();
assert!(count > 1, "need multipage fixture");
let page0 = split(&data, 0, 1).expect("split page 0");
let form = iff::parse_form(&page0).expect("parse split page");
assert_eq!(&form.form_type, b"DJVU");
}
#[test]
fn merge_two_single_page_files() {
let path = fixture_path("irish.djvu");
if !path.exists() {
return;
}
let irish = std::fs::read(&path).expect("read fixture");
let data = merge(&[&irish, &irish]).expect("merge");
let form = iff::parse_form(&data).expect("parse merged");
assert_eq!(&form.form_type, b"DJVM");
}
#[test]
fn split_out_of_bounds() {
let path = fixture_path("irish.djvu");
if !path.exists() {
return;
}
let data = std::fs::read(&path).expect("read fixture");
let result = split(&data, 0, 5);
assert!(result.is_err());
}
#[test]
fn create_indirect_empty_returns_error() {
let result = create_indirect(&[]);
assert!(result.is_err());
}
#[test]
fn create_indirect_parses_with_resolver() {
let indirect_bytes = create_indirect(&["chicken.djvu"]).expect("create_indirect");
let form = iff::parse_form(&indirect_bytes).expect("parse form");
assert_eq!(&form.form_type, b"DJVM");
let dirm = form.chunks.iter().find(|c| &c.id == b"DIRM").expect("DIRM");
assert_eq!(
dirm.data[0] & 0x80,
0,
"indirect DIRM must not have bundled bit set"
);
let chicken_path = fixture_path("chicken.djvu");
if !chicken_path.exists() {
return;
}
let chicken_data = std::fs::read(&chicken_path).expect("read chicken.djvu");
let doc = DjVuDocument::parse_with_resolver(
&indirect_bytes,
Some(
move |name: &str| -> Result<Vec<u8>, crate::djvu_document::DocError> {
if name == "chicken.djvu" {
Ok(chicken_data.clone())
} else {
Err(crate::djvu_document::DocError::IndirectResolve(
name.to_string(),
))
}
},
),
)
.expect("parse indirect with resolver");
assert_eq!(doc.page_count(), 1);
let page = doc.page(0).unwrap();
assert_eq!(page.width(), 181);
assert_eq!(page.height(), 240);
}
#[test]
fn create_indirect_multipage() {
let indirect_bytes =
create_indirect(&["page1.djvu", "page2.djvu", "page3.djvu"]).expect("create_indirect");
let form = iff::parse_form(&indirect_bytes).expect("parse");
assert_eq!(&form.form_type, b"DJVM");
let dirm = form.chunks.iter().find(|c| &c.id == b"DIRM").expect("DIRM");
let nfiles = u16::from_be_bytes([dirm.data[1], dirm.data[2]]) as usize;
assert_eq!(nfiles, 3);
}
#[test]
fn parse_from_dir_indirect() {
let chicken_path = fixture_path("chicken.djvu");
if !chicken_path.exists() {
return;
}
let tmp = std::env::temp_dir().join("djvu_indirect_test");
std::fs::create_dir_all(&tmp).unwrap();
let component_name = "p0001.djvu";
std::fs::copy(&chicken_path, tmp.join(component_name)).unwrap();
let index_bytes = create_indirect(&[component_name]).expect("create_indirect");
let index_path = tmp.join("index.djvu");
std::fs::write(&index_path, &index_bytes).unwrap();
let index_data = std::fs::read(&index_path).unwrap();
let doc = DjVuDocument::parse_from_dir(&index_data, &tmp).expect("parse_from_dir");
assert_eq!(doc.page_count(), 1);
assert_eq!(doc.page(0).unwrap().width(), 181);
}
}