1#![cfg_attr(not(feature = "std"), no_std)]
29#![deny(unsafe_code)]
30
31#[cfg(not(feature = "std"))]
32extern crate alloc;
33
34#[cfg(not(feature = "std"))]
35use alloc::{string::String, vec::Vec};
36#[cfg(feature = "std")]
37use std::{string::String, vec::Vec};
38
39#[derive(Debug, thiserror::Error, PartialEq, Eq)]
43pub enum IffError {
44 #[error("input is too short to be a valid IFF file")]
46 TooShort,
47
48 #[error("bad magic bytes: expected AT&T, got {got:?}")]
50 BadMagic { got: [u8; 4] },
51
52 #[error("unknown FORM type: {id:?}")]
57 UnknownFormType { id: [u8; 4] },
58
59 #[error(
61 "chunk {:?} claims {} bytes but only {} are available",
62 id,
63 claimed,
64 available
65 )]
66 ChunkTooLong {
67 id: [u8; 4],
68 claimed: u32,
69 available: usize,
70 },
71
72 #[error("unexpected end of input (truncated IFF data)")]
74 Truncated,
75}
76
77#[derive(Debug, Clone, PartialEq, Eq)]
79pub enum LegacyError {
80 UnexpectedEof,
82 InvalidMagic,
84 InvalidLength,
86 MissingChunk(&'static str),
88 Unsupported(&'static str),
90 FormatError(String),
92}
93
94impl core::fmt::Display for LegacyError {
95 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
96 match self {
97 LegacyError::UnexpectedEof => write!(f, "unexpected end of input"),
98 LegacyError::InvalidMagic => write!(f, "invalid magic number"),
99 LegacyError::InvalidLength => write!(f, "invalid length"),
100 LegacyError::MissingChunk(id) => write!(f, "missing required chunk: {}", id),
101 LegacyError::Unsupported(msg) => write!(f, "unsupported: {}", msg),
102 LegacyError::FormatError(msg) => write!(f, "format error: {}", msg),
103 }
104 }
105}
106
107#[cfg(feature = "std")]
108impl std::error::Error for LegacyError {}
109
110pub use LegacyError as Error;
112
113pub type ChunkId = [u8; 4];
117
118#[derive(Debug, Clone)]
120pub enum Chunk {
121 Form {
123 secondary_id: ChunkId,
125 length: u32,
128 children: Vec<Chunk>,
130 },
131 Leaf {
133 id: ChunkId,
135 data: Vec<u8>,
137 },
138}
139
140impl Chunk {
141 pub fn data(&self) -> &[u8] {
143 match self {
144 Chunk::Form { .. } => &[],
145 Chunk::Leaf { data, .. } => data,
146 }
147 }
148
149 pub fn children(&self) -> &[Chunk] {
151 match self {
152 Chunk::Form { children, .. } => children,
153 Chunk::Leaf { .. } => &[],
154 }
155 }
156
157 pub fn payload_length(&self) -> u32 {
163 match self {
164 Chunk::Form { length, .. } => *length,
165 Chunk::Leaf { data, .. } => data.len() as u32,
166 }
167 }
168
169 pub fn find_first(&self, target_id: &[u8; 4]) -> Option<&Chunk> {
171 self.children().iter().find(|c| match c {
172 Chunk::Leaf { id, .. } => id == target_id,
173 _ => false,
174 })
175 }
176
177 pub fn find_all(&self, target_id: &[u8; 4]) -> Vec<&Chunk> {
179 self.children()
180 .iter()
181 .filter(|c| match c {
182 Chunk::Leaf { id, .. } => id == target_id,
183 _ => false,
184 })
185 .collect()
186 }
187}
188
189#[derive(Debug, Clone)]
191pub struct DjvuFile {
192 pub root: Chunk,
193}
194
195pub fn parse(data: &[u8]) -> Result<DjvuFile, Error> {
199 if data.len() < 4 {
200 return Err(Error::UnexpectedEof);
201 }
202 let (magic, rest) = if &data[..4] == b"AT&T" {
204 (&data[..4], &data[4..])
205 } else {
206 (&data[..0], data)
208 };
209 let _ = magic;
210
211 let (root, _) = parse_chunk(rest, 0)?;
212 Ok(DjvuFile { root })
213}
214
215fn parse_chunk(data: &[u8], offset: usize) -> Result<(Chunk, usize), Error> {
218 if offset + 8 > data.len() {
219 return Err(Error::UnexpectedEof);
220 }
221
222 let id: ChunkId = [
223 data[offset],
224 data[offset + 1],
225 data[offset + 2],
226 data[offset + 3],
227 ];
228 let length = u32::from_be_bytes([
229 data[offset + 4],
230 data[offset + 5],
231 data[offset + 6],
232 data[offset + 7],
233 ]);
234
235 let payload_start = offset + 8;
236 let payload_end = payload_start + length as usize;
237
238 if payload_end > data.len() {
239 return Err(Error::UnexpectedEof);
240 }
241
242 let total = 8 + length as usize;
244 let padded_total = total + (total % 2);
245
246 if &id == b"FORM" {
247 if length < 4 {
248 return Err(Error::InvalidLength);
249 }
250 let secondary_id: ChunkId = [
251 data[payload_start],
252 data[payload_start + 1],
253 data[payload_start + 2],
254 data[payload_start + 3],
255 ];
256
257 let children_start = payload_start + 4;
258 let children = parse_children(data, children_start, payload_end)?;
259
260 Ok((
261 Chunk::Form {
262 secondary_id,
263 length,
264 children,
265 },
266 padded_total,
267 ))
268 } else {
269 let chunk_data = data[payload_start..payload_end].to_vec();
270 Ok((
271 Chunk::Leaf {
272 id,
273 data: chunk_data,
274 },
275 padded_total,
276 ))
277 }
278}
279
280fn parse_children(data: &[u8], start: usize, end: usize) -> Result<Vec<Chunk>, Error> {
282 let mut chunks = Vec::new();
283 let mut pos = start;
284
285 while pos < end {
286 if pos + 8 > end {
287 break;
289 }
290 let (chunk, consumed) = parse_chunk(data, pos)?;
291 chunks.push(chunk);
292 pos += consumed;
293 }
294
295 Ok(chunks)
296}
297
298pub fn emit(file: &DjvuFile) -> Vec<u8> {
309 let mut out = Vec::with_capacity(64);
310 out.extend_from_slice(b"AT&T");
311 emit_chunk(&file.root, &mut out);
312 out
313}
314
315fn emit_chunk(chunk: &Chunk, out: &mut Vec<u8>) {
316 emit_chunk_inner(chunk, out, false);
317}
318
319fn emit_chunk_inner(chunk: &Chunk, out: &mut Vec<u8>, suppress_inner_pad: bool) {
320 match chunk {
321 Chunk::Form {
322 secondary_id,
323 length: stored_length,
324 children,
325 } => {
326 let suppress_last_pad = (*stored_length & 1) == 1;
335 let mut payload: Vec<u8> = Vec::new();
336 payload.extend_from_slice(secondary_id);
337 let n = children.len();
338 for (i, child) in children.iter().enumerate() {
339 let last = i + 1 == n;
340 emit_chunk_inner(child, &mut payload, last && suppress_last_pad);
341 }
342 let len = payload.len() as u32;
343 out.extend_from_slice(b"FORM");
344 out.extend_from_slice(&len.to_be_bytes());
345 out.extend_from_slice(&payload);
346 let total = 8 + payload.len();
349 if !suppress_inner_pad && total % 2 == 1 {
350 out.push(0);
351 }
352 }
353 Chunk::Leaf { id, data } => {
354 let len = data.len() as u32;
355 out.extend_from_slice(id);
356 out.extend_from_slice(&len.to_be_bytes());
357 out.extend_from_slice(data);
358 let total = 8 + data.len();
359 if !suppress_inner_pad && total % 2 == 1 {
360 out.push(0);
361 }
362 }
363 }
364}
365
366#[derive(Debug, Clone, Copy)]
374pub struct IffChunk<'a> {
375 pub id: [u8; 4],
377 pub data: &'a [u8],
379}
380
381#[derive(Debug)]
383pub struct Form<'a> {
384 pub form_type: [u8; 4],
386 pub chunks: Vec<IffChunk<'a>>,
388}
389
390pub fn parse_form(data: &[u8]) -> Result<Form<'_>, IffError> {
402 if data.len() < 16 {
404 return Err(IffError::TooShort);
405 }
406
407 let magic = read_4(data, 0)?;
409 if &magic != b"AT&T" {
410 return Err(IffError::BadMagic { got: magic });
411 }
412
413 let form_id = read_4(data, 4)?;
415 if &form_id != b"FORM" {
416 return Err(IffError::Truncated);
417 }
418
419 let form_len = read_u32_be(data, 8)? as usize;
421
422 let form_data_end = 12_usize.checked_add(form_len).ok_or(IffError::Truncated)?;
424 if form_data_end > data.len() {
425 return Err(IffError::ChunkTooLong {
426 id: *b"FORM",
427 claimed: form_len as u32,
428 available: data.len().saturating_sub(12),
429 });
430 }
431
432 if form_len < 4 {
434 return Err(IffError::Truncated);
435 }
436 let form_type = read_4(data, 12)?;
437
438 let body = data.get(16..form_data_end).ok_or(IffError::Truncated)?;
440
441 let chunks = parse_iff_chunks(body)?;
442
443 Ok(Form { form_type, chunks })
444}
445
446fn parse_iff_chunks(mut buf: &[u8]) -> Result<Vec<IffChunk<'_>>, IffError> {
451 let mut chunks = Vec::new();
452
453 while buf.len() >= 8 {
454 let id = read_4(buf, 0)?;
455 let data_len = read_u32_be(buf, 4)? as usize;
456
457 let data_start = 8_usize;
458 let data_end = data_start
459 .checked_add(data_len)
460 .ok_or(IffError::Truncated)?;
461
462 if data_end > buf.len() {
463 return Err(IffError::ChunkTooLong {
464 id,
465 claimed: data_len as u32,
466 available: buf.len().saturating_sub(data_start),
467 });
468 }
469
470 let chunk_data = buf.get(data_start..data_end).ok_or(IffError::Truncated)?;
471 chunks.push(IffChunk {
472 id,
473 data: chunk_data,
474 });
475
476 let padded_len = data_len + (data_len & 1);
478 let next = data_start
479 .checked_add(padded_len)
480 .ok_or(IffError::Truncated)?;
481
482 buf = buf.get(next.min(buf.len())..).ok_or(IffError::Truncated)?;
484 }
485
486 Ok(chunks)
487}
488
489#[inline]
491fn read_4(data: &[u8], offset: usize) -> Result<[u8; 4], IffError> {
492 data.get(offset..offset + 4)
493 .and_then(|s| s.try_into().ok())
494 .ok_or(IffError::Truncated)
495}
496
497#[inline]
499fn read_u32_be(data: &[u8], offset: usize) -> Result<u32, IffError> {
500 let b = read_4(data, offset)?;
501 Ok(u32::from_be_bytes(b))
502}
503
504#[cfg(test)]
508pub fn dump(file: &DjvuFile) -> String {
509 let mut out = String::new();
510 dump_chunk(&file.root, 1, &mut out);
511 out
512}
513
514#[cfg(test)]
515fn dump_chunk(chunk: &Chunk, depth: usize, out: &mut String) {
516 let indent = " ".repeat(depth);
517 match chunk {
518 Chunk::Form {
519 secondary_id,
520 length,
521 children,
522 } => {
523 let sec = std::str::from_utf8(secondary_id).unwrap_or("????");
524 out.push_str(&format!("{}FORM:{} [{}] \n", indent, sec, length));
525 for child in children {
526 dump_chunk(child, depth + 1, out);
527 }
528 }
529 Chunk::Leaf { id, data } => {
530 let id_str = std::str::from_utf8(id).unwrap_or("????");
531 out.push_str(&format!("{}{} [{}] \n", indent, id_str, data.len()));
532 }
533 }
534}
535
536#[cfg(test)]
537mod tests {
538 use super::*;
539
540 fn assets_path() -> std::path::PathBuf {
541 std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"))
542 .join("../../references/djvujs/library/assets")
543 }
544
545 fn golden_path() -> std::path::PathBuf {
546 std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("../../tests/golden/iff")
547 }
548
549 fn normalize_dump(input: &str) -> Vec<String> {
553 input
554 .lines()
555 .filter(|l| !l.trim().is_empty())
556 .map(|line| {
557 let trimmed = line.trim_end();
558 if let Some(bracket_end) = trimmed.find(']') {
559 let structural = &trimmed[..=bracket_end];
560 structural.trim_end().to_string()
561 } else {
562 trimmed.to_string()
563 }
564 })
565 .collect()
566 }
567
568 fn assert_structure_matches(djvu_file: &str, golden_file: &str) {
569 let data = std::fs::read(assets_path().join(djvu_file)).unwrap();
570 let file = parse(&data).unwrap();
571 let actual = dump(&file);
572 let expected = std::fs::read_to_string(golden_path().join(golden_file)).unwrap();
573
574 let actual_lines = normalize_dump(&actual);
575 let expected_lines = normalize_dump(&expected);
576
577 assert_eq!(
578 actual_lines.len(),
579 expected_lines.len(),
580 "Line count mismatch for {} ({} vs {})",
581 djvu_file,
582 actual_lines.len(),
583 expected_lines.len()
584 );
585
586 for (i, (a, e)) in actual_lines.iter().zip(expected_lines.iter()).enumerate() {
587 assert_eq!(
588 a,
589 e,
590 "Line {} mismatch for {}\n actual: {:?}\n expected: {:?}",
591 i + 1,
592 djvu_file,
593 a,
594 e
595 );
596 }
597 }
598
599 #[test]
600 fn parse_boy_jb2_legacy() {
601 let data = std::fs::read(assets_path().join("boy_jb2.djvu")).unwrap();
602 let file = parse(&data).unwrap();
603
604 match &file.root {
605 Chunk::Form {
606 secondary_id,
607 children,
608 ..
609 } => {
610 assert_eq!(secondary_id, b"DJVU");
611 assert_eq!(children.len(), 2);
612 }
613 _ => panic!("expected FORM root"),
614 }
615 }
616
617 #[test]
618 fn structure_boy_jb2() {
619 assert_structure_matches("boy_jb2.djvu", "boy_jb2.dump");
620 }
621
622 #[test]
623 fn structure_boy() {
624 assert_structure_matches("boy.djvu", "boy.dump");
625 }
626
627 #[test]
628 fn structure_chicken() {
629 assert_structure_matches("chicken.djvu", "chicken.dump");
630 }
631
632 #[test]
633 fn structure_carte() {
634 assert_structure_matches("carte.djvu", "carte.dump");
635 }
636
637 #[test]
638 fn structure_navm_fgbz() {
639 assert_structure_matches("navm_fgbz.djvu", "navm_fgbz.dump");
640 }
641
642 #[test]
643 fn structure_colorbook() {
644 assert_structure_matches("colorbook.djvu", "colorbook.dump");
645 }
646
647 #[test]
648 fn structure_djvu3spec_bundled() {
649 assert_structure_matches("DjVu3Spec_bundled.djvu", "djvu3spec_bundled.dump");
650 }
651
652 #[test]
653 fn structure_big_scanned_page() {
654 assert_structure_matches("big-scanned-page.djvu", "big_scanned_page.dump");
655 }
656
657 fn minimal_djvu_bytes() -> Vec<u8> {
661 let info_data: &[u8] = &[
662 0x00, 0xB5, 0x00, 0xF0, 0x18, 0x00, 0x64, 0x00, 0x16, 0x00, ];
670 let info_len = info_data.len() as u32;
671
672 let mut chunk = Vec::new();
673 chunk.extend_from_slice(b"INFO");
674 chunk.extend_from_slice(&info_len.to_be_bytes());
675 chunk.extend_from_slice(info_data);
676
677 let mut form_body = Vec::new();
678 form_body.extend_from_slice(b"DJVU");
679 form_body.extend_from_slice(&chunk);
680
681 let form_len = form_body.len() as u32;
682
683 let mut file = Vec::new();
684 file.extend_from_slice(b"AT&T");
685 file.extend_from_slice(b"FORM");
686 file.extend_from_slice(&form_len.to_be_bytes());
687 file.extend_from_slice(&form_body);
688
689 file
690 }
691
692 #[test]
693 fn empty_input_is_error() {
694 let result = parse_form(&[]);
695 assert!(result.is_err());
696 assert_eq!(result.unwrap_err(), IffError::TooShort);
697 }
698
699 #[test]
700 fn short_input_is_error() {
701 let result = parse_form(&[0u8; 10]);
702 assert!(result.is_err());
703 assert_eq!(result.unwrap_err(), IffError::TooShort);
704 }
705
706 #[test]
707 fn bad_magic_is_error() {
708 let mut data = minimal_djvu_bytes();
709 data[0] = 0xFF;
710 data[1] = 0xFF;
711 data[2] = 0xFF;
712 data[3] = 0xFF;
713
714 let result = parse_form(&data);
715 assert!(result.is_err());
716 assert_eq!(
717 result.unwrap_err(),
718 IffError::BadMagic {
719 got: [0xFF, 0xFF, 0xFF, 0xFF]
720 }
721 );
722 }
723
724 #[test]
725 fn valid_single_page_parses() {
726 let data = minimal_djvu_bytes();
727 let form = parse_form(&data).expect("should parse successfully");
728
729 assert_eq!(&form.form_type, b"DJVU");
730 assert_eq!(form.chunks.len(), 1);
731 assert_eq!(&form.chunks[0].id, b"INFO");
732 assert_eq!(form.chunks[0].data.len(), 10);
733 }
734
735 #[test]
736 fn truncated_chunk_is_error() {
737 let mut data = minimal_djvu_bytes();
738 let new_len = data.len() - 4;
739 data.truncate(new_len);
740
741 let result = parse_form(&data);
742 assert!(result.is_err());
743 match result.unwrap_err() {
744 IffError::ChunkTooLong { .. } | IffError::Truncated => {}
745 other => panic!("expected ChunkTooLong or Truncated, got {:?}", other),
746 }
747 }
748
749 #[test]
750 fn unknown_form_type_allowed() {
751 let mut data = minimal_djvu_bytes();
752 data[12] = b'X';
753 data[13] = b'X';
754 data[14] = b'X';
755 data[15] = b'X';
756
757 let form = parse_form(&data).expect("unknown form type should still parse");
758 assert_eq!(&form.form_type, b"XXXX");
759 }
760
761 #[test]
762 fn real_chicken_djvu_parses() {
763 let path = assets_path().join("chicken.djvu");
764 let data = std::fs::read(&path).expect("chicken.djvu must exist");
765 let form = parse_form(&data).expect("chicken.djvu should parse");
766
767 assert_eq!(&form.form_type, b"DJVU");
768 assert!(!form.chunks.is_empty(), "must have at least one chunk");
769 assert_eq!(&form.chunks[0].id, b"INFO");
770 assert!(form.chunks[0].data.len() >= 10);
771 }
772
773 #[test]
774 fn real_multipage_djvu_parses() {
775 let path = assets_path().join("navm_fgbz.djvu");
776 let data = std::fs::read(&path).expect("navm_fgbz.djvu must exist");
777 let form = parse_form(&data).expect("navm_fgbz.djvu should parse");
778
779 assert_eq!(&form.form_type, b"DJVM");
780 assert!(!form.chunks.is_empty());
781 }
782
783 #[test]
784 fn odd_length_chunk_padding() {
785 let chunk1_data: &[u8] = &[0xAA, 0xBB, 0xCC, 0xDD, 0xEE]; let chunk2_data: &[u8] = &[0x01, 0x02]; let mut form_body: Vec<u8> = Vec::new();
789 form_body.extend_from_slice(b"DJVU");
790
791 form_body.extend_from_slice(b"TST1");
792 form_body.extend_from_slice(&5u32.to_be_bytes());
793 form_body.extend_from_slice(chunk1_data);
794 form_body.push(0x00); form_body.extend_from_slice(b"TST2");
797 form_body.extend_from_slice(&2u32.to_be_bytes());
798 form_body.extend_from_slice(chunk2_data);
799
800 let form_len = form_body.len() as u32;
801
802 let mut file: Vec<u8> = Vec::new();
803 file.extend_from_slice(b"AT&T");
804 file.extend_from_slice(b"FORM");
805 file.extend_from_slice(&form_len.to_be_bytes());
806 file.extend_from_slice(&form_body);
807
808 let form = parse_form(&file).expect("should parse padded chunk");
809 assert_eq!(form.chunks.len(), 2);
810 assert_eq!(&form.chunks[0].id, b"TST1");
811 assert_eq!(form.chunks[0].data, chunk1_data);
812 assert_eq!(&form.chunks[1].id, b"TST2");
813 assert_eq!(form.chunks[1].data, chunk2_data);
814 }
815}