Skip to main content

honzo_core/
parse.rs

1use crate::types::{
2    Compression, CoverType, FontEmbedding, HonzoHead, MarkupType, MathType, PmapEntry, TocEntry,
3};
4use crate::HonzoError;
5
6const MAGIC: &[u8; 4] = b"HONO";
7const HEAD_SIZE: usize = 48;
8
9#[derive(Debug)]
10pub struct HonzoParser<'buf> {
11    buf: &'buf [u8],
12    head: HonzoHead,
13    toc_offset: usize,
14    toc_entries: u32,
15    data_offset: usize,
16    extra_offset: usize,
17    meta_offset: usize,
18    pmap_offset: usize,
19    pmap_entries: u32,
20}
21
22impl<'buf> HonzoParser<'buf> {
23    pub fn new(buf: &'buf [u8], reader_version: u16) -> Result<Self, HonzoError> {
24        if buf.len() < 4 + HEAD_SIZE {
25            return Err(HonzoError::BufferTooShort);
26        }
27        if &buf[..4] != MAGIC {
28            return Err(HonzoError::InvalidMagic);
29        }
30
31        let end = buf.len();
32        let mut cursor = 4;
33        let version_major = read_u8(buf, &mut cursor, end)?;
34        let version_minor = read_u8(buf, &mut cursor, end)?;
35        let min_reader_version = read_u16(buf, &mut cursor, end)?;
36        let flags = read_u32(buf, &mut cursor, end)?;
37        let chunk_count = read_u32(buf, &mut cursor, end)?;
38        let toc_size = read_u64(buf, &mut cursor, end)?;
39        let data_size = read_u64(buf, &mut cursor, end)?;
40        let extra_size = read_u64(buf, &mut cursor, end)?;
41        let meta_size = read_u64(buf, &mut cursor, end)?;
42        let _reserved = read_u32(buf, &mut cursor, end)?;
43
44        if reader_version < min_reader_version {
45            return Err(HonzoError::ReaderVersionTooOld {
46                required: min_reader_version,
47                have: reader_version,
48            });
49        }
50
51        let toc_offset = 4 + HEAD_SIZE;
52        let data_offset = toc_offset + toc_size as usize;
53        let extra_offset = data_offset + data_size as usize;
54        let meta_offset = extra_offset + extra_size as usize;
55
56        if buf.len() < meta_offset + meta_size as usize {
57            return Err(HonzoError::BufferTooShort);
58        }
59
60        let toc_end = toc_offset + toc_size as usize;
61        if toc_end > buf.len() {
62            return Err(HonzoError::BufferTooShort);
63        }
64
65        let (toc_entries, pmap_offset, pmap_entries) =
66            validate_toc(buf, toc_offset, toc_end, chunk_count)?;
67
68        Ok(Self {
69            buf,
70            head: HonzoHead {
71                version_major,
72                version_minor,
73                min_reader_version,
74                flags,
75                chunk_count,
76                toc_size,
77                data_size,
78                extra_size,
79                meta_size,
80            },
81            toc_offset,
82            toc_entries,
83            data_offset,
84            extra_offset,
85            meta_offset,
86            pmap_offset,
87            pmap_entries,
88        })
89    }
90
91    pub fn head(&self) -> &HonzoHead {
92        &self.head
93    }
94
95    pub fn toc_entries(&self) -> TocEntryIter<'buf> {
96        TocEntryIter::new(self.buf, self.toc_offset, self.toc_entries)
97    }
98
99    pub fn pmap_entries(&self) -> PmapEntryIter<'buf> {
100        PmapEntryIter::new(self.buf, self.pmap_offset, self.pmap_entries)
101    }
102
103    pub fn chunk_bytes(&self, entry: &TocEntry) -> Result<&'buf [u8], HonzoError> {
104        if entry.is_encrypted() {
105            return Err(HonzoError::EncryptedChunk {
106                chunk_id: entry.chunk_id,
107            });
108        }
109        self.chunk_bytes_unchecked(entry)
110    }
111
112    /// Read raw chunk bytes regardless of the encrypted flag.
113    /// Upper layers (honzo-io) handle decryption when a key is provided.
114    pub fn chunk_bytes_unchecked(&self, entry: &TocEntry) -> Result<&'buf [u8], HonzoError> {
115        let start = self.data_offset + entry.offset as usize;
116        let end = start + entry.size_compressed as usize;
117        if end > self.data_offset + self.head.data_size as usize {
118            return Err(HonzoError::Truncated);
119        }
120        if end > self.buf.len() {
121            return Err(HonzoError::BufferTooShort);
122        }
123        Ok(&self.buf[start..end])
124    }
125
126    pub fn meta_bytes(&self) -> Result<&'buf [u8], HonzoError> {
127        let start = self.meta_offset;
128        let end = start + self.head.meta_size as usize;
129        if end > self.buf.len() {
130            return Err(HonzoError::BufferTooShort);
131        }
132        Ok(&self.buf[start..end])
133    }
134
135    pub fn extra_bytes(&self) -> Result<&'buf [u8], HonzoError> {
136        let start = self.extra_offset;
137        let end = start + self.head.extra_size as usize;
138        if end > self.buf.len() {
139            return Err(HonzoError::BufferTooShort);
140        }
141        Ok(&self.buf[start..end])
142    }
143
144    pub fn find_chunk(&self, tag: &[u8; 4]) -> Option<TocEntry<'buf>> {
145        self.toc_entries().find(|entry| &entry.chunk_type == tag)
146    }
147
148    pub fn find_chunk_by_id(&self, id: u32) -> Option<TocEntry<'buf>> {
149        self.toc_entries().find(|entry| entry.chunk_id == id)
150    }
151}
152
153pub struct TocEntryIter<'buf> {
154    buf: &'buf [u8],
155    cursor: usize,
156    remaining: u32,
157}
158
159impl<'buf> TocEntryIter<'buf> {
160    fn new(buf: &'buf [u8], toc_offset: usize, chunk_count: u32) -> Self {
161        let mut cursor = toc_offset;
162        let _ = read_u32(buf, &mut cursor, buf.len()).ok();
163        Self {
164            buf,
165            cursor,
166            remaining: chunk_count,
167        }
168    }
169}
170
171impl<'buf> Iterator for TocEntryIter<'buf> {
172    type Item = TocEntry<'buf>;
173
174    fn next(&mut self) -> Option<Self::Item> {
175        if self.remaining == 0 {
176            return None;
177        }
178
179        let end = self.buf.len();
180        let start = self.cursor;
181        let mut cursor = self.cursor;
182        let chunk_type = read_tag(self.buf, &mut cursor, end).ok()?;
183        if !is_known_chunk(&chunk_type) {
184            return None;
185        }
186        let chunk_id = read_u32(self.buf, &mut cursor, end).ok()?;
187        let offset = read_u64(self.buf, &mut cursor, end).ok()?;
188        let size_compressed = read_u32(self.buf, &mut cursor, end).ok()?;
189        let size_raw = read_u32(self.buf, &mut cursor, end).ok()?;
190        let compression = read_u8(self.buf, &mut cursor, end).ok()?;
191        let content_type_kind = read_u8(self.buf, &mut cursor, end).ok()?;
192        let content_type_value = read_u8(self.buf, &mut cursor, end).ok()?;
193        let cover_type = read_u8(self.buf, &mut cursor, end).ok()?;
194        let flags = read_u8(self.buf, &mut cursor, end).ok()?;
195        let crc32 = read_u32(self.buf, &mut cursor, end).ok()?;
196        let alt_text_len = read_u16(self.buf, &mut cursor, end).ok()? as usize;
197
198        let alt_text = if alt_text_len > 0 {
199            let bytes = read_bytes(self.buf, &mut cursor, alt_text_len, end).ok()?;
200            core::str::from_utf8(bytes).ok()
201        } else {
202            None
203        };
204
205        let mut font_embedding = None;
206        let mut font_license_url = None;
207
208        if &chunk_type == b"FONT" {
209            let embedding = read_u8(self.buf, &mut cursor, end).ok()?;
210            font_embedding = Some(FontEmbedding::from_u8(embedding).ok()?);
211            let url_len = read_u16(self.buf, &mut cursor, end).ok()? as usize;
212            if url_len > 0 {
213                let bytes = read_bytes(self.buf, &mut cursor, url_len, end).ok()?;
214                font_license_url = core::str::from_utf8(bytes).ok();
215            }
216        }
217
218        self.cursor = cursor;
219        self.remaining -= 1;
220
221        let entry = TocEntry {
222            chunk_type,
223            chunk_id,
224            offset,
225            size_compressed,
226            size_raw,
227            compression: Compression::from_u8(compression).ok()?,
228            content_type_kind,
229            content_type_value,
230            cover_type: CoverType::from_u8(cover_type).ok()?,
231            flags,
232            crc32,
233            alt_text,
234            font_embedding,
235            font_license_url,
236        };
237
238        let min_len = cursor - start;
239        if min_len == 0 {
240            return None;
241        }
242        Some(entry)
243    }
244}
245
246pub struct PmapEntryIter<'buf> {
247    buf: &'buf [u8],
248    cursor: usize,
249    remaining: u32,
250}
251
252impl<'buf> PmapEntryIter<'buf> {
253    fn new(buf: &'buf [u8], pmap_offset: usize, pmap_count: u32) -> Self {
254        Self {
255            buf,
256            cursor: pmap_offset,
257            remaining: pmap_count,
258        }
259    }
260}
261
262impl<'buf> Iterator for PmapEntryIter<'buf> {
263    type Item = PmapEntry;
264
265    fn next(&mut self) -> Option<Self::Item> {
266        if self.remaining == 0 {
267            return None;
268        }
269        let end = self.buf.len();
270        let mut cursor = self.cursor;
271        let print_page = read_u32(self.buf, &mut cursor, end).ok()?;
272        let chunk_id = read_u32(self.buf, &mut cursor, end).ok()?;
273        let byte_offset = read_u32(self.buf, &mut cursor, end).ok()?;
274        self.cursor = cursor;
275        self.remaining -= 1;
276        Some(PmapEntry {
277            print_page,
278            chunk_id,
279            byte_offset,
280        })
281    }
282}
283
284fn is_known_chunk(tag: &[u8; 4]) -> bool {
285    matches!(
286        tag,
287        b"CHAP" | b"IMG_" | b"CSS_" | b"FONT" | b"COVR" | b"COVT" | b"NOTE" | b"SIDX" | b"MATH"
288    )
289}
290
291fn validate_toc(
292    buf: &[u8],
293    toc_offset: usize,
294    toc_end: usize,
295    expected_entries: u32,
296) -> Result<(u32, usize, u32), HonzoError> {
297    let mut cursor = toc_offset;
298    let num_entries = read_u32(buf, &mut cursor, toc_end)?;
299    if num_entries != expected_entries {
300        return Err(HonzoError::Truncated);
301    }
302    for _ in 0..num_entries {
303        let chunk_type = read_tag(buf, &mut cursor, toc_end)?;
304        if !is_known_chunk(&chunk_type) {
305            return Err(HonzoError::InvalidChunkType);
306        }
307        let _ = read_u32(buf, &mut cursor, toc_end)?;
308        let _ = read_u64(buf, &mut cursor, toc_end)?;
309        let _ = read_u32(buf, &mut cursor, toc_end)?;
310        let _ = read_u32(buf, &mut cursor, toc_end)?;
311        let compression = read_u8(buf, &mut cursor, toc_end)?;
312        let content_type_kind = read_u8(buf, &mut cursor, toc_end)?;
313        let content_type_value = read_u8(buf, &mut cursor, toc_end)?;
314        let cover_type = read_u8(buf, &mut cursor, toc_end)?;
315        let _ = read_u8(buf, &mut cursor, toc_end)?;
316        let _ = read_u32(buf, &mut cursor, toc_end)?;
317        let alt_text_len = read_u16(buf, &mut cursor, toc_end)? as usize;
318        if alt_text_len > 0 {
319            let bytes = read_bytes(buf, &mut cursor, alt_text_len, toc_end)?;
320            if core::str::from_utf8(bytes).is_err() {
321                return Err(HonzoError::Truncated);
322            }
323        }
324
325        Compression::from_u8(compression)?;
326        match &chunk_type {
327            b"CHAP" | b"NOTE" => {
328                if content_type_kind != 1 {
329                    return Err(HonzoError::UnknownMarkupType(content_type_kind));
330                }
331                MarkupType::from_u8(content_type_value)?;
332            }
333            b"MATH" => {
334                if content_type_kind != 2 {
335                    return Err(HonzoError::UnknownMathType(content_type_kind));
336                }
337                MathType::from_u8(content_type_value)?;
338            }
339            _ => {
340                // for other chunk types we expect kind==1 and value==0
341                if content_type_kind != 1 || content_type_value != 0 {
342                    return Err(HonzoError::Truncated);
343                }
344            }
345        }
346        CoverType::from_u8(cover_type)?;
347
348        if &chunk_type == b"FONT" {
349            let embedding = read_u8(buf, &mut cursor, toc_end)?;
350            FontEmbedding::from_u8(embedding)?;
351            let url_len = read_u16(buf, &mut cursor, toc_end)? as usize;
352            if url_len > 0 {
353                let bytes = read_bytes(buf, &mut cursor, url_len, toc_end)?;
354                if core::str::from_utf8(bytes).is_err() {
355                    return Err(HonzoError::Truncated);
356                }
357            }
358        }
359    }
360
361    let pmap_offset = cursor;
362    let num_pmap_entries = read_u32(buf, &mut cursor, toc_end)?;
363    for _ in 0..num_pmap_entries {
364        let _ = read_u32(buf, &mut cursor, toc_end)?;
365        let _ = read_u32(buf, &mut cursor, toc_end)?;
366        let _ = read_u32(buf, &mut cursor, toc_end)?;
367    }
368
369    Ok((num_entries, pmap_offset + 4, num_pmap_entries))
370}
371
372fn read_bytes<'a>(
373    buf: &'a [u8],
374    cursor: &mut usize,
375    len: usize,
376    limit: usize,
377) -> Result<&'a [u8], HonzoError> {
378    let end = *cursor + len;
379    if end > limit {
380        return Err(HonzoError::Truncated);
381    }
382    if end > buf.len() {
383        return Err(HonzoError::BufferTooShort);
384    }
385    let out = &buf[*cursor..end];
386    *cursor = end;
387    Ok(out)
388}
389
390fn read_tag(buf: &[u8], cursor: &mut usize, limit: usize) -> Result<[u8; 4], HonzoError> {
391    let bytes = read_bytes(buf, cursor, 4, limit)?;
392    let mut tag = [0u8; 4];
393    tag.copy_from_slice(bytes);
394    Ok(tag)
395}
396
397fn read_u8(buf: &[u8], cursor: &mut usize, limit: usize) -> Result<u8, HonzoError> {
398    let bytes = read_bytes(buf, cursor, 1, limit)?;
399    Ok(bytes[0])
400}
401
402fn read_u16(buf: &[u8], cursor: &mut usize, limit: usize) -> Result<u16, HonzoError> {
403    let bytes = read_bytes(buf, cursor, 2, limit)?;
404    Ok(u16::from_le_bytes([bytes[0], bytes[1]]))
405}
406
407fn read_u32(buf: &[u8], cursor: &mut usize, limit: usize) -> Result<u32, HonzoError> {
408    let bytes = read_bytes(buf, cursor, 4, limit)?;
409    Ok(u32::from_le_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]))
410}
411
412fn read_u64(buf: &[u8], cursor: &mut usize, limit: usize) -> Result<u64, HonzoError> {
413    let bytes = read_bytes(buf, cursor, 8, limit)?;
414    Ok(u64::from_le_bytes([
415        bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], bytes[5], bytes[6], bytes[7],
416    ]))
417}