1use crate::types::{
2 Compression, CoverType, FontEmbedding, HonzoHead, MarkupType, MathType, PmapEntry, TocEntry,
3};
4use crate::HonzoError;
5
6const MAGIC: &[u8; 4] = b"HONO";
7const HEAD_SIZE: usize = 48;
8
9#[derive(Debug)]
10pub struct HonzoParser<'buf> {
11 buf: &'buf [u8],
12 head: HonzoHead,
13 toc_offset: usize,
14 toc_entries: u32,
15 data_offset: usize,
16 extra_offset: usize,
17 meta_offset: usize,
18 pmap_offset: usize,
19 pmap_entries: u32,
20}
21
22impl<'buf> HonzoParser<'buf> {
23 pub fn new(buf: &'buf [u8], reader_version: u16) -> Result<Self, HonzoError> {
24 if buf.len() < 4 + HEAD_SIZE {
25 return Err(HonzoError::BufferTooShort);
26 }
27 if &buf[..4] != MAGIC {
28 return Err(HonzoError::InvalidMagic);
29 }
30
31 let end = buf.len();
32 let mut cursor = 4;
33 let version_major = read_u8(buf, &mut cursor, end)?;
34 let version_minor = read_u8(buf, &mut cursor, end)?;
35 let min_reader_version = read_u16(buf, &mut cursor, end)?;
36 let flags = read_u32(buf, &mut cursor, end)?;
37 let chunk_count = read_u32(buf, &mut cursor, end)?;
38 let toc_size = read_u64(buf, &mut cursor, end)?;
39 let data_size = read_u64(buf, &mut cursor, end)?;
40 let extra_size = read_u64(buf, &mut cursor, end)?;
41 let meta_size = read_u64(buf, &mut cursor, end)?;
42 let _reserved = read_u32(buf, &mut cursor, end)?;
43
44 if reader_version < min_reader_version {
45 return Err(HonzoError::ReaderVersionTooOld {
46 required: min_reader_version,
47 have: reader_version,
48 });
49 }
50
51 let toc_offset = 4 + HEAD_SIZE;
52 let data_offset = toc_offset + toc_size as usize;
53 let extra_offset = data_offset + data_size as usize;
54 let meta_offset = extra_offset + extra_size as usize;
55
56 if buf.len() < meta_offset + meta_size as usize {
57 return Err(HonzoError::BufferTooShort);
58 }
59
60 let toc_end = toc_offset + toc_size as usize;
61 if toc_end > buf.len() {
62 return Err(HonzoError::BufferTooShort);
63 }
64
65 let (toc_entries, pmap_offset, pmap_entries) =
66 validate_toc(buf, toc_offset, toc_end, chunk_count)?;
67
68 Ok(Self {
69 buf,
70 head: HonzoHead {
71 version_major,
72 version_minor,
73 min_reader_version,
74 flags,
75 chunk_count,
76 toc_size,
77 data_size,
78 extra_size,
79 meta_size,
80 },
81 toc_offset,
82 toc_entries,
83 data_offset,
84 extra_offset,
85 meta_offset,
86 pmap_offset,
87 pmap_entries,
88 })
89 }
90
91 pub fn head(&self) -> &HonzoHead {
92 &self.head
93 }
94
95 pub fn toc_entries(&self) -> TocEntryIter<'buf> {
96 TocEntryIter::new(self.buf, self.toc_offset, self.toc_entries)
97 }
98
99 pub fn pmap_entries(&self) -> PmapEntryIter<'buf> {
100 PmapEntryIter::new(self.buf, self.pmap_offset, self.pmap_entries)
101 }
102
103 pub fn chunk_bytes(&self, entry: &TocEntry) -> Result<&'buf [u8], HonzoError> {
104 if entry.is_encrypted() {
105 return Err(HonzoError::EncryptedChunk {
106 chunk_id: entry.chunk_id,
107 });
108 }
109 self.chunk_bytes_unchecked(entry)
110 }
111
112 pub fn chunk_bytes_unchecked(&self, entry: &TocEntry) -> Result<&'buf [u8], HonzoError> {
115 let start = self.data_offset + entry.offset as usize;
116 let end = start + entry.size_compressed as usize;
117 if end > self.data_offset + self.head.data_size as usize {
118 return Err(HonzoError::Truncated);
119 }
120 if end > self.buf.len() {
121 return Err(HonzoError::BufferTooShort);
122 }
123 Ok(&self.buf[start..end])
124 }
125
126 pub fn meta_bytes(&self) -> Result<&'buf [u8], HonzoError> {
127 let start = self.meta_offset;
128 let end = start + self.head.meta_size as usize;
129 if end > self.buf.len() {
130 return Err(HonzoError::BufferTooShort);
131 }
132 Ok(&self.buf[start..end])
133 }
134
135 pub fn extra_bytes(&self) -> Result<&'buf [u8], HonzoError> {
136 let start = self.extra_offset;
137 let end = start + self.head.extra_size as usize;
138 if end > self.buf.len() {
139 return Err(HonzoError::BufferTooShort);
140 }
141 Ok(&self.buf[start..end])
142 }
143
144 pub fn find_chunk(&self, tag: &[u8; 4]) -> Option<TocEntry<'buf>> {
145 self.toc_entries().find(|entry| &entry.chunk_type == tag)
146 }
147
148 pub fn find_chunk_by_id(&self, id: u32) -> Option<TocEntry<'buf>> {
149 self.toc_entries().find(|entry| entry.chunk_id == id)
150 }
151}
152
153pub struct TocEntryIter<'buf> {
154 buf: &'buf [u8],
155 cursor: usize,
156 remaining: u32,
157}
158
159impl<'buf> TocEntryIter<'buf> {
160 fn new(buf: &'buf [u8], toc_offset: usize, chunk_count: u32) -> Self {
161 let mut cursor = toc_offset;
162 let _ = read_u32(buf, &mut cursor, buf.len()).ok();
163 Self {
164 buf,
165 cursor,
166 remaining: chunk_count,
167 }
168 }
169}
170
171impl<'buf> Iterator for TocEntryIter<'buf> {
172 type Item = TocEntry<'buf>;
173
174 fn next(&mut self) -> Option<Self::Item> {
175 if self.remaining == 0 {
176 return None;
177 }
178
179 let end = self.buf.len();
180 let start = self.cursor;
181 let mut cursor = self.cursor;
182 let chunk_type = read_tag(self.buf, &mut cursor, end).ok()?;
183 if !is_known_chunk(&chunk_type) {
184 return None;
185 }
186 let chunk_id = read_u32(self.buf, &mut cursor, end).ok()?;
187 let offset = read_u64(self.buf, &mut cursor, end).ok()?;
188 let size_compressed = read_u32(self.buf, &mut cursor, end).ok()?;
189 let size_raw = read_u32(self.buf, &mut cursor, end).ok()?;
190 let compression = read_u8(self.buf, &mut cursor, end).ok()?;
191 let content_type_kind = read_u8(self.buf, &mut cursor, end).ok()?;
192 let content_type_value = read_u8(self.buf, &mut cursor, end).ok()?;
193 let cover_type = read_u8(self.buf, &mut cursor, end).ok()?;
194 let flags = read_u8(self.buf, &mut cursor, end).ok()?;
195 let crc32 = read_u32(self.buf, &mut cursor, end).ok()?;
196 let alt_text_len = read_u16(self.buf, &mut cursor, end).ok()? as usize;
197
198 let alt_text = if alt_text_len > 0 {
199 let bytes = read_bytes(self.buf, &mut cursor, alt_text_len, end).ok()?;
200 core::str::from_utf8(bytes).ok()
201 } else {
202 None
203 };
204
205 let mut font_embedding = None;
206 let mut font_license_url = None;
207
208 if &chunk_type == b"FONT" {
209 let embedding = read_u8(self.buf, &mut cursor, end).ok()?;
210 font_embedding = Some(FontEmbedding::from_u8(embedding).ok()?);
211 let url_len = read_u16(self.buf, &mut cursor, end).ok()? as usize;
212 if url_len > 0 {
213 let bytes = read_bytes(self.buf, &mut cursor, url_len, end).ok()?;
214 font_license_url = core::str::from_utf8(bytes).ok();
215 }
216 }
217
218 self.cursor = cursor;
219 self.remaining -= 1;
220
221 let entry = TocEntry {
222 chunk_type,
223 chunk_id,
224 offset,
225 size_compressed,
226 size_raw,
227 compression: Compression::from_u8(compression).ok()?,
228 content_type_kind,
229 content_type_value,
230 cover_type: CoverType::from_u8(cover_type).ok()?,
231 flags,
232 crc32,
233 alt_text,
234 font_embedding,
235 font_license_url,
236 };
237
238 let min_len = cursor - start;
239 if min_len == 0 {
240 return None;
241 }
242 Some(entry)
243 }
244}
245
246pub struct PmapEntryIter<'buf> {
247 buf: &'buf [u8],
248 cursor: usize,
249 remaining: u32,
250}
251
252impl<'buf> PmapEntryIter<'buf> {
253 fn new(buf: &'buf [u8], pmap_offset: usize, pmap_count: u32) -> Self {
254 Self {
255 buf,
256 cursor: pmap_offset,
257 remaining: pmap_count,
258 }
259 }
260}
261
262impl<'buf> Iterator for PmapEntryIter<'buf> {
263 type Item = PmapEntry;
264
265 fn next(&mut self) -> Option<Self::Item> {
266 if self.remaining == 0 {
267 return None;
268 }
269 let end = self.buf.len();
270 let mut cursor = self.cursor;
271 let print_page = read_u32(self.buf, &mut cursor, end).ok()?;
272 let chunk_id = read_u32(self.buf, &mut cursor, end).ok()?;
273 let byte_offset = read_u32(self.buf, &mut cursor, end).ok()?;
274 self.cursor = cursor;
275 self.remaining -= 1;
276 Some(PmapEntry {
277 print_page,
278 chunk_id,
279 byte_offset,
280 })
281 }
282}
283
284fn is_known_chunk(tag: &[u8; 4]) -> bool {
285 matches!(
286 tag,
287 b"CHAP" | b"IMG_" | b"CSS_" | b"FONT" | b"COVR" | b"COVT" | b"NOTE" | b"SIDX" | b"MATH"
288 )
289}
290
291fn validate_toc(
292 buf: &[u8],
293 toc_offset: usize,
294 toc_end: usize,
295 expected_entries: u32,
296) -> Result<(u32, usize, u32), HonzoError> {
297 let mut cursor = toc_offset;
298 let num_entries = read_u32(buf, &mut cursor, toc_end)?;
299 if num_entries != expected_entries {
300 return Err(HonzoError::Truncated);
301 }
302 for _ in 0..num_entries {
303 let chunk_type = read_tag(buf, &mut cursor, toc_end)?;
304 if !is_known_chunk(&chunk_type) {
305 return Err(HonzoError::InvalidChunkType);
306 }
307 let _ = read_u32(buf, &mut cursor, toc_end)?;
308 let _ = read_u64(buf, &mut cursor, toc_end)?;
309 let _ = read_u32(buf, &mut cursor, toc_end)?;
310 let _ = read_u32(buf, &mut cursor, toc_end)?;
311 let compression = read_u8(buf, &mut cursor, toc_end)?;
312 let content_type_kind = read_u8(buf, &mut cursor, toc_end)?;
313 let content_type_value = read_u8(buf, &mut cursor, toc_end)?;
314 let cover_type = read_u8(buf, &mut cursor, toc_end)?;
315 let _ = read_u8(buf, &mut cursor, toc_end)?;
316 let _ = read_u32(buf, &mut cursor, toc_end)?;
317 let alt_text_len = read_u16(buf, &mut cursor, toc_end)? as usize;
318 if alt_text_len > 0 {
319 let bytes = read_bytes(buf, &mut cursor, alt_text_len, toc_end)?;
320 if core::str::from_utf8(bytes).is_err() {
321 return Err(HonzoError::Truncated);
322 }
323 }
324
325 Compression::from_u8(compression)?;
326 match &chunk_type {
327 b"CHAP" | b"NOTE" => {
328 if content_type_kind != 1 {
329 return Err(HonzoError::UnknownMarkupType(content_type_kind));
330 }
331 MarkupType::from_u8(content_type_value)?;
332 }
333 b"MATH" => {
334 if content_type_kind != 2 {
335 return Err(HonzoError::UnknownMathType(content_type_kind));
336 }
337 MathType::from_u8(content_type_value)?;
338 }
339 _ => {
340 if content_type_kind != 1 || content_type_value != 0 {
342 return Err(HonzoError::Truncated);
343 }
344 }
345 }
346 CoverType::from_u8(cover_type)?;
347
348 if &chunk_type == b"FONT" {
349 let embedding = read_u8(buf, &mut cursor, toc_end)?;
350 FontEmbedding::from_u8(embedding)?;
351 let url_len = read_u16(buf, &mut cursor, toc_end)? as usize;
352 if url_len > 0 {
353 let bytes = read_bytes(buf, &mut cursor, url_len, toc_end)?;
354 if core::str::from_utf8(bytes).is_err() {
355 return Err(HonzoError::Truncated);
356 }
357 }
358 }
359 }
360
361 let pmap_offset = cursor;
362 let num_pmap_entries = read_u32(buf, &mut cursor, toc_end)?;
363 for _ in 0..num_pmap_entries {
364 let _ = read_u32(buf, &mut cursor, toc_end)?;
365 let _ = read_u32(buf, &mut cursor, toc_end)?;
366 let _ = read_u32(buf, &mut cursor, toc_end)?;
367 }
368
369 Ok((num_entries, pmap_offset + 4, num_pmap_entries))
370}
371
372fn read_bytes<'a>(
373 buf: &'a [u8],
374 cursor: &mut usize,
375 len: usize,
376 limit: usize,
377) -> Result<&'a [u8], HonzoError> {
378 let end = *cursor + len;
379 if end > limit {
380 return Err(HonzoError::Truncated);
381 }
382 if end > buf.len() {
383 return Err(HonzoError::BufferTooShort);
384 }
385 let out = &buf[*cursor..end];
386 *cursor = end;
387 Ok(out)
388}
389
390fn read_tag(buf: &[u8], cursor: &mut usize, limit: usize) -> Result<[u8; 4], HonzoError> {
391 let bytes = read_bytes(buf, cursor, 4, limit)?;
392 let mut tag = [0u8; 4];
393 tag.copy_from_slice(bytes);
394 Ok(tag)
395}
396
397fn read_u8(buf: &[u8], cursor: &mut usize, limit: usize) -> Result<u8, HonzoError> {
398 let bytes = read_bytes(buf, cursor, 1, limit)?;
399 Ok(bytes[0])
400}
401
402fn read_u16(buf: &[u8], cursor: &mut usize, limit: usize) -> Result<u16, HonzoError> {
403 let bytes = read_bytes(buf, cursor, 2, limit)?;
404 Ok(u16::from_le_bytes([bytes[0], bytes[1]]))
405}
406
407fn read_u32(buf: &[u8], cursor: &mut usize, limit: usize) -> Result<u32, HonzoError> {
408 let bytes = read_bytes(buf, cursor, 4, limit)?;
409 Ok(u32::from_le_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]))
410}
411
412fn read_u64(buf: &[u8], cursor: &mut usize, limit: usize) -> Result<u64, HonzoError> {
413 let bytes = read_bytes(buf, cursor, 8, limit)?;
414 Ok(u64::from_le_bytes([
415 bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], bytes[5], bytes[6], bytes[7],
416 ]))
417}