1#[cfg(not(feature = "std"))]
25use alloc::{
26 string::{String, ToString},
27 vec::Vec,
28};
29
30use crate::{bzz_new::bzz_decode, error::BzzError, info::Rotation};
31
32#[derive(Debug, thiserror::Error)]
36pub enum TextError {
37 #[error("bzz decode failed: {0}")]
39 Bzz(#[from] BzzError),
40
41 #[error("text layer data too short")]
43 TooShort,
44
45 #[error("text length overflows data")]
47 TextOverflow,
48
49 #[error("invalid UTF-8 in text layer")]
51 InvalidUtf8,
52
53 #[error("zone record truncated at offset {0}")]
55 ZoneTruncated(usize),
56
57 #[error("unknown zone type {0}")]
59 UnknownZoneType(u8),
60}
61
62#[derive(Debug, Clone, Copy, PartialEq, Eq)]
66#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
67pub enum TextZoneKind {
68 Page,
69 Column,
70 Region,
71 Para,
72 Line,
73 Word,
74 Character,
75}
76
77#[derive(Debug, Clone, PartialEq, Eq)]
79#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
80pub struct Rect {
81 pub x: u32,
82 pub y: u32,
83 pub width: u32,
84 pub height: u32,
85}
86
87#[derive(Debug, Clone)]
89#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
90pub struct TextZone {
91 pub kind: TextZoneKind,
93 pub rect: Rect,
95 pub text: String,
97 pub children: Vec<TextZone>,
99}
100
101#[derive(Debug, Clone)]
103#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
104pub struct TextLayer {
105 pub text: String,
107 pub zones: Vec<TextZone>,
109}
110
111impl TextLayer {
112 pub fn transform(
123 &self,
124 page_w: u32,
125 page_h: u32,
126 rotation: Rotation,
127 render_w: u32,
128 render_h: u32,
129 ) -> Self {
130 let (disp_w, disp_h) = match rotation {
131 Rotation::Cw90 | Rotation::Ccw90 => (page_h, page_w),
132 _ => (page_w, page_h),
133 };
134 let t = ZoneTransform {
135 page_w,
136 page_h,
137 rotation,
138 disp_w,
139 disp_h,
140 render_w,
141 render_h,
142 };
143 let zones = self.zones.iter().map(|z| transform_zone(z, &t)).collect();
144 TextLayer {
145 text: self.text.clone(),
146 zones,
147 }
148 }
149}
150
151impl Rect {
154 pub fn rotate(&self, page_w: u32, page_h: u32, rotation: Rotation) -> Self {
160 match rotation {
161 Rotation::None => self.clone(),
162 Rotation::Rot180 => Rect {
163 x: page_w.saturating_sub(self.x.saturating_add(self.width)),
164 y: page_h.saturating_sub(self.y.saturating_add(self.height)),
165 width: self.width,
166 height: self.height,
167 },
168 Rotation::Cw90 => Rect {
171 x: page_h.saturating_sub(self.y.saturating_add(self.height)),
172 y: self.x,
173 width: self.height,
174 height: self.width,
175 },
176 Rotation::Ccw90 => Rect {
179 x: self.y,
180 y: page_w.saturating_sub(self.x.saturating_add(self.width)),
181 width: self.height,
182 height: self.width,
183 },
184 }
185 }
186
187 pub fn scale(&self, from_w: u32, from_h: u32, to_w: u32, to_h: u32) -> Self {
189 if from_w == 0 || from_h == 0 {
190 return self.clone();
191 }
192 Rect {
193 x: (self.x as u64 * to_w as u64 / from_w as u64) as u32,
194 y: (self.y as u64 * to_h as u64 / from_h as u64) as u32,
195 width: (self.width as u64 * to_w as u64 / from_w as u64) as u32,
196 height: (self.height as u64 * to_h as u64 / from_h as u64) as u32,
197 }
198 }
199}
200
201struct ZoneTransform {
204 page_w: u32,
205 page_h: u32,
206 rotation: Rotation,
207 disp_w: u32,
208 disp_h: u32,
209 render_w: u32,
210 render_h: u32,
211}
212
213fn transform_zone(zone: &TextZone, t: &ZoneTransform) -> TextZone {
214 let rotated = zone.rect.rotate(t.page_w, t.page_h, t.rotation);
215 let scaled = rotated.scale(t.disp_w, t.disp_h, t.render_w, t.render_h);
216 let children = zone.children.iter().map(|c| transform_zone(c, t)).collect();
217 TextZone {
218 kind: zone.kind,
219 rect: scaled,
220 text: zone.text.clone(),
221 children,
222 }
223}
224
225pub fn parse_text_layer(data: &[u8], page_height: u32) -> Result<TextLayer, TextError> {
231 parse_text_layer_inner(data, page_height)
232}
233
234pub fn parse_text_layer_bzz(data: &[u8], page_height: u32) -> Result<TextLayer, TextError> {
238 let decoded = bzz_decode(data)?;
239 parse_text_layer_inner(&decoded, page_height)
240}
241
242fn parse_text_layer_inner(data: &[u8], page_height: u32) -> Result<TextLayer, TextError> {
245 if data.len() < 3 {
246 return Err(TextError::TooShort);
247 }
248
249 let mut pos = 0usize;
250
251 let text_len = read_u24(data, &mut pos).ok_or(TextError::TooShort)?;
253
254 let text_end = pos.checked_add(text_len).ok_or(TextError::TextOverflow)?;
256 if text_end > data.len() {
257 return Err(TextError::TextOverflow);
258 }
259 let text = core::str::from_utf8(data.get(pos..text_end).ok_or(TextError::TextOverflow)?)
260 .map_err(|_| TextError::InvalidUtf8)?
261 .to_string();
262 pos = text_end;
263
264 if pos < data.len() {
266 pos += 1; }
268
269 let mut zones = Vec::new();
271 if pos < data.len() {
272 let zone = parse_zone(data, &mut pos, None, None, &text, page_height)?;
273 zones.push(zone);
274 }
275
276 Ok(TextLayer { text, zones })
277}
278
279#[derive(Clone)]
283struct ZoneCtx {
284 x: i32,
285 y: i32, width: i32,
287 height: i32,
288 text_start: i32,
289 text_len: i32,
290}
291
292fn parse_zone(
293 data: &[u8],
294 pos: &mut usize,
295 parent: Option<&ZoneCtx>,
296 prev: Option<&ZoneCtx>,
297 full_text: &str,
298 page_height: u32,
299) -> Result<TextZone, TextError> {
300 if *pos >= data.len() {
301 return Err(TextError::ZoneTruncated(*pos));
302 }
303
304 let type_byte = *data.get(*pos).ok_or(TextError::ZoneTruncated(*pos))?;
305 *pos += 1;
306
307 let kind = match type_byte {
308 1 => TextZoneKind::Page,
309 2 => TextZoneKind::Column,
310 3 => TextZoneKind::Region,
311 4 => TextZoneKind::Para,
312 5 => TextZoneKind::Line,
313 6 => TextZoneKind::Word,
314 7 => TextZoneKind::Character,
315 other => return Err(TextError::UnknownZoneType(other)),
316 };
317
318 let mut x = read_i16_biased(data, pos).ok_or(TextError::ZoneTruncated(*pos))?;
319 let mut y = read_i16_biased(data, pos).ok_or(TextError::ZoneTruncated(*pos))?;
320 let width = read_i16_biased(data, pos).ok_or(TextError::ZoneTruncated(*pos))?;
321 let height = read_i16_biased(data, pos).ok_or(TextError::ZoneTruncated(*pos))?;
322 let mut text_start = read_i16_biased(data, pos).ok_or(TextError::ZoneTruncated(*pos))?;
323 let text_len = read_i24(data, pos).ok_or(TextError::ZoneTruncated(*pos))?;
324
325 if let Some(prev) = prev {
327 match type_byte {
328 1 | 4 | 5 => {
329 x += prev.x;
331 y = prev.y - (y + height);
332 }
333 _ => {
334 x += prev.x + prev.width;
336 y += prev.y;
337 }
338 }
339 text_start += prev.text_start + prev.text_len;
340 } else if let Some(parent) = parent {
341 x += parent.x;
342 y = parent.y + parent.height - (y + height);
343 text_start += parent.text_start;
344 }
345
346 let tl_y = (page_height as i32)
349 .saturating_sub(y.saturating_add(height))
350 .max(0) as u32;
351 let tl_x = x.max(0) as u32;
352 let tl_w = width.max(0) as u32;
353 let tl_h = height.max(0) as u32;
354
355 let rect = Rect {
356 x: tl_x,
357 y: tl_y,
358 width: tl_w,
359 height: tl_h,
360 };
361
362 let ts = text_start.max(0) as usize;
364 let tl = text_len.max(0) as usize;
365 let zone_text = extract_text_slice(full_text, ts, tl);
366
367 let children_count = read_i24(data, pos)
368 .ok_or(TextError::ZoneTruncated(*pos))?
369 .max(0) as usize;
370
371 let ctx = ZoneCtx {
372 x,
373 y,
374 width,
375 height,
376 text_start,
377 text_len,
378 };
379
380 let mut children = Vec::with_capacity(children_count);
381 let mut prev_child: Option<ZoneCtx> = None;
382
383 for _ in 0..children_count {
384 let child = parse_zone(
385 data,
386 pos,
387 Some(&ctx),
388 prev_child.as_ref(),
389 full_text,
390 page_height,
391 )?;
392 prev_child = Some(ZoneCtx {
393 x: child.rect.x as i32,
394 y: {
395 (page_height as i32).saturating_sub(child.rect.y as i32 + child.rect.height as i32)
398 },
399 width: child.rect.width as i32,
400 height: child.rect.height as i32,
401 text_start: ts as i32,
402 text_len: tl as i32,
403 });
404 children.push(child);
405 }
406
407 Ok(TextZone {
408 kind,
409 rect,
410 text: zone_text,
411 children,
412 })
413}
414
415fn extract_text_slice(full_text: &str, start: usize, len: usize) -> String {
419 let end = start.saturating_add(len).min(full_text.len());
420 let start = start.min(end);
421 let safe_start = (0..=start)
423 .rev()
424 .find(|&i| full_text.is_char_boundary(i))
425 .unwrap_or(0);
426 let safe_end = (end..=full_text.len())
427 .find(|&i| full_text.is_char_boundary(i))
428 .unwrap_or(full_text.len());
429 full_text[safe_start..safe_end].to_string()
430}
431
432fn read_u24(data: &[u8], pos: &mut usize) -> Option<usize> {
436 let b0 = *data.get(*pos)?;
437 let b1 = *data.get(*pos + 1)?;
438 let b2 = *data.get(*pos + 2)?;
439 *pos += 3;
440 Some(((b0 as usize) << 16) | ((b1 as usize) << 8) | (b2 as usize))
441}
442
443fn read_i16_biased(data: &[u8], pos: &mut usize) -> Option<i32> {
445 let b0 = *data.get(*pos)?;
446 let b1 = *data.get(*pos + 1)?;
447 *pos += 2;
448 let raw = u16::from_be_bytes([b0, b1]);
449 Some(raw as i32 - 0x8000)
450}
451
452fn read_i24(data: &[u8], pos: &mut usize) -> Option<i32> {
454 let b0 = *data.get(*pos)? as i32;
455 let b1 = *data.get(*pos + 1)? as i32;
456 let b2 = *data.get(*pos + 2)? as i32;
457 *pos += 3;
458 Some((b0 << 16) | (b1 << 8) | b2)
459}
460
461#[cfg(test)]
462mod tests {
463 use super::*;
464
465 #[test]
468 fn test_read_u24() {
469 let data = [0x01, 0x02, 0x03];
470 let mut pos = 0;
471 assert_eq!(read_u24(&data, &mut pos), Some(0x010203));
472 assert_eq!(pos, 3);
473 }
474
475 #[test]
476 fn test_read_u24_truncated() {
477 let data = [0x01, 0x02];
478 let mut pos = 0;
479 assert_eq!(read_u24(&data, &mut pos), None);
480 }
481
482 #[test]
483 fn test_read_i16_biased() {
484 let data = [0x80, 0x00]; let mut pos = 0;
486 assert_eq!(read_i16_biased(&data, &mut pos), Some(0));
487 assert_eq!(pos, 2);
488 }
489
490 #[test]
491 fn test_read_i16_biased_negative() {
492 let data = [0x00, 0x00]; let mut pos = 0;
494 assert_eq!(read_i16_biased(&data, &mut pos), Some(-0x8000));
495 }
496
497 #[test]
498 fn test_read_i16_biased_truncated() {
499 let data = [0x80];
500 let mut pos = 0;
501 assert_eq!(read_i16_biased(&data, &mut pos), None);
502 }
503
504 #[test]
505 fn test_read_i24() {
506 let data = [0x00, 0x01, 0x00];
507 let mut pos = 0;
508 assert_eq!(read_i24(&data, &mut pos), Some(256));
509 }
510
511 #[test]
514 fn test_extract_text_slice_basic() {
515 assert_eq!(extract_text_slice("hello world", 0, 5), "hello");
516 assert_eq!(extract_text_slice("hello world", 6, 5), "world");
517 }
518
519 #[test]
520 fn test_extract_text_slice_out_of_bounds() {
521 assert_eq!(extract_text_slice("hello", 10, 5), "");
522 assert_eq!(extract_text_slice("hello", 0, 100), "hello");
523 }
524
525 #[test]
526 fn test_extract_text_slice_utf8_boundary() {
527 let s = "\u{00e9}\u{00e8}"; let result = extract_text_slice(s, 1, 2);
531 assert!(result.is_char_boundary(0));
532 }
533
534 #[test]
535 fn test_extract_text_slice_empty() {
536 assert_eq!(extract_text_slice("", 0, 0), "");
537 assert_eq!(extract_text_slice("abc", 1, 0), "");
538 }
539
540 #[test]
543 fn test_too_short_data() {
544 assert!(matches!(
545 parse_text_layer(&[0x00], 100),
546 Err(TextError::TooShort)
547 ));
548 assert!(matches!(
549 parse_text_layer(&[], 100),
550 Err(TextError::TooShort)
551 ));
552 }
553
554 #[test]
555 fn test_text_overflow() {
556 let data = [0x00, 0x00, 0xFF, 0x41];
558 assert!(matches!(
559 parse_text_layer(&data, 100),
560 Err(TextError::TextOverflow)
561 ));
562 }
563
564 #[test]
565 fn test_invalid_utf8() {
566 let data = [0x00, 0x00, 0x02, 0xFF, 0xFE];
568 assert!(matches!(
569 parse_text_layer(&data, 100),
570 Err(TextError::InvalidUtf8)
571 ));
572 }
573
574 #[test]
575 fn test_unknown_zone_type() {
576 let data = [
578 0x00, 0x00, 0x01, b'A', 0x00, 99, ];
583 assert!(matches!(
584 parse_text_layer(&data, 100),
585 Err(TextError::UnknownZoneType(99))
586 ));
587 }
588
589 #[test]
590 fn test_zone_truncated() {
591 let data = [
593 0x00, 0x00, 0x01, b'A', 0x00, 0x01, 0x80, 0x00, ];
599 assert!(matches!(
600 parse_text_layer(&data, 100),
601 Err(TextError::ZoneTruncated(_))
602 ));
603 }
604
605 #[test]
608 fn test_empty_text_no_zones() {
609 let data = [0x00, 0x00, 0x00];
611 let result = parse_text_layer(&data, 100).unwrap();
612 assert_eq!(result.text, "");
613 assert!(result.zones.is_empty());
614 }
615
616 #[test]
617 fn test_text_only_no_zones() {
618 let data = [
620 0x00, 0x00, 0x05, b'H', b'e', b'l', b'l', b'o', 0x00, ];
624 let result = parse_text_layer(&data, 100).unwrap();
625 assert_eq!(result.text, "Hello");
626 assert!(result.zones.is_empty());
627 }
628
629 fn make_layer(x: u32, y: u32, w: u32, h: u32) -> TextLayer {
632 TextLayer {
633 text: "test".to_string(),
634 zones: vec![TextZone {
635 kind: TextZoneKind::Page,
636 rect: Rect {
637 x,
638 y,
639 width: w,
640 height: h,
641 },
642 text: "test".to_string(),
643 children: vec![],
644 }],
645 }
646 }
647
648 fn rect0(layer: &TextLayer) -> &Rect {
649 &layer.zones[0].rect
650 }
651
652 #[test]
653 fn transform_none_identity() {
654 let layer = make_layer(10, 20, 30, 40);
656 let out = layer.transform(100, 200, Rotation::None, 100, 200);
657 assert_eq!(
658 *rect0(&out),
659 Rect {
660 x: 10,
661 y: 20,
662 width: 30,
663 height: 40
664 }
665 );
666 }
667
668 #[test]
669 fn transform_none_scale_2x() {
670 let layer = make_layer(10, 20, 30, 40);
671 let out = layer.transform(100, 200, Rotation::None, 200, 400);
672 assert_eq!(
673 *rect0(&out),
674 Rect {
675 x: 20,
676 y: 40,
677 width: 60,
678 height: 80
679 }
680 );
681 }
682
683 #[test]
684 fn transform_rot180() {
685 let layer = make_layer(10, 20, 30, 40);
689 let out = layer.transform(100, 200, Rotation::Rot180, 100, 200);
690 assert_eq!(
691 *rect0(&out),
692 Rect {
693 x: 60,
694 y: 140,
695 width: 30,
696 height: 40
697 }
698 );
699 }
700
701 #[test]
702 fn transform_cw90() {
703 let layer = make_layer(10, 20, 30, 40);
709 let out = layer.transform(100, 200, Rotation::Cw90, 200, 100);
710 assert_eq!(
711 *rect0(&out),
712 Rect {
713 x: 140,
714 y: 10,
715 width: 40,
716 height: 30
717 }
718 );
719 }
720
721 #[test]
722 fn transform_ccw90() {
723 let layer = make_layer(10, 20, 30, 40);
729 let out = layer.transform(100, 200, Rotation::Ccw90, 200, 100);
730 assert_eq!(
731 *rect0(&out),
732 Rect {
733 x: 20,
734 y: 60,
735 width: 40,
736 height: 30
737 }
738 );
739 }
740
741 #[test]
742 fn transform_cw90_then_scale() {
743 let layer = make_layer(10, 20, 30, 40);
747 let out = layer.transform(100, 200, Rotation::Cw90, 400, 200);
748 assert_eq!(
749 *rect0(&out),
750 Rect {
751 x: 280,
752 y: 20,
753 width: 80,
754 height: 60
755 }
756 );
757 }
758
759 #[test]
760 fn transform_text_preserved() {
761 let layer = make_layer(0, 0, 10, 10);
762 let out = layer.transform(100, 100, Rotation::Cw90, 100, 100);
763 assert_eq!(out.text, "test");
764 assert_eq!(out.zones[0].text, "test");
765 }
766
767 #[test]
768 fn test_single_word_zone() {
769 let text = b"Hi";
771 let mut data = Vec::new();
772 data.extend_from_slice(&[0x00, 0x00, 0x02]);
774 data.extend_from_slice(text);
775 data.push(0x00); data.push(0x01);
779 data.extend_from_slice(&0x8000u16.to_be_bytes()); data.extend_from_slice(&0x8000u16.to_be_bytes()); data.extend_from_slice(&(100u16 + 0x8000u16).wrapping_add(0).to_be_bytes()); let h_val = 50i32 + 0x8000;
784 data.extend_from_slice(&(h_val as u16).to_be_bytes()); data.extend_from_slice(&0x8000u16.to_be_bytes()); data.extend_from_slice(&[0x00, 0x00, 0x02]);
788 data.extend_from_slice(&[0x00, 0x00, 0x00]);
790
791 let result = parse_text_layer(&data, 100).unwrap();
792 assert_eq!(result.text, "Hi");
793 assert_eq!(result.zones.len(), 1);
794 assert_eq!(result.zones[0].kind, TextZoneKind::Page);
795 assert_eq!(result.zones[0].text, "Hi");
796 assert_eq!(result.zones[0].rect.width, 100);
797 assert_eq!(result.zones[0].rect.height, 50);
798 }
799}