#[cfg(not(feature = "std"))]
use alloc::{
string::{String, ToString},
vec::Vec,
};
use crate::{bzz_new::bzz_decode, error::BzzError, info::Rotation};
#[derive(Debug, thiserror::Error)]
pub enum TextError {
#[error("bzz decode failed: {0}")]
Bzz(#[from] BzzError),
#[error("text layer data too short")]
TooShort,
#[error("text length overflows data")]
TextOverflow,
#[error("invalid UTF-8 in text layer")]
InvalidUtf8,
#[error("zone record truncated at offset {0}")]
ZoneTruncated(usize),
#[error("unknown zone type {0}")]
UnknownZoneType(u8),
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum TextZoneKind {
Page,
Column,
Region,
Para,
Line,
Word,
Character,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Rect {
pub x: u32,
pub y: u32,
pub width: u32,
pub height: u32,
}
#[derive(Debug, Clone)]
pub struct TextZone {
pub kind: TextZoneKind,
pub rect: Rect,
pub text: String,
pub children: Vec<TextZone>,
}
#[derive(Debug, Clone)]
pub struct TextLayer {
pub text: String,
pub zones: Vec<TextZone>,
}
impl TextLayer {
pub fn transform(
&self,
page_w: u32,
page_h: u32,
rotation: Rotation,
render_w: u32,
render_h: u32,
) -> Self {
let (disp_w, disp_h) = match rotation {
Rotation::Cw90 | Rotation::Ccw90 => (page_h, page_w),
_ => (page_w, page_h),
};
let t = ZoneTransform {
page_w,
page_h,
rotation,
disp_w,
disp_h,
render_w,
render_h,
};
let zones = self.zones.iter().map(|z| transform_zone(z, &t)).collect();
TextLayer {
text: self.text.clone(),
zones,
}
}
}
impl Rect {
pub fn rotate(&self, page_w: u32, page_h: u32, rotation: Rotation) -> Self {
match rotation {
Rotation::None => self.clone(),
Rotation::Rot180 => Rect {
x: page_w.saturating_sub(self.x.saturating_add(self.width)),
y: page_h.saturating_sub(self.y.saturating_add(self.height)),
width: self.width,
height: self.height,
},
Rotation::Cw90 => Rect {
x: page_h.saturating_sub(self.y.saturating_add(self.height)),
y: self.x,
width: self.height,
height: self.width,
},
Rotation::Ccw90 => Rect {
x: self.y,
y: page_w.saturating_sub(self.x.saturating_add(self.width)),
width: self.height,
height: self.width,
},
}
}
pub fn scale(&self, from_w: u32, from_h: u32, to_w: u32, to_h: u32) -> Self {
if from_w == 0 || from_h == 0 {
return self.clone();
}
Rect {
x: (self.x as u64 * to_w as u64 / from_w as u64) as u32,
y: (self.y as u64 * to_h as u64 / from_h as u64) as u32,
width: (self.width as u64 * to_w as u64 / from_w as u64) as u32,
height: (self.height as u64 * to_h as u64 / from_h as u64) as u32,
}
}
}
struct ZoneTransform {
page_w: u32,
page_h: u32,
rotation: Rotation,
disp_w: u32,
disp_h: u32,
render_w: u32,
render_h: u32,
}
fn transform_zone(zone: &TextZone, t: &ZoneTransform) -> TextZone {
let rotated = zone.rect.rotate(t.page_w, t.page_h, t.rotation);
let scaled = rotated.scale(t.disp_w, t.disp_h, t.render_w, t.render_h);
let children = zone.children.iter().map(|c| transform_zone(c, t)).collect();
TextZone {
kind: zone.kind,
rect: scaled,
text: zone.text.clone(),
children,
}
}
pub fn parse_text_layer(data: &[u8], page_height: u32) -> Result<TextLayer, TextError> {
parse_text_layer_inner(data, page_height)
}
pub fn parse_text_layer_bzz(data: &[u8], page_height: u32) -> Result<TextLayer, TextError> {
let decoded = bzz_decode(data)?;
parse_text_layer_inner(&decoded, page_height)
}
fn parse_text_layer_inner(data: &[u8], page_height: u32) -> Result<TextLayer, TextError> {
if data.len() < 3 {
return Err(TextError::TooShort);
}
let mut pos = 0usize;
let text_len = read_u24(data, &mut pos).ok_or(TextError::TooShort)?;
let text_end = pos.checked_add(text_len).ok_or(TextError::TextOverflow)?;
if text_end > data.len() {
return Err(TextError::TextOverflow);
}
let text = core::str::from_utf8(data.get(pos..text_end).ok_or(TextError::TextOverflow)?)
.map_err(|_| TextError::InvalidUtf8)?
.to_string();
pos = text_end;
if pos < data.len() {
pos += 1; }
let mut zones = Vec::new();
if pos < data.len() {
let zone = parse_zone(data, &mut pos, None, None, &text, page_height)?;
zones.push(zone);
}
Ok(TextLayer { text, zones })
}
#[derive(Clone)]
struct ZoneCtx {
x: i32,
y: i32, width: i32,
height: i32,
text_start: i32,
text_len: i32,
}
fn parse_zone(
data: &[u8],
pos: &mut usize,
parent: Option<&ZoneCtx>,
prev: Option<&ZoneCtx>,
full_text: &str,
page_height: u32,
) -> Result<TextZone, TextError> {
if *pos >= data.len() {
return Err(TextError::ZoneTruncated(*pos));
}
let type_byte = *data.get(*pos).ok_or(TextError::ZoneTruncated(*pos))?;
*pos += 1;
let kind = match type_byte {
1 => TextZoneKind::Page,
2 => TextZoneKind::Column,
3 => TextZoneKind::Region,
4 => TextZoneKind::Para,
5 => TextZoneKind::Line,
6 => TextZoneKind::Word,
7 => TextZoneKind::Character,
other => return Err(TextError::UnknownZoneType(other)),
};
let mut x = read_i16_biased(data, pos).ok_or(TextError::ZoneTruncated(*pos))?;
let mut y = read_i16_biased(data, pos).ok_or(TextError::ZoneTruncated(*pos))?;
let width = read_i16_biased(data, pos).ok_or(TextError::ZoneTruncated(*pos))?;
let height = read_i16_biased(data, pos).ok_or(TextError::ZoneTruncated(*pos))?;
let mut text_start = read_i16_biased(data, pos).ok_or(TextError::ZoneTruncated(*pos))?;
let text_len = read_i24(data, pos).ok_or(TextError::ZoneTruncated(*pos))?;
if let Some(prev) = prev {
match type_byte {
1 | 4 | 5 => {
x += prev.x;
y = prev.y - (y + height);
}
_ => {
x += prev.x + prev.width;
y += prev.y;
}
}
text_start += prev.text_start + prev.text_len;
} else if let Some(parent) = parent {
x += parent.x;
y = parent.y + parent.height - (y + height);
text_start += parent.text_start;
}
let tl_y = (page_height as i32)
.saturating_sub(y.saturating_add(height))
.max(0) as u32;
let tl_x = x.max(0) as u32;
let tl_w = width.max(0) as u32;
let tl_h = height.max(0) as u32;
let rect = Rect {
x: tl_x,
y: tl_y,
width: tl_w,
height: tl_h,
};
let ts = text_start.max(0) as usize;
let tl = text_len.max(0) as usize;
let zone_text = extract_text_slice(full_text, ts, tl);
let children_count = read_i24(data, pos)
.ok_or(TextError::ZoneTruncated(*pos))?
.max(0) as usize;
let ctx = ZoneCtx {
x,
y,
width,
height,
text_start,
text_len,
};
let mut children = Vec::with_capacity(children_count);
let mut prev_child: Option<ZoneCtx> = None;
for _ in 0..children_count {
let child = parse_zone(
data,
pos,
Some(&ctx),
prev_child.as_ref(),
full_text,
page_height,
)?;
prev_child = Some(ZoneCtx {
x: child.rect.x as i32,
y: {
(page_height as i32).saturating_sub(child.rect.y as i32 + child.rect.height as i32)
},
width: child.rect.width as i32,
height: child.rect.height as i32,
text_start: ts as i32,
text_len: tl as i32,
});
children.push(child);
}
Ok(TextZone {
kind,
rect,
text: zone_text,
children,
})
}
fn extract_text_slice(full_text: &str, start: usize, len: usize) -> String {
let end = start.saturating_add(len).min(full_text.len());
let start = start.min(end);
let safe_start = (0..=start)
.rev()
.find(|&i| full_text.is_char_boundary(i))
.unwrap_or(0);
let safe_end = (end..=full_text.len())
.find(|&i| full_text.is_char_boundary(i))
.unwrap_or(full_text.len());
full_text[safe_start..safe_end].to_string()
}
fn read_u24(data: &[u8], pos: &mut usize) -> Option<usize> {
let b0 = *data.get(*pos)?;
let b1 = *data.get(*pos + 1)?;
let b2 = *data.get(*pos + 2)?;
*pos += 3;
Some(((b0 as usize) << 16) | ((b1 as usize) << 8) | (b2 as usize))
}
fn read_i16_biased(data: &[u8], pos: &mut usize) -> Option<i32> {
let b0 = *data.get(*pos)?;
let b1 = *data.get(*pos + 1)?;
*pos += 2;
let raw = u16::from_be_bytes([b0, b1]);
Some(raw as i32 - 0x8000)
}
fn read_i24(data: &[u8], pos: &mut usize) -> Option<i32> {
let b0 = *data.get(*pos)? as i32;
let b1 = *data.get(*pos + 1)? as i32;
let b2 = *data.get(*pos + 2)? as i32;
*pos += 3;
Some((b0 << 16) | (b1 << 8) | b2)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_read_u24() {
let data = [0x01, 0x02, 0x03];
let mut pos = 0;
assert_eq!(read_u24(&data, &mut pos), Some(0x010203));
assert_eq!(pos, 3);
}
#[test]
fn test_read_u24_truncated() {
let data = [0x01, 0x02];
let mut pos = 0;
assert_eq!(read_u24(&data, &mut pos), None);
}
#[test]
fn test_read_i16_biased() {
let data = [0x80, 0x00]; let mut pos = 0;
assert_eq!(read_i16_biased(&data, &mut pos), Some(0));
assert_eq!(pos, 2);
}
#[test]
fn test_read_i16_biased_negative() {
let data = [0x00, 0x00]; let mut pos = 0;
assert_eq!(read_i16_biased(&data, &mut pos), Some(-0x8000));
}
#[test]
fn test_read_i16_biased_truncated() {
let data = [0x80];
let mut pos = 0;
assert_eq!(read_i16_biased(&data, &mut pos), None);
}
#[test]
fn test_read_i24() {
let data = [0x00, 0x01, 0x00];
let mut pos = 0;
assert_eq!(read_i24(&data, &mut pos), Some(256));
}
#[test]
fn test_extract_text_slice_basic() {
assert_eq!(extract_text_slice("hello world", 0, 5), "hello");
assert_eq!(extract_text_slice("hello world", 6, 5), "world");
}
#[test]
fn test_extract_text_slice_out_of_bounds() {
assert_eq!(extract_text_slice("hello", 10, 5), "");
assert_eq!(extract_text_slice("hello", 0, 100), "hello");
}
#[test]
fn test_extract_text_slice_utf8_boundary() {
let s = "\u{00e9}\u{00e8}"; let result = extract_text_slice(s, 1, 2);
assert!(result.is_char_boundary(0));
}
#[test]
fn test_extract_text_slice_empty() {
assert_eq!(extract_text_slice("", 0, 0), "");
assert_eq!(extract_text_slice("abc", 1, 0), "");
}
#[test]
fn test_too_short_data() {
assert!(matches!(
parse_text_layer(&[0x00], 100),
Err(TextError::TooShort)
));
assert!(matches!(
parse_text_layer(&[], 100),
Err(TextError::TooShort)
));
}
#[test]
fn test_text_overflow() {
let data = [0x00, 0x00, 0xFF, 0x41];
assert!(matches!(
parse_text_layer(&data, 100),
Err(TextError::TextOverflow)
));
}
#[test]
fn test_invalid_utf8() {
let data = [0x00, 0x00, 0x02, 0xFF, 0xFE];
assert!(matches!(
parse_text_layer(&data, 100),
Err(TextError::InvalidUtf8)
));
}
#[test]
fn test_unknown_zone_type() {
let data = [
0x00, 0x00, 0x01, b'A', 0x00, 99, ];
assert!(matches!(
parse_text_layer(&data, 100),
Err(TextError::UnknownZoneType(99))
));
}
#[test]
fn test_zone_truncated() {
let data = [
0x00, 0x00, 0x01, b'A', 0x00, 0x01, 0x80, 0x00, ];
assert!(matches!(
parse_text_layer(&data, 100),
Err(TextError::ZoneTruncated(_))
));
}
#[test]
fn test_empty_text_no_zones() {
let data = [0x00, 0x00, 0x00];
let result = parse_text_layer(&data, 100).unwrap();
assert_eq!(result.text, "");
assert!(result.zones.is_empty());
}
#[test]
fn test_text_only_no_zones() {
let data = [
0x00, 0x00, 0x05, b'H', b'e', b'l', b'l', b'o', 0x00, ];
let result = parse_text_layer(&data, 100).unwrap();
assert_eq!(result.text, "Hello");
assert!(result.zones.is_empty());
}
fn make_layer(x: u32, y: u32, w: u32, h: u32) -> TextLayer {
TextLayer {
text: "test".to_string(),
zones: vec![TextZone {
kind: TextZoneKind::Page,
rect: Rect {
x,
y,
width: w,
height: h,
},
text: "test".to_string(),
children: vec![],
}],
}
}
fn rect0(layer: &TextLayer) -> &Rect {
&layer.zones[0].rect
}
#[test]
fn transform_none_identity() {
let layer = make_layer(10, 20, 30, 40);
let out = layer.transform(100, 200, Rotation::None, 100, 200);
assert_eq!(
*rect0(&out),
Rect {
x: 10,
y: 20,
width: 30,
height: 40
}
);
}
#[test]
fn transform_none_scale_2x() {
let layer = make_layer(10, 20, 30, 40);
let out = layer.transform(100, 200, Rotation::None, 200, 400);
assert_eq!(
*rect0(&out),
Rect {
x: 20,
y: 40,
width: 60,
height: 80
}
);
}
#[test]
fn transform_rot180() {
let layer = make_layer(10, 20, 30, 40);
let out = layer.transform(100, 200, Rotation::Rot180, 100, 200);
assert_eq!(
*rect0(&out),
Rect {
x: 60,
y: 140,
width: 30,
height: 40
}
);
}
#[test]
fn transform_cw90() {
let layer = make_layer(10, 20, 30, 40);
let out = layer.transform(100, 200, Rotation::Cw90, 200, 100);
assert_eq!(
*rect0(&out),
Rect {
x: 140,
y: 10,
width: 40,
height: 30
}
);
}
#[test]
fn transform_ccw90() {
let layer = make_layer(10, 20, 30, 40);
let out = layer.transform(100, 200, Rotation::Ccw90, 200, 100);
assert_eq!(
*rect0(&out),
Rect {
x: 20,
y: 60,
width: 40,
height: 30
}
);
}
#[test]
fn transform_cw90_then_scale() {
let layer = make_layer(10, 20, 30, 40);
let out = layer.transform(100, 200, Rotation::Cw90, 400, 200);
assert_eq!(
*rect0(&out),
Rect {
x: 280,
y: 20,
width: 80,
height: 60
}
);
}
#[test]
fn transform_text_preserved() {
let layer = make_layer(0, 0, 10, 10);
let out = layer.transform(100, 100, Rotation::Cw90, 100, 100);
assert_eq!(out.text, "test");
assert_eq!(out.zones[0].text, "test");
}
#[test]
fn test_single_word_zone() {
let text = b"Hi";
let mut data = Vec::new();
data.extend_from_slice(&[0x00, 0x00, 0x02]);
data.extend_from_slice(text);
data.push(0x00);
data.push(0x01);
data.extend_from_slice(&0x8000u16.to_be_bytes()); data.extend_from_slice(&0x8000u16.to_be_bytes()); data.extend_from_slice(&(100u16 + 0x8000u16).wrapping_add(0).to_be_bytes()); let h_val = 50i32 + 0x8000;
data.extend_from_slice(&(h_val as u16).to_be_bytes()); data.extend_from_slice(&0x8000u16.to_be_bytes()); data.extend_from_slice(&[0x00, 0x00, 0x02]);
data.extend_from_slice(&[0x00, 0x00, 0x00]);
let result = parse_text_layer(&data, 100).unwrap();
assert_eq!(result.text, "Hi");
assert_eq!(result.zones.len(), 1);
assert_eq!(result.zones[0].kind, TextZoneKind::Page);
assert_eq!(result.zones[0].text, "Hi");
assert_eq!(result.zones[0].rect.width, 100);
assert_eq!(result.zones[0].rect.height, 50);
}
}