use serde::{Deserialize, Serialize};
use super::bbox::{BoundingBox, Vertex};
use super::enums::{PdfLayer, TextFormat, TextType};
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TextChunk {
pub value: String,
pub bbox: BoundingBox,
pub font_name: String,
pub font_size: f64,
pub font_weight: f64,
pub italic_angle: f64,
pub font_color: String,
pub contrast_ratio: f64,
pub symbol_ends: Vec<f64>,
pub text_format: TextFormat,
pub text_type: TextType,
pub pdf_layer: PdfLayer,
pub ocg_visible: bool,
pub index: Option<usize>,
pub page_number: Option<u32>,
pub level: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub mcid: Option<i64>,
}
impl TextChunk {
pub fn is_white_space_chunk(&self) -> bool {
self.value.chars().all(|c| c.is_whitespace())
}
pub fn compress_spaces(&mut self) {
let mut result = String::with_capacity(self.value.len());
let mut last_was_space = false;
for ch in self.value.chars() {
if ch == ' ' {
if !last_was_space {
result.push(' ');
}
last_was_space = true;
} else {
result.push(ch);
last_was_space = false;
}
}
self.value = result;
}
pub fn text_length(&self) -> usize {
self.value.chars().count()
}
pub fn average_symbol_width(&self) -> f64 {
let len = self.text_length();
if len == 0 {
return 0.0;
}
self.bbox.width() / len as f64
}
pub fn symbol_start_coordinate(&self, idx: usize) -> f64 {
if idx == 0 {
self.bbox.left_x
} else if idx <= self.symbol_ends.len() {
self.symbol_ends[idx - 1]
} else {
self.bbox.right_x
}
}
pub fn symbol_end_coordinate(&self, idx: usize) -> f64 {
if idx < self.symbol_ends.len() {
self.symbol_ends[idx]
} else {
self.bbox.right_x
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ImageChunk {
pub bbox: BoundingBox,
pub index: Option<u32>,
pub level: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct LineChunk {
pub bbox: BoundingBox,
pub index: Option<u32>,
pub level: Option<String>,
pub start: Vertex,
pub end: Vertex,
pub width: f64,
pub is_horizontal_line: bool,
pub is_vertical_line: bool,
pub is_square: bool,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct LineArtChunk {
pub bbox: BoundingBox,
pub index: Option<u32>,
pub level: Option<String>,
pub line_chunks: Vec<LineChunk>,
}
pub const LINE_ART_SIZE_EPSILON: f64 = 1.0;
#[cfg(test)]
mod tests {
use super::*;
fn make_text_chunk(value: &str) -> TextChunk {
TextChunk {
value: value.to_string(),
bbox: BoundingBox::new(Some(1), 0.0, 0.0, 100.0, 12.0),
font_name: "Helvetica".to_string(),
font_size: 12.0,
font_weight: 400.0,
italic_angle: 0.0,
font_color: "#000000".to_string(),
contrast_ratio: 21.0,
symbol_ends: vec![],
text_format: TextFormat::Normal,
text_type: TextType::Regular,
pdf_layer: PdfLayer::Main,
ocg_visible: true,
index: None,
page_number: Some(1),
level: None,
mcid: None,
}
}
#[test]
fn test_is_white_space_chunk() {
assert!(make_text_chunk(" ").is_white_space_chunk());
assert!(!make_text_chunk("hello").is_white_space_chunk());
assert!(make_text_chunk("").is_white_space_chunk());
}
#[test]
fn test_compress_spaces() {
let mut chunk = make_text_chunk("hello world test");
chunk.compress_spaces();
assert_eq!(chunk.value, "hello world test");
}
#[test]
fn test_text_length() {
assert_eq!(make_text_chunk("hello").text_length(), 5);
assert_eq!(make_text_chunk("").text_length(), 0);
}
#[test]
fn test_average_symbol_width() {
let chunk = make_text_chunk("hello");
assert!((chunk.average_symbol_width() - 20.0).abs() < 0.01);
}
}