use crate::extractors::text::ArtifactType;
use crate::geometry::{Point, Rect};
use crate::layout::{Color, FontWeight, TextSpan};
#[derive(Debug, Clone, Default)]
pub struct TextContent {
pub text: String,
pub bbox: Rect,
pub font: FontSpec,
pub style: TextStyle,
pub reading_order: Option<usize>,
pub artifact_type: Option<ArtifactType>,
pub origin: Option<Point>,
pub rotation_degrees: Option<f32>,
pub matrix: Option<[f32; 6]>,
}
impl TextContent {
pub fn new(text: impl Into<String>, bbox: Rect, font: FontSpec, style: TextStyle) -> Self {
Self {
text: text.into(),
bbox,
font,
style,
reading_order: None,
artifact_type: None,
origin: None,
rotation_degrees: None,
matrix: None,
}
}
pub fn with_reading_order(mut self, order: usize) -> Self {
self.reading_order = Some(order);
self
}
pub fn with_artifact_type(mut self, artifact_type: ArtifactType) -> Self {
self.artifact_type = Some(artifact_type);
self
}
pub fn is_bold(&self) -> bool {
self.style.weight.is_bold()
}
pub fn is_italic(&self) -> bool {
self.style.italic
}
pub fn font_size(&self) -> f32 {
self.font.size
}
pub fn with_matrix(mut self, matrix: [f32; 6]) -> Self {
self.matrix = Some(matrix);
self
}
pub fn with_origin(mut self, origin: Point) -> Self {
self.origin = Some(origin);
self
}
pub fn with_rotation(mut self, degrees: f32) -> Self {
self.rotation_degrees = Some(degrees);
self
}
pub fn is_rotated(&self) -> bool {
self.rotation_degrees
.map(|r| r.abs() > 0.1)
.unwrap_or(false)
}
pub fn rotation_radians(&self) -> Option<f32> {
self.rotation_degrees.map(|d| d.to_radians())
}
pub fn get_matrix(&self) -> Option<[f32; 6]> {
self.matrix
}
}
impl From<TextSpan> for TextContent {
fn from(span: TextSpan) -> Self {
TextContent {
text: span.text,
bbox: span.bbox,
font: FontSpec {
name: span.font_name,
size: span.font_size,
},
style: TextStyle {
weight: span.font_weight,
italic: span.is_italic,
color: span.color,
underline: false,
strikethrough: false,
},
reading_order: Some(span.sequence),
artifact_type: span.artifact_type,
origin: None,
rotation_degrees: None,
matrix: None,
}
}
}
impl From<TextContent> for TextSpan {
fn from(content: TextContent) -> Self {
TextSpan {
text: content.text,
bbox: content.bbox,
font_name: content.font.name,
font_size: content.font.size,
font_weight: content.style.weight,
is_italic: content.style.italic,
is_monospace: false,
color: content.style.color,
mcid: None,
sequence: content.reading_order.unwrap_or(0),
split_boundary_before: false,
offset_semantic: false,
char_spacing: 0.0,
word_spacing: 0.0,
horizontal_scaling: 100.0,
primary_detected: false,
artifact_type: content.artifact_type,
char_widths: vec![],
}
}
}
#[derive(Debug, Clone)]
pub struct FontSpec {
pub name: String,
pub size: f32,
}
impl Default for FontSpec {
fn default() -> Self {
Self {
name: "Helvetica".to_string(),
size: 12.0,
}
}
}
impl FontSpec {
pub fn new(name: impl Into<String>, size: f32) -> Self {
Self {
name: name.into(),
size,
}
}
pub fn helvetica(size: f32) -> Self {
Self::new("Helvetica", size)
}
pub fn times(size: f32) -> Self {
Self::new("Times-Roman", size)
}
pub fn courier(size: f32) -> Self {
Self::new("Courier", size)
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub enum FontStyle {
#[default]
Normal,
Italic,
Oblique,
}
#[derive(Debug, Clone, Default)]
pub struct TextStyle {
pub weight: FontWeight,
pub italic: bool,
pub color: Color,
pub underline: bool,
pub strikethrough: bool,
}
impl TextStyle {
pub fn new() -> Self {
Self::default()
}
pub fn bold() -> Self {
Self {
weight: FontWeight::Bold,
..Default::default()
}
}
pub fn italic() -> Self {
Self {
italic: true,
..Default::default()
}
}
pub fn bold_italic() -> Self {
Self {
weight: FontWeight::Bold,
italic: true,
..Default::default()
}
}
pub fn with_weight(mut self, weight: FontWeight) -> Self {
self.weight = weight;
self
}
pub fn with_italic(mut self, italic: bool) -> Self {
self.italic = italic;
self
}
pub fn with_color(mut self, color: Color) -> Self {
self.color = color;
self
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_text_content_creation() {
let text = TextContent::new(
"Hello",
Rect::new(0.0, 0.0, 50.0, 12.0),
FontSpec::default(),
TextStyle::default(),
);
assert_eq!(text.text, "Hello");
assert_eq!(text.font_size(), 12.0);
assert!(!text.is_bold());
assert!(!text.is_italic());
}
#[test]
fn test_text_span_conversion() {
let span = TextSpan {
artifact_type: None,
text: "Test".to_string(),
bbox: Rect::new(10.0, 20.0, 40.0, 12.0),
font_name: "Times".to_string(),
font_size: 12.0,
font_weight: FontWeight::Bold,
is_italic: false,
is_monospace: false,
color: Color::black(),
mcid: None,
sequence: 3,
split_boundary_before: false,
offset_semantic: false,
char_spacing: 0.0,
word_spacing: 0.0,
horizontal_scaling: 100.0,
primary_detected: false,
char_widths: vec![],
};
let content: TextContent = span.into();
assert_eq!(content.text, "Test");
assert_eq!(content.font.name, "Times");
assert_eq!(content.font.size, 12.0);
assert!(content.is_bold());
assert_eq!(content.reading_order, Some(3));
}
}