use crate::fitz::geometry::{Matrix, Point, Rect};
use crate::fitz::text::{BidiDirection, TextItem, TextLanguage, TextSpan};
use std::fmt;
#[derive(Debug, Clone)]
pub struct STextPage {
pub media_box: Rect,
pub blocks: Vec<STextBlock>,
}
impl STextPage {
pub fn new(media_box: Rect) -> Self {
Self {
media_box,
blocks: Vec::new(),
}
}
pub fn add_block(&mut self, block: STextBlock) {
self.blocks.push(block);
}
pub fn get_text(&self) -> String {
let mut result = String::new();
for block in &self.blocks {
result.push_str(&block.get_text());
result.push('\n');
}
result
}
pub fn get_text_in_rect(&self, rect: &Rect) -> String {
let mut result = String::new();
for block in &self.blocks {
if block.bbox.intersects(rect) {
result.push_str(&block.get_text_in_rect(rect));
result.push('\n');
}
}
result
}
pub fn search(&self, needle: &str, case_sensitive: bool) -> Vec<Rect> {
let mut results = Vec::new();
let search_text = if case_sensitive {
needle.to_string()
} else {
needle.to_lowercase()
};
for block in &self.blocks {
let block_text = if case_sensitive {
block.get_text()
} else {
block.get_text().to_lowercase()
};
if block_text.contains(&search_text) {
results.push(block.bbox);
}
}
results
}
pub fn char_count(&self) -> usize {
self.blocks.iter().map(|b| b.char_count()).sum()
}
pub fn get_blocks_of_type(&self, block_type: STextBlockType) -> Vec<&STextBlock> {
self.blocks
.iter()
.filter(|b| b.block_type == block_type)
.collect()
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum STextBlockType {
Text,
Image,
List,
Table,
}
#[derive(Debug, Clone)]
pub struct STextBlock {
pub block_type: STextBlockType,
pub bbox: Rect,
pub lines: Vec<STextLine>,
}
impl STextBlock {
pub fn new(block_type: STextBlockType, bbox: Rect) -> Self {
Self {
block_type,
bbox,
lines: Vec::new(),
}
}
pub fn add_line(&mut self, line: STextLine) {
self.bbox = self.bbox.union(&line.bbox);
self.lines.push(line);
}
pub fn get_text(&self) -> String {
let mut result = String::new();
for line in &self.lines {
result.push_str(&line.get_text());
result.push('\n');
}
result
}
pub fn get_text_in_rect(&self, rect: &Rect) -> String {
let mut result = String::new();
for line in &self.lines {
if line.bbox.intersects(rect) {
result.push_str(&line.get_text_in_rect(rect));
result.push('\n');
}
}
result
}
pub fn line_count(&self) -> usize {
self.lines.len()
}
pub fn char_count(&self) -> usize {
self.lines.iter().map(|l| l.char_count()).sum()
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum WritingMode {
HorizontalLtr,
HorizontalRtl,
VerticalTtb,
VerticalBtt,
}
impl WritingMode {
pub fn is_horizontal(&self) -> bool {
matches!(
self,
WritingMode::HorizontalLtr | WritingMode::HorizontalRtl
)
}
pub fn is_vertical(&self) -> bool {
!self.is_horizontal()
}
pub fn is_rtl(&self) -> bool {
matches!(self, WritingMode::HorizontalRtl)
}
}
#[derive(Debug, Clone)]
pub struct STextLine {
pub wmode: WritingMode,
pub bbox: Rect,
pub baseline: f32,
pub dir: Point,
pub chars: Vec<STextChar>,
}
impl STextLine {
pub fn new(wmode: WritingMode, baseline: f32) -> Self {
Self {
wmode,
bbox: Rect::EMPTY,
baseline,
dir: Point::new(1.0, 0.0), chars: Vec::new(),
}
}
pub fn add_char(&mut self, ch: STextChar) {
if self.chars.is_empty() {
self.bbox = ch.quad.to_rect();
} else {
self.bbox = self.bbox.union(&ch.quad.to_rect());
}
self.chars.push(ch);
}
pub fn get_text(&self) -> String {
self.chars.iter().map(|c| c.c).collect()
}
pub fn get_text_in_rect(&self, rect: &Rect) -> String {
self.chars
.iter()
.filter(|c| c.quad.to_rect().intersects(rect))
.map(|c| c.c)
.collect()
}
pub fn char_count(&self) -> usize {
self.chars.len()
}
pub fn get_words(&self) -> Vec<String> {
let text = self.get_text();
text.split_whitespace().map(|s| s.to_string()).collect()
}
pub fn height(&self) -> f32 {
self.bbox.height()
}
pub fn width(&self) -> f32 {
self.bbox.width()
}
}
#[derive(Debug, Clone, Copy)]
pub struct Quad {
pub ll: Point,
pub lr: Point,
pub ul: Point,
pub ur: Point,
}
impl Quad {
pub fn new(ll: Point, lr: Point, ul: Point, ur: Point) -> Self {
Self { ll, lr, ul, ur }
}
pub fn from_rect(rect: &Rect) -> Self {
Self {
ll: Point::new(rect.x0, rect.y0),
lr: Point::new(rect.x1, rect.y0),
ul: Point::new(rect.x0, rect.y1),
ur: Point::new(rect.x1, rect.y1),
}
}
pub fn to_rect(&self) -> Rect {
let min_x = self.ll.x.min(self.lr.x).min(self.ul.x).min(self.ur.x);
let min_y = self.ll.y.min(self.lr.y).min(self.ul.y).min(self.ur.y);
let max_x = self.ll.x.max(self.lr.x).max(self.ul.x).max(self.ur.x);
let max_y = self.ll.y.max(self.lr.y).max(self.ul.y).max(self.ur.y);
Rect::new(min_x, min_y, max_x, max_y)
}
pub fn contains_point(&self, p: Point) -> bool {
self.to_rect().contains(p.x, p.y)
}
pub fn transform(&self, ctm: &Matrix) -> Self {
Self {
ll: ctm.transform_point(self.ll),
lr: ctm.transform_point(self.lr),
ul: ctm.transform_point(self.ul),
ur: ctm.transform_point(self.ur),
}
}
}
#[derive(Debug, Clone)]
pub struct STextChar {
pub c: char,
pub quad: Quad,
pub size: f32,
pub font_name: String,
pub gid: u16,
pub color: [u8; 3],
pub origin: Point,
}
impl STextChar {
pub fn new(c: char, quad: Quad, size: f32, font_name: String) -> Self {
Self {
c,
quad,
size,
font_name,
gid: 0,
color: [0, 0, 0], origin: quad.ll,
}
}
pub fn with_details(
c: char,
quad: Quad,
size: f32,
font_name: String,
gid: u16,
color: [u8; 3],
origin: Point,
) -> Self {
Self {
c,
quad,
size,
font_name,
gid,
color,
origin,
}
}
pub fn is_whitespace(&self) -> bool {
self.c.is_whitespace()
}
pub fn bbox(&self) -> Rect {
self.quad.to_rect()
}
}
#[derive(Debug, Clone, Copy)]
pub struct STextOptions {
pub preserve_ligatures: bool,
pub preserve_whitespace: bool,
pub preserve_images: bool,
pub detect_paragraphs: bool,
pub dehyphenate: bool,
}
impl Default for STextOptions {
fn default() -> Self {
Self {
preserve_ligatures: true,
preserve_whitespace: false,
preserve_images: false,
detect_paragraphs: true,
dehyphenate: true,
}
}
}
pub struct STextBuilder {
page: STextPage,
current_block: Option<STextBlock>,
current_line: Option<STextLine>,
options: STextOptions,
}
impl STextBuilder {
pub fn new(media_box: Rect, options: STextOptions) -> Self {
Self {
page: STextPage::new(media_box),
current_block: None,
current_line: None,
options,
}
}
pub fn with_defaults(media_box: Rect) -> Self {
Self::new(media_box, STextOptions::default())
}
pub fn add_span(&mut self, span: &TextSpan) {
let wmode = if span.wmode {
WritingMode::VerticalTtb
} else if span.markup_dir == BidiDirection::Rtl {
WritingMode::HorizontalRtl
} else {
WritingMode::HorizontalLtr
};
for item in span.items() {
self.add_text_item(item, span, wmode);
}
}
fn add_text_item(&mut self, item: &TextItem, span: &TextSpan, wmode: WritingMode) {
let c = if item.ucs >= 0 {
char::from_u32(item.ucs as u32).unwrap_or('?')
} else {
'?'
};
let size = (span.trm.a.abs() + span.trm.b.abs()).max(span.trm.c.abs() + span.trm.d.abs());
let origin = Point::new(item.x, item.y);
let advance = item.advance;
let quad = Quad::from_rect(&Rect::new(
origin.x,
origin.y - size,
origin.x + advance,
origin.y,
));
let ch = STextChar::new(c, quad, size, span.font.name().to_string());
if let Some(ref mut line) = self.current_line {
let baseline_diff = (item.y - line.baseline).abs();
if baseline_diff > size * 0.3 {
self.finish_line();
self.start_line(wmode, item.y);
}
} else {
self.start_line(wmode, item.y);
}
if let Some(ref mut line) = self.current_line {
line.add_char(ch);
}
}
fn start_line(&mut self, wmode: WritingMode, baseline: f32) {
let line = STextLine::new(wmode, baseline);
self.current_line = Some(line);
}
fn finish_line(&mut self) {
if let Some(line) = self.current_line.take() {
if let Some(ref mut block) = self.current_block {
let last_line_bbox = block.lines.last().map(|l| l.bbox).unwrap_or(Rect::EMPTY);
let spacing = (line.bbox.y0 - last_line_bbox.y1).abs();
if spacing < line.height() * 1.5 {
block.add_line(line);
} else {
self.finish_block();
self.start_block();
if let Some(ref mut block) = self.current_block {
block.add_line(line);
}
}
} else {
self.start_block();
if let Some(ref mut block) = self.current_block {
block.add_line(line);
}
}
}
}
fn start_block(&mut self) {
let block = STextBlock::new(STextBlockType::Text, Rect::EMPTY);
self.current_block = Some(block);
}
fn finish_block(&mut self) {
if let Some(block) = self.current_block.take() {
if !block.lines.is_empty() {
self.page.add_block(block);
}
}
}
pub fn finish(mut self) -> STextPage {
self.finish_line();
self.finish_block();
self.page
}
}
impl fmt::Display for STextPage {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.get_text())
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_stext_page_creation() {
let page = STextPage::new(Rect::new(0.0, 0.0, 612.0, 792.0));
assert_eq!(page.blocks.len(), 0);
assert_eq!(page.char_count(), 0);
}
#[test]
fn test_stext_block_creation() {
let block = STextBlock::new(STextBlockType::Text, Rect::new(0.0, 0.0, 100.0, 50.0));
assert_eq!(block.line_count(), 0);
assert_eq!(block.char_count(), 0);
}
#[test]
fn test_stext_line_creation() {
let line = STextLine::new(WritingMode::HorizontalLtr, 100.0);
assert_eq!(line.baseline, 100.0);
assert_eq!(line.char_count(), 0);
}
#[test]
fn test_stext_char_creation() {
let quad = Quad::from_rect(&Rect::new(0.0, 0.0, 10.0, 12.0));
let ch = STextChar::new('A', quad, 12.0, "Times".to_string());
assert_eq!(ch.c, 'A');
assert_eq!(ch.size, 12.0);
assert!(!ch.is_whitespace());
}
#[test]
fn test_quad_to_rect() {
let quad = Quad::from_rect(&Rect::new(10.0, 20.0, 30.0, 40.0));
let rect = quad.to_rect();
assert_eq!(rect.x0, 10.0);
assert_eq!(rect.y0, 20.0);
assert_eq!(rect.x1, 30.0);
assert_eq!(rect.y1, 40.0);
}
#[test]
fn test_writing_mode() {
assert!(WritingMode::HorizontalLtr.is_horizontal());
assert!(!WritingMode::VerticalTtb.is_horizontal());
assert!(WritingMode::VerticalTtb.is_vertical());
assert!(WritingMode::HorizontalRtl.is_rtl());
}
#[test]
fn test_stext_line_add_char() {
let mut line = STextLine::new(WritingMode::HorizontalLtr, 100.0);
let quad = Quad::from_rect(&Rect::new(0.0, 90.0, 10.0, 100.0));
let ch = STextChar::new('H', quad, 10.0, "Arial".to_string());
line.add_char(ch);
assert_eq!(line.char_count(), 1);
assert_eq!(line.get_text(), "H");
}
#[test]
fn test_stext_line_get_words() {
let mut line = STextLine::new(WritingMode::HorizontalLtr, 100.0);
for (i, c) in "Hello World".chars().enumerate() {
let x = i as f32 * 10.0;
let quad = Quad::from_rect(&Rect::new(x, 90.0, x + 10.0, 100.0));
let ch = STextChar::new(c, quad, 10.0, "Arial".to_string());
line.add_char(ch);
}
let words = line.get_words();
assert_eq!(words.len(), 2);
assert_eq!(words[0], "Hello");
assert_eq!(words[1], "World");
}
}