use super::headings::FontSignature;
use super::zone::{BBox, Bounded};
use crate::ImageEmitter;
use crate::PdfFigure as Figure;
pub(super) struct TextElement {
pub(super) text: String,
pub(super) sig: FontSignature,
pub(super) left: f32,
pub(super) right: f32,
pub(super) top: f32,
pub(super) bottom: f32,
}
impl TextElement {
pub(super) fn y_center(&self) -> f32 {
(self.top + self.bottom) / 2.0
}
pub(super) fn width(&self) -> f32 {
(self.right - self.left).max(0.0)
}
}
pub(super) struct ImageElement {
pub(super) figure: Figure,
pub(super) left: f32,
pub(super) right: f32,
pub(super) top: f32,
pub(super) bottom: f32,
}
pub(super) enum PageElement {
Text(TextElement),
Image(ImageElement),
}
impl Bounded for PageElement {
fn bbox(&self) -> BBox {
match self {
PageElement::Text(t) => BBox {
left: t.left,
right: t.right,
top: t.top,
bottom: t.bottom,
},
PageElement::Image(i) => BBox {
left: i.left,
right: i.right,
top: i.top,
bottom: i.bottom,
},
}
}
fn char_count(&self) -> usize {
match self {
PageElement::Text(t) => t.text.chars().count(),
PageElement::Image(_) => 0,
}
}
}
pub(super) struct PageCtx {
pub(super) median_char_width: f32,
pub(super) median_line_height: f32,
pub(super) x_gap_line_break: f32,
pub(super) indent_threshold: f32,
pub(super) space_threshold: f32,
pub(super) image_emitter: ImageEmitter,
}
impl PageCtx {
pub(super) fn from_elements(elements: &[PageElement], image_emitter: ImageEmitter) -> Self {
let mut char_widths: Vec<f32> = Vec::new();
let mut line_heights: Vec<f32> = Vec::new();
for el in elements {
if let PageElement::Text(t) = el {
let h = (t.top - t.bottom).abs();
if h > 0.0 {
line_heights.push(h);
}
let n = t.text.chars().count();
if n > 0 && t.width() > 0.0 {
char_widths.push(t.width() / n as f32);
}
}
}
let median_char_width = median(&mut char_widths).unwrap_or(5.0);
let median_line_height = median(&mut line_heights).unwrap_or(10.0);
Self {
median_char_width,
median_line_height,
x_gap_line_break: median_char_width * 4.0,
indent_threshold: median_char_width * 2.0,
space_threshold: median_char_width * 0.25,
image_emitter,
}
}
}
pub(super) fn median(values: &mut [f32]) -> Option<f32> {
if values.is_empty() {
return None;
}
values.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
let mid = values.len() / 2;
Some(if values.len().is_multiple_of(2) {
(values[mid - 1] + values[mid]) / 2.0
} else {
values[mid]
})
}