use crate::layout::{FontWeight, TextSpan};
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum SpanType {
Word,
Space,
Mixed,
}
impl SpanType {
pub fn from_span(span: &TextSpan) -> Self {
let has_word_chars = span.text.chars().any(|c| !c.is_whitespace());
let has_spaces = span.text.chars().any(|c| c.is_whitespace());
match (has_word_chars, has_spaces) {
(true, false) => SpanType::Word,
(false, true) => SpanType::Space,
(true, true) => SpanType::Mixed,
(false, false) => SpanType::Space, }
}
}
#[derive(Debug, Clone)]
pub struct NormalizedSpan {
pub text: String,
pub span_type: SpanType,
pub font_weight: FontWeight,
pub effective_font_weight: FontWeight,
}
impl NormalizedSpan {
pub fn from_span(span: &TextSpan) -> Self {
let span_type = SpanType::from_span(span);
let effective_font_weight = match span_type {
SpanType::Space => FontWeight::Normal,
_ => span.font_weight,
};
NormalizedSpan {
text: span.text.clone(),
span_type,
font_weight: span.font_weight,
effective_font_weight,
}
}
pub fn can_be_bold(&self) -> bool {
self.span_type != SpanType::Space
}
pub fn is_whitespace_only(&self) -> bool {
self.text.trim().is_empty()
}
}
pub struct FontWeightNormalizer;
impl FontWeightNormalizer {
pub fn normalize_spans(spans: &[TextSpan]) -> Vec<NormalizedSpan> {
spans.iter().map(NormalizedSpan::from_span).collect()
}
pub fn propagate_bold(normalized: &[NormalizedSpan]) -> Vec<NormalizedSpan> {
normalized.to_vec()
}
pub fn validate_space_formatting(normalized: &[NormalizedSpan]) -> Result<(), String> {
for (idx, span) in normalized.iter().enumerate() {
if span.span_type == SpanType::Space && span.effective_font_weight.is_bold() {
return Err(format!("Span {} violates PDF spec: space has bold formatting", idx));
}
}
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::geometry::Rect;
use crate::layout::Color;
fn make_span(text: &str, bold: bool) -> TextSpan {
TextSpan {
artifact_type: None,
text: text.to_string(),
bbox: Rect::new(0.0, 0.0, 10.0, 10.0),
font_name: "Helvetica".to_string(),
font_size: 12.0,
font_weight: if bold {
FontWeight::Bold
} else {
FontWeight::Normal
},
is_italic: false,
is_monospace: false,
color: Color::black(),
mcid: Some(0),
sequence: 0,
split_boundary_before: false,
offset_semantic: false,
char_spacing: 0.0,
word_spacing: 0.0,
horizontal_scaling: 100.0,
primary_detected: false,
char_widths: vec![],
}
}
#[test]
fn test_span_type_classification() {
let word = make_span("hello", false);
assert_eq!(SpanType::from_span(&word), SpanType::Word);
let space = make_span(" ", false);
assert_eq!(SpanType::from_span(&space), SpanType::Space);
let mixed = make_span("hello ", false);
assert_eq!(SpanType::from_span(&mixed), SpanType::Mixed);
let empty = make_span("", false);
assert_eq!(SpanType::from_span(&empty), SpanType::Space);
}
#[test]
fn test_space_never_bold() {
let space = make_span(" ", true); let normalized = NormalizedSpan::from_span(&space);
assert_eq!(normalized.effective_font_weight, FontWeight::Normal);
assert!(!normalized.can_be_bold());
}
#[test]
fn test_word_can_be_bold() {
let word = make_span("hello", true);
let normalized = NormalizedSpan::from_span(&word);
assert_eq!(normalized.effective_font_weight, FontWeight::Bold);
assert!(normalized.can_be_bold());
}
#[test]
fn test_normalization_prevents_space_bold() {
let spans = vec![
make_span("hello", true),
make_span(" ", true), make_span("world", true),
];
let normalized = FontWeightNormalizer::normalize_spans(&spans);
assert_eq!(normalized[1].effective_font_weight, FontWeight::Normal);
let result = FontWeightNormalizer::validate_space_formatting(&normalized);
assert!(result.is_ok());
}
}