#![forbid(unsafe_code)]
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum ReadingOrderClass {
Default,
DramaticScript,
DenseSingleLine,
SubSuperBaselineReattach,
NarrowTrackedJustified,
}
#[derive(Debug, Clone, Copy)]
pub struct DetectorGlyph {
pub x: f32,
pub y: f32,
pub width: f32,
pub font_size: f32,
pub text_len: usize,
}
pub fn detect_dramatic_script(row_first_glyphs: &[DetectorGlyph], row_texts: &[&str]) -> bool {
if row_texts.len() < 3 || row_first_glyphs.len() != row_texts.len() {
return false;
}
let mut speaker_row_count = 0;
let mut leftmost_x: Option<f32> = None;
for (row_idx, row) in row_texts.iter().enumerate() {
let trimmed = row.trim_start();
if let Some(dot_pos) = trimmed.find('.') {
let token = &trimmed[..=dot_pos];
if token.len() <= 12 && !token.is_empty() {
let first_glyph = &row_first_glyphs[row_idx];
match leftmost_x {
None => leftmost_x = Some(first_glyph.x),
Some(prev_x) => {
if (prev_x - first_glyph.x).abs() < 2.0 {
speaker_row_count += 1;
}
},
}
}
}
}
speaker_row_count >= 3
}
pub fn detect_dense_single_line(glyphs: &[DetectorGlyph]) -> bool {
if glyphs.len() < 8 {
return false;
}
let mut y_counts: Vec<(f32, usize)> = Vec::new();
for g in glyphs {
let mut found = false;
for (y, count) in y_counts.iter_mut() {
if (*y - g.y).abs() < 0.5 {
*count += 1;
found = true;
break;
}
}
if !found {
y_counts.push((g.y, 1));
}
}
let total = glyphs.len();
let dominant = y_counts.iter().max_by_key(|(_, c)| *c);
let Some((dominant_y, dominant_count)) = dominant else {
return false;
};
if (*dominant_count as f32) / (total as f32) < 0.8 {
return false;
}
let mut xs: Vec<f32> = glyphs
.iter()
.filter(|g| (g.y - *dominant_y).abs() < 0.5)
.map(|g| g.x)
.collect();
xs.sort_by(|a, b| a.partial_cmp(b).unwrap());
let mut gaps: Vec<f32> = xs.windows(2).map(|w| w[1] - w[0]).collect();
if gaps.is_empty() {
return false;
}
let max_gap = gaps.iter().cloned().fold(0.0f32, f32::max);
gaps.sort_by(|a, b| a.partial_cmp(b).unwrap());
let median_gap = gaps[gaps.len() / 2];
if median_gap > 0.0 {
max_gap > 4.0 * median_gap
} else {
max_gap > 5.0
}
}
pub fn detect_sub_super_glyphs(glyphs: &[DetectorGlyph]) -> bool {
if glyphs.len() < 2 {
return false;
}
let mut sum_y = 0.0f32;
let mut sum_fs = 0.0f32;
for g in glyphs {
sum_y += g.y;
sum_fs += g.font_size;
}
let baseline_y = sum_y / glyphs.len() as f32;
let avg_fs = sum_fs / glyphs.len() as f32;
let lower = 0.2 * avg_fs;
let upper = 0.8 * avg_fs;
glyphs.iter().any(|g| {
let dy = (g.y - baseline_y).abs();
dy > lower && dy < upper
})
}
pub fn detect_narrow_tracked(glyphs: &[DetectorGlyph]) -> bool {
if glyphs.len() < 6 {
return false;
}
let mut sorted = glyphs.to_vec();
sorted.sort_by(|a, b| a.x.partial_cmp(&b.x).unwrap());
let mut gaps: Vec<f32> = sorted
.windows(2)
.map(|w| (w[1].x - (w[0].x + w[0].width)).max(0.0))
.collect();
if gaps.is_empty() {
return false;
}
gaps.sort_by(|a, b| a.partial_cmp(b).unwrap());
let median = gaps[gaps.len() / 2];
let avg_fs: f32 = sorted.iter().map(|g| g.font_size).sum::<f32>() / sorted.len() as f32;
let expected_intra = 0.08 * avg_fs;
median > 1.5 * expected_intra
}
pub fn classify_region(
glyphs: &[DetectorGlyph],
row_first_glyphs: &[DetectorGlyph],
row_texts: &[&str],
) -> ReadingOrderClass {
if detect_dramatic_script(row_first_glyphs, row_texts) {
return ReadingOrderClass::DramaticScript;
}
if detect_dense_single_line(glyphs) {
return ReadingOrderClass::DenseSingleLine;
}
if detect_sub_super_glyphs(glyphs) {
return ReadingOrderClass::SubSuperBaselineReattach;
}
if detect_narrow_tracked(glyphs) {
return ReadingOrderClass::NarrowTrackedJustified;
}
ReadingOrderClass::Default
}
#[cfg(test)]
mod tests {
use super::*;
fn glyph(x: f32, y: f32, width: f32, font_size: f32) -> DetectorGlyph {
DetectorGlyph {
x,
y,
width,
font_size,
text_len: 1,
}
}
#[test]
fn dramatic_script_fires_on_macbeth_shape() {
let row_first_glyphs = vec![
glyph(50.0, 100.0, 5.0, 10.0),
glyph(50.0, 90.0, 5.0, 10.0),
glyph(50.0, 80.0, 5.0, 10.0),
glyph(50.0, 70.0, 5.0, 10.0),
];
let rows = vec![
"First Witch. I ask you.",
"Sec. Witch. Speak.",
"Third Witch. Demand.",
"All. We'll answer.",
];
assert!(detect_dramatic_script(&row_first_glyphs, &rows));
assert_eq!(
classify_region(&row_first_glyphs, &row_first_glyphs, &rows),
ReadingOrderClass::DramaticScript,
);
}
#[test]
fn dramatic_script_skips_prose() {
let row_first_glyphs = vec![
glyph(50.0, 100.0, 5.0, 10.0),
glyph(50.0, 90.0, 5.0, 10.0),
glyph(50.0, 80.0, 5.0, 10.0),
];
let rows = vec![
"The first paragraph of a novel begins here.",
"And continues with more text.",
"This is plain prose, no speaker tags.",
];
assert!(!detect_dramatic_script(&row_first_glyphs, &rows));
}
#[test]
fn dense_single_line_detects_sec_proxy_shape() {
let mut glyphs = Vec::new();
for x in [100.0, 105.0, 110.0, 115.0, 120.0, 125.0].iter() {
glyphs.push(glyph(*x, 584.39, 2.0, 8.0));
}
for x in [170.0, 175.0, 180.0, 185.0, 190.0, 195.0].iter() {
glyphs.push(glyph(*x, 584.39, 2.0, 8.0));
}
assert!(detect_dense_single_line(&glyphs));
}
#[test]
fn dense_single_line_skips_multi_line() {
let glyphs = vec![
glyph(50.0, 100.0, 2.0, 8.0),
glyph(60.0, 100.0, 2.0, 8.0),
glyph(50.0, 90.0, 2.0, 8.0),
glyph(60.0, 90.0, 2.0, 8.0),
glyph(50.0, 80.0, 2.0, 8.0),
glyph(60.0, 80.0, 2.0, 8.0),
glyph(50.0, 70.0, 2.0, 8.0),
glyph(60.0, 70.0, 2.0, 8.0),
];
assert!(!detect_dense_single_line(&glyphs));
}
#[test]
fn sub_super_detects_subscript_y_offset() {
let glyphs = vec![
glyph(50.0, 100.0, 5.0, 10.0),
glyph(55.0, 100.0, 5.0, 10.0),
glyph(60.0, 100.0, 5.0, 10.0),
glyph(65.0, 100.0, 5.0, 10.0),
glyph(70.0, 104.0, 5.0, 10.0),
];
assert!(detect_sub_super_glyphs(&glyphs));
}
#[test]
fn sub_super_skips_uniform_baseline() {
let glyphs = vec![
glyph(50.0, 100.0, 5.0, 10.0),
glyph(55.0, 100.0, 5.0, 10.0),
glyph(60.0, 100.0, 5.0, 10.0),
];
assert!(!detect_sub_super_glyphs(&glyphs));
}
#[test]
fn narrow_tracked_detects_stretched_justification() {
let mut glyphs = Vec::new();
for i in 0..10 {
glyphs.push(glyph(50.0 + (i as f32) * 8.0, 100.0, 5.0, 10.0));
}
assert!(detect_narrow_tracked(&glyphs));
}
#[test]
fn narrow_tracked_skips_normal_spacing() {
let mut glyphs = Vec::new();
for i in 0..10 {
glyphs.push(glyph(50.0 + (i as f32) * 5.1, 100.0, 5.0, 10.0));
}
assert!(!detect_narrow_tracked(&glyphs));
}
#[test]
fn classify_default_when_no_pattern_matches() {
let glyphs = vec![glyph(50.0, 100.0, 5.0, 10.0), glyph(56.0, 100.0, 5.0, 10.0)];
assert_eq!(classify_region(&glyphs, &[], &[]), ReadingOrderClass::Default,);
}
}