use crate::error::Result;
use crate::hybrid::complexity_estimator::{Complexity, ComplexityEstimator};
use crate::layout::text_block::TextBlock;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum HeadingLevel {
H1,
H2,
H3,
H4,
Body,
}
pub struct SmartLayoutAnalyzer {
complexity_threshold: Complexity,
}
impl SmartLayoutAnalyzer {
pub fn new() -> Self {
Self {
complexity_threshold: Complexity::Moderate,
}
}
pub fn with_threshold(threshold: Complexity) -> Self {
let mut analyzer = Self::new();
analyzer.complexity_threshold = threshold;
analyzer
}
pub fn determine_reading_order(
&self,
blocks: &[TextBlock],
page_width: f32,
page_height: f32,
) -> Result<Vec<usize>> {
if blocks.is_empty() {
return Ok(vec![]);
}
let complexity =
ComplexityEstimator::estimate_page_complexity(blocks, page_width, page_height);
log::debug!(
"Page complexity: {:?} (threshold: {:?})",
complexity,
self.complexity_threshold
);
log::info!("Using classical reading order (complexity: {:?})", complexity);
Ok(self.classical_reading_order(blocks))
}
pub fn detect_headings(&self, blocks: &[TextBlock]) -> Result<Vec<HeadingLevel>> {
if blocks.is_empty() {
return Ok(vec![]);
}
log::warn!(
"Heading detection has been removed (non-PDF-spec-compliant). All blocks will be treated as body text."
);
Ok(vec![HeadingLevel::Body; blocks.len()])
}
pub fn capabilities(&self) -> AnalyzerCapabilities {
AnalyzerCapabilities {
has_ml_reading_order: false, has_ml_heading_detection: false, ml_models_loaded: false, complexity_threshold: self.complexity_threshold,
}
}
fn classical_reading_order(&self, blocks: &[TextBlock]) -> Vec<usize> {
let mut order: Vec<usize> = (0..blocks.len()).collect();
order.sort_by(|&a, &b| {
let block_a = &blocks[a];
let block_b = &blocks[b];
crate::utils::safe_float_cmp(block_a.bbox.y, block_b.bbox.y)
.then(crate::utils::safe_float_cmp(block_a.bbox.x, block_b.bbox.x))
});
order
}
}
impl Default for SmartLayoutAnalyzer {
fn default() -> Self {
Self::new()
}
}
#[derive(Debug, Clone)]
pub struct AnalyzerCapabilities {
pub has_ml_reading_order: bool,
pub has_ml_heading_detection: bool,
pub ml_models_loaded: bool,
pub complexity_threshold: Complexity,
}
impl AnalyzerCapabilities {
pub fn has_any_ml(&self) -> bool {
self.ml_models_loaded
}
pub fn description(&self) -> String {
if self.ml_models_loaded {
format!("ML-enhanced (threshold: {:?})", self.complexity_threshold)
} else if self.has_ml_reading_order || self.has_ml_heading_detection {
"ML compiled but models not loaded (using classical)".to_string()
} else {
"Classical only (ML feature not enabled)".to_string()
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::geometry::Rect;
use crate::layout::text_block::{Color, FontWeight, TextBlock, TextChar};
fn create_test_block(x: f32, y: f32, text: &str) -> TextBlock {
let bbox = Rect {
x,
y,
width: 10.0,
height: 10.0,
};
let char_data = TextChar {
char: 'A',
bbox,
font_name: "Arial".to_string(),
font_size: 12.0,
font_weight: FontWeight::Normal,
is_italic: false,
is_monospace: false,
color: Color::black(),
mcid: None,
origin_x: bbox.x,
origin_y: bbox.y,
rotation_degrees: 0.0,
advance_width: bbox.width,
matrix: None,
};
TextBlock {
chars: vec![char_data],
bbox: Rect {
x,
y,
width: 100.0,
height: 20.0,
},
text: text.to_string(),
avg_font_size: 12.0,
dominant_font: "Arial".to_string(),
is_bold: false,
is_italic: false,
mcid: None,
}
}
#[test]
fn test_create_analyzer() {
let analyzer = SmartLayoutAnalyzer::new();
let caps = analyzer.capabilities();
assert!(!caps.description().is_empty());
}
#[test]
fn test_reading_order() {
let analyzer = SmartLayoutAnalyzer::new();
let blocks = vec![
create_test_block(100.0, 200.0, "third"),
create_test_block(100.0, 100.0, "first"),
create_test_block(100.0, 150.0, "second"),
];
let order = analyzer
.determine_reading_order(&blocks, 612.0, 792.0)
.unwrap();
assert_eq!(order, vec![1, 2, 0]);
}
#[test]
fn test_heading_detection() {
let analyzer = SmartLayoutAnalyzer::new();
let blocks = vec![
create_test_block(100.0, 100.0, "Test"),
create_test_block(100.0, 130.0, "More text"),
];
let headings = analyzer.detect_headings(&blocks).unwrap();
assert_eq!(headings.len(), 2);
}
#[test]
fn test_empty_blocks() {
let analyzer = SmartLayoutAnalyzer::new();
let order = analyzer.determine_reading_order(&[], 612.0, 792.0).unwrap();
assert_eq!(order.len(), 0);
let headings = analyzer.detect_headings(&[]).unwrap();
assert_eq!(headings.len(), 0);
}
#[test]
fn test_with_threshold() {
let analyzer = SmartLayoutAnalyzer::with_threshold(Complexity::Complex);
let caps = analyzer.capabilities();
assert_eq!(caps.complexity_threshold, Complexity::Complex);
}
#[test]
fn test_capabilities() {
let analyzer = SmartLayoutAnalyzer::new();
let caps = analyzer.capabilities();
assert!(!caps.has_ml_reading_order);
assert!(!caps.has_ml_heading_detection);
assert!(!caps.ml_models_loaded);
}
#[test]
fn test_classical_reading_order() {
let analyzer = SmartLayoutAnalyzer::new();
let blocks = vec![
create_test_block(50.0, 100.0, "top-left"),
create_test_block(400.0, 100.0, "top-right"),
create_test_block(50.0, 200.0, "bottom-left"),
create_test_block(400.0, 200.0, "bottom-right"),
];
let order = analyzer.classical_reading_order(&blocks);
assert_eq!(order[0], 0); assert_eq!(order[1], 1); assert_eq!(order[2], 2); assert_eq!(order[3], 3); }
}