use super::keyvalue;
use super::layout;
use super::table;
use super::types::{StructuredDataConfig, StructuredDataResult};
use crate::text::extraction::TextFragment;
#[derive(Debug, Clone)]
pub struct StructuredDataDetector {
config: StructuredDataConfig,
}
impl StructuredDataDetector {
pub fn new(config: StructuredDataConfig) -> Self {
Self { config }
}
pub fn default() -> Self {
Self::new(StructuredDataConfig::default())
}
pub fn detect(&self, fragments: &[TextFragment]) -> Result<StructuredDataResult, String> {
let mut result = StructuredDataResult::new();
if fragments.is_empty() {
return Ok(result);
}
if self.config.detect_tables {
result.tables = table::detect_tables(fragments, &self.config);
}
if self.config.detect_key_value {
result.key_value_pairs = keyvalue::detect_key_value_pairs(fragments, &self.config);
}
if self.config.detect_multi_column {
result.column_sections = layout::detect_column_layout(fragments, &self.config);
}
Ok(result)
}
pub fn config(&self) -> &StructuredDataConfig {
&self.config
}
pub fn set_config(&mut self, config: StructuredDataConfig) {
self.config = config;
}
}
impl Default for StructuredDataDetector {
fn default() -> Self {
Self::new(StructuredDataConfig::default())
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_detector_creation() {
let detector = StructuredDataDetector::default();
assert!(detector.config().detect_tables);
assert!(detector.config().detect_key_value);
assert!(detector.config().detect_multi_column);
}
#[test]
fn test_detector_empty_input() {
let detector = StructuredDataDetector::default();
let result = detector
.detect(&[])
.expect("detector should handle empty input");
assert_eq!(result.tables.len(), 0);
assert_eq!(result.key_value_pairs.len(), 0);
assert_eq!(result.column_sections.len(), 0);
}
#[test]
fn test_detector_config_update() {
let mut detector = StructuredDataDetector::default();
let mut config = StructuredDataConfig::default();
config.detect_tables = false;
detector.set_config(config);
assert!(!detector.config().detect_tables);
}
#[test]
fn test_detector_selective_detection() {
let config = StructuredDataConfig::default()
.with_table_detection(false)
.with_key_value_detection(true)
.with_multi_column_detection(false);
let detector = StructuredDataDetector::new(config);
let fragments = vec![TextFragment {
text: "Name: John".to_string(),
x: 100.0,
y: 700.0,
width: 50.0,
height: 12.0,
font_size: 12.0,
font_name: None,
is_bold: false,
is_italic: false,
color: None,
space_decisions: Vec::new(),
}];
let result = detector
.detect(&fragments)
.expect("detect should succeed with valid input");
assert_eq!(result.tables.len(), 0);
assert_eq!(result.column_sections.len(), 0);
}
}