oxidize_pdf/text/structured/
detector.rs1use super::keyvalue;
4use super::layout;
5use super::table;
6use super::types::{StructuredDataConfig, StructuredDataResult};
7use crate::text::extraction::TextFragment;
8
9#[derive(Debug, Clone)]
35pub struct StructuredDataDetector {
36 config: StructuredDataConfig,
37}
38
39impl StructuredDataDetector {
40 pub fn new(config: StructuredDataConfig) -> Self {
42 Self { config }
43 }
44
45 pub fn default() -> Self {
47 Self::new(StructuredDataConfig::default())
48 }
49
50 pub fn detect(&self, fragments: &[TextFragment]) -> Result<StructuredDataResult, String> {
67 let mut result = StructuredDataResult::new();
68
69 if fragments.is_empty() {
71 return Ok(result);
72 }
73
74 if self.config.detect_tables {
76 result.tables = table::detect_tables(fragments, &self.config);
77 }
78
79 if self.config.detect_key_value {
81 result.key_value_pairs = keyvalue::detect_key_value_pairs(fragments, &self.config);
82 }
83
84 if self.config.detect_multi_column {
86 result.column_sections = layout::detect_column_layout(fragments, &self.config);
87 }
88
89 Ok(result)
90 }
91
92 pub fn config(&self) -> &StructuredDataConfig {
94 &self.config
95 }
96
97 pub fn set_config(&mut self, config: StructuredDataConfig) {
99 self.config = config;
100 }
101}
102
103impl Default for StructuredDataDetector {
104 fn default() -> Self {
105 Self::new(StructuredDataConfig::default())
106 }
107}
108
109#[cfg(test)]
110mod tests {
111 use super::*;
112
113 #[test]
114 fn test_detector_creation() {
115 let detector = StructuredDataDetector::default();
116 assert!(detector.config().detect_tables);
117 assert!(detector.config().detect_key_value);
118 assert!(detector.config().detect_multi_column);
119 }
120
121 #[test]
122 fn test_detector_empty_input() {
123 let detector = StructuredDataDetector::default();
124 let result = detector
125 .detect(&[])
126 .expect("detector should handle empty input");
127
128 assert_eq!(result.tables.len(), 0);
129 assert_eq!(result.key_value_pairs.len(), 0);
130 assert_eq!(result.column_sections.len(), 0);
131 }
132
133 #[test]
134 fn test_detector_config_update() {
135 let mut detector = StructuredDataDetector::default();
136
137 let mut config = StructuredDataConfig::default();
138 config.detect_tables = false;
139
140 detector.set_config(config);
141
142 assert!(!detector.config().detect_tables);
143 }
144
145 #[test]
146 fn test_detector_selective_detection() {
147 let config = StructuredDataConfig::default()
148 .with_table_detection(false)
149 .with_key_value_detection(true)
150 .with_multi_column_detection(false);
151
152 let detector = StructuredDataDetector::new(config);
153
154 let fragments = vec![TextFragment {
156 text: "Name: John".to_string(),
157 x: 100.0,
158 y: 700.0,
159 width: 50.0,
160 height: 12.0,
161 font_size: 12.0,
162 font_name: None,
163 is_bold: false,
164 is_italic: false,
165 color: None,
166 }];
167
168 let result = detector
169 .detect(&fragments)
170 .expect("detect should succeed with valid input");
171
172 assert_eq!(result.tables.len(), 0);
174 assert_eq!(result.column_sections.len(), 0);
178 }
179}