1use serde::{Deserialize, Serialize};
4
5#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
7pub enum OutputFormat {
8 Json,
10 Text,
12 Html,
14 Pdf,
16 Markdown,
18 MarkdownWithHtml,
20 MarkdownWithImages,
22}
23
24#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
26pub enum TableMethod {
27 Default,
29 Cluster,
31}
32
33#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
35pub enum ReadingOrder {
36 Off,
38 XyCut,
40}
41
42#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
44pub enum ImageOutput {
45 Off,
47 Embedded,
49 External,
51}
52
53#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
55pub enum ImageFormat {
56 Png,
58 Jpeg,
60}
61
62#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
64pub enum HybridBackend {
65 Off,
67 DoclingFast,
69}
70
71#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
73pub enum HybridMode {
74 Auto,
76 Full,
78}
79
80#[derive(Debug, Clone, Serialize, Deserialize)]
82pub struct ProcessingConfig {
83 pub output_dir: Option<String>,
85 pub password: Option<String>,
87 pub formats: Vec<OutputFormat>,
89 pub quiet: bool,
91 pub filter_config: super::filter::FilterConfig,
93 pub sanitize: bool,
95 pub keep_line_breaks: bool,
97 pub replace_invalid_chars: String,
99 pub use_struct_tree: bool,
101 pub table_method: TableMethod,
103 pub reading_order: ReadingOrder,
105 pub markdown_page_separator: Option<String>,
107 pub text_page_separator: Option<String>,
109 pub html_page_separator: Option<String>,
111 pub image_output: ImageOutput,
113 pub image_format: ImageFormat,
115 pub image_dir: Option<String>,
117 pub raster_table_ocr: bool,
119 pub pages: Option<String>,
121 pub include_header_footer: bool,
123 pub hybrid: HybridBackend,
125 pub hybrid_mode: HybridMode,
127 pub hybrid_url: Option<String>,
129 pub hybrid_timeout: u64,
131 pub hybrid_fallback: bool,
133}
134
135impl Default for ProcessingConfig {
136 fn default() -> Self {
137 Self {
138 output_dir: None,
139 password: None,
140 formats: vec![OutputFormat::Json],
141 quiet: false,
142 filter_config: super::filter::FilterConfig::default(),
143 sanitize: false,
144 keep_line_breaks: false,
145 replace_invalid_chars: " ".to_string(),
146 use_struct_tree: false,
147 table_method: TableMethod::Default,
148 reading_order: ReadingOrder::XyCut,
149 markdown_page_separator: None,
150 text_page_separator: None,
151 html_page_separator: None,
152 image_output: ImageOutput::External,
153 image_format: ImageFormat::Png,
154 image_dir: None,
155 raster_table_ocr: true,
156 pages: None,
157 include_header_footer: false,
158 hybrid: HybridBackend::Off,
159 hybrid_mode: HybridMode::Auto,
160 hybrid_url: None,
161 hybrid_timeout: 30000,
162 hybrid_fallback: false,
163 }
164 }
165}
166
167impl ProcessingConfig {
168 pub fn hybrid_enabled(&self) -> bool {
170 !matches!(self.hybrid, HybridBackend::Off)
171 }
172
173 pub fn raster_table_ocr_enabled(&self) -> bool {
178 self.raster_table_ocr && self.hybrid_enabled()
179 }
180}
181
182#[cfg(test)]
183mod tests {
184 use super::*;
185
186 #[test]
187 fn test_default_config() {
188 let config = ProcessingConfig::default();
189 assert_eq!(config.formats, vec![OutputFormat::Json]);
190 assert!(!config.quiet);
191 assert!(!config.sanitize);
192 assert_eq!(config.reading_order, ReadingOrder::XyCut);
193 assert_eq!(config.table_method, TableMethod::Default);
194 assert_eq!(config.image_output, ImageOutput::External);
195 assert_eq!(config.image_format, ImageFormat::Png);
196 assert!(config.raster_table_ocr);
197 assert_eq!(config.hybrid, HybridBackend::Off);
198 assert_eq!(config.hybrid_timeout, 30000);
199 }
200
201 #[test]
202 fn test_raster_table_ocr_requires_hybrid_mode() {
203 let mut config = ProcessingConfig::default();
204 assert!(!config.hybrid_enabled());
205 assert!(!config.raster_table_ocr_enabled());
206
207 config.hybrid = HybridBackend::DoclingFast;
208 assert!(config.hybrid_enabled());
209 assert!(config.raster_table_ocr_enabled());
210
211 config.raster_table_ocr = false;
212 assert!(!config.raster_table_ocr_enabled());
213 }
214}