use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum OutputFormat {
Json,
Text,
Html,
Pdf,
Markdown,
MarkdownWithHtml,
MarkdownWithImages,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum TableMethod {
Default,
Cluster,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum ReadingOrder {
Off,
XyCut,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum ImageOutput {
Off,
Embedded,
External,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum ImageFormat {
Png,
Jpeg,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum HybridBackend {
Off,
DoclingFast,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum HybridMode {
Auto,
Full,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ProcessingConfig {
pub output_dir: Option<String>,
pub password: Option<String>,
pub formats: Vec<OutputFormat>,
pub quiet: bool,
pub filter_config: super::filter::FilterConfig,
pub sanitize: bool,
pub keep_line_breaks: bool,
pub replace_invalid_chars: String,
pub use_struct_tree: bool,
pub table_method: TableMethod,
pub reading_order: ReadingOrder,
pub markdown_page_separator: Option<String>,
pub text_page_separator: Option<String>,
pub html_page_separator: Option<String>,
pub image_output: ImageOutput,
pub image_format: ImageFormat,
pub image_dir: Option<String>,
pub raster_table_ocr: bool,
pub pages: Option<String>,
pub include_header_footer: bool,
pub hybrid: HybridBackend,
pub hybrid_mode: HybridMode,
pub hybrid_url: Option<String>,
pub hybrid_timeout: u64,
pub hybrid_fallback: bool,
}
impl Default for ProcessingConfig {
fn default() -> Self {
Self {
output_dir: None,
password: None,
formats: vec![OutputFormat::Json],
quiet: false,
filter_config: super::filter::FilterConfig::default(),
sanitize: false,
keep_line_breaks: false,
replace_invalid_chars: " ".to_string(),
use_struct_tree: false,
table_method: TableMethod::Default,
reading_order: ReadingOrder::XyCut,
markdown_page_separator: None,
text_page_separator: None,
html_page_separator: None,
image_output: ImageOutput::External,
image_format: ImageFormat::Png,
image_dir: None,
raster_table_ocr: true,
pages: None,
include_header_footer: false,
hybrid: HybridBackend::Off,
hybrid_mode: HybridMode::Auto,
hybrid_url: None,
hybrid_timeout: 30000,
hybrid_fallback: false,
}
}
}
impl ProcessingConfig {
pub fn hybrid_enabled(&self) -> bool {
!matches!(self.hybrid, HybridBackend::Off)
}
pub fn raster_table_ocr_enabled(&self) -> bool {
self.raster_table_ocr && self.hybrid_enabled()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_default_config() {
let config = ProcessingConfig::default();
assert_eq!(config.formats, vec![OutputFormat::Json]);
assert!(!config.quiet);
assert!(!config.sanitize);
assert_eq!(config.reading_order, ReadingOrder::XyCut);
assert_eq!(config.table_method, TableMethod::Default);
assert_eq!(config.image_output, ImageOutput::External);
assert_eq!(config.image_format, ImageFormat::Png);
assert!(config.raster_table_ocr);
assert_eq!(config.hybrid, HybridBackend::Off);
assert_eq!(config.hybrid_timeout, 30000);
}
#[test]
fn test_raster_table_ocr_requires_hybrid_mode() {
let mut config = ProcessingConfig::default();
assert!(!config.hybrid_enabled());
assert!(!config.raster_table_ocr_enabled());
config.hybrid = HybridBackend::DoclingFast;
assert!(config.hybrid_enabled());
assert!(config.raster_table_ocr_enabled());
config.raster_table_ocr = false;
assert!(!config.raster_table_ocr_enabled());
}
}