use serde::{Deserialize, Serialize};
#[cfg(feature = "pdf")]
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PdfConfig {
#[serde(default)]
pub extract_images: bool,
#[serde(default)]
pub passwords: Option<Vec<String>>,
#[serde(default = "default_true")]
pub extract_metadata: bool,
#[serde(default)]
pub hierarchy: Option<HierarchyConfig>,
#[serde(default)]
pub extract_annotations: bool,
#[serde(default)]
pub top_margin_fraction: Option<f32>,
#[serde(default)]
pub bottom_margin_fraction: Option<f32>,
#[serde(default)]
pub allow_single_column_tables: bool,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct HierarchyConfig {
#[serde(default = "default_true")]
pub enabled: bool,
#[serde(default = "default_k_clusters")]
pub k_clusters: usize,
#[serde(default = "default_true")]
pub include_bbox: bool,
#[serde(default = "default_ocr_coverage_threshold")]
pub ocr_coverage_threshold: Option<f32>,
}
#[cfg(feature = "pdf")]
impl Default for PdfConfig {
fn default() -> Self {
Self {
extract_images: false,
passwords: None,
extract_metadata: true,
hierarchy: None,
extract_annotations: false,
top_margin_fraction: None,
bottom_margin_fraction: None,
allow_single_column_tables: false,
}
}
}
impl Default for HierarchyConfig {
fn default() -> Self {
Self {
enabled: true,
k_clusters: 3,
include_bbox: true,
ocr_coverage_threshold: None,
}
}
}
fn default_true() -> bool {
true
}
fn default_k_clusters() -> usize {
3
}
fn default_ocr_coverage_threshold() -> Option<f32> {
None
}
#[cfg(test)]
mod tests {
#[test]
#[cfg(feature = "pdf")]
fn test_hierarchy_config_default() {
use super::*;
let config = HierarchyConfig::default();
assert!(config.enabled);
assert_eq!(config.k_clusters, 3);
assert!(config.include_bbox);
assert!(config.ocr_coverage_threshold.is_none());
}
#[test]
#[cfg(feature = "pdf")]
fn test_hierarchy_config_disabled() {
use super::*;
let config = HierarchyConfig {
enabled: false,
k_clusters: 3,
include_bbox: false,
ocr_coverage_threshold: Some(0.7),
};
assert!(!config.enabled);
assert_eq!(config.k_clusters, 3);
assert!(!config.include_bbox);
assert_eq!(config.ocr_coverage_threshold, Some(0.7));
}
#[test]
#[cfg(feature = "pdf")]
fn test_pdf_config_custom_margins() {
use super::*;
let config = PdfConfig {
extract_images: false,
passwords: None,
extract_metadata: true,
hierarchy: None,
extract_annotations: false,
top_margin_fraction: Some(0.10),
bottom_margin_fraction: Some(0.08),
allow_single_column_tables: false,
};
assert_eq!(config.top_margin_fraction, Some(0.10));
assert_eq!(config.bottom_margin_fraction, Some(0.08));
}
}