use serde::{Deserialize, Serialize};
#[cfg(feature = "pdf")]
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PdfConfig {
#[serde(default)]
pub extract_images: bool,
#[serde(default)]
pub passwords: Option<Vec<String>>,
#[serde(default = "default_true")]
pub extract_metadata: bool,
#[serde(default)]
pub hierarchy: Option<HierarchyConfig>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct HierarchyConfig {
#[serde(default = "default_true")]
pub enabled: bool,
#[serde(default = "default_k_clusters")]
pub k_clusters: usize,
#[serde(default = "default_true")]
pub include_bbox: bool,
#[serde(default = "default_ocr_coverage_threshold")]
pub ocr_coverage_threshold: Option<f32>,
}
impl Default for HierarchyConfig {
fn default() -> Self {
Self {
enabled: true,
k_clusters: 6,
include_bbox: true,
ocr_coverage_threshold: None,
}
}
}
fn default_true() -> bool {
true
}
fn default_k_clusters() -> usize {
6
}
fn default_ocr_coverage_threshold() -> Option<f32> {
None
}
#[cfg(test)]
mod tests {
#[test]
#[cfg(feature = "pdf")]
fn test_hierarchy_config_default() {
use super::*;
let config = HierarchyConfig::default();
assert!(config.enabled);
assert_eq!(config.k_clusters, 6);
assert!(config.include_bbox);
assert!(config.ocr_coverage_threshold.is_none());
}
#[test]
#[cfg(feature = "pdf")]
fn test_hierarchy_config_disabled() {
use super::*;
let config = HierarchyConfig {
enabled: false,
k_clusters: 3,
include_bbox: false,
ocr_coverage_threshold: Some(0.7),
};
assert!(!config.enabled);
assert_eq!(config.k_clusters, 3);
assert!(!config.include_bbox);
assert_eq!(config.ocr_coverage_threshold, Some(0.7));
}
}