use crate::pipeline::partition::ReadingOrderStrategy;
use crate::pipeline::PartitionConfig;
use crate::text::extraction::ExtractionOptions;
#[derive(Debug, Clone, Default)]
pub enum ExtractionProfile {
#[default]
Standard,
Academic,
Form,
Government,
Dense,
Presentation,
Rag,
}
#[derive(Debug, Clone)]
pub struct ProfileConfig {
pub extraction: ExtractionOptions,
pub partition: PartitionConfig,
}
impl ExtractionProfile {
pub fn config(&self) -> ProfileConfig {
match self {
ExtractionProfile::Standard => ProfileConfig {
extraction: ExtractionOptions {
space_threshold: 0.3,
detect_columns: false,
..ExtractionOptions::default()
},
partition: PartitionConfig {
title_min_font_ratio: 1.3,
header_zone: 0.05,
footer_zone: 0.05,
..PartitionConfig::default()
},
},
ExtractionProfile::Academic => ProfileConfig {
extraction: ExtractionOptions {
space_threshold: 0.25,
detect_columns: true,
..ExtractionOptions::default()
},
partition: PartitionConfig {
title_min_font_ratio: 1.4,
header_zone: 0.08,
footer_zone: 0.08,
..PartitionConfig::default()
},
},
ExtractionProfile::Form => ProfileConfig {
extraction: ExtractionOptions {
space_threshold: 0.3,
detect_columns: false,
..ExtractionOptions::default()
},
partition: PartitionConfig {
title_min_font_ratio: 1.5,
header_zone: 0.03,
footer_zone: 0.03,
..PartitionConfig::default()
},
},
ExtractionProfile::Government => ProfileConfig {
extraction: ExtractionOptions {
space_threshold: 0.35,
detect_columns: false,
..ExtractionOptions::default()
},
partition: PartitionConfig {
title_min_font_ratio: 1.3,
header_zone: 0.06,
footer_zone: 0.06,
..PartitionConfig::default()
},
},
ExtractionProfile::Dense => ProfileConfig {
extraction: ExtractionOptions {
space_threshold: 0.2,
detect_columns: false,
..ExtractionOptions::default()
},
partition: PartitionConfig {
title_min_font_ratio: 1.3,
header_zone: 0.05,
footer_zone: 0.05,
..PartitionConfig::default()
},
},
ExtractionProfile::Presentation => ProfileConfig {
extraction: ExtractionOptions {
space_threshold: 0.4,
detect_columns: false,
..ExtractionOptions::default()
},
partition: PartitionConfig {
title_min_font_ratio: 1.2,
header_zone: 0.10,
footer_zone: 0.10,
..PartitionConfig::default()
},
},
ExtractionProfile::Rag => ProfileConfig {
extraction: ExtractionOptions {
space_threshold: 0.3,
detect_columns: false,
..ExtractionOptions::default()
},
partition: PartitionConfig {
title_min_font_ratio: 1.3,
header_zone: 0.05,
footer_zone: 0.05,
reading_order: ReadingOrderStrategy::XYCut { min_gap: 20.0 },
min_table_confidence: 0.65,
..PartitionConfig::default()
},
},
}
}
}