pub struct FileExtractionConfig {Show 14 fields
pub enable_quality_processing: Option<bool>,
pub ocr: Option<OcrConfig>,
pub force_ocr: Option<bool>,
pub force_ocr_pages: Option<Vec<usize>>,
pub chunking: Option<ChunkingConfig>,
pub images: Option<ImageExtractionConfig>,
pub token_reduction: Option<TokenReductionConfig>,
pub language_detection: Option<LanguageDetectionConfig>,
pub pages: Option<PageConfig>,
pub postprocessor: Option<PostProcessorConfig>,
pub result_format: Option<OutputFormat>,
pub output_format: Option<OutputFormat>,
pub include_document_structure: Option<bool>,
pub timeout_secs: Option<u64>,
}Expand description
Per-file extraction configuration overrides for batch processing.
All fields are Option<T> — None means “use the batch-level default.”
This type is used with crate::batch_extract_file and
crate::batch_extract_bytes to allow heterogeneous
extraction settings within a single batch.
§Excluded Fields
The following super::ExtractionConfig fields are batch-level only and
cannot be overridden per file:
max_concurrent_extractions— controls batch parallelismuse_cache— global caching policyacceleration— shared ONNX execution providersecurity_limits— global archive security policy
§Example
use kreuzberg::FileExtractionConfig;
// Override just OCR forcing for a specific file
let config = FileExtractionConfig {
force_ocr: Some(true),
..Default::default()
};Fields§
§enable_quality_processing: Option<bool>Override quality post-processing for this file.
ocr: Option<OcrConfig>Override OCR configuration for this file (None in the Option = use batch default).
force_ocr: Option<bool>Override force OCR for this file.
force_ocr_pages: Option<Vec<usize>>Override force OCR pages for this file (1-indexed page numbers).
chunking: Option<ChunkingConfig>Override chunking configuration for this file.
images: Option<ImageExtractionConfig>Override image extraction configuration for this file.
token_reduction: Option<TokenReductionConfig>Override token reduction for this file.
language_detection: Option<LanguageDetectionConfig>Override language detection for this file.
pages: Option<PageConfig>Override page extraction for this file.
postprocessor: Option<PostProcessorConfig>Override post-processor for this file.
result_format: Option<OutputFormat>Override result format for this file.
output_format: Option<OutputFormat>Override output content format for this file.
include_document_structure: Option<bool>Override document structure output for this file.
timeout_secs: Option<u64>Override per-file extraction timeout in seconds.
When set, the extraction for this file will be canceled after the specified duration. A timed-out file produces an error result without affecting other files in the batch.
Trait Implementations§
Source§impl Clone for FileExtractionConfig
impl Clone for FileExtractionConfig
Source§fn clone(&self) -> FileExtractionConfig
fn clone(&self) -> FileExtractionConfig
1.0.0 · Source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
source. Read moreSource§impl Debug for FileExtractionConfig
impl Debug for FileExtractionConfig
Source§impl Default for FileExtractionConfig
impl Default for FileExtractionConfig
Source§fn default() -> FileExtractionConfig
fn default() -> FileExtractionConfig
Source§impl<'de> Deserialize<'de> for FileExtractionConfigwhere
FileExtractionConfig: Default,
impl<'de> Deserialize<'de> for FileExtractionConfigwhere
FileExtractionConfig: Default,
Source§fn deserialize<__D>(__deserializer: __D) -> Result<Self, __D::Error>where
__D: Deserializer<'de>,
fn deserialize<__D>(__deserializer: __D) -> Result<Self, __D::Error>where
__D: Deserializer<'de>,
Auto Trait Implementations§
impl Freeze for FileExtractionConfig
impl RefUnwindSafe for FileExtractionConfig
impl Send for FileExtractionConfig
impl Sync for FileExtractionConfig
impl Unpin for FileExtractionConfig
impl UnsafeUnpin for FileExtractionConfig
impl UnwindSafe for FileExtractionConfig
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Source§impl<T> CloneToUninit for Twhere
T: Clone,
impl<T> CloneToUninit for Twhere
T: Clone,
Source§impl<T> Instrument for T
impl<T> Instrument for T
Source§fn instrument(self, span: Span) -> Instrumented<Self>
fn instrument(self, span: Span) -> Instrumented<Self>
Source§fn in_current_span(self) -> Instrumented<Self>
fn in_current_span(self) -> Instrumented<Self>
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read more