use crate::{KreuzbergError, Result};
use super::super::ocr::OcrConfig;
use super::super::processing::ChunkingConfig;
use super::core::ExtractionConfig;
use super::types::TokenReductionConfig;
impl ExtractionConfig {
pub fn apply_env_overrides(&mut self) -> Result<()> {
use crate::core::config_validation::{
validate_chunking_params, validate_language_code, validate_ocr_backend, validate_token_reduction_level,
};
if let Ok(lang) = std::env::var("KREUZBERG_OCR_LANGUAGE") {
validate_language_code(&lang)?;
if self.ocr.is_none() {
self.ocr = Some(OcrConfig::default());
}
if let Some(ref mut ocr) = self.ocr {
ocr.language = lang;
}
}
if let Ok(backend) = std::env::var("KREUZBERG_OCR_BACKEND") {
validate_ocr_backend(&backend)?;
if self.ocr.is_none() {
self.ocr = Some(OcrConfig::default());
}
if let Some(ref mut ocr) = self.ocr {
ocr.backend = backend;
}
}
if let Ok(max_chars_str) = std::env::var("KREUZBERG_CHUNKING_MAX_CHARS") {
let max_chars: usize = max_chars_str.parse().map_err(|_| KreuzbergError::Validation {
message: format!(
"Invalid value for KREUZBERG_CHUNKING_MAX_CHARS: '{}'. Must be a positive integer.",
max_chars_str
),
source: None,
})?;
if max_chars == 0 {
return Err(KreuzbergError::Validation {
message: "KREUZBERG_CHUNKING_MAX_CHARS must be greater than 0".to_string(),
source: None,
});
}
if self.chunking.is_none() {
self.chunking = Some(ChunkingConfig {
max_characters: 1000,
overlap: 200,
trim: true,
chunker_type: super::super::processing::ChunkerType::Text,
embedding: None,
preset: None,
});
}
if let Some(ref mut chunking) = self.chunking {
validate_chunking_params(max_chars, chunking.overlap)?;
chunking.max_characters = max_chars;
}
}
if let Ok(max_overlap_str) = std::env::var("KREUZBERG_CHUNKING_MAX_OVERLAP") {
let max_overlap: usize = max_overlap_str.parse().map_err(|_| KreuzbergError::Validation {
message: format!(
"Invalid value for KREUZBERG_CHUNKING_MAX_OVERLAP: '{}'. Must be a non-negative integer.",
max_overlap_str
),
source: None,
})?;
if self.chunking.is_none() {
self.chunking = Some(ChunkingConfig {
max_characters: 1000,
overlap: 200,
trim: true,
chunker_type: super::super::processing::ChunkerType::Text,
embedding: None,
preset: None,
});
}
if let Some(ref mut chunking) = self.chunking {
validate_chunking_params(chunking.max_characters, max_overlap)?;
chunking.overlap = max_overlap;
}
}
if let Ok(cache_str) = std::env::var("KREUZBERG_CACHE_ENABLED") {
let cache_enabled = match cache_str.to_lowercase().as_str() {
"true" => true,
"false" => false,
_ => {
return Err(KreuzbergError::Validation {
message: format!(
"Invalid value for KREUZBERG_CACHE_ENABLED: '{}'. Must be 'true' or 'false'.",
cache_str
),
source: None,
});
}
};
self.use_cache = cache_enabled;
}
if let Ok(mode) = std::env::var("KREUZBERG_TOKEN_REDUCTION_MODE") {
validate_token_reduction_level(&mode)?;
if self.token_reduction.is_none() {
self.token_reduction = Some(TokenReductionConfig {
mode: "off".to_string(),
preserve_important_words: true,
});
}
if let Some(ref mut token_reduction) = self.token_reduction {
token_reduction.mode = mode;
}
}
if let Ok(val) = std::env::var("KREUZBERG_OUTPUT_FORMAT") {
self.output_format = val.parse().map_err(|e: String| KreuzbergError::Validation {
message: format!("Invalid value for KREUZBERG_OUTPUT_FORMAT: {}", e),
source: None,
})?;
}
Ok(())
}
}