use crate::plugins::DocumentExtractor;
use crate::types::{ErrorMetadata, ExtractionResult, Metadata};
use crate::utils::{PoolSizeHint, estimate_pool_size};
use crate::{KreuzbergError, Result};
use std::borrow::Cow;
use std::sync::Arc;
pub(in crate::core::extractor) fn get_extractor(mime_type: &str) -> Result<Arc<dyn DocumentExtractor>> {
let registry = crate::plugins::registry::get_document_extractor_registry();
let registry_read = registry
.read()
.map_err(|e| KreuzbergError::Other(format!("Document extractor registry lock poisoned: {}", e)))?;
registry_read.get(mime_type)
}
pub(crate) fn error_extraction_result(e: &KreuzbergError, elapsed_ms: Option<u64>) -> ExtractionResult {
let metadata = Metadata {
error: Some(ErrorMetadata {
error_type: format!("{:?}", e),
message: e.to_string(),
}),
extraction_duration_ms: elapsed_ms,
..Default::default()
};
ExtractionResult {
content: format!("Error: {}", e),
mime_type: Cow::Borrowed("text/plain"),
metadata,
tables: vec![],
detected_languages: None,
chunks: None,
images: None,
djot_content: None,
pages: None,
elements: None,
ocr_elements: None,
document: None,
#[cfg(any(feature = "keywords-yake", feature = "keywords-rake"))]
extracted_keywords: None,
quality_score: None,
processing_warnings: Vec::new(),
annotations: None,
}
}
#[inline]
pub fn get_pool_sizing_hint(file_size: u64, mime_type: &str) -> PoolSizeHint {
estimate_pool_size(file_size, mime_type)
}