use crate::core::{Error, ExtractionResult, Result};
use crate::detection::TypeDetector;
use crate::parsers::ParserRegistry;
use std::fs::File;
use std::io::BufReader;
use std::path::Path;
pub struct Extractor {
pub(crate) detector: TypeDetector,
pub(crate) registry: ParserRegistry,
}
impl Extractor {
pub fn new() -> Self {
Self {
detector: TypeDetector::new(),
registry: ParserRegistry::default(),
}
}
pub fn extract_from_path(&self, path: impl AsRef<Path>) -> Result<ExtractionResult> {
let path = path.as_ref();
let detection = self.detector.detect_from_path(path)?;
let parser = self.registry.get_parser(&detection.mime_type)
.ok_or_else(|| Error::UnsupportedFormat(detection.mime_type.clone()))?;
let file = File::open(path)?;
let mut reader = BufReader::new(file);
let mut result = parser.parse_stream(&mut reader, &detection.mime_type)?;
result.detection_confidence = detection.confidence;
result.mime_type = detection.mime_type;
Ok(result)
}
pub fn extract_from_bytes(&self, data: &[u8], mime_hint: Option<&str>) -> Result<ExtractionResult> {
let mime_type = if let Some(hint) = mime_hint {
hint.to_string()
} else {
let detection = self.detector.detect_from_bytes(data);
detection.mime_type
};
let parser = self.registry.get_parser(&mime_type)
.ok_or_else(|| Error::UnsupportedFormat(mime_type.clone()))?;
let mut result = parser.parse(data, &mime_type)?;
result.mime_type = mime_type;
Ok(result)
}
}
impl Default for Extractor {
fn default() -> Self {
Self::new()
}
}