use std::io::SeekFrom;
use std::path::Path;
use std::sync::{
Mutex,
PoisonError,
};
use magika::{
ContentType,
SyncInput,
};
use qubit_io::ReadSeek;
use qubit_mime::{
DetectionSource,
MimeConfig,
MimeDetectionPolicy,
MimeDetector,
MimeDetectorCore,
MimeError,
MimeRepository,
MimeResult,
RepositoryMimeDetector,
};
#[derive(Debug)]
pub struct MagikaMimeDetector {
core: MimeDetectorCore,
filename_detector: RepositoryMimeDetector<'static>,
session: Mutex<magika::Session>,
}
impl MagikaMimeDetector {
#[inline]
pub fn new() -> MimeResult<Self> {
Self::from_mime_config(MimeConfig::default())
}
pub fn from_mime_config(config: MimeConfig) -> MimeResult<Self> {
let session = magika::Session::new().map_err(map_magika_error)?;
Ok(Self {
core: MimeDetectorCore::from_mime_config(config.clone()),
filename_detector: RepositoryMimeDetector::from_mime_config(config),
session: Mutex::new(session),
})
}
#[inline]
pub fn core(&self) -> &MimeDetectorCore {
&self.core
}
#[inline]
pub fn core_mut(&mut self) -> &mut MimeDetectorCore {
&mut self.core
}
#[inline]
pub fn repository(&self) -> &MimeRepository {
self.filename_detector.repository()
}
#[inline]
fn guess_from_filename(&self, filename: &str) -> Vec<String> {
self.filename_detector.guess_from_filename(filename)
}
fn guess_from_magika_input<I>(&self, input: I) -> MimeResult<Vec<String>>
where
I: SyncInput,
{
let mut session = self.session.lock().map_err(map_session_lock_error)?;
let file_type = session
.identify_content_sync(input)
.map_err(map_magika_error)?;
Ok(file_type
.content_type()
.and_then(content_type_to_mime)
.into_iter()
.collect())
}
fn guess_from_magika_file(&self, file: &Path) -> MimeResult<Vec<String>> {
let mut session = self.session.lock().map_err(map_session_lock_error)?;
let file_type = session.identify_file_sync(file).map_err(map_magika_error)?;
Ok(file_type
.content_type()
.and_then(content_type_to_mime)
.into_iter()
.collect())
}
}
impl MimeDetector for MagikaMimeDetector {
#[inline]
fn detect_by_filename(&self, filename: &str) -> Option<String> {
self.filename_detector.detect_by_filename(filename)
}
fn detect_by_content(&self, content: &[u8]) -> Option<String> {
let candidates = self.guess_from_magika_input(content).ok()?;
candidates.first().map(|mime_type| {
self.core
.refine_detected_mime_type(mime_type, None, DetectionSource::Content(content))
})
}
fn detect(
&self,
content: &[u8],
filename: Option<&str>,
policy: MimeDetectionPolicy,
) -> Option<String> {
let from_filename = filename
.map(|filename| self.guess_from_filename(filename))
.unwrap_or_default();
let from_content =
if from_filename.len() == 1 && policy == MimeDetectionPolicy::PreferFilename {
Vec::new()
} else {
self.guess_from_magika_input(content).unwrap_or_default()
};
self.core.select_result(
&from_filename,
&from_content,
filename,
policy,
DetectionSource::Content(content),
)
}
fn detect_reader(
&self,
reader: &mut dyn ReadSeek,
filename: Option<&str>,
policy: MimeDetectionPolicy,
) -> MimeResult<Option<String>> {
let from_filename = filename
.map(|filename| self.guess_from_filename(filename))
.unwrap_or_default();
let from_content =
if from_filename.len() == 1 && policy == MimeDetectionPolicy::PreferFilename {
Vec::new()
} else {
self.guess_from_reader(reader)?
};
Ok(self.core.select_result(
&from_filename,
&from_content,
filename,
policy,
DetectionSource::None,
))
}
fn detect_file(&self, file: &Path, policy: MimeDetectionPolicy) -> MimeResult<Option<String>> {
let filename = file.to_string_lossy();
let from_filename = self.guess_from_filename(&filename);
let from_content =
if from_filename.len() == 1 && policy == MimeDetectionPolicy::PreferFilename {
Vec::new()
} else {
self.guess_from_magika_file(file)?
};
Ok(self.core.select_result(
&from_filename,
&from_content,
Some(&filename),
policy,
DetectionSource::Path(file),
))
}
}
impl MagikaMimeDetector {
fn guess_from_reader(&self, reader: &mut dyn ReadSeek) -> MimeResult<Vec<String>> {
let original_position = reader.stream_position()?;
let length = reader.seek(SeekFrom::End(0))?;
let mut input = ReadSeekInput { reader, length };
let result = self.guess_from_magika_input(&mut input);
let restore_result = input.reader.seek(SeekFrom::Start(original_position));
match (result, restore_result) {
(Ok(candidates), Ok(_)) => Ok(candidates),
(Err(error), Ok(_)) => Err(error),
(Ok(_), Err(error)) | (Err(_), Err(error)) => Err(MimeError::Io(error)),
}
}
}
struct ReadSeekInput<'a> {
reader: &'a mut dyn ReadSeek,
length: u64,
}
impl SyncInput for ReadSeekInput<'_> {
#[inline]
fn length(&self) -> magika::Result<u64> {
Ok(self.length)
}
#[inline]
fn read_at(&mut self, buffer: &mut [u8], offset: u64) -> magika::Result<()> {
self.reader.seek(SeekFrom::Start(offset))?;
self.reader.read_exact(buffer)?;
Ok(())
}
}
#[inline]
fn content_type_to_mime(content_type: ContentType) -> Option<String> {
let mime_type = content_type.info().mime_type;
if mime_type.is_empty() || mime_type == "application/undefined" {
None
} else {
Some(mime_type.to_owned())
}
}
#[inline]
fn map_session_lock_error<T>(error: PoisonError<T>) -> MimeError {
MimeError::detector_backend("magika", format!("session lock poisoned: {error}"))
}
#[inline]
fn map_magika_error(error: magika::Error) -> MimeError {
match error {
magika::Error::IOError(error) => MimeError::Io(error),
error => MimeError::detector_backend("magika", error.to_string()),
}
}
#[cfg(coverage)]
pub fn coverage_map_session_lock_error() -> MimeError {
map_session_lock_error(PoisonError::new(()))
}
#[cfg(coverage)]
pub fn coverage_undefined_content_type_to_mime() -> Option<String> {
content_type_to_mime(ContentType::Undefined)
}
#[cfg(all(coverage, feature = "ort"))]
pub fn coverage_map_non_io_magika_error() -> MimeError {
map_magika_error(magika::Error::OrtError(ort::Error::new(
"coverage non-io error",
)))
}