Skip to main content

qubit_mime/detector/
stream_based_mime_detector.rs

1/*******************************************************************************
2 *
3 *    Copyright (c) 2026 Haixing Hu.
4 *
5 *    SPDX-License-Identifier: Apache-2.0
6 *
7 *    Licensed under the Apache License, Version 2.0.
8 *
9 ******************************************************************************/
10//! Helpers for stream-backed MIME detectors.
11
12use std::fmt::Debug;
13use std::fs::File;
14use std::io::SeekFrom;
15use std::path::Path;
16
17use qubit_io::ReadSeek;
18
19use crate::{
20    MimeDetectorCore,
21    MimeResult,
22};
23
24/// Core implementation contract for detectors that can inspect content bytes.
25pub trait StreamBasedMimeDetector: Debug + Send + Sync {
26    /// Gets the shared detector core.
27    ///
28    /// # Returns
29    /// Shared detector configuration and merge/refinement behavior.
30    fn core(&self) -> &MimeDetectorCore;
31
32    /// Gets the maximum number of bytes needed for content inspection.
33    ///
34    /// # Returns
35    /// Content prefix length to read from files and readers.
36    fn max_test_bytes(&self) -> usize;
37
38    /// Guesses MIME type names from a filename.
39    ///
40    /// # Parameters
41    /// - `filename`: File path or basename.
42    ///
43    /// # Returns
44    /// Candidate MIME type names ordered by backend confidence.
45    fn guess_from_filename(&self, filename: &str) -> Vec<String>;
46
47    /// Guesses MIME type names from content bytes.
48    ///
49    /// # Parameters
50    /// - `content`: Content bytes.
51    ///
52    /// # Returns
53    /// Candidate MIME type names ordered by backend confidence.
54    ///
55    /// # Errors
56    /// Returns an error when a backend cannot inspect the supplied content.
57    fn guess_from_content_bytes(&self, content: &[u8]) -> MimeResult<Vec<String>>;
58
59    /// Guesses MIME type names from a seekable reader.
60    ///
61    /// # Parameters
62    /// - `reader`: Reader to inspect. The original position is restored.
63    ///
64    /// # Returns
65    /// Candidate MIME type names and the content prefix used for refinement.
66    ///
67    /// # Errors
68    /// Returns an error when reading, seeking, or backend inspection fails.
69    fn guess_from_reader_stream(
70        &self,
71        reader: &mut dyn ReadSeek,
72    ) -> MimeResult<(Vec<String>, Vec<u8>)> {
73        let content = read_prefix(reader, self.max_test_bytes())?;
74        let candidates = self.guess_from_content_bytes(&content)?;
75        Ok((candidates, content))
76    }
77
78    /// Guesses MIME type names from a local file.
79    ///
80    /// # Parameters
81    /// - `file`: Local file path.
82    ///
83    /// # Returns
84    /// Candidate MIME type names and the content prefix used for refinement.
85    ///
86    /// # Errors
87    /// Returns an error when opening, reading, seeking, or backend inspection fails.
88    fn guess_from_file_stream(&self, file: &Path) -> MimeResult<(Vec<String>, Vec<u8>)> {
89        let mut reader = File::open(file)?;
90        self.guess_from_reader_stream(&mut reader)
91    }
92}
93
94/// Reads a prefix from a stream and restores the original position.
95///
96/// # Parameters
97/// - `reader`: Stream to inspect.
98/// - `max_bytes`: Maximum number of bytes to read.
99///
100/// # Returns
101/// Bytes read from the stream.
102///
103/// # Errors
104/// Returns [`MimeError::Io`](crate::MimeError::Io) when reading or seeking fails.
105pub(crate) fn read_prefix(reader: &mut dyn ReadSeek, max_bytes: usize) -> MimeResult<Vec<u8>> {
106    let position = reader.stream_position()?;
107    let mut buffer = vec![0; max_bytes];
108    let bytes_read = reader.read(&mut buffer)?;
109    buffer.truncate(bytes_read);
110    reader.seek(SeekFrom::Start(position))?;
111    Ok(buffer)
112}