Skip to main content

qubit_mime/detector/
repository_mime_detector.rs

1/*******************************************************************************
2 *
3 *    Copyright (c) 2026 Haixing Hu.
4 *
5 *    SPDX-License-Identifier: Apache-2.0
6 *
7 *    Licensed under the Apache License, Version 2.0.
8 *
9 ******************************************************************************/
10//! Repository-backed MIME detector.
11
12use std::path::Path;
13use std::sync::OnceLock;
14
15use crate::{
16    MimeConfig,
17    MimeDetectionPolicy,
18    MimeDetector,
19    MimeDetectorCore,
20    MimeRepository,
21    MimeResult,
22    StreamBasedMimeDetector,
23};
24
25const DEFAULT_DATABASE: &str = include_str!("../../resources/freedesktop.org-v2.4.xml");
26
27static DEFAULT_REPOSITORY: OnceLock<MimeRepository> = OnceLock::new();
28
29/// MIME detector backed by a [`MimeRepository`].
30#[derive(Debug, Clone)]
31pub struct RepositoryMimeDetector<'a> {
32    /// The shared detector core.
33    core: MimeDetectorCore,
34    /// The repository used for all detections.
35    repository: &'a MimeRepository,
36}
37
38impl RepositoryMimeDetector<'static> {
39    /// Creates a detector using the embedded freedesktop MIME repository.
40    ///
41    /// # Returns
42    /// A repository-backed detector.
43    ///
44    /// # Errors
45    /// The embedded database is parsed from crate resources and is expected to
46    /// be valid; this method keeps a `Result` return type for API consistency.
47    pub fn new() -> MimeResult<Self> {
48        Ok(Self::with_repository(default_repository()))
49    }
50
51    /// Creates a detector using the embedded repository and explicit config.
52    ///
53    /// # Parameters
54    /// - `config`: MIME detector configuration.
55    ///
56    /// # Returns
57    /// A repository-backed detector.
58    pub fn from_mime_config(config: MimeConfig) -> Self {
59        Self::with_repository_and_config(default_repository(), config)
60    }
61}
62
63impl Default for RepositoryMimeDetector<'static> {
64    fn default() -> Self {
65        Self::new().expect("embedded MIME repository should parse")
66    }
67}
68
69impl<'a> RepositoryMimeDetector<'a> {
70    /// Creates a detector using an explicit repository.
71    ///
72    /// # Parameters
73    /// - `repository`: Repository used for all detections.
74    ///
75    /// # Returns
76    /// A detector borrowing `repository`.
77    pub fn with_repository(repository: &'a MimeRepository) -> Self {
78        Self::with_repository_and_config(repository, MimeConfig::default())
79    }
80
81    /// Creates a detector using an explicit repository and config.
82    ///
83    /// # Parameters
84    /// - `repository`: Repository used for all detections.
85    /// - `config`: MIME detector configuration.
86    ///
87    /// # Returns
88    /// A detector borrowing `repository`.
89    pub fn with_repository_and_config(repository: &'a MimeRepository, config: MimeConfig) -> Self {
90        Self {
91            core: MimeDetectorCore::from_mime_config(config),
92            repository,
93        }
94    }
95
96    /// Gets the shared detector core.
97    ///
98    /// # Returns
99    /// Shared detector core.
100    pub fn core(&self) -> &MimeDetectorCore {
101        &self.core
102    }
103
104    /// Gets mutable shared detector core.
105    ///
106    /// # Returns
107    /// Mutable shared detector core.
108    pub fn core_mut(&mut self) -> &mut MimeDetectorCore {
109        &mut self.core
110    }
111
112    /// Gets the underlying repository.
113    ///
114    /// # Returns
115    /// Repository used by this detector.
116    pub fn repository(&self) -> &'a MimeRepository {
117        self.repository
118    }
119
120    /// Detects a MIME type from a filename.
121    ///
122    /// # Parameters
123    /// - `filename`: Path or basename to inspect.
124    ///
125    /// # Returns
126    /// First MIME type matched by filename, or `None`.
127    pub fn detect_by_filename(&self, filename: &str) -> Option<String> {
128        <Self as MimeDetector>::detect_by_filename(self, filename)
129    }
130
131    /// Detects a MIME type from content bytes.
132    ///
133    /// # Parameters
134    /// - `bytes`: Content prefix to inspect.
135    ///
136    /// # Returns
137    /// First MIME type matched by magic, or `None`.
138    pub fn detect_by_content(&self, bytes: &[u8]) -> Option<String> {
139        <Self as MimeDetector>::detect_by_content(self, bytes)
140    }
141
142    /// Detects a MIME type from content bytes and an optional filename.
143    ///
144    /// # Parameters
145    /// - `bytes`: Content prefix to inspect.
146    /// - `filename`: Optional path or basename used for glob detection.
147    /// - `policy`: Strategy for resolving filename and content results.
148    ///
149    /// # Returns
150    /// Selected MIME type name, or `None`.
151    pub fn detect_bytes(
152        &self,
153        bytes: &[u8],
154        filename: Option<&str>,
155        policy: MimeDetectionPolicy,
156    ) -> Option<String> {
157        self.detect(bytes, filename, policy)
158    }
159
160    /// Detects a MIME type from a seekable reader without consuming its position.
161    ///
162    /// # Parameters
163    /// - `reader`: Reader to inspect. The original stream position is restored.
164    /// - `filename`: Optional path or basename used for glob detection.
165    /// - `policy`: Strategy for resolving filename and content results.
166    ///
167    /// # Returns
168    /// Selected MIME type name, or `None`.
169    ///
170    /// # Errors
171    /// Returns [`MimeError::Io`](crate::MimeError::Io) when reading or seeking fails.
172    pub fn detect_reader(
173        &self,
174        reader: &mut dyn qubit_io::ReadSeek,
175        filename: Option<&str>,
176        policy: MimeDetectionPolicy,
177    ) -> MimeResult<Option<String>> {
178        <Self as MimeDetector>::detect_reader(self, reader, filename, policy)
179    }
180
181    /// Detects a MIME type from a local file.
182    ///
183    /// # Parameters
184    /// - `file`: Local file path to open.
185    /// - `policy`: Strategy for resolving filename and content results.
186    ///
187    /// # Returns
188    /// Selected MIME type name, or `None`.
189    ///
190    /// # Errors
191    /// Returns [`MimeError::Io`](crate::MimeError::Io) when the file cannot be opened or read.
192    pub fn detect_file(
193        &self,
194        file: &Path,
195        policy: MimeDetectionPolicy,
196    ) -> MimeResult<Option<String>> {
197        <Self as MimeDetector>::detect_file(self, file, policy)
198    }
199
200    /// Guesses MIME type names from filename rules.
201    ///
202    /// # Parameters
203    /// - `filename`: Filename or path.
204    ///
205    /// # Returns
206    /// Candidate MIME type names.
207    pub fn guess_from_filename(&self, filename: &str) -> Vec<String> {
208        self.repository
209            .detect_by_filename(filename)
210            .into_iter()
211            .map(|mime_type| mime_type.name().to_owned())
212            .collect()
213    }
214
215    /// Guesses MIME type names from content magic rules.
216    ///
217    /// # Parameters
218    /// - `bytes`: Content bytes to inspect.
219    ///
220    /// # Returns
221    /// Candidate MIME type names.
222    pub fn guess_from_content(&self, bytes: &[u8]) -> Vec<String> {
223        self.repository
224            .detect_by_content(bytes)
225            .into_iter()
226            .map(|mime_type| mime_type.name().to_owned())
227            .collect()
228    }
229}
230
231/// Gets the embedded default repository.
232///
233/// # Returns
234/// Shared parsed repository.
235///
236pub(crate) fn default_repository() -> &'static MimeRepository {
237    DEFAULT_REPOSITORY.get_or_init(|| {
238        MimeRepository::from_xml(DEFAULT_DATABASE)
239            .expect("embedded freedesktop MIME database should parse")
240    })
241}
242
243impl<'a> StreamBasedMimeDetector for RepositoryMimeDetector<'a> {
244    /// Gets the shared detector core.
245    fn core(&self) -> &MimeDetectorCore {
246        &self.core
247    }
248
249    /// Gets the maximum content prefix length from the repository.
250    fn max_test_bytes(&self) -> usize {
251        self.repository.max_test_bytes()
252    }
253
254    /// Guesses MIME type names from filename rules.
255    fn guess_from_filename(&self, filename: &str) -> Vec<String> {
256        RepositoryMimeDetector::guess_from_filename(self, filename)
257    }
258
259    /// Guesses MIME type names from content magic rules.
260    fn guess_from_content_bytes(&self, content: &[u8]) -> MimeResult<Vec<String>> {
261        Ok(RepositoryMimeDetector::guess_from_content(self, content))
262    }
263}