qubit_mime/detector/repository_mime_detector.rs
1/*******************************************************************************
2 *
3 * Copyright (c) 2026 Haixing Hu.
4 *
5 * SPDX-License-Identifier: Apache-2.0
6 *
7 * Licensed under the Apache License, Version 2.0.
8 *
9 ******************************************************************************/
10//! Repository-backed MIME detector.
11
12use std::path::Path;
13use std::sync::OnceLock;
14
15use crate::{
16 MimeConfig,
17 MimeDetectionPolicy,
18 MimeDetector,
19 MimeDetectorCore,
20 MimeRepository,
21 MimeResult,
22 StreamBasedMimeDetector,
23};
24
25const DEFAULT_DATABASE: &str = include_str!("../../resources/freedesktop.org-v2.4.xml");
26
27static DEFAULT_REPOSITORY: OnceLock<MimeRepository> = OnceLock::new();
28
29/// MIME detector backed by a [`MimeRepository`].
30#[derive(Debug, Clone)]
31pub struct RepositoryMimeDetector<'a> {
32 /// The shared detector core.
33 core: MimeDetectorCore,
34 /// The repository used for all detections.
35 repository: &'a MimeRepository,
36}
37
38impl RepositoryMimeDetector<'static> {
39 /// Creates a detector using the embedded freedesktop MIME repository.
40 ///
41 /// # Returns
42 /// A repository-backed detector.
43 ///
44 /// # Errors
45 /// The embedded database is parsed from crate resources and is expected to
46 /// be valid; this method keeps a `Result` return type for API consistency.
47 pub fn new() -> MimeResult<Self> {
48 Ok(Self::with_repository(default_repository()))
49 }
50
51 /// Creates a detector using the embedded repository and explicit config.
52 ///
53 /// # Parameters
54 /// - `config`: MIME detector configuration.
55 ///
56 /// # Returns
57 /// A repository-backed detector.
58 pub fn from_mime_config(config: MimeConfig) -> Self {
59 Self::with_repository_and_config(default_repository(), config)
60 }
61}
62
63impl Default for RepositoryMimeDetector<'static> {
64 fn default() -> Self {
65 Self::new().expect("embedded MIME repository should parse")
66 }
67}
68
69impl<'a> RepositoryMimeDetector<'a> {
70 /// Creates a detector using an explicit repository.
71 ///
72 /// # Parameters
73 /// - `repository`: Repository used for all detections.
74 ///
75 /// # Returns
76 /// A detector borrowing `repository`.
77 pub fn with_repository(repository: &'a MimeRepository) -> Self {
78 Self::with_repository_and_config(repository, MimeConfig::default())
79 }
80
81 /// Creates a detector using an explicit repository and config.
82 ///
83 /// # Parameters
84 /// - `repository`: Repository used for all detections.
85 /// - `config`: MIME detector configuration.
86 ///
87 /// # Returns
88 /// A detector borrowing `repository`.
89 pub fn with_repository_and_config(repository: &'a MimeRepository, config: MimeConfig) -> Self {
90 Self {
91 core: MimeDetectorCore::from_mime_config(config),
92 repository,
93 }
94 }
95
96 /// Gets the shared detector core.
97 ///
98 /// # Returns
99 /// Shared detector core.
100 pub fn core(&self) -> &MimeDetectorCore {
101 &self.core
102 }
103
104 /// Gets mutable shared detector core.
105 ///
106 /// # Returns
107 /// Mutable shared detector core.
108 pub fn core_mut(&mut self) -> &mut MimeDetectorCore {
109 &mut self.core
110 }
111
112 /// Gets the underlying repository.
113 ///
114 /// # Returns
115 /// Repository used by this detector.
116 pub fn repository(&self) -> &'a MimeRepository {
117 self.repository
118 }
119
120 /// Detects a MIME type from a filename.
121 ///
122 /// # Parameters
123 /// - `filename`: Path or basename to inspect.
124 ///
125 /// # Returns
126 /// First MIME type matched by filename, or `None`.
127 pub fn detect_by_filename(&self, filename: &str) -> Option<String> {
128 <Self as MimeDetector>::detect_by_filename(self, filename)
129 }
130
131 /// Detects a MIME type from content bytes.
132 ///
133 /// # Parameters
134 /// - `bytes`: Content prefix to inspect.
135 ///
136 /// # Returns
137 /// First MIME type matched by magic, or `None`.
138 pub fn detect_by_content(&self, bytes: &[u8]) -> Option<String> {
139 <Self as MimeDetector>::detect_by_content(self, bytes)
140 }
141
142 /// Detects a MIME type from content bytes and an optional filename.
143 ///
144 /// # Parameters
145 /// - `bytes`: Content prefix to inspect.
146 /// - `filename`: Optional path or basename used for glob detection.
147 /// - `policy`: Strategy for resolving filename and content results.
148 ///
149 /// # Returns
150 /// Selected MIME type name, or `None`.
151 pub fn detect_bytes(
152 &self,
153 bytes: &[u8],
154 filename: Option<&str>,
155 policy: MimeDetectionPolicy,
156 ) -> Option<String> {
157 self.detect(bytes, filename, policy)
158 }
159
160 /// Detects a MIME type from a seekable reader without consuming its position.
161 ///
162 /// # Parameters
163 /// - `reader`: Reader to inspect. The original stream position is restored.
164 /// - `filename`: Optional path or basename used for glob detection.
165 /// - `policy`: Strategy for resolving filename and content results.
166 ///
167 /// # Returns
168 /// Selected MIME type name, or `None`.
169 ///
170 /// # Errors
171 /// Returns [`MimeError::Io`](crate::MimeError::Io) when reading or seeking fails.
172 pub fn detect_reader(
173 &self,
174 reader: &mut dyn qubit_io::ReadSeek,
175 filename: Option<&str>,
176 policy: MimeDetectionPolicy,
177 ) -> MimeResult<Option<String>> {
178 <Self as MimeDetector>::detect_reader(self, reader, filename, policy)
179 }
180
181 /// Detects a MIME type from a local file.
182 ///
183 /// # Parameters
184 /// - `file`: Local file path to open.
185 /// - `policy`: Strategy for resolving filename and content results.
186 ///
187 /// # Returns
188 /// Selected MIME type name, or `None`.
189 ///
190 /// # Errors
191 /// Returns [`MimeError::Io`](crate::MimeError::Io) when the file cannot be opened or read.
192 pub fn detect_file(
193 &self,
194 file: &Path,
195 policy: MimeDetectionPolicy,
196 ) -> MimeResult<Option<String>> {
197 <Self as MimeDetector>::detect_file(self, file, policy)
198 }
199
200 /// Guesses MIME type names from filename rules.
201 ///
202 /// # Parameters
203 /// - `filename`: Filename or path.
204 ///
205 /// # Returns
206 /// Candidate MIME type names.
207 pub fn guess_from_filename(&self, filename: &str) -> Vec<String> {
208 self.repository
209 .detect_by_filename(filename)
210 .into_iter()
211 .map(|mime_type| mime_type.name().to_owned())
212 .collect()
213 }
214
215 /// Guesses MIME type names from content magic rules.
216 ///
217 /// # Parameters
218 /// - `bytes`: Content bytes to inspect.
219 ///
220 /// # Returns
221 /// Candidate MIME type names.
222 pub fn guess_from_content(&self, bytes: &[u8]) -> Vec<String> {
223 self.repository
224 .detect_by_content(bytes)
225 .into_iter()
226 .map(|mime_type| mime_type.name().to_owned())
227 .collect()
228 }
229}
230
231/// Gets the embedded default repository.
232///
233/// # Returns
234/// Shared parsed repository.
235///
236pub(crate) fn default_repository() -> &'static MimeRepository {
237 DEFAULT_REPOSITORY.get_or_init(|| {
238 MimeRepository::from_xml(DEFAULT_DATABASE)
239 .expect("embedded freedesktop MIME database should parse")
240 })
241}
242
243impl<'a> StreamBasedMimeDetector for RepositoryMimeDetector<'a> {
244 /// Gets the shared detector core.
245 fn core(&self) -> &MimeDetectorCore {
246 &self.core
247 }
248
249 /// Gets the maximum content prefix length from the repository.
250 fn max_test_bytes(&self) -> usize {
251 self.repository.max_test_bytes()
252 }
253
254 /// Guesses MIME type names from filename rules.
255 fn guess_from_filename(&self, filename: &str) -> Vec<String> {
256 RepositoryMimeDetector::guess_from_filename(self, filename)
257 }
258
259 /// Guesses MIME type names from content magic rules.
260 fn guess_from_content_bytes(&self, content: &[u8]) -> MimeResult<Vec<String>> {
261 Ok(RepositoryMimeDetector::guess_from_content(self, content))
262 }
263}