qubit_mime/mime_config.rs
1/*******************************************************************************
2 *
3 * Copyright (c) 2026 Haixing Hu.
4 *
5 * SPDX-License-Identifier: Apache-2.0
6 *
7 * Licensed under the Apache License, Version 2.0.
8 *
9 ******************************************************************************/
10//! Configuration values for MIME detection.
11//!
12//! [`MimeConfig`] is the runtime configuration shared by detector wrappers,
13//! detector providers, and media-stream refinement. It can be loaded from a
14//! [`Config`] object, from process environment variables, or from built-in
15//! defaults.
16//!
17
18use std::collections::{
19 HashMap,
20 HashSet,
21};
22use std::sync::{
23 LazyLock,
24 RwLock,
25};
26
27use qubit_config::{
28 Config,
29 options::{
30 CollectionReadOptions,
31 ConfigReadOptions,
32 EmptyItemPolicy,
33 },
34};
35
36use crate::{
37 CONFIG_MEDIA_STREAM_CLASSIFIER_DEFAULT,
38 CONFIG_MIME_AMBIGUOUS_MIME_MAPPING,
39 CONFIG_MIME_DETECTOR_DEFAULT,
40 CONFIG_MIME_DETECTOR_FALLBACKS,
41 CONFIG_MIME_ENABLE_PRECISE_DETECTION,
42 CONFIG_MIME_PRECISE_DETECTION_PATTERNS,
43 DEFAULT_ENABLE_PRECISE_DETECTION,
44 DEFAULT_MEDIA_STREAM_CLASSIFIER,
45 DEFAULT_MIME_DETECTOR,
46 DEFAULT_MIME_DETECTOR_FALLBACKS,
47 ENV_MEDIA_STREAM_CLASSIFIER_DEFAULT,
48 ENV_MIME_DETECTOR_AMBIGUOUS_MIME_MAPPING,
49 ENV_MIME_DETECTOR_DEFAULT,
50 ENV_MIME_DETECTOR_ENABLE_PRECISE_DETECTION,
51 ENV_MIME_DETECTOR_FALLBACKS,
52 ENV_MIME_DETECTOR_PRECISE_DETECTION_PATTERNS,
53 MimeResult,
54};
55
56/// Runtime configuration for MIME detectors.
57///
58/// # Supported keys
59///
60/// Logical keys and environment-style keys are both accepted by
61/// [`MimeConfig::from_config`]. Environment variables use the same names as the
62/// environment-style keys.
63///
64/// | Field | Logical key | Environment key | Default | Format |
65/// | --- | --- | --- | --- | --- |
66/// | Default MIME detector | `mime.detector.default` | `QUBIT_MIME_DETECTOR_DEFAULT` | `repository` | Provider id, alias, or `auto` |
67/// | MIME detector fallbacks | `mime.detector.fallbacks` | `QUBIT_MIME_DETECTOR_FALLBACKS` | empty | List split on `,` or `;` |
68/// | Media stream classifier | `mime.media.stream.classifier.default` | `QUBIT_MEDIA_STREAM_CLASSIFIER_DEFAULT` | `ffprobe` | Classifier selector |
69/// | Precise detection switch | `mime.enable.precise.detection` | `QUBIT_MIME_ENABLE_PRECISE_DETECTION` | `true` | Boolean |
70/// | Precise detection patterns | `mime.precise.detection.patterns` | `QUBIT_MIME_PRECISE_DETECTION_PATTERNS` | `webm,ogg` | Extension list |
71/// | Ambiguous MIME mapping | `mime.ambiguous.mime.mapping` | `QUBIT_MIME_AMBIGUOUS_MIME_MAPPING` | `webm:video/webm,audio/webm;ogg:video/ogg,audio/ogg` | `ext:video,audio` entries split on `;` |
72///
73/// Detector fallback selection is performed by
74/// [`MimeDetectorRegistry`](crate::MimeDetectorRegistry), not by this config
75/// object. The config only stores the default selector and ordered fallback
76/// names.
77#[derive(Debug, Clone)]
78pub struct MimeConfig {
79 /// Default MIME detector selector.
80 mime_detector_default: String,
81 /// Fallback MIME detector selectors.
82 mime_detector_fallbacks: Vec<String>,
83 /// Default media stream classifier selector.
84 media_stream_classifier_default: String,
85 /// Whether precise media-stream detection is enabled.
86 enable_precise_detection: bool,
87 /// Extensions requiring precise detection.
88 precise_detection_patterns: HashSet<String>,
89 /// Ambiguous MIME mappings.
90 ambiguous_mime_mapping: HashMap<String, [String; 2]>,
91}
92
93/// Default MIME configuration.
94static DEFAULT_MIME_CONFIG: LazyLock<RwLock<MimeConfig>> =
95 LazyLock::new(|| RwLock::new(MimeConfig::load()));
96
97/// Value read options.
98static VALUE_READ_OPTIONS: LazyLock<ConfigReadOptions> =
99 LazyLock::new(ConfigReadOptions::env_friendly);
100
101/// List value read options.
102static LIST_READ_OPTIONS: LazyLock<ConfigReadOptions> = LazyLock::new(|| {
103 ConfigReadOptions::env_friendly().with_collection_options(
104 CollectionReadOptions::default()
105 .with_split_scalar_strings(true)
106 .with_delimiters([',', ';'])
107 .with_trim_items(true)
108 .with_empty_item_policy(EmptyItemPolicy::Skip),
109 )
110});
111
112/// Mapping read options.
113static MAPPING_READ_OPTIONS: LazyLock<ConfigReadOptions> = LazyLock::new(|| {
114 ConfigReadOptions::env_friendly().with_collection_options(
115 CollectionReadOptions::default()
116 .with_split_scalar_strings(true)
117 .with_delimiters([';'])
118 .with_trim_items(true)
119 .with_empty_item_policy(EmptyItemPolicy::Skip),
120 )
121});
122
123/// Built-in precise detection patterns.
124static DEFAULT_PRECISE_DETECTION_PATTERNS: &[&str] = &["webm", "ogg"];
125
126/// Built-in ambiguous MIME mapping entries.
127static DEFAULT_AMBIGUOUS_MIME_MAPPING_ENTRIES: &[&str] =
128 &["webm:video/webm,audio/webm", "ogg:video/ogg,audio/ogg"];
129
130impl MimeConfig {
131 /// Loads configuration from environment variables and defaults.
132 ///
133 /// # Returns
134 /// Configuration used by default detector instances.
135 pub fn load() -> Self {
136 match Self::from_env() {
137 Ok(config) => config,
138 Err(_) => Self::builtin_default(),
139 }
140 }
141
142 /// Creates MIME configuration from a config object.
143 ///
144 /// Values are read with environment-friendly options, so both logical keys
145 /// such as `mime.detector.default` and environment-style keys such as
146 /// `QUBIT_MIME_DETECTOR_DEFAULT` are accepted. List values may be provided
147 /// as arrays or as scalar strings split on `,` and `;`; empty items are
148 /// ignored.
149 ///
150 /// # Examples
151 ///
152 /// Configure a preferred native detector and a repository fallback:
153 ///
154 /// ```rust
155 /// use qubit_config::Config;
156 /// use qubit_mime::{
157 /// CONFIG_MIME_DETECTOR_DEFAULT,
158 /// CONFIG_MIME_DETECTOR_FALLBACKS,
159 /// MimeConfig,
160 /// MimeResult,
161 /// };
162 ///
163 /// # fn main() -> MimeResult<()> {
164 /// let mut source = Config::new();
165 /// source.set(CONFIG_MIME_DETECTOR_DEFAULT, "file")?;
166 /// source.set(CONFIG_MIME_DETECTOR_FALLBACKS, "repository")?;
167 ///
168 /// let config = MimeConfig::from_config(&source)?;
169 /// assert_eq!("file", config.mime_detector_default());
170 /// assert_eq!(
171 /// ["repository".to_owned()].as_slice(),
172 /// config.mime_detector_fallbacks(),
173 /// );
174 /// # Ok(())
175 /// # }
176 /// ```
177 ///
178 /// # Parameters
179 /// - `config`: Configuration object containing logical keys or environment
180 /// variable style keys.
181 ///
182 /// # Returns
183 /// Parsed MIME configuration.
184 ///
185 /// # Errors
186 /// Returns [`MimeError::Config`](crate::MimeError::Config) when a present
187 /// configuration value cannot be converted to the expected type.
188 pub fn from_config(config: &Config) -> MimeResult<Self> {
189 let mime_detector_default = config.get_any_or_with(
190 [CONFIG_MIME_DETECTOR_DEFAULT, ENV_MIME_DETECTOR_DEFAULT],
191 DEFAULT_MIME_DETECTOR.to_owned(),
192 &VALUE_READ_OPTIONS,
193 )?;
194 let mime_detector_fallbacks = config.get_any_or_with(
195 [CONFIG_MIME_DETECTOR_FALLBACKS, ENV_MIME_DETECTOR_FALLBACKS],
196 fallback_defaults(),
197 &LIST_READ_OPTIONS,
198 )?;
199 let media_stream_classifier_default = config.get_any_or_with(
200 [
201 CONFIG_MEDIA_STREAM_CLASSIFIER_DEFAULT,
202 ENV_MEDIA_STREAM_CLASSIFIER_DEFAULT,
203 ],
204 DEFAULT_MEDIA_STREAM_CLASSIFIER.to_owned(),
205 &VALUE_READ_OPTIONS,
206 )?;
207 let enable_precise_detection = config.get_any_or_with(
208 [
209 CONFIG_MIME_ENABLE_PRECISE_DETECTION,
210 ENV_MIME_DETECTOR_ENABLE_PRECISE_DETECTION,
211 ],
212 DEFAULT_ENABLE_PRECISE_DETECTION,
213 &VALUE_READ_OPTIONS,
214 )?;
215 let precise_detection_patterns = config.get_any_or_with(
216 [
217 CONFIG_MIME_PRECISE_DETECTION_PATTERNS,
218 ENV_MIME_DETECTOR_PRECISE_DETECTION_PATTERNS,
219 ],
220 DEFAULT_PRECISE_DETECTION_PATTERNS,
221 &VALUE_READ_OPTIONS,
222 )?;
223 let ambiguous_mime_mapping = config.get_any_or_with(
224 [
225 CONFIG_MIME_AMBIGUOUS_MIME_MAPPING,
226 ENV_MIME_DETECTOR_AMBIGUOUS_MIME_MAPPING,
227 ],
228 DEFAULT_AMBIGUOUS_MIME_MAPPING_ENTRIES,
229 &MAPPING_READ_OPTIONS,
230 )?;
231 Ok(Self {
232 mime_detector_default,
233 mime_detector_fallbacks: normalize_detector_names(mime_detector_fallbacks),
234 media_stream_classifier_default,
235 enable_precise_detection,
236 precise_detection_patterns: normalize_patterns(precise_detection_patterns),
237 ambiguous_mime_mapping: build_ambiguous_mime_mapping(ambiguous_mime_mapping),
238 })
239 }
240
241 /// Creates MIME configuration from process environment variables.
242 ///
243 /// # Returns
244 /// Parsed MIME configuration.
245 ///
246 /// # Errors
247 /// Returns [`MimeError::Config`](crate::MimeError::Config) when the
248 /// environment cannot be represented by [`Config`].
249 pub fn from_env() -> MimeResult<Self> {
250 let config = Config::from_env()?;
251 Self::from_config(&config)
252 }
253
254 /// Replaces the global default MIME configuration.
255 ///
256 /// # Parameters
257 /// - `config`: Configuration to use for future default instances.
258 pub fn set_default(config: Self) {
259 let mut guard = DEFAULT_MIME_CONFIG
260 .write()
261 .expect("default MIME configuration lock should not be poisoned");
262 *guard = config;
263 }
264
265 /// Reloads the global default MIME configuration from a config object.
266 ///
267 /// # Parameters
268 /// - `config`: Configuration object used to build the new default.
269 ///
270 /// # Errors
271 /// Returns [`MimeError::Config`](crate::MimeError::Config) when a present
272 /// configuration value cannot be converted to the expected type.
273 pub fn reload_default(config: &Config) -> MimeResult<()> {
274 Self::set_default(Self::from_config(config)?);
275 Ok(())
276 }
277
278 /// Reloads the global default MIME configuration from process environment.
279 ///
280 /// # Errors
281 /// Returns [`MimeError::Config`](crate::MimeError::Config) when the
282 /// environment cannot be represented by [`Config`].
283 pub fn reload_default_from_env() -> MimeResult<()> {
284 Self::set_default(Self::from_env()?);
285 Ok(())
286 }
287
288 /// Gets the configured default MIME detector selector.
289 ///
290 /// # Returns
291 /// Backend selector used by default detector wrappers.
292 pub fn mime_detector_default(&self) -> &str {
293 &self.mime_detector_default
294 }
295
296 /// Gets fallback MIME detector selectors.
297 ///
298 /// # Returns
299 /// Ordered fallback backend selectors.
300 pub fn mime_detector_fallbacks(&self) -> &[String] {
301 &self.mime_detector_fallbacks
302 }
303
304 /// Gets the configured default media stream classifier selector.
305 ///
306 /// # Returns
307 /// Backend selector used by default classifier wrappers.
308 pub fn media_stream_classifier_default(&self) -> &str {
309 &self.media_stream_classifier_default
310 }
311
312 /// Tells whether precise media-stream detection is enabled.
313 ///
314 /// # Returns
315 /// `true` when ambiguous media MIME types may be refined.
316 pub fn enable_precise_detection(&self) -> bool {
317 self.enable_precise_detection
318 }
319
320 /// Gets extensions requiring precise detection.
321 ///
322 /// # Returns
323 /// Lowercase extension names without leading dots.
324 pub fn precise_detection_patterns(&self) -> &HashSet<String> {
325 &self.precise_detection_patterns
326 }
327
328 /// Gets ambiguous extension mappings.
329 ///
330 /// # Returns
331 /// Mapping from extension to `[video_mime, audio_mime]`.
332 pub fn ambiguous_mime_mapping(&self) -> &HashMap<String, [String; 2]> {
333 &self.ambiguous_mime_mapping
334 }
335
336 /// Creates the built-in MIME configuration.
337 ///
338 /// # Returns
339 /// Configuration populated entirely from crate constants.
340 fn builtin_default() -> Self {
341 Self {
342 mime_detector_default: DEFAULT_MIME_DETECTOR.to_owned(),
343 mime_detector_fallbacks: fallback_defaults(),
344 media_stream_classifier_default: DEFAULT_MEDIA_STREAM_CLASSIFIER.to_owned(),
345 enable_precise_detection: DEFAULT_ENABLE_PRECISE_DETECTION,
346 precise_detection_patterns: normalize_patterns(
347 DEFAULT_PRECISE_DETECTION_PATTERNS
348 .iter()
349 .map(|pattern| pattern.to_string())
350 .collect(),
351 ),
352 ambiguous_mime_mapping: build_ambiguous_mime_mapping(
353 DEFAULT_AMBIGUOUS_MIME_MAPPING_ENTRIES
354 .iter()
355 .map(|entry| entry.to_string())
356 .collect(),
357 ),
358 }
359 }
360}
361
362impl Default for MimeConfig {
363 /// Loads default configuration.
364 fn default() -> Self {
365 DEFAULT_MIME_CONFIG
366 .read()
367 .expect("default MIME configuration lock should not be poisoned")
368 .clone()
369 }
370}
371
372/// Gets built-in fallback detector defaults.
373///
374/// # Returns
375/// Default fallback detector names.
376fn fallback_defaults() -> Vec<String> {
377 DEFAULT_MIME_DETECTOR_FALLBACKS
378 .split(',')
379 .map(str::trim)
380 .filter(|name| !name.is_empty())
381 .map(str::to_owned)
382 .collect()
383}
384
385/// Normalizes detector names read from configuration.
386///
387/// # Parameters
388/// - `names`: Raw detector names.
389///
390/// # Returns
391/// Trimmed detector names with empty values removed.
392fn normalize_detector_names(names: Vec<String>) -> Vec<String> {
393 names
394 .into_iter()
395 .map(|name| name.trim().to_owned())
396 .filter(|name| !name.is_empty())
397 .collect()
398}
399
400/// Normalizes extension patterns.
401///
402/// # Parameters
403/// - `patterns`: Raw extension items, usually read from configuration.
404///
405/// # Returns
406/// Lowercase extension set without leading dots.
407fn normalize_patterns(patterns: Vec<String>) -> HashSet<String> {
408 patterns
409 .into_iter()
410 .map(|pattern| pattern.trim().to_owned())
411 .filter(|pattern| !pattern.is_empty())
412 .map(|pattern| pattern.trim_start_matches('.').to_ascii_lowercase())
413 .collect()
414}
415
416/// Builds ambiguous MIME mappings from configured entries.
417///
418/// # Parameters
419/// - `entries`: Mapping entries in `ext:video,audio` format.
420///
421/// # Returns
422/// Lowercase extension to MIME pair mapping.
423fn build_ambiguous_mime_mapping(entries: Vec<String>) -> HashMap<String, [String; 2]> {
424 entries
425 .into_iter()
426 .filter_map(|entry| {
427 let (extension, mime_types) = entry.split_once(':')?;
428 let mut mime_types = mime_types.split(',').map(str::trim);
429 let video_type = mime_types.next()?.to_owned();
430 let audio_type = mime_types.next()?.to_owned();
431 if extension.trim().is_empty()
432 || video_type.is_empty()
433 || audio_type.is_empty()
434 || mime_types.next().is_some()
435 {
436 None
437 } else {
438 Some((
439 extension
440 .trim()
441 .trim_start_matches('.')
442 .to_ascii_lowercase(),
443 [video_type, audio_type],
444 ))
445 }
446 })
447 .collect()
448}