Skip to main content

qubit_mime/
mime_config.rs

1/*******************************************************************************
2 *
3 *    Copyright (c) 2026 Haixing Hu.
4 *
5 *    SPDX-License-Identifier: Apache-2.0
6 *
7 *    Licensed under the Apache License, Version 2.0.
8 *
9 ******************************************************************************/
10//! Configuration values for MIME detection.
11//!
12//! [`MimeConfig`] is the runtime configuration shared by detector wrappers,
13//! detector providers, and media-stream refinement. It can be loaded from a
14//! [`Config`] object, from process environment variables, or from built-in
15//! defaults.
16//!
17
18use std::collections::{
19    HashMap,
20    HashSet,
21};
22use std::sync::{
23    LazyLock,
24    RwLock,
25};
26
27use qubit_config::{
28    Config,
29    options::{
30        CollectionReadOptions,
31        ConfigReadOptions,
32        EmptyItemPolicy,
33    },
34};
35
36use crate::{
37    CONFIG_MEDIA_STREAM_CLASSIFIER_DEFAULT,
38    CONFIG_MIME_AMBIGUOUS_MIME_MAPPING,
39    CONFIG_MIME_DETECTOR_DEFAULT,
40    CONFIG_MIME_DETECTOR_FALLBACKS,
41    CONFIG_MIME_ENABLE_PRECISE_DETECTION,
42    CONFIG_MIME_PRECISE_DETECTION_PATTERNS,
43    DEFAULT_ENABLE_PRECISE_DETECTION,
44    DEFAULT_MEDIA_STREAM_CLASSIFIER,
45    DEFAULT_MIME_DETECTOR,
46    DEFAULT_MIME_DETECTOR_FALLBACKS,
47    ENV_MEDIA_STREAM_CLASSIFIER_DEFAULT,
48    ENV_MIME_DETECTOR_AMBIGUOUS_MIME_MAPPING,
49    ENV_MIME_DETECTOR_DEFAULT,
50    ENV_MIME_DETECTOR_ENABLE_PRECISE_DETECTION,
51    ENV_MIME_DETECTOR_FALLBACKS,
52    ENV_MIME_DETECTOR_PRECISE_DETECTION_PATTERNS,
53    MimeResult,
54};
55
56/// Runtime configuration for MIME detectors.
57///
58/// # Supported keys
59///
60/// Logical keys and environment-style keys are both accepted by
61/// [`MimeConfig::from_config`]. Environment variables use the same names as the
62/// environment-style keys.
63///
64/// | Field | Logical key | Environment key | Default | Format |
65/// | --- | --- | --- | --- | --- |
66/// | Default MIME detector | `mime.detector.default` | `QUBIT_MIME_DETECTOR_DEFAULT` | `repository` | Provider id, alias, or `auto` |
67/// | MIME detector fallbacks | `mime.detector.fallbacks` | `QUBIT_MIME_DETECTOR_FALLBACKS` | empty | List split on `,` or `;` |
68/// | Media stream classifier | `mime.media.stream.classifier.default` | `QUBIT_MEDIA_STREAM_CLASSIFIER_DEFAULT` | `ffprobe` | Classifier selector |
69/// | Precise detection switch | `mime.enable.precise.detection` | `QUBIT_MIME_ENABLE_PRECISE_DETECTION` | `true` | Boolean |
70/// | Precise detection patterns | `mime.precise.detection.patterns` | `QUBIT_MIME_PRECISE_DETECTION_PATTERNS` | `webm,ogg` | Extension list |
71/// | Ambiguous MIME mapping | `mime.ambiguous.mime.mapping` | `QUBIT_MIME_AMBIGUOUS_MIME_MAPPING` | `webm:video/webm,audio/webm;ogg:video/ogg,audio/ogg` | `ext:video,audio` entries split on `;` |
72///
73/// Detector fallback selection is performed by
74/// [`MimeDetectorRegistry`](crate::MimeDetectorRegistry), not by this config
75/// object. The config only stores the default selector and ordered fallback
76/// names.
77#[derive(Debug, Clone)]
78pub struct MimeConfig {
79    /// Default MIME detector selector.
80    mime_detector_default: String,
81    /// Fallback MIME detector selectors.
82    mime_detector_fallbacks: Vec<String>,
83    /// Default media stream classifier selector.
84    media_stream_classifier_default: String,
85    /// Whether precise media-stream detection is enabled.
86    enable_precise_detection: bool,
87    /// Extensions requiring precise detection.
88    precise_detection_patterns: HashSet<String>,
89    /// Ambiguous MIME mappings.
90    ambiguous_mime_mapping: HashMap<String, [String; 2]>,
91}
92
93/// Default MIME configuration.
94static DEFAULT_MIME_CONFIG: LazyLock<RwLock<MimeConfig>> =
95    LazyLock::new(|| RwLock::new(MimeConfig::load()));
96
97/// Value read options.
98static VALUE_READ_OPTIONS: LazyLock<ConfigReadOptions> =
99    LazyLock::new(ConfigReadOptions::env_friendly);
100
101/// List value read options.
102static LIST_READ_OPTIONS: LazyLock<ConfigReadOptions> = LazyLock::new(|| {
103    ConfigReadOptions::env_friendly().with_collection_options(
104        CollectionReadOptions::default()
105            .with_split_scalar_strings(true)
106            .with_delimiters([',', ';'])
107            .with_trim_items(true)
108            .with_empty_item_policy(EmptyItemPolicy::Skip),
109    )
110});
111
112/// Mapping read options.
113static MAPPING_READ_OPTIONS: LazyLock<ConfigReadOptions> = LazyLock::new(|| {
114    ConfigReadOptions::env_friendly().with_collection_options(
115        CollectionReadOptions::default()
116            .with_split_scalar_strings(true)
117            .with_delimiters([';'])
118            .with_trim_items(true)
119            .with_empty_item_policy(EmptyItemPolicy::Skip),
120    )
121});
122
123/// Built-in precise detection patterns.
124static DEFAULT_PRECISE_DETECTION_PATTERNS: &[&str] = &["webm", "ogg"];
125
126/// Built-in ambiguous MIME mapping entries.
127static DEFAULT_AMBIGUOUS_MIME_MAPPING_ENTRIES: &[&str] =
128    &["webm:video/webm,audio/webm", "ogg:video/ogg,audio/ogg"];
129
130impl MimeConfig {
131    /// Loads configuration from environment variables and defaults.
132    ///
133    /// # Returns
134    /// Configuration used by default detector instances.
135    pub fn load() -> Self {
136        match Self::from_env() {
137            Ok(config) => config,
138            Err(_) => Self::builtin_default(),
139        }
140    }
141
142    /// Creates MIME configuration from a config object.
143    ///
144    /// Values are read with environment-friendly options, so both logical keys
145    /// such as `mime.detector.default` and environment-style keys such as
146    /// `QUBIT_MIME_DETECTOR_DEFAULT` are accepted. List values may be provided
147    /// as arrays or as scalar strings split on `,` and `;`; empty items are
148    /// ignored.
149    ///
150    /// # Examples
151    ///
152    /// Configure a preferred native detector and a repository fallback:
153    ///
154    /// ```rust
155    /// use qubit_config::Config;
156    /// use qubit_mime::{
157    ///     CONFIG_MIME_DETECTOR_DEFAULT,
158    ///     CONFIG_MIME_DETECTOR_FALLBACKS,
159    ///     MimeConfig,
160    ///     MimeResult,
161    /// };
162    ///
163    /// # fn main() -> MimeResult<()> {
164    /// let mut source = Config::new();
165    /// source.set(CONFIG_MIME_DETECTOR_DEFAULT, "file")?;
166    /// source.set(CONFIG_MIME_DETECTOR_FALLBACKS, "repository")?;
167    ///
168    /// let config = MimeConfig::from_config(&source)?;
169    /// assert_eq!("file", config.mime_detector_default());
170    /// assert_eq!(
171    ///     ["repository".to_owned()].as_slice(),
172    ///     config.mime_detector_fallbacks(),
173    /// );
174    /// # Ok(())
175    /// # }
176    /// ```
177    ///
178    /// # Parameters
179    /// - `config`: Configuration object containing logical keys or environment
180    ///   variable style keys.
181    ///
182    /// # Returns
183    /// Parsed MIME configuration.
184    ///
185    /// # Errors
186    /// Returns [`MimeError::Config`](crate::MimeError::Config) when a present
187    /// configuration value cannot be converted to the expected type.
188    pub fn from_config(config: &Config) -> MimeResult<Self> {
189        let mime_detector_default = config.get_any_or_with(
190            [CONFIG_MIME_DETECTOR_DEFAULT, ENV_MIME_DETECTOR_DEFAULT],
191            DEFAULT_MIME_DETECTOR.to_owned(),
192            &VALUE_READ_OPTIONS,
193        )?;
194        let mime_detector_fallbacks = config.get_any_or_with(
195            [CONFIG_MIME_DETECTOR_FALLBACKS, ENV_MIME_DETECTOR_FALLBACKS],
196            fallback_defaults(),
197            &LIST_READ_OPTIONS,
198        )?;
199        let media_stream_classifier_default = config.get_any_or_with(
200            [
201                CONFIG_MEDIA_STREAM_CLASSIFIER_DEFAULT,
202                ENV_MEDIA_STREAM_CLASSIFIER_DEFAULT,
203            ],
204            DEFAULT_MEDIA_STREAM_CLASSIFIER.to_owned(),
205            &VALUE_READ_OPTIONS,
206        )?;
207        let enable_precise_detection = config.get_any_or_with(
208            [
209                CONFIG_MIME_ENABLE_PRECISE_DETECTION,
210                ENV_MIME_DETECTOR_ENABLE_PRECISE_DETECTION,
211            ],
212            DEFAULT_ENABLE_PRECISE_DETECTION,
213            &VALUE_READ_OPTIONS,
214        )?;
215        let precise_detection_patterns = config.get_any_or_with(
216            [
217                CONFIG_MIME_PRECISE_DETECTION_PATTERNS,
218                ENV_MIME_DETECTOR_PRECISE_DETECTION_PATTERNS,
219            ],
220            DEFAULT_PRECISE_DETECTION_PATTERNS,
221            &VALUE_READ_OPTIONS,
222        )?;
223        let ambiguous_mime_mapping = config.get_any_or_with(
224            [
225                CONFIG_MIME_AMBIGUOUS_MIME_MAPPING,
226                ENV_MIME_DETECTOR_AMBIGUOUS_MIME_MAPPING,
227            ],
228            DEFAULT_AMBIGUOUS_MIME_MAPPING_ENTRIES,
229            &MAPPING_READ_OPTIONS,
230        )?;
231        Ok(Self {
232            mime_detector_default,
233            mime_detector_fallbacks: normalize_detector_names(mime_detector_fallbacks),
234            media_stream_classifier_default,
235            enable_precise_detection,
236            precise_detection_patterns: normalize_patterns(precise_detection_patterns),
237            ambiguous_mime_mapping: build_ambiguous_mime_mapping(ambiguous_mime_mapping),
238        })
239    }
240
241    /// Creates MIME configuration from process environment variables.
242    ///
243    /// # Returns
244    /// Parsed MIME configuration.
245    ///
246    /// # Errors
247    /// Returns [`MimeError::Config`](crate::MimeError::Config) when the
248    /// environment cannot be represented by [`Config`].
249    pub fn from_env() -> MimeResult<Self> {
250        let config = Config::from_env()?;
251        Self::from_config(&config)
252    }
253
254    /// Replaces the global default MIME configuration.
255    ///
256    /// # Parameters
257    /// - `config`: Configuration to use for future default instances.
258    pub fn set_default(config: Self) {
259        let mut guard = DEFAULT_MIME_CONFIG
260            .write()
261            .expect("default MIME configuration lock should not be poisoned");
262        *guard = config;
263    }
264
265    /// Reloads the global default MIME configuration from a config object.
266    ///
267    /// # Parameters
268    /// - `config`: Configuration object used to build the new default.
269    ///
270    /// # Errors
271    /// Returns [`MimeError::Config`](crate::MimeError::Config) when a present
272    /// configuration value cannot be converted to the expected type.
273    pub fn reload_default(config: &Config) -> MimeResult<()> {
274        Self::set_default(Self::from_config(config)?);
275        Ok(())
276    }
277
278    /// Reloads the global default MIME configuration from process environment.
279    ///
280    /// # Errors
281    /// Returns [`MimeError::Config`](crate::MimeError::Config) when the
282    /// environment cannot be represented by [`Config`].
283    pub fn reload_default_from_env() -> MimeResult<()> {
284        Self::set_default(Self::from_env()?);
285        Ok(())
286    }
287
288    /// Gets the configured default MIME detector selector.
289    ///
290    /// # Returns
291    /// Backend selector used by default detector wrappers.
292    pub fn mime_detector_default(&self) -> &str {
293        &self.mime_detector_default
294    }
295
296    /// Gets fallback MIME detector selectors.
297    ///
298    /// # Returns
299    /// Ordered fallback backend selectors.
300    pub fn mime_detector_fallbacks(&self) -> &[String] {
301        &self.mime_detector_fallbacks
302    }
303
304    /// Gets the configured default media stream classifier selector.
305    ///
306    /// # Returns
307    /// Backend selector used by default classifier wrappers.
308    pub fn media_stream_classifier_default(&self) -> &str {
309        &self.media_stream_classifier_default
310    }
311
312    /// Tells whether precise media-stream detection is enabled.
313    ///
314    /// # Returns
315    /// `true` when ambiguous media MIME types may be refined.
316    pub fn enable_precise_detection(&self) -> bool {
317        self.enable_precise_detection
318    }
319
320    /// Gets extensions requiring precise detection.
321    ///
322    /// # Returns
323    /// Lowercase extension names without leading dots.
324    pub fn precise_detection_patterns(&self) -> &HashSet<String> {
325        &self.precise_detection_patterns
326    }
327
328    /// Gets ambiguous extension mappings.
329    ///
330    /// # Returns
331    /// Mapping from extension to `[video_mime, audio_mime]`.
332    pub fn ambiguous_mime_mapping(&self) -> &HashMap<String, [String; 2]> {
333        &self.ambiguous_mime_mapping
334    }
335
336    /// Creates the built-in MIME configuration.
337    ///
338    /// # Returns
339    /// Configuration populated entirely from crate constants.
340    fn builtin_default() -> Self {
341        Self {
342            mime_detector_default: DEFAULT_MIME_DETECTOR.to_owned(),
343            mime_detector_fallbacks: fallback_defaults(),
344            media_stream_classifier_default: DEFAULT_MEDIA_STREAM_CLASSIFIER.to_owned(),
345            enable_precise_detection: DEFAULT_ENABLE_PRECISE_DETECTION,
346            precise_detection_patterns: normalize_patterns(
347                DEFAULT_PRECISE_DETECTION_PATTERNS
348                    .iter()
349                    .map(|pattern| pattern.to_string())
350                    .collect(),
351            ),
352            ambiguous_mime_mapping: build_ambiguous_mime_mapping(
353                DEFAULT_AMBIGUOUS_MIME_MAPPING_ENTRIES
354                    .iter()
355                    .map(|entry| entry.to_string())
356                    .collect(),
357            ),
358        }
359    }
360}
361
362impl Default for MimeConfig {
363    /// Loads default configuration.
364    fn default() -> Self {
365        DEFAULT_MIME_CONFIG
366            .read()
367            .expect("default MIME configuration lock should not be poisoned")
368            .clone()
369    }
370}
371
372/// Gets built-in fallback detector defaults.
373///
374/// # Returns
375/// Default fallback detector names.
376fn fallback_defaults() -> Vec<String> {
377    DEFAULT_MIME_DETECTOR_FALLBACKS
378        .split(',')
379        .map(str::trim)
380        .filter(|name| !name.is_empty())
381        .map(str::to_owned)
382        .collect()
383}
384
385/// Normalizes detector names read from configuration.
386///
387/// # Parameters
388/// - `names`: Raw detector names.
389///
390/// # Returns
391/// Trimmed detector names with empty values removed.
392fn normalize_detector_names(names: Vec<String>) -> Vec<String> {
393    names
394        .into_iter()
395        .map(|name| name.trim().to_owned())
396        .filter(|name| !name.is_empty())
397        .collect()
398}
399
400/// Normalizes extension patterns.
401///
402/// # Parameters
403/// - `patterns`: Raw extension items, usually read from configuration.
404///
405/// # Returns
406/// Lowercase extension set without leading dots.
407fn normalize_patterns(patterns: Vec<String>) -> HashSet<String> {
408    patterns
409        .into_iter()
410        .map(|pattern| pattern.trim().to_owned())
411        .filter(|pattern| !pattern.is_empty())
412        .map(|pattern| pattern.trim_start_matches('.').to_ascii_lowercase())
413        .collect()
414}
415
416/// Builds ambiguous MIME mappings from configured entries.
417///
418/// # Parameters
419/// - `entries`: Mapping entries in `ext:video,audio` format.
420///
421/// # Returns
422/// Lowercase extension to MIME pair mapping.
423fn build_ambiguous_mime_mapping(entries: Vec<String>) -> HashMap<String, [String; 2]> {
424    entries
425        .into_iter()
426        .filter_map(|entry| {
427            let (extension, mime_types) = entry.split_once(':')?;
428            let mut mime_types = mime_types.split(',').map(str::trim);
429            let video_type = mime_types.next()?.to_owned();
430            let audio_type = mime_types.next()?.to_owned();
431            if extension.trim().is_empty()
432                || video_type.is_empty()
433                || audio_type.is_empty()
434                || mime_types.next().is_some()
435            {
436                None
437            } else {
438                Some((
439                    extension
440                        .trim()
441                        .trim_start_matches('.')
442                        .to_ascii_lowercase(),
443                    [video_type, audio_type],
444                ))
445            }
446        })
447        .collect()
448}