Skip to main content

qubit_mime/detector/
file_command_mime_detector.rs

1/*******************************************************************************
2 *
3 *    Copyright (c) 2026 Haixing Hu.
4 *
5 *    SPDX-License-Identifier: Apache-2.0
6 *
7 *    Licensed under the Apache License, Version 2.0.
8 *
9 ******************************************************************************/
10//! MIME detector backed by the system `file` command.
11//!
12//! This detector uses the embedded repository for filename guesses and invokes
13//! `file --mime-type --brief <path>` for content guesses on local files or
14//! temporary files staged from seekable readers. It is registered under the
15//! built-in provider id `file` and aliases such as `file-command`.
16
17use qubit_command::{
18    Command,
19    CommandRunner,
20};
21use qubit_io::ReadSeek;
22use std::path::Path;
23
24use crate::{
25    FileBasedMimeDetector,
26    MimeConfig,
27    MimeDetectionPolicy,
28    MimeDetector,
29    MimeDetectorCore,
30    MimeRepository,
31    MimeResult,
32};
33
34use super::repository_mime_detector::default_repository;
35
36/// MIME detector backed by `file --mime-type --brief`.
37#[derive(Debug, Clone)]
38pub struct FileCommandMimeDetector<'a> {
39    /// The shared detector core.
40    core: MimeDetectorCore,
41    /// The repository used for filename detection.
42    repository: &'a MimeRepository,
43    /// The command runner used for command execution.
44    command_runner: CommandRunner,
45}
46
47impl FileCommandMimeDetector<'static> {
48    /// Creates a detector using the embedded repository for filename guesses.
49    ///
50    /// # Returns
51    /// File command detector.
52    pub fn new() -> Self {
53        Self::with_repository(default_repository())
54    }
55
56    /// Creates a detector using the embedded repository and explicit config.
57    ///
58    /// # Parameters
59    /// - `config`: MIME detector configuration.
60    ///
61    /// # Returns
62    /// File command detector.
63    pub fn from_mime_config(config: MimeConfig) -> Self {
64        Self::with_repository_runner_and_config(
65            default_repository(),
66            Self::default_command_runner(),
67            config,
68        )
69    }
70}
71
72impl<'a> FileCommandMimeDetector<'a> {
73    /// System command executable name.
74    pub const COMMAND: &'static str = "file";
75    /// Argument enabling MIME type output.
76    pub const MIME_TYPE_ARG: &'static str = "--mime-type";
77    /// Argument enabling concise output.
78    pub const BRIEF_ARG: &'static str = "--brief";
79
80    /// Creates a detector using an explicit repository for filename guesses.
81    ///
82    /// # Parameters
83    /// - `repository`: Repository used for filename detection.
84    ///
85    /// # Returns
86    /// File command detector borrowing `repository`.
87    pub fn with_repository(repository: &'a MimeRepository) -> Self {
88        Self::with_repository_and_runner(repository, Self::default_command_runner())
89    }
90
91    /// Creates a detector using an explicit repository and command runner.
92    ///
93    /// # Parameters
94    /// - `repository`: Repository used for filename detection.
95    /// - `command_runner`: Runner used for all `file` command executions.
96    ///
97    /// # Returns
98    /// File command detector borrowing `repository` and owning the supplied
99    /// runner.
100    pub fn with_repository_and_runner(
101        repository: &'a MimeRepository,
102        command_runner: CommandRunner,
103    ) -> Self {
104        Self::with_repository_runner_and_config(repository, command_runner, MimeConfig::default())
105    }
106
107    /// Creates a detector using an explicit repository, runner, and config.
108    ///
109    /// # Parameters
110    /// - `repository`: Repository used for filename detection.
111    /// - `command_runner`: Runner used for all `file` command executions.
112    /// - `config`: MIME detector configuration.
113    ///
114    /// # Returns
115    /// File command detector borrowing `repository` and owning the supplied
116    /// runner.
117    pub fn with_repository_runner_and_config(
118        repository: &'a MimeRepository,
119        command_runner: CommandRunner,
120        config: MimeConfig,
121    ) -> Self {
122        Self {
123            core: MimeDetectorCore::from_mime_config(config),
124            repository,
125            command_runner,
126        }
127    }
128
129    /// Gets the shared detector core.
130    ///
131    /// # Returns
132    /// Shared detector core.
133    pub fn core(&self) -> &MimeDetectorCore {
134        &self.core
135    }
136
137    /// Gets mutable shared detector core.
138    ///
139    /// # Returns
140    /// Mutable shared detector core.
141    pub fn core_mut(&mut self) -> &mut MimeDetectorCore {
142        &mut self.core
143    }
144
145    /// Gets the repository used for filename detection.
146    ///
147    /// # Returns
148    /// Repository reference.
149    pub fn repository(&self) -> &'a MimeRepository {
150        self.repository
151    }
152
153    /// Gets the command runner used by this detector.
154    ///
155    /// # Returns
156    /// Runner used for `file` command executions.
157    pub fn command_runner(&self) -> &CommandRunner {
158        &self.command_runner
159    }
160
161    /// Replaces the command runner used by this detector.
162    ///
163    /// # Parameters
164    /// - `command_runner`: New runner configuration.
165    pub fn set_command_runner(&mut self, command_runner: CommandRunner) {
166        self.command_runner = command_runner;
167    }
168
169    /// Replaces the command runner and returns the updated detector.
170    ///
171    /// # Parameters
172    /// - `command_runner`: New runner configuration.
173    ///
174    /// # Returns
175    /// The updated detector.
176    pub fn with_command_runner(mut self, command_runner: CommandRunner) -> Self {
177        self.command_runner = command_runner;
178        self
179    }
180
181    /// Detects content from a local file using the `file` command only.
182    ///
183    /// # Parameters
184    /// - `file`: Local file path to inspect.
185    ///
186    /// # Returns
187    /// MIME type name, or `None`.
188    ///
189    /// # Errors
190    /// Returns [`MimeError::Command`](crate::MimeError::Command) when the command cannot be executed.
191    pub fn detect_file_by_content(&self, file: &Path) -> MimeResult<Option<String>> {
192        Ok(self.guess_from_file_command(file)?.into_iter().next())
193    }
194
195    /// Detects a local file from filename and content.
196    ///
197    /// # Parameters
198    /// - `file`: Local file path.
199    /// - `policy`: Strategy for resolving filename and content results.
200    ///
201    /// # Returns
202    /// Selected MIME type name, or `None`.
203    ///
204    /// # Errors
205    /// Returns [`MimeError::Command`](crate::MimeError::Command) when command execution fails.
206    pub fn detect_file(
207        &self,
208        file: &Path,
209        policy: MimeDetectionPolicy,
210    ) -> MimeResult<Option<String>> {
211        <Self as MimeDetector>::detect_file(self, file, policy)
212    }
213
214    /// Detects a seekable reader by staging its prefix to a temporary file.
215    ///
216    /// # Parameters
217    /// - `reader`: Reader to inspect. The original position is restored.
218    /// - `filename`: Optional filename.
219    /// - `policy`: Strategy for resolving filename and content results.
220    ///
221    /// # Returns
222    /// Selected MIME type name, or `None`.
223    ///
224    /// # Errors
225    /// Returns [`MimeError::Io`](crate::MimeError::Io) when stream operations fail.
226    pub fn detect_reader(
227        &self,
228        reader: &mut dyn ReadSeek,
229        filename: Option<&str>,
230        policy: MimeDetectionPolicy,
231    ) -> MimeResult<Option<String>> {
232        <Self as MimeDetector>::detect_reader(self, reader, filename, policy)
233    }
234
235    /// Checks whether the `file` command is available.
236    ///
237    /// Availability is checked by executing `file --mime-type --brief .` with a
238    /// quiet command runner. This only validates that the command can be
239    /// started successfully in the current process environment; it does not
240    /// guarantee that every future file path can be inspected.
241    ///
242    /// # Returns
243    /// `true` when the command can be executed.
244    pub fn is_available() -> bool {
245        CommandRunner::new()
246            .disable_logging(true)
247            .run(Self::command_for_path(Path::new(".")))
248            .is_ok()
249    }
250
251    /// Gets filename candidates from the repository.
252    ///
253    /// # Parameters
254    /// - `filename`: Filename or path.
255    ///
256    /// # Returns
257    /// Candidate MIME type names.
258    fn guess_from_filename(&self, filename: &str) -> Vec<String> {
259        self.repository
260            .detect_by_filename(filename)
261            .into_iter()
262            .map(|mime_type| mime_type.name().to_owned())
263            .collect()
264    }
265
266    /// Gets content candidates from `file`.
267    ///
268    /// # Parameters
269    /// - `path`: Local path to inspect.
270    ///
271    /// # Returns
272    /// Zero or one MIME type names.
273    ///
274    /// # Errors
275    /// Returns [`MimeError::Command`](crate::MimeError::Command) when command execution fails.
276    fn guess_from_file_command(&self, path: &Path) -> MimeResult<Vec<String>> {
277        let output = self.command_runner.run(Self::command_for_path(path))?;
278        let text = output.stdout_lossy_text();
279        let result = text.trim();
280        if result.is_empty() {
281            Ok(Vec::new())
282        } else {
283            Ok(vec![result.to_owned()])
284        }
285    }
286
287    /// Creates the default command runner for file detection.
288    ///
289    /// # Returns
290    /// Runner used by the default detector.
291    fn default_command_runner() -> CommandRunner {
292        CommandRunner::new()
293    }
294
295    /// Builds the structured `file` command for one path.
296    ///
297    /// # Parameters
298    /// - `path`: Local file path passed as an argument without shell parsing.
299    ///
300    /// # Returns
301    /// Structured command description.
302    fn command_for_path(path: &Path) -> Command {
303        Command::new(Self::COMMAND)
304            .arg(Self::MIME_TYPE_ARG)
305            .arg(Self::BRIEF_ARG)
306            .arg_os(path)
307    }
308}
309
310impl Default for FileCommandMimeDetector<'static> {
311    /// Creates a detector using the embedded repository.
312    fn default() -> Self {
313        Self::new()
314    }
315}
316
317impl<'a> FileBasedMimeDetector for FileCommandMimeDetector<'a> {
318    /// Gets the shared detector core.
319    fn core(&self) -> &MimeDetectorCore {
320        &self.core
321    }
322
323    /// Gets the maximum content prefix length from the repository.
324    fn max_test_bytes(&self) -> usize {
325        self.repository.max_test_bytes()
326    }
327
328    /// Guesses MIME type names from filename rules.
329    fn guess_from_filename(&self, filename: &str) -> Vec<String> {
330        FileCommandMimeDetector::guess_from_filename(self, filename)
331    }
332
333    /// Guesses MIME type names from a local file using the file command.
334    fn guess_from_local_file(&self, file: &Path) -> MimeResult<Vec<String>> {
335        self.guess_from_file_command(file)
336    }
337}