qubit_mime/detector/file_command_mime_detector.rs
1/*******************************************************************************
2 *
3 * Copyright (c) 2026 Haixing Hu.
4 *
5 * SPDX-License-Identifier: Apache-2.0
6 *
7 * Licensed under the Apache License, Version 2.0.
8 *
9 ******************************************************************************/
10//! MIME detector backed by the system `file` command.
11//!
12//! This detector uses the embedded repository for filename guesses and invokes
13//! `file --mime-type --brief <path>` for content guesses on local files or
14//! temporary files staged from seekable readers. It is registered under the
15//! built-in provider id `file` and aliases such as `file-command`.
16
17use qubit_command::{
18 Command,
19 CommandRunner,
20};
21use qubit_io::ReadSeek;
22use std::path::Path;
23
24use crate::{
25 FileBasedMimeDetector,
26 MimeConfig,
27 MimeDetectionPolicy,
28 MimeDetector,
29 MimeDetectorCore,
30 MimeRepository,
31 MimeResult,
32};
33
34use super::repository_mime_detector::default_repository;
35
36/// MIME detector backed by `file --mime-type --brief`.
37#[derive(Debug, Clone)]
38pub struct FileCommandMimeDetector<'a> {
39 /// The shared detector core.
40 core: MimeDetectorCore,
41 /// The repository used for filename detection.
42 repository: &'a MimeRepository,
43 /// The command runner used for command execution.
44 command_runner: CommandRunner,
45}
46
47impl FileCommandMimeDetector<'static> {
48 /// Creates a detector using the embedded repository for filename guesses.
49 ///
50 /// # Returns
51 /// File command detector.
52 pub fn new() -> Self {
53 Self::with_repository(default_repository())
54 }
55
56 /// Creates a detector using the embedded repository and explicit config.
57 ///
58 /// # Parameters
59 /// - `config`: MIME detector configuration.
60 ///
61 /// # Returns
62 /// File command detector.
63 pub fn from_mime_config(config: MimeConfig) -> Self {
64 Self::with_repository_runner_and_config(
65 default_repository(),
66 Self::default_command_runner(),
67 config,
68 )
69 }
70}
71
72impl<'a> FileCommandMimeDetector<'a> {
73 /// System command executable name.
74 pub const COMMAND: &'static str = "file";
75 /// Argument enabling MIME type output.
76 pub const MIME_TYPE_ARG: &'static str = "--mime-type";
77 /// Argument enabling concise output.
78 pub const BRIEF_ARG: &'static str = "--brief";
79
80 /// Creates a detector using an explicit repository for filename guesses.
81 ///
82 /// # Parameters
83 /// - `repository`: Repository used for filename detection.
84 ///
85 /// # Returns
86 /// File command detector borrowing `repository`.
87 pub fn with_repository(repository: &'a MimeRepository) -> Self {
88 Self::with_repository_and_runner(repository, Self::default_command_runner())
89 }
90
91 /// Creates a detector using an explicit repository and command runner.
92 ///
93 /// # Parameters
94 /// - `repository`: Repository used for filename detection.
95 /// - `command_runner`: Runner used for all `file` command executions.
96 ///
97 /// # Returns
98 /// File command detector borrowing `repository` and owning the supplied
99 /// runner.
100 pub fn with_repository_and_runner(
101 repository: &'a MimeRepository,
102 command_runner: CommandRunner,
103 ) -> Self {
104 Self::with_repository_runner_and_config(repository, command_runner, MimeConfig::default())
105 }
106
107 /// Creates a detector using an explicit repository, runner, and config.
108 ///
109 /// # Parameters
110 /// - `repository`: Repository used for filename detection.
111 /// - `command_runner`: Runner used for all `file` command executions.
112 /// - `config`: MIME detector configuration.
113 ///
114 /// # Returns
115 /// File command detector borrowing `repository` and owning the supplied
116 /// runner.
117 pub fn with_repository_runner_and_config(
118 repository: &'a MimeRepository,
119 command_runner: CommandRunner,
120 config: MimeConfig,
121 ) -> Self {
122 Self {
123 core: MimeDetectorCore::from_mime_config(config),
124 repository,
125 command_runner,
126 }
127 }
128
129 /// Gets the shared detector core.
130 ///
131 /// # Returns
132 /// Shared detector core.
133 pub fn core(&self) -> &MimeDetectorCore {
134 &self.core
135 }
136
137 /// Gets mutable shared detector core.
138 ///
139 /// # Returns
140 /// Mutable shared detector core.
141 pub fn core_mut(&mut self) -> &mut MimeDetectorCore {
142 &mut self.core
143 }
144
145 /// Gets the repository used for filename detection.
146 ///
147 /// # Returns
148 /// Repository reference.
149 pub fn repository(&self) -> &'a MimeRepository {
150 self.repository
151 }
152
153 /// Gets the command runner used by this detector.
154 ///
155 /// # Returns
156 /// Runner used for `file` command executions.
157 pub fn command_runner(&self) -> &CommandRunner {
158 &self.command_runner
159 }
160
161 /// Replaces the command runner used by this detector.
162 ///
163 /// # Parameters
164 /// - `command_runner`: New runner configuration.
165 pub fn set_command_runner(&mut self, command_runner: CommandRunner) {
166 self.command_runner = command_runner;
167 }
168
169 /// Replaces the command runner and returns the updated detector.
170 ///
171 /// # Parameters
172 /// - `command_runner`: New runner configuration.
173 ///
174 /// # Returns
175 /// The updated detector.
176 pub fn with_command_runner(mut self, command_runner: CommandRunner) -> Self {
177 self.command_runner = command_runner;
178 self
179 }
180
181 /// Detects content from a local file using the `file` command only.
182 ///
183 /// # Parameters
184 /// - `file`: Local file path to inspect.
185 ///
186 /// # Returns
187 /// MIME type name, or `None`.
188 ///
189 /// # Errors
190 /// Returns [`MimeError::Command`](crate::MimeError::Command) when the command cannot be executed.
191 pub fn detect_file_by_content(&self, file: &Path) -> MimeResult<Option<String>> {
192 Ok(self.guess_from_file_command(file)?.into_iter().next())
193 }
194
195 /// Detects a local file from filename and content.
196 ///
197 /// # Parameters
198 /// - `file`: Local file path.
199 /// - `policy`: Strategy for resolving filename and content results.
200 ///
201 /// # Returns
202 /// Selected MIME type name, or `None`.
203 ///
204 /// # Errors
205 /// Returns [`MimeError::Command`](crate::MimeError::Command) when command execution fails.
206 pub fn detect_file(
207 &self,
208 file: &Path,
209 policy: MimeDetectionPolicy,
210 ) -> MimeResult<Option<String>> {
211 <Self as MimeDetector>::detect_file(self, file, policy)
212 }
213
214 /// Detects a seekable reader by staging its prefix to a temporary file.
215 ///
216 /// # Parameters
217 /// - `reader`: Reader to inspect. The original position is restored.
218 /// - `filename`: Optional filename.
219 /// - `policy`: Strategy for resolving filename and content results.
220 ///
221 /// # Returns
222 /// Selected MIME type name, or `None`.
223 ///
224 /// # Errors
225 /// Returns [`MimeError::Io`](crate::MimeError::Io) when stream operations fail.
226 pub fn detect_reader(
227 &self,
228 reader: &mut dyn ReadSeek,
229 filename: Option<&str>,
230 policy: MimeDetectionPolicy,
231 ) -> MimeResult<Option<String>> {
232 <Self as MimeDetector>::detect_reader(self, reader, filename, policy)
233 }
234
235 /// Checks whether the `file` command is available.
236 ///
237 /// Availability is checked by executing `file --mime-type --brief .` with a
238 /// quiet command runner. This only validates that the command can be
239 /// started successfully in the current process environment; it does not
240 /// guarantee that every future file path can be inspected.
241 ///
242 /// # Returns
243 /// `true` when the command can be executed.
244 pub fn is_available() -> bool {
245 CommandRunner::new()
246 .disable_logging(true)
247 .run(Self::command_for_path(Path::new(".")))
248 .is_ok()
249 }
250
251 /// Gets filename candidates from the repository.
252 ///
253 /// # Parameters
254 /// - `filename`: Filename or path.
255 ///
256 /// # Returns
257 /// Candidate MIME type names.
258 fn guess_from_filename(&self, filename: &str) -> Vec<String> {
259 self.repository
260 .detect_by_filename(filename)
261 .into_iter()
262 .map(|mime_type| mime_type.name().to_owned())
263 .collect()
264 }
265
266 /// Gets content candidates from `file`.
267 ///
268 /// # Parameters
269 /// - `path`: Local path to inspect.
270 ///
271 /// # Returns
272 /// Zero or one MIME type names.
273 ///
274 /// # Errors
275 /// Returns [`MimeError::Command`](crate::MimeError::Command) when command execution fails.
276 fn guess_from_file_command(&self, path: &Path) -> MimeResult<Vec<String>> {
277 let output = self.command_runner.run(Self::command_for_path(path))?;
278 let text = output.stdout_lossy_text();
279 let result = text.trim();
280 if result.is_empty() {
281 Ok(Vec::new())
282 } else {
283 Ok(vec![result.to_owned()])
284 }
285 }
286
287 /// Creates the default command runner for file detection.
288 ///
289 /// # Returns
290 /// Runner used by the default detector.
291 fn default_command_runner() -> CommandRunner {
292 CommandRunner::new()
293 }
294
295 /// Builds the structured `file` command for one path.
296 ///
297 /// # Parameters
298 /// - `path`: Local file path passed as an argument without shell parsing.
299 ///
300 /// # Returns
301 /// Structured command description.
302 fn command_for_path(path: &Path) -> Command {
303 Command::new(Self::COMMAND)
304 .arg(Self::MIME_TYPE_ARG)
305 .arg(Self::BRIEF_ARG)
306 .arg_os(path)
307 }
308}
309
310impl Default for FileCommandMimeDetector<'static> {
311 /// Creates a detector using the embedded repository.
312 fn default() -> Self {
313 Self::new()
314 }
315}
316
317impl<'a> FileBasedMimeDetector for FileCommandMimeDetector<'a> {
318 /// Gets the shared detector core.
319 fn core(&self) -> &MimeDetectorCore {
320 &self.core
321 }
322
323 /// Gets the maximum content prefix length from the repository.
324 fn max_test_bytes(&self) -> usize {
325 self.repository.max_test_bytes()
326 }
327
328 /// Guesses MIME type names from filename rules.
329 fn guess_from_filename(&self, filename: &str) -> Vec<String> {
330 FileCommandMimeDetector::guess_from_filename(self, filename)
331 }
332
333 /// Guesses MIME type names from a local file using the file command.
334 fn guess_from_local_file(&self, file: &Path) -> MimeResult<Vec<String>> {
335 self.guess_from_file_command(file)
336 }
337}