qubit_mime/detector/stream_based_mime_detector.rs
1/*******************************************************************************
2 *
3 * Copyright (c) 2026 Haixing Hu.
4 *
5 * SPDX-License-Identifier: Apache-2.0
6 *
7 * Licensed under the Apache License, Version 2.0.
8 *
9 ******************************************************************************/
10//! Helpers for stream-backed MIME detectors.
11
12use std::fmt::Debug;
13use std::fs::File;
14use std::io::SeekFrom;
15use std::path::Path;
16
17use qubit_io::ReadSeek;
18
19use crate::{
20 MimeDetectorCore,
21 MimeResult,
22};
23
24/// Core implementation contract for detectors that can inspect content bytes.
25pub trait StreamBasedMimeDetector: Debug + Send + Sync {
26 /// Gets the shared detector core.
27 ///
28 /// # Returns
29 /// Shared detector configuration and merge/refinement behavior.
30 fn core(&self) -> &MimeDetectorCore;
31
32 /// Gets the maximum number of bytes needed for content inspection.
33 ///
34 /// # Returns
35 /// Content prefix length to read from files and readers.
36 fn max_test_bytes(&self) -> usize;
37
38 /// Guesses MIME type names from a filename.
39 ///
40 /// # Parameters
41 /// - `filename`: File path or basename.
42 ///
43 /// # Returns
44 /// Candidate MIME type names ordered by backend confidence.
45 fn guess_from_filename(&self, filename: &str) -> Vec<String>;
46
47 /// Guesses MIME type names from content bytes.
48 ///
49 /// # Parameters
50 /// - `content`: Content bytes.
51 ///
52 /// # Returns
53 /// Candidate MIME type names ordered by backend confidence.
54 ///
55 /// # Errors
56 /// Returns an error when a backend cannot inspect the supplied content.
57 fn guess_from_content_bytes(&self, content: &[u8]) -> MimeResult<Vec<String>>;
58
59 /// Guesses MIME type names from a seekable reader.
60 ///
61 /// # Parameters
62 /// - `reader`: Reader to inspect. The original position is restored.
63 ///
64 /// # Returns
65 /// Candidate MIME type names and the content prefix used for refinement.
66 ///
67 /// # Errors
68 /// Returns an error when reading, seeking, or backend inspection fails.
69 fn guess_from_reader_stream(
70 &self,
71 reader: &mut dyn ReadSeek,
72 ) -> MimeResult<(Vec<String>, Vec<u8>)> {
73 let content = read_prefix(reader, self.max_test_bytes())?;
74 let candidates = self.guess_from_content_bytes(&content)?;
75 Ok((candidates, content))
76 }
77
78 /// Guesses MIME type names from a local file.
79 ///
80 /// # Parameters
81 /// - `file`: Local file path.
82 ///
83 /// # Returns
84 /// Candidate MIME type names and the content prefix used for refinement.
85 ///
86 /// # Errors
87 /// Returns an error when opening, reading, seeking, or backend inspection fails.
88 fn guess_from_file_stream(&self, file: &Path) -> MimeResult<(Vec<String>, Vec<u8>)> {
89 let mut reader = File::open(file)?;
90 self.guess_from_reader_stream(&mut reader)
91 }
92}
93
94/// Reads a prefix from a stream and restores the original position.
95///
96/// # Parameters
97/// - `reader`: Stream to inspect.
98/// - `max_bytes`: Maximum number of bytes to read.
99///
100/// # Returns
101/// Bytes read from the stream.
102///
103/// # Errors
104/// Returns [`MimeError::Io`](crate::MimeError::Io) when reading or seeking fails.
105pub(crate) fn read_prefix(reader: &mut dyn ReadSeek, max_bytes: usize) -> MimeResult<Vec<u8>> {
106 let position = reader.stream_position()?;
107 let mut buffer = vec![0; max_bytes];
108 let bytes_read = reader.read(&mut buffer)?;
109 buffer.truncate(bytes_read);
110 reader.seek(SeekFrom::Start(position))?;
111 Ok(buffer)
112}