Skip to main content

systemprompt_files/services/upload/
validator.rs

1//! Upload type and size policy.
2//!
3//! [`FileValidator`] enforces the configured size limit, rejects a blocklist of
4//! executable/script MIME types, and maps allowed types to a [`FileCategory`]
5//! used for storage layout and extension resolution. [`FileValidationError`]
6//! reports each rejection reason.
7
8use crate::config::FileUploadConfig;
9use thiserror::Error;
10
11#[derive(Debug, Error)]
12pub enum FileValidationError {
13    #[error("File uploads are disabled")]
14    UploadsDisabled,
15
16    #[error("File size {size} bytes exceeds maximum allowed {max} bytes")]
17    FileTooLarge { size: u64, max: u64 },
18
19    #[error("File type '{mime_type}' is not allowed")]
20    TypeNotAllowed { mime_type: String },
21
22    #[error("File type '{mime_type}' is blocked for security reasons")]
23    TypeBlocked { mime_type: String },
24
25    #[error("File category '{category}' is disabled in configuration")]
26    CategoryDisabled { category: String },
27}
28
29#[derive(Debug, Clone, Copy, PartialEq, Eq)]
30pub enum FileCategory {
31    Image,
32    Document,
33    Audio,
34    Video,
35}
36
37impl FileCategory {
38    pub const fn storage_subdir(&self) -> &'static str {
39        match self {
40            Self::Image => "images",
41            Self::Document => "documents",
42            Self::Audio => "audio",
43            Self::Video => "video",
44        }
45    }
46
47    pub const fn display_name(&self) -> &'static str {
48        match self {
49            Self::Image => "image",
50            Self::Document => "document",
51            Self::Audio => "audio",
52            Self::Video => "video",
53        }
54    }
55}
56
57#[derive(Debug, Clone, Copy)]
58pub struct FileValidator {
59    config: FileUploadConfig,
60}
61
62impl FileValidator {
63    const IMAGE_TYPES: &'static [&'static str] = &[
64        "image/jpeg",
65        "image/png",
66        "image/gif",
67        "image/webp",
68        "image/svg+xml",
69        "image/bmp",
70        "image/tiff",
71        "image/x-icon",
72        "image/vnd.microsoft.icon",
73    ];
74
75    const DOCUMENT_TYPES: &'static [&'static str] = &[
76        "application/pdf",
77        "application/msword",
78        "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
79        "application/vnd.ms-excel",
80        "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
81        "application/vnd.ms-powerpoint",
82        "application/vnd.openxmlformats-officedocument.presentationml.presentation",
83        "text/plain",
84        "text/csv",
85        "text/markdown",
86        "text/html",
87        "application/json",
88        "application/xml",
89        "text/xml",
90        "application/rtf",
91    ];
92
93    const AUDIO_TYPES: &'static [&'static str] = &[
94        "audio/mpeg",
95        "audio/mp3",
96        "audio/wav",
97        "audio/wave",
98        "audio/x-wav",
99        "audio/ogg",
100        "audio/webm",
101        "audio/aac",
102        "audio/flac",
103        "audio/mp4",
104        "audio/x-m4a",
105    ];
106
107    const VIDEO_TYPES: &'static [&'static str] = &[
108        "video/mp4",
109        "video/webm",
110        "video/ogg",
111        "video/quicktime",
112        "video/x-msvideo",
113        "video/x-matroska",
114    ];
115
116    const BLOCKED_TYPES: &'static [&'static str] = &[
117        "application/x-executable",
118        "application/x-msdos-program",
119        "application/x-msdownload",
120        "application/x-sh",
121        "application/x-shellscript",
122        "application/x-csh",
123        "application/x-bash",
124        "application/bat",
125        "application/x-bat",
126        "application/x-msi",
127        "application/vnd.microsoft.portable-executable",
128        "application/x-dosexec",
129        "application/x-python-code",
130        "application/javascript",
131        "text/javascript",
132        "application/x-httpd-php",
133        "application/x-php",
134        "text/x-php",
135        "application/x-perl",
136        "text/x-perl",
137        "application/x-ruby",
138        "text/x-ruby",
139        "application/java-archive",
140        "application/x-java-class",
141    ];
142
143    pub const fn new(config: FileUploadConfig) -> Self {
144        Self { config }
145    }
146
147    pub fn validate(
148        &self,
149        mime_type: &str,
150        size_bytes: u64,
151    ) -> Result<FileCategory, FileValidationError> {
152        if !self.config.enabled {
153            return Err(FileValidationError::UploadsDisabled);
154        }
155
156        if size_bytes > self.config.max_file_size_bytes {
157            return Err(FileValidationError::FileTooLarge {
158                size: size_bytes,
159                max: self.config.max_file_size_bytes,
160            });
161        }
162
163        let normalized_mime = mime_type.to_lowercase();
164
165        if Self::BLOCKED_TYPES.contains(&normalized_mime.as_str()) {
166            return Err(FileValidationError::TypeBlocked {
167                mime_type: mime_type.to_owned(),
168            });
169        }
170
171        let category = Self::categorize_mime_type(&normalized_mime)?;
172
173        if !self.is_category_allowed(&category) {
174            return Err(FileValidationError::CategoryDisabled {
175                category: category.display_name().to_owned(),
176            });
177        }
178
179        Ok(category)
180    }
181
182    fn categorize_mime_type(mime_type: &str) -> Result<FileCategory, FileValidationError> {
183        if Self::IMAGE_TYPES.contains(&mime_type) {
184            return Ok(FileCategory::Image);
185        }
186
187        if Self::DOCUMENT_TYPES.contains(&mime_type) {
188            return Ok(FileCategory::Document);
189        }
190
191        if Self::AUDIO_TYPES.contains(&mime_type) {
192            return Ok(FileCategory::Audio);
193        }
194
195        if Self::VIDEO_TYPES.contains(&mime_type) {
196            return Ok(FileCategory::Video);
197        }
198
199        Err(FileValidationError::TypeNotAllowed {
200            mime_type: mime_type.to_owned(),
201        })
202    }
203
204    const fn is_category_allowed(&self, category: &FileCategory) -> bool {
205        match category {
206            FileCategory::Image => self.config.allowed_types.images,
207            FileCategory::Document => self.config.allowed_types.documents,
208            FileCategory::Audio => self.config.allowed_types.audio,
209            FileCategory::Video => self.config.allowed_types.video,
210        }
211    }
212
213    pub fn get_extension(mime_type: &str, filename: Option<&str>) -> String {
214        if let Some(name) = filename {
215            if let Some(ext) = name.rsplit('.').next() {
216                if !ext.is_empty()
217                    && ext.len() <= 10
218                    && ext != name
219                    && ext.chars().all(|c| c.is_ascii_alphanumeric())
220                {
221                    return ext.to_lowercase();
222                }
223            }
224        }
225
226        let lower = mime_type.to_lowercase();
227        MIME_EXTENSION_TABLE
228            .iter()
229            .find(|(mimes, _)| mimes.contains(&lower.as_str()))
230            .map_or("bin", |(_, ext)| *ext)
231            .to_owned()
232    }
233}
234
235const MIME_EXTENSION_TABLE: &[(&[&str], &str)] = &[
236    (&["image/jpeg"], "jpg"),
237    (&["image/png"], "png"),
238    (&["image/gif"], "gif"),
239    (&["image/webp"], "webp"),
240    (&["image/svg+xml"], "svg"),
241    (&["image/bmp"], "bmp"),
242    (&["image/tiff"], "tiff"),
243    (&["image/x-icon", "image/vnd.microsoft.icon"], "ico"),
244    (&["application/pdf"], "pdf"),
245    (&["text/plain"], "txt"),
246    (&["text/csv"], "csv"),
247    (&["text/markdown"], "md"),
248    (&["text/html"], "html"),
249    (&["application/json"], "json"),
250    (&["application/xml", "text/xml"], "xml"),
251    (&["application/rtf"], "rtf"),
252    (&["application/msword"], "doc"),
253    (
254        &["application/vnd.openxmlformats-officedocument.wordprocessingml.document"],
255        "docx",
256    ),
257    (&["application/vnd.ms-excel"], "xls"),
258    (
259        &["application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"],
260        "xlsx",
261    ),
262    (&["application/vnd.ms-powerpoint"], "ppt"),
263    (
264        &["application/vnd.openxmlformats-officedocument.presentationml.presentation"],
265        "pptx",
266    ),
267    (&["audio/mpeg", "audio/mp3"], "mp3"),
268    (&["audio/wav", "audio/wave", "audio/x-wav"], "wav"),
269    (&["audio/ogg"], "ogg"),
270    (&["audio/webm"], "weba"),
271    (&["audio/aac"], "aac"),
272    (&["audio/flac"], "flac"),
273    (&["audio/mp4", "audio/x-m4a"], "m4a"),
274    (&["video/mp4"], "mp4"),
275    (&["video/webm"], "webm"),
276    (&["video/ogg"], "ogv"),
277    (&["video/quicktime"], "mov"),
278    (&["video/x-msvideo"], "avi"),
279    (&["video/x-matroska"], "mkv"),
280];