docbox_core/utils/
file.rs

1use mime::Mime;
2use std::path::Path;
3
4// Set of characters to allow in S3 file names a-zA-Z0-9
5static ALLOWED_S3_CHARS: &str = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
6
7pub fn make_s3_safe(name: &str) -> String {
8    name.chars()
9        .filter_map(|c| {
10            if c.is_whitespace() || c == '-' {
11                // Replace whitespace and dashes with underscores
12                Some('_')
13            } else if ALLOWED_S3_CHARS.contains(c) {
14                // Allowed characters can stay
15                Some(c)
16            } else {
17                // Ignore anything else
18                None
19            }
20        })
21        // Don't take more than 50 chars worth of name
22        .take(50)
23        .collect()
24}
25
26/// Extracts the extension portion of a file name
27pub fn get_file_name_ext(name: &str) -> Option<String> {
28    let path = Path::new(name);
29    let ext = path.extension()?;
30    let ext = ext.to_str()?;
31    Some(ext.to_string())
32}
33
34/// Finds the file extension to use for a file based on its mime type
35pub fn get_mime_ext(mime: &Mime) -> Option<&'static str> {
36    if let Some(known_match) = mime2ext::mime2ext(mime) {
37        return Some(known_match);
38    }
39
40    // Search the fallback extension types
41    OTHER_EXT_MAP.iter().find_map(|(other_mime, ext)| {
42        if *other_mime == *mime {
43            Some(*ext)
44        } else {
45            None
46        }
47    })
48}
49
50/// Fallback mapping for some more obscure mime types
51/// 
52/// Most of these are legacy types but supported by LibreOffice so
53/// we support them here as well
54#[rustfmt::skip]
55pub static OTHER_EXT_MAP: &[(&str, &str)] = &[
56    // Microsoft Excel Macro-Enabled Workbook
57    ("application/vnd.ms-excel.sheet.macroEnabled.12", "xlsm"),
58    // Flat OpenDocument Text file
59    ("application/vnd.oasis.opendocument.text-flat-xml", "fodt"),
60    // ClarisWorks document format (Legacy)
61    ("application/clarisworks", "cwk"),
62    // MacWrite II document format (Legacy)
63    ("application/macwriteii", "mw"),
64    // T602 word processor file (Legacy)
65    ("application/x-t602", "602"),
66    // Hangul Word Processor
67    ("application/x-hwp", "hwp"),
68    // FictionBook (FB2) e-book format
69    ("application/x-fictionbook+xml", "fb2"),
70    // AportisDoc eBook format (Legacy)
71    ("application/x-aportisdoc", "pdb"),
72    // Plucker eBook/Web content format (Legacy)
73    ("application/prs.plucker", "pdb"),
74    // Microsoft Pocket Word document (Legacy)
75    ("application/x-pocket-word", "psw"),
76    // Flat OpenDocument Spreadsheet
77    ("application/vnd.oasis.opendocument.spreadsheet-flat-xml", "fods"),
78    // OpenOffice Base files
79    ("application/vnd.sun.xml.base", "odb"),
80
81];
82
83#[cfg(test)]
84mod test {
85    use mime::Mime;
86
87    use crate::utils::file::{get_file_name_ext, get_mime_ext, make_s3_safe};
88
89    #[test]
90    fn test_make_s3_safe_basic() {
91        let input = "my file-name 123";
92        let expected = "my_file_name_123";
93        assert_eq!(make_s3_safe(input), expected);
94    }
95
96    #[test]
97    fn test_make_s3_safe_only_allowed_chars() {
98        let input = "abcXYZ0123";
99        let expected = "abcXYZ0123";
100        assert_eq!(make_s3_safe(input), expected);
101    }
102
103    #[test]
104    fn test_make_s3_safe_removes_disallowed_chars() {
105        let input = "file*name$with%chars!";
106        let expected = "filenamewithchars";
107        assert_eq!(make_s3_safe(input), expected);
108    }
109
110    #[test]
111    fn test_make_s3_safe_max_length() {
112        let input = "a".repeat(60); // 60 'a's
113        let expected = "a".repeat(50); // only 50 allowed
114        assert_eq!(make_s3_safe(&input), expected);
115    }
116
117    #[test]
118    fn test_get_file_name_ext_basic() {
119        let input = "file.txt";
120        assert_eq!(get_file_name_ext(input), Some("txt".to_string()));
121    }
122
123    #[test]
124    fn test_get_file_name_ext_no_ext() {
125        let input = "file";
126        assert_eq!(get_file_name_ext(input), None);
127    }
128
129    #[test]
130    fn test_get_file_name_ext_hidden_file() {
131        let input = ".hidden";
132        assert_eq!(get_file_name_ext(input), None);
133    }
134
135    #[test]
136    fn test_get_file_name_ext_multiple_dots() {
137        let input = "archive.tar.gz";
138        assert_eq!(get_file_name_ext(input), Some("gz".to_string()));
139    }
140
141    #[test]
142    fn test_get_mime_ext_known_mime() {
143        let mime: Mime = "image/png".parse().unwrap();
144        assert_eq!(get_mime_ext(&mime), Some("png"));
145    }
146
147    #[test]
148    fn test_get_mime_ext_unknown_mime() {
149        let mime: Mime = "unknown/mime".parse().unwrap();
150        assert_eq!(get_mime_ext(&mime), None);
151    }
152}