fontdrasil/
paths.rs

1const SEPARATOR_CHAR: char = '^';
2
3fn is_reserved_char(c: char) -> bool {
4    matches!(
5        c,
6        '\0'..='\x1F'
7            | '\x7F'
8            | SEPARATOR_CHAR
9            | '>'
10            | '|'
11            | '['
12            | '?'
13            | '+'
14            | '\\'
15            | '"'
16            | ':'
17            | '/'
18            | '<'
19            | '%'
20            | ']'
21            | '*'
22    )
23}
24
25fn is_reserved_filename(name: &str) -> bool {
26    matches!(
27        name.to_ascii_uppercase().as_str(),
28        "CON"
29            | "PRN"
30            | "AUX"
31            | "CLOCK$"
32            | "NUL"
33            | "COM1"
34            | "LPT1"
35            | "LPT2"
36            | "LPT3"
37            | "COM2"
38            | "COM3"
39            | "COM4"
40    )
41}
42
43const BASE_32_CHARS: [char; 32] = [
44    '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I',
45    'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V',
46];
47
48#[inline]
49fn ok_for_filenames(c: char) -> bool {
50    match c as u32 {
51        v if v < 32 => false,
52        0x7F => false,
53        // py repl: >>> for c in "\" * + / : < > ? [ \ ] |".split(" "): print(f"0x{ord(c):04x} => false, // {unicodedata.name(c).lower()}")
54        0x0022 => false, // quotation mark
55        0x002a => false, // asterisk
56        0x002b => false, // plus sign
57        0x002f => false, // solidus
58        0x003a => false, // colon
59        0x003c => false, // less-than sign
60        0x003e => false, // greater-than sign
61        0x003f => false, // question mark
62        0x005b => false, // left square bracket
63        0x005c => false, // reverse solidus
64        0x005d => false, // right square bracket
65        0x007c => false, // vertical line
66        _ => true,
67    }
68}
69
70/// Is this name part a poor choice on Windows?
71///
72/// Note that this applies to the portion of the name preceeding a . or as the documentation
73/// puts it "NUL.txt and NUL.tar.gz are both equivalent to NUL"
74///
75/// "Do not use the following reserved names for the name of a file" from
76/// <https://learn.microsoft.com/en-gb/windows/win32/fileio/naming-a-file#naming-conventions>
77fn scary_for_windows(name: &str) -> bool {
78    let name = if let Some(idx) = name.find('.') {
79        &name[0..idx]
80    } else {
81        name
82    };
83    matches!(
84        name.to_ascii_uppercase().as_str(),
85        "CON"
86            | "PRN"
87            | "AUX"
88            | "NUL"
89            | "COM0"
90            | "COM1"
91            | "COM2"
92            | "COM3"
93            | "COM4"
94            | "COM5"
95            | "COM6"
96            | "COM7"
97            | "COM8"
98            | "COM9"
99            | "LPT0"
100            | "LPT1"
101            | "LPT2"
102            | "LPT3"
103            | "LPT4"
104            | "LPT5"
105            | "LPT6"
106            | "LPT7"
107            | "LPT8"
108            | "LPT"
109    )
110}
111
112/// Matches <https://github.com/googlefonts/fontra/blob/15bc0b8401054390484cfb86d509d633d29657a1/src/fontra/backends/filenames.py#L40-L64>
113pub fn string_to_filename(string: &str, suffix: &str) -> String {
114    let string_bytes = string.as_bytes();
115    let mut code_digits: Vec<_> = string_bytes
116        .chunks(5)
117        .map(|chunk| {
118            let mut digit = 0;
119            let mut bit = 1;
120            for byte in chunk {
121                if byte.is_ascii_uppercase() {
122                    digit |= bit
123                }
124                bit <<= 1;
125            }
126            digit
127        })
128        .collect();
129    while let Some(0) = code_digits.last() {
130        code_digits.pop();
131    }
132
133    let mut filename = String::new();
134    for (i, c) in string.chars().enumerate() {
135        if i == 0 && c == '.' {
136            filename.push_str("%2E");
137        } else if !is_reserved_char(c) {
138            filename.push(c);
139        } else {
140            filename.push_str(format!("%{:02X}", c as u32).as_str());
141        }
142    }
143
144    if code_digits.is_empty() && is_reserved_filename(string) {
145        code_digits.push(0);
146    }
147
148    if !code_digits.is_empty() {
149        filename.push(SEPARATOR_CHAR);
150        for d in code_digits {
151            assert!(d < 32, "We've made a terrible mistake");
152            filename.push(BASE_32_CHARS[d]);
153        }
154    }
155
156    for c in suffix.chars() {
157        filename.push(c);
158    }
159    filename
160}
161
162/// Makes a cursory attempt to not produce bad filenames.
163///
164/// Intended for things like turning a glyph name into a filename. Not meant to
165/// be reversible. Use of illegal filename chars may result in duplicate names.
166///
167/// See
168/// * <https://unifiedfontobject.org/versions/ufo3/conventions/#example-implementation>
169/// * <https://github.com/unified-font-object/ufo-spec/issues/164>
170pub fn safe_filename(name: &str, suffix: &str) -> String {
171    let mut filename = Vec::new();
172    for ch in name.chars() {
173        if ok_for_filenames(ch) {
174            filename.push(ch);
175        } else {
176            filename.push('_');
177        }
178        if ch == '_' || ch.is_uppercase() {
179            filename.push('_');
180        }
181    }
182    filename.extend(suffix.chars());
183
184    if let Some(ch) = filename.first() {
185        if *ch == '.' {
186            filename[0] = '_';
187        }
188    }
189
190    let filename: String = filename.into_iter().collect();
191
192    // Windows fears no _
193    if scary_for_windows(&filename) {
194        "_".to_string() + &filename
195    } else {
196        filename
197    }
198}
199
200#[cfg(test)]
201mod tests {
202    use std::collections::HashSet;
203
204    use super::safe_filename;
205
206    /// <https://github.com/googlefonts/fontc/issues/41>
207    fn assert_unique_for_caseinsensitive_fs(names: &[&str]) {
208        let filenames: HashSet<_> = names
209            .iter()
210            .map(|n| safe_filename(n, ""))
211            .map(|n| n.to_lowercase())
212            .collect();
213        assert_eq!(
214            names.len(),
215            filenames.len(),
216            "{names:?} became {filenames:?}"
217        );
218    }
219
220    #[test]
221    fn lower_and_upper_a() {
222        assert_unique_for_caseinsensitive_fs(&["a", "A"]);
223    }
224
225    #[test]
226    fn adding_underscore_avoids_collisions() {
227        // if we don't add _ to _ the resulting names are identical
228        assert_unique_for_caseinsensitive_fs(&["Aa", "a_a"]);
229    }
230
231    #[test]
232    fn starts_with_dot() {
233        assert_eq!("_notdef", safe_filename(".notdef", ""));
234        assert_eq!("_notdef", safe_filename(".not", "def"));
235    }
236
237    #[test]
238    fn dont_scare_windows() {
239        assert_eq!(
240            vec!["N_U_L_", "_nul.tar.gz", "_", "_.31", "_.127",],
241            vec![
242                safe_filename("NUL", ""),
243                safe_filename("nul", ".tar.gz"),
244                safe_filename("\u{0}", ""),
245                safe_filename("\u{1f}", ".31"),
246                safe_filename("\u{7f}", ".127"),
247            ]
248        );
249    }
250}