embedded_sdmmc_dev/filesystem/
filename.rs

1//! Filename related types
2
3use crate::fat::VolumeName;
4use crate::trace;
5
6/// Various filename related errors that can occur.
7#[cfg_attr(feature = "defmt-log", derive(defmt::Format))]
8#[derive(Debug, Clone)]
9pub enum FilenameError {
10    /// Tried to create a file with an invalid character.
11    InvalidCharacter,
12    /// Tried to create a file with no file name.
13    FilenameEmpty,
14    /// Given name was too long (we are limited to 8.3).
15    NameTooLong,
16    /// Can't start a file with a period, or after 8 characters.
17    MisplacedPeriod,
18    /// Can't extract utf8 from file name
19    Utf8Error,
20}
21
22/// Describes things we can convert to short 8.3 filenames
23pub trait ToShortFileName {
24    /// Try and convert this value into a [`ShortFileName`].
25    fn to_short_filename(self) -> Result<ShortFileName, FilenameError>;
26}
27
28impl ToShortFileName for ShortFileName {
29    fn to_short_filename(self) -> Result<ShortFileName, FilenameError> {
30        Ok(self)
31    }
32}
33
34impl ToShortFileName for &ShortFileName {
35    fn to_short_filename(self) -> Result<ShortFileName, FilenameError> {
36        Ok(self.clone())
37    }
38}
39
40impl ToShortFileName for &str {
41    fn to_short_filename(self) -> Result<ShortFileName, FilenameError> {
42        ShortFileName::create_from_str(self)
43    }
44}
45
46/// An MS-DOS 8.3 filename.
47///
48/// ISO-8859-1 encoding is assumed. All lower-case is converted to upper-case by
49/// default.
50#[cfg_attr(feature = "defmt-log", derive(defmt::Format))]
51#[derive(PartialEq, Eq, Clone)]
52pub struct ShortFileName {
53    pub(crate) contents: [u8; Self::TOTAL_LEN],
54}
55
56impl ShortFileName {
57    const BASE_LEN: usize = 8;
58    const TOTAL_LEN: usize = 11;
59
60    /// Get a short file name containing "..", which means "parent directory".
61    pub const fn parent_dir() -> Self {
62        Self {
63            contents: *b"..         ",
64        }
65    }
66
67    /// Get a short file name containing ".", which means "this directory".
68    pub const fn this_dir() -> Self {
69        Self {
70            contents: *b".          ",
71        }
72    }
73
74    /// Get base name (without extension) of the file.
75    pub fn base_name(&self) -> &[u8] {
76        Self::bytes_before_space(&self.contents[..Self::BASE_LEN])
77    }
78
79    /// Get extension of the file (without base name).
80    pub fn extension(&self) -> &[u8] {
81        Self::bytes_before_space(&self.contents[Self::BASE_LEN..])
82    }
83
84    fn bytes_before_space(bytes: &[u8]) -> &[u8] {
85        bytes.split(|b| *b == b' ').next().unwrap_or(&[])
86    }
87
88    /// Create a new MS-DOS 8.3 space-padded file name as stored in the directory entry.
89    ///
90    /// The output uses ISO-8859-1 encoding.
91    pub fn create_from_str(name: &str) -> Result<ShortFileName, FilenameError> {
92        let mut sfn = ShortFileName {
93            contents: [b' '; Self::TOTAL_LEN],
94        };
95
96        // Special case `..`, which means "parent directory".
97        if name == ".." {
98            return Ok(ShortFileName::parent_dir());
99        }
100
101        // Special case `.` (or blank), which means "this directory".
102        if name.is_empty() || name == "." {
103            return Ok(ShortFileName::this_dir());
104        }
105
106        let mut idx = 0;
107        let mut seen_dot = false;
108        for ch in name.chars() {
109            match ch {
110                // Microsoft say these are the invalid characters
111                '\u{0000}'..='\u{001F}'
112                | '"'
113                | '*'
114                | '+'
115                | ','
116                | '/'
117                | ':'
118                | ';'
119                | '<'
120                | '='
121                | '>'
122                | '?'
123                | '['
124                | '\\'
125                | ']'
126                | ' '
127                | '|' => {
128                    return Err(FilenameError::InvalidCharacter);
129                }
130                x if x > '\u{00FF}' => {
131                    // We only handle ISO-8859-1 which is Unicode Code Points
132                    // \U+0000 to \U+00FF. This is above that.
133                    return Err(FilenameError::InvalidCharacter);
134                }
135                '.' => {
136                    // Denotes the start of the file extension
137                    if (1..=Self::BASE_LEN).contains(&idx) {
138                        idx = Self::BASE_LEN;
139                        seen_dot = true;
140                    } else {
141                        return Err(FilenameError::MisplacedPeriod);
142                    }
143                }
144                _ => {
145                    let b = ch.to_ascii_uppercase() as u8;
146                    if seen_dot {
147                        if (Self::BASE_LEN..Self::TOTAL_LEN).contains(&idx) {
148                            sfn.contents[idx] = b;
149                        } else {
150                            return Err(FilenameError::NameTooLong);
151                        }
152                    } else if idx < Self::BASE_LEN {
153                        sfn.contents[idx] = b;
154                    } else {
155                        return Err(FilenameError::NameTooLong);
156                    }
157                    idx += 1;
158                }
159            }
160        }
161        if idx == 0 {
162            return Err(FilenameError::FilenameEmpty);
163        }
164        Ok(sfn)
165    }
166
167    /// Convert a Short File Name to a Volume Label.
168    ///
169    /// # Safety
170    ///
171    /// Volume Labels can contain things that Short File Names cannot, so only
172    /// do this conversion if you have the name of a directory entry with the
173    /// 'Volume Label' attribute.
174    pub unsafe fn to_volume_label(self) -> VolumeName {
175        VolumeName {
176            contents: self.contents,
177        }
178    }
179
180    /// Get the LFN checksum for this short filename
181    pub fn csum(&self) -> u8 {
182        let mut result = 0u8;
183        for b in self.contents.iter() {
184            result = result.rotate_right(1).wrapping_add(*b);
185        }
186        result
187    }
188}
189
190impl core::fmt::Display for ShortFileName {
191    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
192        let mut printed = 0;
193        for (i, &c) in self.contents.iter().enumerate() {
194            if c != b' ' {
195                if i == Self::BASE_LEN {
196                    write!(f, ".")?;
197                    printed += 1;
198                }
199                // converting a byte to a codepoint means you are assuming
200                // ISO-8859-1 encoding, because that's how Unicode was designed.
201                write!(f, "{}", c as char)?;
202                printed += 1;
203            }
204        }
205        if let Some(mut width) = f.width() {
206            if width > printed {
207                width -= printed;
208                for _ in 0..width {
209                    write!(f, "{}", f.fill())?;
210                }
211            }
212        }
213        Ok(())
214    }
215}
216
217impl core::fmt::Debug for ShortFileName {
218    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
219        write!(f, "ShortFileName(\"{}\")", self)
220    }
221}
222
223/// Used to store a Long File Name
224#[derive(Debug)]
225pub struct LfnBuffer<'a> {
226    /// We fill this buffer in from the back
227    inner: &'a mut [u8],
228    /// How many bytes are free.
229    ///
230    /// This is also the byte index the string starts from.
231    free: usize,
232    /// Did we overflow?
233    overflow: bool,
234    /// If a surrogate-pair is split over two directory entries, remember half of it here.
235    unpaired_surrogate: Option<u16>,
236}
237
238impl<'a> LfnBuffer<'a> {
239    /// Create a new, empty, LFN Buffer using the given mutable slice as its storage.
240    pub fn new(storage: &'a mut [u8]) -> LfnBuffer<'a> {
241        let len = storage.len();
242        LfnBuffer {
243            inner: storage,
244            free: len,
245            overflow: false,
246            unpaired_surrogate: None,
247        }
248    }
249
250    /// Empty out this buffer
251    pub fn clear(&mut self) {
252        self.free = self.inner.len();
253        self.overflow = false;
254        self.unpaired_surrogate = None;
255    }
256
257    /// Push the 13 UTF-16 codepoints into this string.
258    ///
259    /// We assume they are pushed last-chunk-first, as you would find
260    /// them on disk.
261    ///
262    /// Any chunk starting with a half of a surrogate pair has that saved for the next call.
263    ///
264    /// ```text
265    /// [de00, 002e, 0074, 0078, 0074, 0000, ffff, ffff, ffff, ffff, ffff, ffff, ffff]
266    /// [0041, 0042, 0030, 0031, 0032, 0033, 0034, 0035, 0036, 0037, 0038, 0039, d83d]
267    ///
268    /// Would map to
269    ///
270    /// 0041 0042 0030 0031 0032 0033 0034 0035 0036 0037 0038 0039 1f600 002e 0074 0078 0074, or
271    ///
272    /// "AB0123456789😀.txt"
273    /// ```
274    pub fn push(&mut self, buffer: &[u16; 13]) {
275        // find the first null, if any
276        let null_idx = buffer
277            .iter()
278            .position(|&b| b == 0x0000)
279            .unwrap_or(buffer.len());
280        // take all the wide chars, up to the null (or go to the end)
281        let buffer = &buffer[0..null_idx];
282
283        // This next part will convert the 16-bit values into chars, noting that
284        // chars outside the Basic Multilingual Plane will require two 16-bit
285        // values to encode (see UTF-16 Surrogate Pairs).
286        //
287        // We cache the decoded chars into this array so we can iterate them
288        // backwards. It's 60 bytes, but it'll have to do.
289        let mut char_vec: heapless::Vec<char, 13> = heapless::Vec::new();
290        // Now do the decode, including the unpaired surrogate (if any) from
291        // last time (maybe it has a pair now!)
292        let mut is_first = true;
293        for ch in char::decode_utf16(
294            buffer
295                .iter()
296                .cloned()
297                .chain(self.unpaired_surrogate.take().iter().cloned()),
298        ) {
299            match ch {
300                Ok(ch) => {
301                    char_vec.push(ch).expect("Vec was full!?");
302                }
303                Err(e) => {
304                    // OK, so we found half a surrogate pair and nothing to go
305                    // with it. Was this the first codepoint in the chunk?
306                    if is_first {
307                        // it was - the other half is probably in the next chunk
308                        // so save this for next time
309                        trace!("LFN saved {:?}", e.unpaired_surrogate());
310                        self.unpaired_surrogate = Some(e.unpaired_surrogate());
311                    } else {
312                        // it wasn't - can't deal with it these mid-sequence, so
313                        // replace it
314                        trace!("LFN replaced {:?}", e.unpaired_surrogate());
315                        char_vec.push('\u{fffd}').expect("Vec was full?!");
316                    }
317                }
318            }
319            is_first = false;
320        }
321
322        for ch in char_vec.iter().rev() {
323            trace!("LFN push {:?}", ch);
324            // a buffer of length 4 is enough to encode any char
325            let mut encoded_ch = [0u8; 4];
326            let encoded_ch = ch.encode_utf8(&mut encoded_ch);
327            if self.free < encoded_ch.len() {
328                // the LFN buffer they gave us was not long enough. Note for
329                // later, so we don't show them garbage.
330                self.overflow = true;
331                return;
332            }
333            // Store the encoded char in the buffer, working backwards. We
334            // already checked there was enough space.
335            for b in encoded_ch.bytes().rev() {
336                self.free -= 1;
337                self.inner[self.free] = b;
338            }
339        }
340    }
341
342    /// View this LFN buffer as a string-slice
343    ///
344    /// If the buffer overflowed while parsing the LFN, or if this buffer is
345    /// empty, you get an empty string.
346    pub fn as_str(&self) -> &str {
347        if self.overflow {
348            ""
349        } else {
350            // we always only put UTF-8 encoded data in here
351            unsafe { core::str::from_utf8_unchecked(&self.inner[self.free..]) }
352        }
353    }
354}
355
356// ****************************************************************************
357//
358// Unit Tests
359//
360// ****************************************************************************
361
362#[cfg(test)]
363mod test {
364    use super::*;
365
366    #[test]
367    fn filename_no_extension() {
368        let sfn = ShortFileName {
369            contents: *b"HELLO      ",
370        };
371        assert_eq!(format!("{}", &sfn), "HELLO");
372        assert_eq!(sfn, ShortFileName::create_from_str("HELLO").unwrap());
373        assert_eq!(sfn, ShortFileName::create_from_str("hello").unwrap());
374        assert_eq!(sfn, ShortFileName::create_from_str("HeLlO").unwrap());
375        assert_eq!(sfn, ShortFileName::create_from_str("HELLO.").unwrap());
376    }
377
378    #[test]
379    fn filename_extension() {
380        let sfn = ShortFileName {
381            contents: *b"HELLO   TXT",
382        };
383        assert_eq!(format!("{}", &sfn), "HELLO.TXT");
384        assert_eq!(sfn, ShortFileName::create_from_str("HELLO.TXT").unwrap());
385    }
386
387    #[test]
388    fn filename_get_extension() {
389        let mut sfn = ShortFileName::create_from_str("hello.txt").unwrap();
390        assert_eq!(sfn.extension(), "TXT".as_bytes());
391        sfn = ShortFileName::create_from_str("hello").unwrap();
392        assert_eq!(sfn.extension(), "".as_bytes());
393        sfn = ShortFileName::create_from_str("hello.a").unwrap();
394        assert_eq!(sfn.extension(), "A".as_bytes());
395    }
396
397    #[test]
398    fn filename_get_base_name() {
399        let mut sfn = ShortFileName::create_from_str("hello.txt").unwrap();
400        assert_eq!(sfn.base_name(), "HELLO".as_bytes());
401        sfn = ShortFileName::create_from_str("12345678").unwrap();
402        assert_eq!(sfn.base_name(), "12345678".as_bytes());
403        sfn = ShortFileName::create_from_str("1").unwrap();
404        assert_eq!(sfn.base_name(), "1".as_bytes());
405    }
406
407    #[test]
408    fn filename_fulllength() {
409        let sfn = ShortFileName {
410            contents: *b"12345678TXT",
411        };
412        assert_eq!(format!("{}", &sfn), "12345678.TXT");
413        assert_eq!(sfn, ShortFileName::create_from_str("12345678.TXT").unwrap());
414    }
415
416    #[test]
417    fn filename_short_extension() {
418        let sfn = ShortFileName {
419            contents: *b"12345678C  ",
420        };
421        assert_eq!(format!("{}", &sfn), "12345678.C");
422        assert_eq!(sfn, ShortFileName::create_from_str("12345678.C").unwrap());
423    }
424
425    #[test]
426    fn filename_short() {
427        let sfn = ShortFileName {
428            contents: *b"1       C  ",
429        };
430        assert_eq!(format!("{}", &sfn), "1.C");
431        assert_eq!(sfn, ShortFileName::create_from_str("1.C").unwrap());
432    }
433
434    #[test]
435    fn filename_empty() {
436        assert_eq!(
437            ShortFileName::create_from_str("").unwrap(),
438            ShortFileName::this_dir()
439        );
440    }
441
442    #[test]
443    fn filename_bad() {
444        assert!(ShortFileName::create_from_str(" ").is_err());
445        assert!(ShortFileName::create_from_str("123456789").is_err());
446        assert!(ShortFileName::create_from_str("12345678.ABCD").is_err());
447    }
448
449    #[test]
450    fn checksum() {
451        assert_eq!(
452            0xB3,
453            ShortFileName::create_from_str("UNARCH~1.DAT")
454                .unwrap()
455                .csum()
456        );
457    }
458
459    #[test]
460    fn one_piece() {
461        let mut storage = [0u8; 64];
462        let mut buf: LfnBuffer = LfnBuffer::new(&mut storage);
463        buf.push(&[
464            0x0030, 0x0031, 0x0032, 0x0033, 0x2202, 0x0000, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF,
465            0xFFFF, 0xFFFF,
466        ]);
467        assert_eq!(buf.as_str(), "0123∂");
468    }
469
470    #[test]
471    fn two_piece() {
472        let mut storage = [0u8; 64];
473        let mut buf: LfnBuffer = LfnBuffer::new(&mut storage);
474        buf.push(&[
475            0x0030, 0x0031, 0x0032, 0x0033, 0x2202, 0x0000, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF,
476            0xFFFF, 0xFFFF,
477        ]);
478        buf.push(&[
479            0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004a, 0x004b,
480            0x004c, 0x004d,
481        ]);
482        assert_eq!(buf.as_str(), "ABCDEFGHIJKLM0123∂");
483    }
484
485    #[test]
486    fn two_piece_split_surrogate() {
487        let mut storage = [0u8; 64];
488        let mut buf: LfnBuffer = LfnBuffer::new(&mut storage);
489
490        buf.push(&[
491            0xde00, 0x002e, 0x0074, 0x0078, 0x0074, 0x0000, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff,
492            0xffff, 0xffff,
493        ]);
494        buf.push(&[
495            0xd83d, 0xde00, 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 0x0038,
496            0x0039, 0xd83d,
497        ]);
498        assert_eq!(buf.as_str(), "😀0123456789😀.txt");
499    }
500}
501
502// ****************************************************************************
503//
504// End Of File
505//
506// ****************************************************************************
embedded_sdmmc_dev/filesystem/filename.rs

embedded_sdmmc_dev/filesystem/
filename.rs