tor_persist/
slug.rs

1//! "Slugs" used as part of on-disk filenames and other similar purposes
2//!
3//! Arti uses "slugs" as parts of filenames in many places.
4//! Slugs are fixed or variable strings which either
5//! designate the kind of a thing, or which of various things this is.
6//!
7//! Slugs have a restricted character set:
8//! Lowercase ASCII alphanumerics, underscore, hyphen.
9//! We may extend this to allow additional characters in the future,
10//! but /, +, and . (the slug separators) will never be valid slug characters.
11//! Additionally, : will never be a valid slug character,
12//! because Windows does not allow colons in filenames[^1],
13//!
14//! Slugs may not be empty, and they may not start with a hyphen.
15//!
16//! Slugs can be concatenated to build file names.
17//! When concatenating slugs to make filenames,
18//! they should be separated using `/`, `+`, or `.`
19//! ([`SLUG_SEPARATOR_CHARS`]).
20//! Slugs should not be concatenated without separators (for security reasons).
21//!
22//! On Windows only, the following slugs are forbidden,
23//! because of [absurd Windows filename behaviours](https://learn.microsoft.com/en-us/windows/win32/fileio/naming-a-file):
24//! `con` `prn` `aux` `nul`
25//! `com1` `com2` `com3` `com4` `com5` `com6` `com7` `com8` `com9` `com0`
26//! `lpt1` `lpt2` `lpt3` `lpt4` `lpt5` `lpt6` `lpt7` `lpt8` `lpt9` `lpt0`.
27//!
28//! [^1]: <https://learn.microsoft.com/en-us/windows/win32/fileio/naming-a-file#naming-conventions>
29
30pub mod timestamp;
31
32use std::borrow::Borrow;
33use std::ffi::OsStr;
34use std::fmt::{self, Display};
35use std::mem;
36use std::ops::Deref;
37use std::path::Path;
38
39use paste::paste;
40use serde::{Deserialize, Serialize};
41use thiserror::Error;
42
43#[cfg(target_family = "windows")]
44pub use os::ForbiddenOnWindows;
45
46/// An owned slug, checked for syntax
47///
48/// The syntax check can be relied on for safety/soundness.
49// We adopt this rule so that eventually we could have AsRef<[std::ascii::Char]>, etc.
50#[derive(Debug, Clone, Serialize, Deserialize)] //
51#[derive(Eq, PartialEq, Ord, PartialOrd, Hash)] //
52#[derive(derive_more::Display)]
53#[serde(try_from = "String", into = "String")]
54// Box<str> since we don't expect to change the size; that makes it 2 words rather than 3
55// (But our public APIs are in terms of String.)
56pub struct Slug(Box<str>);
57
58/// A borrwed slug, checked for syntax
59///
60/// The syntax check can be relied on for safety/soundness.
61#[derive(Debug, Serialize)] //
62#[derive(Eq, PartialEq, Ord, PartialOrd, Hash)] //
63#[derive(derive_more::Display)]
64#[serde(transparent)]
65#[repr(transparent)] // SAFETY: this attribute is needed for unsafe in new_unchecked
66pub struct SlugRef(str);
67
68/// Characters which are good to use to separate slugs
69///
70/// Guaranteed to never overlap with the valid slug character set.
71///
72/// We might expand this set, but not ever reduce it.
73pub const SLUG_SEPARATOR_CHARS: &str = "/+.";
74
75/// Error for an invalid slug
76#[derive(Error, Debug, Clone, Eq, PartialEq, Hash)]
77#[non_exhaustive]
78pub enum BadSlug {
79    /// Slug contains a forbidden character
80    BadCharacter(char),
81    /// Slug starts with a disallowed character
82    BadFirstCharacter(char),
83    /// An empty slug was supplied where a nonempty one is required
84    EmptySlugNotAllowed,
85    /// We are on Windows and the slug is one of the forbidden ones
86    ///
87    /// On platforms other than Windows, this variant is absent.
88    #[cfg(target_family = "windows")]
89    ForbiddenOnWindows(ForbiddenOnWindows),
90}
91
92/// Types which can perhaps be used as a slug
93///
94/// This is a trait implemented by `str`, `std::fmt::Arguments`,
95/// and other implementors of `ToString`, for the convenience of call sites:
96/// APIs can have functions taking an `&(impl TryIntoSlug + ?Sized)` or `&dyn TryIntoSlug`
97/// and callers then don't need error-handling boilerplate.
98///
99/// Functions that take a `TryIntoSlug` will need to do a runtime syntax check.
100pub trait TryIntoSlug {
101    /// Convert `self` into a `Slug`, if it has the right syntax
102    fn try_into_slug(&self) -> Result<Slug, BadSlug>;
103}
104
105impl<T: ToString + ?Sized> TryIntoSlug for T {
106    fn try_into_slug(&self) -> Result<Slug, BadSlug> {
107        self.to_string().try_into()
108    }
109}
110
111impl Slug {
112    /// Make a Slug out of an owned `String`, if it has the correct syntax
113    pub fn new(s: String) -> Result<Slug, BadSlug> {
114        Ok(unsafe {
115            // SAFETY: we check, and then call new_unchecked
116            check_syntax(&s)?;
117            Slug::new_unchecked(s)
118        })
119    }
120
121    /// Make a Slug out of an owned `String`, without checking the syntax
122    ///
123    /// # Safety
124    ///
125    /// It's the caller's responsibility to check the syntax of the input string.
126    pub unsafe fn new_unchecked(s: String) -> Slug {
127        Slug(s.into())
128    }
129}
130
131impl SlugRef {
132    /// Make a SlugRef out of a `str`, if it has the correct syntax
133    pub fn new(s: &str) -> Result<&SlugRef, BadSlug> {
134        Ok(unsafe {
135            // SAFETY: we check, and then call new_unchecked
136            check_syntax(s)?;
137            SlugRef::new_unchecked(s)
138        })
139    }
140
141    /// Make a SlugRef out of a `str`, without checking the syntax
142    ///
143    /// # Safety
144    ///
145    /// It's the caller's responsibility to check the syntax of the input string.
146    pub unsafe fn new_unchecked<'s>(s: &'s str) -> &'s SlugRef {
147        unsafe {
148            // SAFETY
149            // SlugRef is repr(transparent).  So the alignment and memory layout
150            // are the same, and the pointer metadata is the same too.
151            // The lifetimes is correct by construction.
152            //
153            // We do this, rather than `struct SlugRef<'r>(&'r str)`,
154            // because that way we couldn't impl Deref.
155            mem::transmute::<&'s str, &'s SlugRef>(s)
156        }
157    }
158
159    /// Make an owned `Slug`
160    fn to_slug(&self) -> Slug {
161        unsafe {
162            // SAFETY: self is a SlugRef so our syntax is right
163            Slug::new_unchecked(self.0.into())
164        }
165    }
166}
167
168impl TryFrom<String> for Slug {
169    type Error = BadSlug;
170    fn try_from(s: String) -> Result<Slug, BadSlug> {
171        Slug::new(s)
172    }
173}
174
175impl From<Slug> for String {
176    fn from(s: Slug) -> String {
177        s.0.into()
178    }
179}
180
181impl<'s> TryFrom<&'s str> for &'s SlugRef {
182    type Error = BadSlug;
183    fn try_from(s: &'s str) -> Result<&'s SlugRef, BadSlug> {
184        SlugRef::new(s)
185    }
186}
187
188impl Deref for Slug {
189    type Target = SlugRef;
190    fn deref(&self) -> &SlugRef {
191        unsafe {
192            // SAFETY: self is a Slug so our syntax is right
193            SlugRef::new_unchecked(&self.0)
194        }
195    }
196}
197
198impl Borrow<SlugRef> for Slug {
199    fn borrow(&self) -> &SlugRef {
200        self
201    }
202}
203impl Borrow<str> for Slug {
204    fn borrow(&self) -> &str {
205        self.as_ref()
206    }
207}
208
209impl ToOwned for SlugRef {
210    type Owned = Slug;
211    fn to_owned(&self) -> Slug {
212        self.to_slug()
213    }
214}
215
216/// Implement `fn as_...(&self) -> ...` and `AsRef`
217macro_rules! impl_as_with_inherent { { $ty:ident } => { paste!{
218    impl SlugRef {
219        #[doc = concat!("Obtain this slug as a `", stringify!($ty), "`")]
220        pub fn [<as_ $ty:snake>](&self) -> &$ty {
221            self.as_ref()
222        }
223    }
224    impl_as_ref!($ty);
225} } }
226/// Implement `AsRef`
227macro_rules! impl_as_ref { { $ty:ty } => { paste!{
228    impl AsRef<$ty> for SlugRef {
229        fn as_ref(&self) -> &$ty {
230            self.0.as_ref()
231        }
232    }
233    impl AsRef<$ty> for Slug {
234        fn as_ref(&self) -> &$ty {
235            self.deref().as_ref()
236        }
237    }
238} } }
239
240impl_as_with_inherent!(str);
241impl_as_with_inherent!(Path);
242impl_as_ref!(OsStr);
243impl_as_ref!([u8]);
244
245/// Check the string `s` to see if it would be valid as a slug
246///
247/// This is a low-level method for special cases.
248/// Usually, use [`Slug::new`] etc.
249//
250// SAFETY
251// This function checks the syntax, and is relied on by unsafe code
252#[allow(clippy::if_same_then_else)] // clippy objects to the repeated Ok(())
253pub fn check_syntax(s: &str) -> Result<(), BadSlug> {
254    if s.is_empty() {
255        return Err(BadSlug::EmptySlugNotAllowed);
256    }
257
258    // Slugs are not allowed to start with a hyphen.
259    if s.starts_with('-') {
260        return Err(BadSlug::BadFirstCharacter('-'));
261    }
262
263    // check legal character set
264    for c in s.chars() {
265        if c.is_ascii_lowercase() {
266            Ok(())
267        } else if c.is_ascii_digit() {
268            Ok(())
269        } else if c == '_' || c == '-' {
270            Ok(())
271        } else {
272            Err(BadSlug::BadCharacter(c))
273        }?;
274    }
275
276    os::check_forbidden(s)?;
277
278    Ok(())
279}
280
281impl Display for BadSlug {
282    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
283        match self {
284            BadSlug::BadCharacter(c) => {
285                let num = u32::from(*c);
286                write!(f, "character {c:?} (U+{num:04X}) is not allowed")
287            }
288            BadSlug::BadFirstCharacter(c) => {
289                let num = u32::from(*c);
290                write!(
291                    f,
292                    "character {c:?} (U+{num:04X}) is not allowed as the first character"
293                )
294            }
295            BadSlug::EmptySlugNotAllowed => {
296                write!(f, "empty identifier (empty slug) not allowed")
297            }
298            #[cfg(target_family = "windows")]
299            BadSlug::ForbiddenOnWindows(e) => os::fmt_error(e, f),
300        }
301    }
302}
303
304/// Forbidden slug support for Windows
305#[cfg(target_family = "windows")]
306mod os {
307    use super::*;
308
309    /// A slug which is forbidden because we are on Windows (as found in an invalid slug error)
310    ///
311    /// This type is available only on Windows platforms.
312    //
313    // Double reference so that BadSlug has to contain only one word, not two
314    pub type ForbiddenOnWindows = &'static &'static str;
315
316    /// The forbidden slugs - windows thinks "C:\\Program Files\lpt0.json" is a printer.
317    const FORBIDDEN: &[&str] = &[
318        "con", "prn", "aux", "nul", //
319        "com1", "com2", "com3", "com4", "com5", "com6", "com7", "com8", "com9", "com0", //
320        "lpt1", "lpt2", "lpt3", "lpt4", "lpt5", "lpt6", "lpt7", "lpt8", "lpt9", "lpt0",
321    ];
322
323    /// Check whether this slug is forbidden here
324    pub(super) fn check_forbidden(s: &str) -> Result<(), BadSlug> {
325        for bad in FORBIDDEN {
326            if s == *bad {
327                return Err(BadSlug::ForbiddenOnWindows(bad));
328            }
329        }
330        Ok(())
331    }
332
333    /// Display a forbidden slug error
334    pub(super) fn fmt_error(s: &ForbiddenOnWindows, f: &mut fmt::Formatter) -> fmt::Result {
335        write!(f, "slug (name) {s:?} is not allowed on Windows")
336    }
337}
338/// Forbidden slug support for non-Windows
339#[cfg(not(target_family = "windows"))]
340mod os {
341    use super::*;
342
343    /// Check whether this slug is forbidden here
344    #[allow(clippy::unnecessary_wraps)]
345    pub(super) fn check_forbidden(_s: &str) -> Result<(), BadSlug> {
346        Ok(())
347    }
348}
349
350#[cfg(test)]
351mod test {
352    // @@ begin test lint list maintained by maint/add_warning @@
353    #![allow(clippy::bool_assert_comparison)]
354    #![allow(clippy::clone_on_copy)]
355    #![allow(clippy::dbg_macro)]
356    #![allow(clippy::mixed_attributes_style)]
357    #![allow(clippy::print_stderr)]
358    #![allow(clippy::print_stdout)]
359    #![allow(clippy::single_char_pattern)]
360    #![allow(clippy::unwrap_used)]
361    #![allow(clippy::unchecked_time_subtraction)]
362    #![allow(clippy::useless_vec)]
363    #![allow(clippy::needless_pass_by_value)]
364    //! <!-- @@ end test lint list maintained by maint/add_warning @@ -->
365
366    use super::*;
367    use itertools::chain;
368
369    #[test]
370    fn bad() {
371        for c in chain!(
372            SLUG_SEPARATOR_CHARS.chars(), //
373            ['\\', ' ', '\n', '\0']
374        ) {
375            let s = format!("x{c}y");
376            let e_ref = SlugRef::new(&s).unwrap_err();
377            assert_eq!(e_ref, BadSlug::BadCharacter(c));
378            let e_own = Slug::new(s).unwrap_err();
379            assert_eq!(e_ref, e_own);
380        }
381    }
382
383    #[test]
384    fn good() {
385        let all = chain!(
386            b'a'..=b'z', //
387            b'0'..=b'9',
388            [b'_'],
389        )
390        .map(char::from);
391
392        let chk = |s: String| {
393            let sref = SlugRef::new(&s).unwrap();
394            let slug = Slug::new(s.clone()).unwrap();
395            assert_eq!(sref.to_string(), s);
396            assert_eq!(slug.to_string(), s);
397        };
398
399        chk(all.clone().collect());
400
401        for c in all {
402            chk(format!("{c}"));
403        }
404
405        // Hyphens are allowed, but not as the first character
406        chk("a-".into());
407        chk("a-b".into());
408    }
409
410    #[test]
411    fn badchar_msg() {
412        let chk = |s: &str, m: &str| {
413            assert_eq!(
414                SlugRef::new(s).unwrap_err().to_string(),
415                m, //
416            );
417        };
418
419        chk(".", "character '.' (U+002E) is not allowed");
420        chk("\0", "character '\\0' (U+0000) is not allowed");
421        chk(
422            "\u{12345}",
423            "character '\u{12345}' (U+12345) is not allowed",
424        );
425        chk(
426            "-",
427            "character '-' (U+002D) is not allowed as the first character",
428        );
429        chk("A", "character 'A' (U+0041) is not allowed");
430    }
431
432    #[test]
433    fn windows_forbidden() {
434        for s in ["con", "prn", "lpt0"] {
435            let r = SlugRef::new(s);
436            if cfg!(target_family = "windows") {
437                assert_eq!(
438                    r.unwrap_err().to_string(),
439                    format!("slug (name) \"{s}\" is not allowed on Windows"),
440                );
441            } else {
442                assert_eq!(r.unwrap().as_str(), s);
443            }
444        }
445    }
446
447    #[test]
448    fn empty_slug() {
449        assert_eq!(
450            SlugRef::new("").unwrap_err().to_string(),
451            "empty identifier (empty slug) not allowed"
452        );
453    }
454}