Skip to main content

fakecloud_persistence/
key_escape.rs

1use percent_encoding::{utf8_percent_encode, AsciiSet, CONTROLS};
2use sha2::{Digest, Sha256};
3
4const MAX_SEGMENT_BYTES: usize = 200;
5const HASH_SUFFIX_HEX: usize = 12;
6
7// Encode everything that isn't in A-Za-z0-9._-
8const SAFE: &AsciiSet = &CONTROLS
9    .add(b' ')
10    .add(b'!')
11    .add(b'"')
12    .add(b'#')
13    .add(b'$')
14    .add(b'%')
15    .add(b'&')
16    .add(b'\'')
17    .add(b'(')
18    .add(b')')
19    .add(b'*')
20    .add(b'+')
21    .add(b',')
22    .add(b'/')
23    .add(b':')
24    .add(b';')
25    .add(b'<')
26    .add(b'=')
27    .add(b'>')
28    .add(b'?')
29    .add(b'@')
30    .add(b'[')
31    .add(b'\\')
32    .add(b']')
33    .add(b'^')
34    .add(b'`')
35    .add(b'{')
36    .add(b'|')
37    .add(b'}')
38    .add(b'~');
39
40pub fn escape_key_segment(segment: &str) -> String {
41    let encoded: String = utf8_percent_encode(segment, SAFE).collect();
42    if encoded.is_empty() {
43        // '@' is not in the percent-encoding output alphabet (A-Za-z0-9._-%),
44        // so this sentinel cannot collide with any legitimately encoded segment.
45        return "@empty".to_string();
46    }
47
48    // Reject `.` and `..` as directory names — they would escape the parent
49    // objects/ directory on disk. Substitute with @dot/@dotdot sentinels
50    // (same '@' prefix guarantee as @empty).
51    if encoded == "." {
52        return "@dot".to_string();
53    }
54    if encoded == ".." {
55        return "@dotdot".to_string();
56    }
57
58    if encoded.len() <= MAX_SEGMENT_BYTES {
59        return encoded;
60    }
61
62    let mut hasher = Sha256::new();
63    hasher.update(segment.as_bytes());
64    let digest = hasher.finalize();
65    let hex: String = digest
66        .iter()
67        .take(HASH_SUFFIX_HEX.div_ceil(2))
68        .map(|b| format!("{:02x}", b))
69        .collect();
70    let hex = &hex[..HASH_SUFFIX_HEX];
71
72    // Sentinel prefix + hash + truncated head of the encoded segment.
73    // The '@trunc-' prefix cannot appear in a normal percent-encoded segment.
74    let prefix = format!("@trunc-{hex}-");
75    let keep = MAX_SEGMENT_BYTES.saturating_sub(prefix.len());
76    let mut end = keep.min(encoded.len());
77    while end > 0 && !encoded.is_char_boundary(end) {
78        end -= 1;
79    }
80    format!("{prefix}{}", &encoded[..end])
81}
82
83#[cfg(test)]
84mod tests {
85    use super::*;
86
87    #[test]
88    fn empty_string() {
89        assert_eq!(escape_key_segment(""), "@empty");
90    }
91
92    #[test]
93    fn literal_underscore_empty_does_not_collide_with_empty_sentinel() {
94        // A user key that literally equals the old sentinel must not escape to
95        // the new one. Percent-encoding leaves `_empty_` untouched (safe
96        // chars), so it stays distinct from `@empty`.
97        assert_eq!(escape_key_segment("_empty_"), "_empty_");
98        assert_ne!(escape_key_segment("_empty_"), escape_key_segment(""));
99    }
100
101    #[test]
102    fn long_keys_with_shared_prefix_get_distinct_hashes() {
103        let a = format!("{}X", "a".repeat(500));
104        let b = format!("{}Y", "a".repeat(500));
105        let ea = escape_key_segment(&a);
106        let eb = escape_key_segment(&b);
107        assert_ne!(ea, eb);
108        assert!(ea.starts_with("@trunc-"));
109        assert!(eb.starts_with("@trunc-"));
110        // A short literal that happens to equal the truncated head cannot
111        // collide with either, because the overflow form is prefixed with `@trunc-`.
112        let short = "a".repeat(100);
113        let es = escape_key_segment(&short);
114        assert!(!es.starts_with('@'));
115        assert_ne!(es, ea);
116        assert_ne!(es, eb);
117    }
118
119    #[test]
120    fn slash_is_encoded() {
121        assert_eq!(escape_key_segment("a/b"), "a%2Fb");
122    }
123
124    #[test]
125    fn unicode() {
126        let out = escape_key_segment("日本語");
127        assert!(out.is_ascii());
128        assert!(out.contains('%'));
129    }
130
131    #[test]
132    fn dotfile() {
133        assert_eq!(escape_key_segment(".hidden"), ".hidden");
134    }
135
136    #[test]
137    fn long_key_is_truncated_with_hash() {
138        let raw = "a".repeat(500);
139        let out = escape_key_segment(&raw);
140        assert!(out.len() <= MAX_SEGMENT_BYTES);
141        assert!(out.contains('-'));
142    }
143
144    #[test]
145    fn differs_after_truncation_point_round_trip_unique() {
146        let a = format!("{}{}", "a".repeat(500), "X");
147        let b = format!("{}{}", "a".repeat(500), "Y");
148        let ea = escape_key_segment(&a);
149        let eb = escape_key_segment(&b);
150        assert_ne!(ea, eb);
151    }
152
153    #[test]
154    fn dot_and_dotdot_get_sentinels() {
155        assert_eq!(escape_key_segment("."), "@dot");
156        assert_eq!(escape_key_segment(".."), "@dotdot");
157        // A user key that literally equals the sentinels cannot collide:
158        // `@` is escaped as `%40`.
159        assert_eq!(escape_key_segment("@dot"), "%40dot");
160        assert_eq!(escape_key_segment("@dotdot"), "%40dotdot");
161    }
162
163    #[test]
164    fn preserves_safe_chars() {
165        assert_eq!(escape_key_segment("Foo.Bar-baz_1"), "Foo.Bar-baz_1");
166    }
167}