Skip to main content

nextest_runner/record/
cache_dir.rs

1// Copyright (c) The nextest Contributors
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4//! Platform-specific cache directory discovery for nextest records.
5
6use crate::errors::CacheDirError;
7use camino::{Utf8Path, Utf8PathBuf};
8use etcetera::{BaseStrategy, choose_base_strategy};
9use xxhash_rust::xxh3::xxh3_64;
10
11/// Maximum length of the encoded workspace path in bytes.
12const MAX_ENCODED_LEN: usize = 96;
13
14/// Length of the hash suffix appended to truncated paths.
15///
16/// Between the first many bytes and this, we should ideally have more than
17/// enough entropy to disambiguate repos.
18const HASH_SUFFIX_LEN: usize = 8;
19
20/// Environment variable to override the nextest cache directory.
21///
22/// When set, this overrides the platform-specific cache directory. The records
23/// directory will be `$NEXTEST_CACHE_DIR/projects/<encoded-workspace>/records/`.
24pub const NEXTEST_CACHE_DIR_ENV: &str = "NEXTEST_CACHE_DIR";
25
26/// Returns the platform-specific cache directory for nextest records for a workspace.
27///
28/// If the `NEXTEST_CACHE_DIR` environment variable is set, uses that as the base
29/// cache directory. Otherwise, uses the platform-specific default:
30///
31/// - Linux, macOS, and other Unix: `$XDG_CACHE_HOME/nextest/projects/<encoded-workspace>/records/`
32///   or `~/.cache/nextest/projects/<encoded-workspace>/records/`
33/// - Windows: `%LOCALAPPDATA%\nextest\cache\projects\<encoded-workspace>\records\`
34///
35/// The workspace root is canonicalized (symlinks resolved) before being encoded
36/// using `encode_workspace_path` to produce a directory-safe, bijective
37/// representation. This ensures that accessing a workspace via a symlink
38/// produces the same cache directory as accessing it via the real path.
39///
40/// Returns an error if:
41/// - The platform cache directory cannot be determined
42/// - The workspace path cannot be canonicalized (e.g., doesn't exist)
43/// - Any path is not valid UTF-8
44pub fn records_cache_dir(workspace_root: &Utf8Path) -> Result<Utf8PathBuf, CacheDirError> {
45    let base_cache_dir = if let Ok(cache_dir) = std::env::var(NEXTEST_CACHE_DIR_ENV) {
46        Utf8PathBuf::from(cache_dir)
47    } else {
48        let strategy = choose_base_strategy().map_err(CacheDirError::BaseDirStrategy)?;
49        let cache_dir = strategy.cache_dir();
50        let nextest_cache = cache_dir.join("nextest");
51        Utf8PathBuf::from_path_buf(nextest_cache.clone()).map_err(|_| {
52            CacheDirError::CacheDirNotUtf8 {
53                path: nextest_cache,
54            }
55        })?
56    };
57
58    // Canonicalize the workspace root to resolve symlinks. This ensures that
59    // accessing a workspace via a symlink produces the same cache directory.
60    let canonical_workspace =
61        workspace_root
62            .canonicalize_utf8()
63            .map_err(|error| CacheDirError::Canonicalize {
64                workspace_root: workspace_root.to_owned(),
65                error,
66            })?;
67
68    let encoded_workspace = encode_workspace_path(&canonical_workspace);
69    Ok(base_cache_dir
70        .join("projects")
71        .join(&encoded_workspace)
72        .join("records"))
73}
74
75/// Encodes a workspace path into a directory-safe string.
76///
77/// The encoding is bijective (reversible) and produces valid directory names on all
78/// platforms. The encoding scheme uses underscore as an escape character:
79///
80/// - `_` → `__` (escape underscore first)
81/// - `/` → `_s` (Unix path separator)
82/// - `\` → `_b` (Windows path separator)
83/// - `:` → `_c` (Windows drive letter separator)
84/// - `*` → `_a` (asterisk, invalid on Windows)
85/// - `"` → `_q` (double quote, invalid on Windows)
86/// - `<` → `_l` (less than, invalid on Windows)
87/// - `>` → `_g` (greater than, invalid on Windows)
88/// - `|` → `_p` (pipe, invalid on Windows)
89/// - `?` → `_m` (question mark, invalid on Windows)
90///
91/// If the encoded path exceeds 96 bytes, it is truncated at a valid UTF-8 boundary
92/// and an 8-character hash suffix is appended to maintain uniqueness.
93///
94/// # Examples
95///
96/// - `/home/rain/dev/nextest` → `_shome_srain_sdev_snextest`
97/// - `C:\Users\rain\dev` → `C_c_bUsers_brain_bdev`
98/// - `/path_with_underscore` → `_spath__with__underscore`
99/// - `/weird*path?` → `_sweird_apath_m`
100pub fn encode_workspace_path(path: &Utf8Path) -> String {
101    let mut encoded = String::with_capacity(path.as_str().len() * 2);
102
103    for ch in path.as_str().chars() {
104        match ch {
105            '_' => encoded.push_str("__"),
106            '/' => encoded.push_str("_s"),
107            '\\' => encoded.push_str("_b"),
108            ':' => encoded.push_str("_c"),
109            '*' => encoded.push_str("_a"),
110            '"' => encoded.push_str("_q"),
111            '<' => encoded.push_str("_l"),
112            '>' => encoded.push_str("_g"),
113            '|' => encoded.push_str("_p"),
114            '?' => encoded.push_str("_m"),
115            _ => encoded.push(ch),
116        }
117    }
118
119    truncate_with_hash(encoded)
120}
121
122/// Truncates an encoded string to fit within [`MAX_ENCODED_LEN`] bytes.
123///
124/// If the string is already short enough, returns it unchanged. Otherwise,
125/// truncates at a valid UTF-8 boundary and appends an 8-character hash suffix
126/// derived from the full string.
127fn truncate_with_hash(encoded: String) -> String {
128    if encoded.len() <= MAX_ENCODED_LEN {
129        return encoded;
130    }
131
132    // Compute hash of full string before truncation.
133    let hash = xxh3_64(encoded.as_bytes());
134    let hash_suffix = format!("{:08x}", hash & 0xFFFFFFFF);
135
136    // Find the longest valid UTF-8 prefix that fits.
137    let max_prefix_len = MAX_ENCODED_LEN - HASH_SUFFIX_LEN;
138    let bytes = encoded.as_bytes();
139    let truncated_bytes = &bytes[..max_prefix_len.min(bytes.len())];
140
141    // Use utf8_chunks to find the valid UTF-8 portion.
142    let mut valid_len = 0;
143    for chunk in truncated_bytes.utf8_chunks() {
144        valid_len += chunk.valid().len();
145        // Stop at first invalid sequence (which would be an incomplete multi-byte char).
146        if !chunk.invalid().is_empty() {
147            break;
148        }
149    }
150
151    let mut result = encoded[..valid_len].to_string();
152    result.push_str(&hash_suffix);
153    result
154}
155
156/// Decodes a workspace path that was encoded with [`encode_workspace_path`].
157///
158/// Returns `None` if the encoded string is malformed (contains an invalid escape
159/// sequence like `_x` where `x` is not a recognized escape character).
160#[cfg_attr(not(test), expect(dead_code))] // Will be used in replay phase.
161pub fn decode_workspace_path(encoded: &str) -> Option<Utf8PathBuf> {
162    let mut decoded = String::with_capacity(encoded.len());
163    let mut chars = encoded.chars().peekable();
164
165    while let Some(ch) = chars.next() {
166        if ch == '_' {
167            match chars.next() {
168                Some('_') => decoded.push('_'),
169                Some('s') => decoded.push('/'),
170                Some('b') => decoded.push('\\'),
171                Some('c') => decoded.push(':'),
172                Some('a') => decoded.push('*'),
173                Some('q') => decoded.push('"'),
174                Some('l') => decoded.push('<'),
175                Some('g') => decoded.push('>'),
176                Some('p') => decoded.push('|'),
177                Some('m') => decoded.push('?'),
178                // Malformed: `_` at end of string or followed by unknown char.
179                _ => return None,
180            }
181        } else {
182            decoded.push(ch);
183        }
184    }
185
186    Some(Utf8PathBuf::from(decoded))
187}
188
189#[cfg(test)]
190mod tests {
191    use super::*;
192
193    #[test]
194    fn test_records_cache_dir() {
195        // Use a real existing path (the temp dir always exists).
196        let temp_dir =
197            Utf8PathBuf::try_from(std::env::temp_dir()).expect("temp dir should be valid UTF-8");
198        let cache_dir = records_cache_dir(&temp_dir).expect("cache directory should be available");
199
200        assert!(
201            cache_dir.as_str().contains("nextest"),
202            "cache dir should contain 'nextest': {cache_dir}"
203        );
204        assert!(
205            cache_dir.as_str().contains("projects"),
206            "cache dir should contain 'projects': {cache_dir}"
207        );
208        assert!(
209            cache_dir.as_str().contains("records"),
210            "cache dir should contain 'records': {cache_dir}"
211        );
212    }
213
214    #[test]
215    fn test_records_cache_dir_canonicalizes_symlinks() {
216        // Create a temp directory and a symlink pointing to it.
217        let temp_dir = camino_tempfile::tempdir().expect("tempdir should be created");
218        let real_path = temp_dir.path().to_path_buf();
219
220        // Create a subdirectory to serve as the "workspace".
221        let workspace = real_path.join("workspace");
222        std::fs::create_dir(&workspace).expect("workspace dir should be created");
223
224        // Create a symlink pointing to the workspace.
225        let symlink_path = real_path.join("symlink-to-workspace");
226
227        #[cfg(unix)]
228        std::os::unix::fs::symlink(&workspace, &symlink_path)
229            .expect("symlink should be created on Unix");
230
231        #[cfg(windows)]
232        std::os::windows::fs::symlink_dir(&workspace, &symlink_path)
233            .expect("symlink should be created on Windows");
234
235        // Get cache dir via the real path.
236        let cache_via_real =
237            records_cache_dir(&workspace).expect("cache dir via real path should be available");
238
239        // Get cache dir via the symlink.
240        let cache_via_symlink =
241            records_cache_dir(&symlink_path).expect("cache dir via symlink should be available");
242
243        // They should be the same because canonicalization resolves the symlink.
244        assert_eq!(
245            cache_via_real, cache_via_symlink,
246            "cache dir should be the same whether accessed via real path or symlink"
247        );
248    }
249
250    // Basic encoding tests.
251    #[test]
252    fn test_encode_workspace_path() {
253        let cases = [
254            ("", ""),
255            ("simple", "simple"),
256            ("/home/user", "_shome_suser"),
257            ("/home/user/project", "_shome_suser_sproject"),
258            ("C:\\Users\\name", "C_c_bUsers_bname"),
259            ("D:\\dev\\project", "D_c_bdev_bproject"),
260            ("/path_with_underscore", "_spath__with__underscore"),
261            ("C:\\path_name", "C_c_bpath__name"),
262            ("/a/b/c", "_sa_sb_sc"),
263            // Windows-invalid characters.
264            ("/weird*path", "_sweird_apath"),
265            ("/path?query", "_spath_mquery"),
266            ("/file<name>", "_sfile_lname_g"),
267            ("/path|pipe", "_spath_ppipe"),
268            ("/\"quoted\"", "_s_qquoted_q"),
269            // All Windows-invalid characters combined.
270            ("*\"<>|?", "_a_q_l_g_p_m"),
271        ];
272
273        for (input, expected) in cases {
274            let encoded = encode_workspace_path(Utf8Path::new(input));
275            assert_eq!(
276                encoded, expected,
277                "encoding failed for {input:?}: expected {expected:?}, got {encoded:?}"
278            );
279        }
280    }
281
282    // Roundtrip tests: encode then decode should return original.
283    #[test]
284    fn test_encode_decode_roundtrip() {
285        let cases = [
286            "/home/user/project",
287            "C:\\Users\\name\\dev",
288            "/path_with_underscore",
289            "/_",
290            "_/",
291            "__",
292            "/a_b/c_d",
293            "",
294            "no_special_chars",
295            "/mixed\\path:style",
296            // Windows-invalid characters (valid on Unix).
297            "/path*with*asterisks",
298            "/file?query",
299            "/path<with>angles",
300            "/pipe|char",
301            "/\"quoted\"",
302            // All special chars in one path.
303            "/all*special?chars<in>one|path\"here\"_end",
304        ];
305
306        for original in cases {
307            let encoded = encode_workspace_path(Utf8Path::new(original));
308            let decoded = decode_workspace_path(&encoded);
309            assert_eq!(
310                decoded.as_deref(),
311                Some(Utf8Path::new(original)),
312                "roundtrip failed for {original:?}: encoded={encoded:?}, decoded={decoded:?}"
313            );
314        }
315    }
316
317    // Bijectivity tests: different inputs must produce different outputs.
318    #[test]
319    fn test_encoding_is_bijective() {
320        // These pairs were problematic with the simple dash-based encoding.
321        let pairs = [
322            ("/-", "-/"),
323            ("/a", "_a"),
324            ("_s", "/"),
325            ("a_", "a/"),
326            ("__", "_"),
327            ("/", "\\"),
328            // New escape sequences for Windows-invalid characters.
329            ("_a", "*"),
330            ("_q", "\""),
331            ("_l", "<"),
332            ("_g", ">"),
333            ("_p", "|"),
334            ("_m", "?"),
335            // Ensure Windows-invalid chars don't collide with each other.
336            ("*", "?"),
337            ("<", ">"),
338            ("|", "\""),
339        ];
340
341        for (a, b) in pairs {
342            let encoded_a = encode_workspace_path(Utf8Path::new(a));
343            let encoded_b = encode_workspace_path(Utf8Path::new(b));
344            assert_ne!(
345                encoded_a, encoded_b,
346                "bijectivity violated: {a:?} and {b:?} both encode to {encoded_a:?}"
347            );
348        }
349    }
350
351    // Decode should reject malformed inputs.
352    #[test]
353    fn test_decode_rejects_malformed() {
354        let malformed_inputs = [
355            "_",     // underscore at end
356            "_x",    // unknown escape sequence
357            "foo_",  // underscore at end after content
358            "foo_x", // unknown escape in middle
359            "_S",    // uppercase S not valid
360        ];
361
362        for input in malformed_inputs {
363            assert!(
364                decode_workspace_path(input).is_none(),
365                "should reject malformed input: {input:?}"
366            );
367        }
368    }
369
370    // Valid escape sequences should decode.
371    #[test]
372    fn test_decode_valid_escapes() {
373        let cases = [
374            ("__", "_"),
375            ("_s", "/"),
376            ("_b", "\\"),
377            ("_c", ":"),
378            ("a__b", "a_b"),
379            ("_shome", "/home"),
380            // Windows-invalid character escapes.
381            ("_a", "*"),
382            ("_q", "\""),
383            ("_l", "<"),
384            ("_g", ">"),
385            ("_p", "|"),
386            ("_m", "?"),
387            // Combined.
388            ("_spath_astar_mquery", "/path*star?query"),
389        ];
390
391        for (input, expected) in cases {
392            let decoded = decode_workspace_path(input);
393            assert_eq!(
394                decoded.as_deref(),
395                Some(Utf8Path::new(expected)),
396                "decode failed for {input:?}: expected {expected:?}, got {decoded:?}"
397            );
398        }
399    }
400
401    // Truncation tests.
402    #[test]
403    fn test_short_paths_not_truncated() {
404        // A path that encodes to exactly 96 bytes should not be truncated.
405        let short_path = "/a/b/c/d";
406        let encoded = encode_workspace_path(Utf8Path::new(short_path));
407        assert!(
408            encoded.len() <= MAX_ENCODED_LEN,
409            "short path should not be truncated: {encoded:?} (len={})",
410            encoded.len()
411        );
412        // Should not contain a hash suffix (no truncation occurred).
413        assert_eq!(encoded, "_sa_sb_sc_sd");
414    }
415
416    #[test]
417    fn test_long_paths_truncated_with_hash() {
418        // Create a path that will definitely exceed 96 bytes when encoded.
419        // Each `/x` becomes `_sx` (3 bytes), so we need > 32 components.
420        let long_path = "/a".repeat(50); // 100 bytes raw, 150 bytes encoded
421        let encoded = encode_workspace_path(Utf8Path::new(&long_path));
422
423        assert_eq!(
424            encoded.len(),
425            MAX_ENCODED_LEN,
426            "truncated path should be exactly {MAX_ENCODED_LEN} bytes: {encoded:?} (len={})",
427            encoded.len()
428        );
429
430        // Should end with an 8-character hex hash.
431        let hash_suffix = &encoded[encoded.len() - HASH_SUFFIX_LEN..];
432        assert!(
433            hash_suffix.chars().all(|c| c.is_ascii_hexdigit()),
434            "hash suffix should be hex digits: {hash_suffix:?}"
435        );
436    }
437
438    #[test]
439    fn test_truncation_preserves_uniqueness() {
440        // Two different long paths should produce different truncated results.
441        let path_a = "/a".repeat(50);
442        let path_b = "/b".repeat(50);
443
444        let encoded_a = encode_workspace_path(Utf8Path::new(&path_a));
445        let encoded_b = encode_workspace_path(Utf8Path::new(&path_b));
446
447        assert_ne!(
448            encoded_a, encoded_b,
449            "different paths should produce different encodings even when truncated"
450        );
451    }
452
453    #[test]
454    fn test_truncation_with_unicode() {
455        // Create a path with multi-byte UTF-8 characters that would be split.
456        // '日' is 3 bytes in UTF-8.
457        let unicode_path = "/日本語".repeat(20); // Each repeat is 10 bytes raw.
458        let encoded = encode_workspace_path(Utf8Path::new(&unicode_path));
459
460        assert!(
461            encoded.len() <= MAX_ENCODED_LEN,
462            "encoded path should not exceed {MAX_ENCODED_LEN} bytes: len={}",
463            encoded.len()
464        );
465
466        // Verify the result is valid UTF-8 (this would panic if not).
467        let _ = encoded.as_str();
468
469        // Verify the hash suffix is present and valid hex.
470        let hash_suffix = &encoded[encoded.len() - HASH_SUFFIX_LEN..];
471        assert!(
472            hash_suffix.chars().all(|c| c.is_ascii_hexdigit()),
473            "hash suffix should be hex digits: {hash_suffix:?}"
474        );
475    }
476
477    #[test]
478    fn test_truncation_boundary_at_96_bytes() {
479        // Create paths of varying lengths around the 96-byte boundary.
480        // The encoding doubles some characters, so we need to be careful.
481
482        // A path that encodes to exactly 96 bytes should not be truncated.
483        // 'a' stays as 'a', so we can use a string of 96 'a's.
484        let exactly_96 = "a".repeat(96);
485        let encoded = encode_workspace_path(Utf8Path::new(&exactly_96));
486        assert_eq!(encoded.len(), 96);
487        assert_eq!(encoded, exactly_96); // No hash suffix.
488
489        // A path that encodes to 97 bytes should be truncated.
490        let just_over = "a".repeat(97);
491        let encoded = encode_workspace_path(Utf8Path::new(&just_over));
492        assert_eq!(encoded.len(), 96);
493        // Should have hash suffix.
494        let hash_suffix = &encoded[90..];
495        assert!(hash_suffix.chars().all(|c| c.is_ascii_hexdigit()));
496    }
497
498    #[test]
499    fn test_truncation_different_suffixes_same_prefix() {
500        // Two paths with the same prefix but different endings should get different hashes.
501        let base = "a".repeat(90);
502        let path_a = format!("{base}XXXXXXX");
503        let path_b = format!("{base}YYYYYYY");
504
505        let encoded_a = encode_workspace_path(Utf8Path::new(&path_a));
506        let encoded_b = encode_workspace_path(Utf8Path::new(&path_b));
507
508        // Both should be truncated (97 chars each).
509        assert_eq!(encoded_a.len(), 96);
510        assert_eq!(encoded_b.len(), 96);
511
512        // The hash suffixes should be different.
513        assert_ne!(
514            &encoded_a[90..],
515            &encoded_b[90..],
516            "different paths should have different hash suffixes"
517        );
518    }
519}