use crate::errors::StateDirError;
use camino::{Utf8Path, Utf8PathBuf};
use etcetera::{BaseStrategy, choose_base_strategy};
use xxhash_rust::xxh3::xxh3_64;
const MAX_ENCODED_LEN: usize = 96;
const HASH_SUFFIX_LEN: usize = 8;
pub const NEXTEST_STATE_DIR_ENV: &str = "NEXTEST_STATE_DIR";
pub fn records_state_dir(workspace_root: &Utf8Path) -> Result<Utf8PathBuf, StateDirError> {
if let Ok(state_dir) = std::env::var(NEXTEST_STATE_DIR_ENV) {
let base_dir = Utf8PathBuf::from(state_dir);
let canonical_workspace =
workspace_root
.canonicalize_utf8()
.map_err(|error| StateDirError::Canonicalize {
workspace_root: workspace_root.to_owned(),
error,
})?;
let encoded_workspace = encode_workspace_path(&canonical_workspace);
return Ok(base_dir
.join("projects")
.join(&encoded_workspace)
.join("records"));
}
let strategy = choose_base_strategy().map_err(StateDirError::BaseDirStrategy)?;
let canonical_workspace =
workspace_root
.canonicalize_utf8()
.map_err(|error| StateDirError::Canonicalize {
workspace_root: workspace_root.to_owned(),
error,
})?;
let encoded_workspace = encode_workspace_path(&canonical_workspace);
let nextest_dir = if let Some(base_state_dir) = strategy.state_dir() {
base_state_dir.join("nextest")
} else {
strategy.cache_dir().join("nextest")
};
let nextest_dir_utf8 = Utf8PathBuf::from_path_buf(nextest_dir.clone())
.map_err(|_| StateDirError::StateDirNotUtf8 { path: nextest_dir })?;
Ok(nextest_dir_utf8
.join("projects")
.join(&encoded_workspace)
.join("records"))
}
pub fn encode_workspace_path(path: &Utf8Path) -> String {
let mut encoded = String::with_capacity(path.as_str().len() * 2);
for ch in path.as_str().chars() {
match ch {
'_' => encoded.push_str("__"),
'/' => encoded.push_str("_s"),
'\\' => encoded.push_str("_b"),
':' => encoded.push_str("_c"),
'*' => encoded.push_str("_a"),
'"' => encoded.push_str("_q"),
'<' => encoded.push_str("_l"),
'>' => encoded.push_str("_g"),
'|' => encoded.push_str("_p"),
'?' => encoded.push_str("_m"),
_ => encoded.push(ch),
}
}
truncate_with_hash(encoded)
}
fn truncate_with_hash(encoded: String) -> String {
if encoded.len() <= MAX_ENCODED_LEN {
return encoded;
}
let hash = xxh3_64(encoded.as_bytes());
let hash_suffix = format!("{:08x}", hash & 0xFFFFFFFF);
let max_prefix_len = MAX_ENCODED_LEN - HASH_SUFFIX_LEN;
let bytes = encoded.as_bytes();
let truncated_bytes = &bytes[..max_prefix_len.min(bytes.len())];
let mut valid_len = 0;
for chunk in truncated_bytes.utf8_chunks() {
valid_len += chunk.valid().len();
if !chunk.invalid().is_empty() {
break;
}
}
let mut result = encoded[..valid_len].to_string();
result.push_str(&hash_suffix);
result
}
#[cfg_attr(not(test), expect(dead_code))] pub fn decode_workspace_path(encoded: &str) -> Option<Utf8PathBuf> {
let mut decoded = String::with_capacity(encoded.len());
let mut chars = encoded.chars().peekable();
while let Some(ch) = chars.next() {
if ch == '_' {
match chars.next() {
Some('_') => decoded.push('_'),
Some('s') => decoded.push('/'),
Some('b') => decoded.push('\\'),
Some('c') => decoded.push(':'),
Some('a') => decoded.push('*'),
Some('q') => decoded.push('"'),
Some('l') => decoded.push('<'),
Some('g') => decoded.push('>'),
Some('p') => decoded.push('|'),
Some('m') => decoded.push('?'),
_ => return None,
}
} else {
decoded.push(ch);
}
}
Some(Utf8PathBuf::from(decoded))
}
#[cfg(test)]
mod tests {
use super::*;
use std::fs;
#[test]
fn test_records_state_dir() {
let temp_dir =
Utf8PathBuf::try_from(std::env::temp_dir()).expect("temp dir should be valid UTF-8");
let state_dir = records_state_dir(&temp_dir).expect("state directory should be available");
assert!(
state_dir.as_str().contains("nextest"),
"state dir should contain 'nextest': {state_dir}"
);
assert!(
state_dir.as_str().contains("projects"),
"state dir should contain 'projects': {state_dir}"
);
assert!(
state_dir.as_str().contains("records"),
"state dir should contain 'records': {state_dir}"
);
}
#[test]
fn test_records_state_dir_canonicalizes_symlinks() {
let temp_dir = camino_tempfile::tempdir().expect("tempdir should be created");
let real_path = temp_dir.path().to_path_buf();
let workspace = real_path.join("workspace");
fs::create_dir(&workspace).expect("workspace dir should be created");
let symlink_path = real_path.join("symlink-to-workspace");
#[cfg(unix)]
std::os::unix::fs::symlink(&workspace, &symlink_path)
.expect("symlink should be created on Unix");
#[cfg(windows)]
std::os::windows::fs::symlink_dir(&workspace, &symlink_path)
.expect("symlink should be created on Windows");
let state_via_real =
records_state_dir(&workspace).expect("state dir via real path should be available");
let state_via_symlink =
records_state_dir(&symlink_path).expect("state dir via symlink should be available");
assert_eq!(
state_via_real, state_via_symlink,
"state dir should be the same whether accessed via real path or symlink"
);
}
#[test]
fn test_encode_workspace_path() {
let cases = [
("", ""),
("simple", "simple"),
("/home/user", "_shome_suser"),
("/home/user/project", "_shome_suser_sproject"),
("C:\\Users\\name", "C_c_bUsers_bname"),
("D:\\dev\\project", "D_c_bdev_bproject"),
("/path_with_underscore", "_spath__with__underscore"),
("C:\\path_name", "C_c_bpath__name"),
("/a/b/c", "_sa_sb_sc"),
("/weird*path", "_sweird_apath"),
("/path?query", "_spath_mquery"),
("/file<name>", "_sfile_lname_g"),
("/path|pipe", "_spath_ppipe"),
("/\"quoted\"", "_s_qquoted_q"),
("*\"<>|?", "_a_q_l_g_p_m"),
];
for (input, expected) in cases {
let encoded = encode_workspace_path(Utf8Path::new(input));
assert_eq!(
encoded, expected,
"encoding failed for {input:?}: expected {expected:?}, got {encoded:?}"
);
}
}
#[test]
fn test_encode_decode_roundtrip() {
let cases = [
"/home/user/project",
"C:\\Users\\name\\dev",
"/path_with_underscore",
"/_",
"_/",
"__",
"/a_b/c_d",
"",
"no_special_chars",
"/mixed\\path:style",
"/path*with*asterisks",
"/file?query",
"/path<with>angles",
"/pipe|char",
"/\"quoted\"",
"/all*special?chars<in>one|path\"here\"_end",
];
for original in cases {
let encoded = encode_workspace_path(Utf8Path::new(original));
let decoded = decode_workspace_path(&encoded);
assert_eq!(
decoded.as_deref(),
Some(Utf8Path::new(original)),
"roundtrip failed for {original:?}: encoded={encoded:?}, decoded={decoded:?}"
);
}
}
#[test]
fn test_encoding_is_bijective() {
let pairs = [
("/-", "-/"),
("/a", "_a"),
("_s", "/"),
("a_", "a/"),
("__", "_"),
("/", "\\"),
("_a", "*"),
("_q", "\""),
("_l", "<"),
("_g", ">"),
("_p", "|"),
("_m", "?"),
("*", "?"),
("<", ">"),
("|", "\""),
];
for (a, b) in pairs {
let encoded_a = encode_workspace_path(Utf8Path::new(a));
let encoded_b = encode_workspace_path(Utf8Path::new(b));
assert_ne!(
encoded_a, encoded_b,
"bijectivity violated: {a:?} and {b:?} both encode to {encoded_a:?}"
);
}
}
#[test]
fn test_decode_rejects_malformed() {
let malformed_inputs = [
"_", "_x", "foo_", "foo_x", "_S", ];
for input in malformed_inputs {
assert!(
decode_workspace_path(input).is_none(),
"should reject malformed input: {input:?}"
);
}
}
#[test]
fn test_decode_valid_escapes() {
let cases = [
("__", "_"),
("_s", "/"),
("_b", "\\"),
("_c", ":"),
("a__b", "a_b"),
("_shome", "/home"),
("_a", "*"),
("_q", "\""),
("_l", "<"),
("_g", ">"),
("_p", "|"),
("_m", "?"),
("_spath_astar_mquery", "/path*star?query"),
];
for (input, expected) in cases {
let decoded = decode_workspace_path(input);
assert_eq!(
decoded.as_deref(),
Some(Utf8Path::new(expected)),
"decode failed for {input:?}: expected {expected:?}, got {decoded:?}"
);
}
}
#[test]
fn test_short_paths_not_truncated() {
let short_path = "/a/b/c/d";
let encoded = encode_workspace_path(Utf8Path::new(short_path));
assert!(
encoded.len() <= MAX_ENCODED_LEN,
"short path should not be truncated: {encoded:?} (len={})",
encoded.len()
);
assert_eq!(encoded, "_sa_sb_sc_sd");
}
#[test]
fn test_long_paths_truncated_with_hash() {
let long_path = "/a".repeat(50); let encoded = encode_workspace_path(Utf8Path::new(&long_path));
assert_eq!(
encoded.len(),
MAX_ENCODED_LEN,
"truncated path should be exactly {MAX_ENCODED_LEN} bytes: {encoded:?} (len={})",
encoded.len()
);
let hash_suffix = &encoded[encoded.len() - HASH_SUFFIX_LEN..];
assert!(
hash_suffix.chars().all(|c| c.is_ascii_hexdigit()),
"hash suffix should be hex digits: {hash_suffix:?}"
);
}
#[test]
fn test_truncation_preserves_uniqueness() {
let path_a = "/a".repeat(50);
let path_b = "/b".repeat(50);
let encoded_a = encode_workspace_path(Utf8Path::new(&path_a));
let encoded_b = encode_workspace_path(Utf8Path::new(&path_b));
assert_ne!(
encoded_a, encoded_b,
"different paths should produce different encodings even when truncated"
);
}
#[test]
fn test_truncation_with_unicode() {
let unicode_path = "/日本語".repeat(20); let encoded = encode_workspace_path(Utf8Path::new(&unicode_path));
assert!(
encoded.len() <= MAX_ENCODED_LEN,
"encoded path should not exceed {MAX_ENCODED_LEN} bytes: len={}",
encoded.len()
);
let _ = encoded.as_str();
let hash_suffix = &encoded[encoded.len() - HASH_SUFFIX_LEN..];
assert!(
hash_suffix.chars().all(|c| c.is_ascii_hexdigit()),
"hash suffix should be hex digits: {hash_suffix:?}"
);
}
#[test]
fn test_truncation_boundary_at_96_bytes() {
let exactly_96 = "a".repeat(96);
let encoded = encode_workspace_path(Utf8Path::new(&exactly_96));
assert_eq!(encoded.len(), 96);
assert_eq!(encoded, exactly_96);
let just_over = "a".repeat(97);
let encoded = encode_workspace_path(Utf8Path::new(&just_over));
assert_eq!(encoded.len(), 96);
let hash_suffix = &encoded[90..];
assert!(hash_suffix.chars().all(|c| c.is_ascii_hexdigit()));
}
#[test]
fn test_truncation_different_suffixes_same_prefix() {
let base = "a".repeat(90);
let path_a = format!("{base}XXXXXXX");
let path_b = format!("{base}YYYYYYY");
let encoded_a = encode_workspace_path(Utf8Path::new(&path_a));
let encoded_b = encode_workspace_path(Utf8Path::new(&path_b));
assert_eq!(encoded_a.len(), 96);
assert_eq!(encoded_b.len(), 96);
assert_ne!(
&encoded_a[90..],
&encoded_b[90..],
"different paths should have different hash suffixes"
);
}
}