use crate::hash::Blake3Hash;
use std::fmt;
use std::path::{Path, PathBuf};
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct CacheKey {
canonical_path: PathBuf,
language: String,
content_hash: Blake3Hash,
canonicalization_succeeded: bool,
}
impl CacheKey {
pub fn new<P: AsRef<Path>>(
path: P,
language: impl Into<String>,
content_hash: Blake3Hash,
) -> Self {
let path = path.as_ref();
let language = language.into();
let (mut canonical_path, canonicalization_succeeded) = match path.canonicalize() {
Ok(canonical) => {
log::trace!(
"Canonicalized cache key path: {} -> {}",
path.display(),
canonical.display()
);
(canonical, true)
}
Err(e) => {
log::debug!(
"Cache key canonicalization failed for {}: {}. Using original path.",
path.display(),
e
);
(path.to_path_buf(), false)
}
};
canonical_path = Self::normalize_case_if_needed(canonical_path);
Self {
canonical_path,
language,
content_hash,
canonicalization_succeeded,
}
}
fn normalize_case_if_needed(path: PathBuf) -> PathBuf {
#[cfg(any(target_os = "windows", target_os = "macos"))]
{
if let Some(path_str) = path.to_str() {
PathBuf::from(path_str.to_lowercase())
} else {
log::debug!("Cannot normalize non-UTF8 path: {:?}", path);
path
}
}
#[cfg(not(any(target_os = "windows", target_os = "macos")))]
{
path
}
}
pub fn from_raw_path<P: Into<PathBuf>>(
path: P,
language: impl Into<String>,
content_hash: Blake3Hash,
) -> Self {
Self {
canonical_path: path.into(),
language: language.into(),
content_hash,
canonicalization_succeeded: true, }
}
#[must_use]
pub fn path(&self) -> &Path {
&self.canonical_path
}
#[must_use]
pub fn language(&self) -> &str {
&self.language
}
#[must_use]
pub fn content_hash(&self) -> &Blake3Hash {
&self.content_hash
}
#[must_use]
pub fn is_canonical(&self) -> bool {
self.canonicalization_succeeded
}
#[must_use]
pub fn storage_key(&self) -> String {
let filename = self
.canonical_path
.file_name()
.and_then(|n| n.to_str())
.unwrap_or("unknown");
let path_hash = {
let path_str = self.canonical_path.to_string_lossy();
let hash = blake3::hash(path_str.as_bytes());
hex::encode(&hash.as_bytes()[..8])
};
format!(
"{}/{}/{}/{}",
self.language,
self.content_hash.to_hex(),
path_hash,
filename
)
}
}
impl fmt::Display for CacheKey {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"{}:{}:{}",
self.canonical_path.display(),
self.language,
self.content_hash.to_hex()
)
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::cache::GraphNodeSummary;
use std::fs;
use std::io::Write;
use std::sync::Arc;
fn make_test_hash(byte: u8) -> Blake3Hash {
Blake3Hash::from_bytes([byte; 32])
}
#[test]
fn test_cache_key_new() {
let hash = make_test_hash(0x42);
let key = CacheKey::new(PathBuf::from("test.rs"), "rust", hash);
assert_eq!(key.language(), "rust");
assert_eq!(key.content_hash(), &hash);
}
#[test]
fn test_cache_key_from_raw_path() {
let hash = make_test_hash(0x42);
let path = PathBuf::from("/absolute/path/test.rs");
let key = CacheKey::from_raw_path(path.clone(), "rust", hash);
assert_eq!(key.path(), path.as_path());
assert_eq!(key.language(), "rust");
assert_eq!(key.content_hash(), &hash);
assert!(key.is_canonical()); }
#[test]
fn test_cache_key_equality() {
let hash1 = make_test_hash(0x42);
let hash2 = make_test_hash(0x43);
let key1 = CacheKey::from_raw_path("/path/file.rs", "rust", hash1);
let key2 = CacheKey::from_raw_path("/path/file.rs", "rust", hash1);
let key3 = CacheKey::from_raw_path("/path/file.rs", "python", hash1);
let key4 = CacheKey::from_raw_path("/path/file.rs", "rust", hash2);
let key5 = CacheKey::from_raw_path("/other/file.rs", "rust", hash1);
assert_eq!(key1, key2);
assert_ne!(key1, key3);
assert_ne!(key1, key4);
assert_ne!(key1, key5);
}
#[test]
fn test_cache_key_hash_consistency() {
use std::collections::HashMap;
let hash = make_test_hash(0x42);
let key1 = CacheKey::from_raw_path("/path/file.rs", "rust", hash);
let key2 = CacheKey::from_raw_path("/path/file.rs", "rust", hash);
let mut map = HashMap::new();
map.insert(key1.clone(), "value1");
map.insert(key2.clone(), "value2");
assert_eq!(map.len(), 1);
assert_eq!(map.get(&key1), Some(&"value2"));
}
#[test]
fn test_cache_key_storage_key() {
let hash = make_test_hash(0x42);
let key = CacheKey::from_raw_path("/path/file.rs", "rust", hash);
let storage_key = key.storage_key();
assert!(storage_key.starts_with("rust/"));
assert!(storage_key.ends_with("/file.rs"));
let parts: Vec<&str> = storage_key.split('/').collect();
assert_eq!(
parts.len(),
4,
"Should have 4 parts: language/content_hash/path_hash/filename"
);
assert_eq!(parts[0], "rust");
assert_eq!(parts[1].len(), 64, "Content hash should be 64 hex chars");
assert_eq!(parts[2].len(), 16, "Path hash should be 16 hex chars");
assert_eq!(parts[3], "file.rs");
}
#[test]
fn test_cache_key_storage_no_collision() {
let hash = make_test_hash(0x42);
let key1 = CacheKey::from_raw_path("/project1/main.rs", "rust", hash);
let key2 = CacheKey::from_raw_path("/project2/main.rs", "rust", hash);
let storage1 = key1.storage_key();
let storage2 = key2.storage_key();
assert_ne!(
storage1, storage2,
"Different paths should produce different storage keys"
);
assert!(storage1.starts_with("rust/"));
assert!(storage2.starts_with("rust/"));
let parts1: Vec<&str> = storage1.split('/').collect();
let parts2: Vec<&str> = storage2.split('/').collect();
assert_eq!(parts1[1], parts2[1], "Same content hash");
assert_ne!(parts1[2], parts2[2], "Different path hashes");
assert_eq!(parts1[3], parts2[3], "Same filename");
}
#[test]
fn test_cache_key_display() {
let hash = make_test_hash(0x42);
let key = CacheKey::from_raw_path("/path/file.rs", "rust", hash);
let display = format!("{key}");
assert!(display.contains("/path/file.rs"));
assert!(display.contains("rust"));
assert!(display.contains(&hash.to_hex()));
}
#[test]
fn test_cache_key_canonicalization_success() {
let tmp_cache_dir = std::env::temp_dir();
let temp_file = tmp_cache_dir.join("sqry_test_cache_key.rs");
let mut file = fs::File::create(&temp_file).unwrap();
file.write_all(b"fn main() {}").unwrap();
drop(file);
let hash = make_test_hash(0x42);
let key = CacheKey::new(&temp_file, "rust", hash);
assert!(key.is_canonical());
assert!(key.path().is_absolute());
let _ = fs::remove_file(&temp_file);
}
#[test]
fn test_cache_key_canonicalization_fallback() {
let nonexistent = PathBuf::from("/nonexistent/path/file.rs");
let hash = make_test_hash(0x42);
let key = CacheKey::new(&nonexistent, "rust", hash);
assert!(!key.is_canonical());
assert_eq!(key.path(), nonexistent.as_path());
}
#[test]
fn test_cache_key_different_languages() {
let hash = make_test_hash(0x42);
let key_rust = CacheKey::from_raw_path("/path/file.txt", "rust", hash);
let key_python = CacheKey::from_raw_path("/path/file.txt", "python", hash);
assert_ne!(key_rust, key_python);
assert_ne!(key_rust.storage_key(), key_python.storage_key());
}
#[test]
fn test_cache_key_relative_vs_absolute() {
let tmp_cache_dir = std::env::temp_dir();
let temp_file = tmp_cache_dir.join("sqry_test_relative.rs");
let mut file = fs::File::create(&temp_file).unwrap();
file.write_all(b"// test").unwrap();
drop(file);
let hash = make_test_hash(0x42);
let key1 = CacheKey::new(&temp_file, "rust", hash);
let key2 = CacheKey::new(temp_file.canonicalize().unwrap(), "rust", hash);
assert!(key1.is_canonical());
assert!(key2.is_canonical());
assert_eq!(key1, key2);
let _ = fs::remove_file(&temp_file);
}
#[test]
#[cfg(any(target_os = "windows", target_os = "macos"))]
fn test_cache_key_case_normalization() {
let _hash = make_test_hash(0x42);
let lowercase_path = PathBuf::from("/path/to/file.rs");
let uppercase_path = PathBuf::from("/PATH/TO/FILE.RS");
let mixed_path = PathBuf::from("/Path/To/File.rs");
let normalized_lower = CacheKey::normalize_case_if_needed(lowercase_path.clone());
let normalized_upper = CacheKey::normalize_case_if_needed(uppercase_path.clone());
let normalized_mixed = CacheKey::normalize_case_if_needed(mixed_path.clone());
assert_eq!(normalized_lower, normalized_upper);
assert_eq!(normalized_lower, normalized_mixed);
assert_eq!(normalized_lower.to_str().unwrap(), "/path/to/file.rs");
}
#[test]
#[cfg(not(any(target_os = "windows", target_os = "macos")))]
fn test_cache_key_case_preservation() {
let lowercase_path = PathBuf::from("/path/to/file.rs");
let uppercase_path = PathBuf::from("/PATH/TO/FILE.RS");
let normalized_lower = CacheKey::normalize_case_if_needed(lowercase_path.clone());
let normalized_upper = CacheKey::normalize_case_if_needed(uppercase_path.clone());
assert_eq!(normalized_lower, lowercase_path);
assert_eq!(normalized_upper, uppercase_path);
assert_ne!(normalized_lower, normalized_upper);
}
#[test]
fn test_cache_key_symlink_resolution() {
use std::fs;
use tempfile::TempDir;
let tmp_cache_dir = TempDir::new().unwrap();
let real_file = tmp_cache_dir.path().join("real_file.rs");
let symlink = tmp_cache_dir.path().join("symlink.rs");
fs::write(&real_file, "fn test() {}").unwrap();
#[cfg(unix)]
{
std::os::unix::fs::symlink(&real_file, &symlink).unwrap();
let hash = make_test_hash(0x42);
let key_real = CacheKey::new(&real_file, "rust", hash);
let key_symlink = CacheKey::new(&symlink, "rust", hash);
assert_eq!(
key_real.path(),
key_symlink.path(),
"Symlinks should resolve to the same canonical path"
);
}
#[cfg(not(unix))]
{
let _ = (real_file, symlink);
}
}
#[test]
fn test_cache_key_mixed_case_paths_same_file() {
use std::fs;
use tempfile::TempDir;
let tmp_cache_dir = TempDir::new().unwrap();
let file_path = tmp_cache_dir.path().join("TestFile.rs");
fs::write(&file_path, "fn test() {}").unwrap();
let hash = make_test_hash(0x42);
let key1 = CacheKey::new(&file_path, "rust", hash);
#[cfg(any(target_os = "windows", target_os = "macos"))]
{
let lowercase_path = tmp_cache_dir.path().join("testfile.rs");
let key2 = CacheKey::new(&lowercase_path, "rust", hash);
assert_eq!(
key1.path().to_str().unwrap().to_lowercase(),
key2.path().to_str().unwrap().to_lowercase(),
"Case variations should normalize on case-insensitive filesystems"
);
}
#[cfg(not(any(target_os = "windows", target_os = "macos")))]
{
let _ = key1; }
}
#[test]
fn test_cache_key_non_utf8_path() {
#[cfg(unix)]
{
use std::ffi::OsStr;
use std::os::unix::ffi::OsStrExt;
let invalid_bytes = b"/tmp/\xFF\xFE.rs";
let invalid_path = PathBuf::from(OsStr::from_bytes(invalid_bytes));
let hash = make_test_hash(0x42);
let key = CacheKey::from_raw_path(invalid_path.clone(), "rust", hash);
assert_eq!(key.path(), invalid_path.as_path());
}
}
#[test]
fn test_serialized_size_fallback() {
use crate::graph::unified::node::NodeKind;
let summary = GraphNodeSummary::new(
Arc::from("test_function"),
NodeKind::Function,
Arc::from(Path::new("test.rs")),
10,
0,
20,
1,
);
let size = summary.serialized_size();
assert!(size > 0, "Serialized size should be positive");
assert!(size <= 512, "Serialized size should be reasonable");
}
}