use super::facts::FileFacts;
use super::seam_classification::ClassifiedSeam;
#[cfg(test)]
use super::seam_classification::SeamGripClassCounts;
use super::seam_inventory::{SeamLimitSource, repo_exposure_seam_limit};
use std::path::{Path, PathBuf};
#[derive(Clone, Debug, serde::Serialize, serde::Deserialize)]
pub(crate) struct CachedSeamLimitInfo {
pub(crate) analyzed: usize,
pub(crate) total: usize,
pub(crate) source: SeamLimitSource,
}
pub(crate) const CACHE_SCHEMA_VERSION: &str = "0.2";
const SHARDED_CLASSIFIED_SEAM_CACHE_SCHEMA_VERSION: &str = "0.1";
pub(crate) const COMPACT_CLASSIFIED_SEAM_CACHE_SCHEMA_VERSION: &str = "0.1";
#[cfg(test)]
const COUNT_CACHE_SCHEMA_VERSION: &str = "0.1";
const FILE_FACT_CACHE_SCHEMA_VERSION: &str = "0.1";
pub(crate) const CLASSIFIED_SEAM_CACHE_STORE_LIMIT: usize = 20_000;
pub(crate) const CLASSIFIED_SEAM_CACHE_STORE_LIMIT_ENV: &str = "RIPR_REPO_SEAM_CACHE_LIMIT";
pub(crate) const COMPACT_CLASSIFIED_SEAM_CACHE_STORE_LIMIT: usize = 100_000;
pub(crate) const COMPACT_CLASSIFIED_SEAM_CACHE_STORE_LIMIT_ENV: &str =
"RIPR_COMPACT_REPO_SEAM_CACHE_MAX_SEAMS";
pub(crate) const CACHE_DIR_ENV: &str = "RIPR_CACHE_DIR";
pub(crate) fn cache_base_dir_from_env(
workspace_root: &std::path::Path,
env_value: Result<String, std::env::VarError>,
) -> PathBuf {
match env_value {
Ok(value) if !value.trim().is_empty() => PathBuf::from(value.trim()),
_ => workspace_root.join("target").join("ripr").join("cache"),
}
}
pub(crate) fn cache_base_dir(workspace_root: &std::path::Path) -> PathBuf {
cache_base_dir_from_env(workspace_root, std::env::var(CACHE_DIR_ENV))
}
pub(crate) fn classified_seam_cache_store_limit() -> Result<usize, String> {
classified_seam_cache_store_limit_from_env(std::env::var(CLASSIFIED_SEAM_CACHE_STORE_LIMIT_ENV))
}
fn classified_seam_cache_store_limit_from_env(
value: Result<String, std::env::VarError>,
) -> Result<usize, String> {
seam_cache_store_limit_from_env(
value,
CLASSIFIED_SEAM_CACHE_STORE_LIMIT_ENV,
CLASSIFIED_SEAM_CACHE_STORE_LIMIT,
)
}
pub(crate) fn compact_classified_seam_cache_store_limit() -> Result<usize, String> {
compact_classified_seam_cache_store_limit_from_env(std::env::var(
COMPACT_CLASSIFIED_SEAM_CACHE_STORE_LIMIT_ENV,
))
}
fn compact_classified_seam_cache_store_limit_from_env(
value: Result<String, std::env::VarError>,
) -> Result<usize, String> {
seam_cache_store_limit_from_env(
value,
COMPACT_CLASSIFIED_SEAM_CACHE_STORE_LIMIT_ENV,
COMPACT_CLASSIFIED_SEAM_CACHE_STORE_LIMIT,
)
}
fn seam_cache_store_limit_from_env(
value: Result<String, std::env::VarError>,
env_name: &str,
default_limit: usize,
) -> Result<usize, String> {
match value {
Ok(value) => parse_positive_seam_cache_store_limit(&value, env_name),
Err(std::env::VarError::NotPresent) => Ok(default_limit),
Err(std::env::VarError::NotUnicode(_)) => Err(format!("{env_name} must be valid UTF-8")),
}
}
fn parse_positive_seam_cache_store_limit(value: &str, env_name: &str) -> Result<usize, String> {
let trimmed = value.trim();
let parsed = trimmed
.parse::<usize>()
.map_err(|err| format!("{env_name} must be a positive integer: {err}"))?;
if parsed == 0 {
return Err(format!("{env_name} must be a positive integer"));
}
Ok(parsed)
}
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
pub(crate) struct RepoSeamCacheKey {
pub(crate) schema_version: String,
pub(crate) analyzer_version: String,
pub(crate) workspace_root_hash: String,
pub(crate) files_content_hash: String,
pub(crate) cfg_features_hash: String,
pub(crate) config_hash: String,
pub(crate) test_intent_hash: String,
pub(crate) suppressions_hash: String,
pub(crate) seam_limit_key: String,
}
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
pub(crate) struct RepoFileFactCacheKey {
schema_version: String,
analyzer_version: String,
file_path: PathBuf,
content_hash: String,
}
impl RepoFileFactCacheKey {
pub(crate) fn new(file_path: &Path, content: &[u8]) -> Self {
Self {
schema_version: FILE_FACT_CACHE_SCHEMA_VERSION.to_string(),
analyzer_version: env!("CARGO_PKG_VERSION").to_string(),
file_path: file_path.to_path_buf(),
content_hash: hash_bytes(content),
}
}
fn filename(&self) -> String {
let file_path = self.file_path.to_string_lossy();
let parts = [
self.schema_version.as_str(),
self.analyzer_version.as_str(),
file_path.as_ref(),
self.content_hash.as_str(),
];
let mut buf = String::new();
for (idx, part) in parts.iter().enumerate() {
if idx > 0 {
buf.push('\0');
}
buf.push_str(part);
}
format!("{:016x}.json", fnv1a_64(buf.as_bytes()))
}
}
impl RepoSeamCacheKey {
pub(crate) fn filename(&self) -> String {
let parts: [&str; 9] = [
&self.schema_version,
&self.analyzer_version,
&self.workspace_root_hash,
&self.files_content_hash,
&self.cfg_features_hash,
&self.config_hash,
&self.test_intent_hash,
&self.suppressions_hash,
&self.seam_limit_key,
];
let mut buf = String::new();
for (i, p) in parts.iter().enumerate() {
if i > 0 {
buf.push('\0');
}
buf.push_str(p);
}
format!("{:016x}.json", fnv1a_64(buf.as_bytes()))
}
}
#[derive(Debug)]
pub(crate) enum CacheLoad<T> {
Hit(T),
Miss,
CorruptIgnored { reason: String },
}
pub(crate) struct WorkspaceState<'a> {
pub(crate) workspace_root: &'a Path,
pub(crate) files: &'a [(PathBuf, Vec<u8>)],
pub(crate) cfg_features: Option<&'a str>,
pub(crate) config_text: Option<&'a str>,
pub(crate) test_intent_text: Option<&'a str>,
pub(crate) suppressions_text: Option<&'a str>,
}
impl<'a> WorkspaceState<'a> {
pub(crate) fn cache_key(&self) -> RepoSeamCacheKey {
let workspace_root_hash = hash_str(&self.workspace_root.to_string_lossy());
let mut sorted_files: Vec<(&PathBuf, &Vec<u8>)> =
self.files.iter().map(|(p, b)| (p, b)).collect();
sorted_files.sort_by(|a, b| a.0.cmp(b.0));
let mut files_buf = String::new();
for (path, content) in sorted_files {
files_buf.push_str(&path.to_string_lossy().replace('\\', "/"));
files_buf.push('\0');
files_buf.push_str(&hash_bytes(content));
files_buf.push('\n');
}
let files_content_hash = hash_str(&files_buf);
let seam_limit_key = match repo_exposure_seam_limit() {
None => "unlimited".to_string(),
Some((n, _)) => format!("limit_{n}"),
};
RepoSeamCacheKey {
schema_version: CACHE_SCHEMA_VERSION.to_string(),
analyzer_version: env!("CARGO_PKG_VERSION").to_string(),
workspace_root_hash,
files_content_hash,
cfg_features_hash: hash_str(self.cfg_features.unwrap_or("")),
config_hash: hash_str(self.config_text.unwrap_or("")),
test_intent_hash: hash_str(self.test_intent_text.unwrap_or("")),
suppressions_hash: hash_str(self.suppressions_text.unwrap_or("")),
seam_limit_key,
}
}
}
pub(crate) struct RepoSeamFactCache {
dir: PathBuf,
sharded_dir: PathBuf,
}
#[derive(Clone, Debug, Eq, PartialEq)]
pub(crate) struct CacheStoreStatus {
pub(crate) label: String,
}
impl RepoSeamFactCache {
pub(crate) fn at(workspace_root: &Path) -> Self {
Self::at_named(workspace_root, "repo-seam-facts", CACHE_SCHEMA_VERSION)
}
pub(crate) fn at_compact_classified(workspace_root: &Path) -> Self {
Self::at_named(
workspace_root,
"repo-compact-classified-seams",
COMPACT_CLASSIFIED_SEAM_CACHE_SCHEMA_VERSION,
)
}
fn at_named(workspace_root: &Path, cache_name: &str, schema_version: &str) -> Self {
let cache_root = cache_base_dir(workspace_root);
Self {
dir: cache_root.join(cache_name).join(schema_version),
sharded_dir: cache_root
.join(format!("{cache_name}-sharded"))
.join(schema_version)
.join(SHARDED_CLASSIFIED_SEAM_CACHE_SCHEMA_VERSION),
}
}
#[cfg(test)]
pub(crate) fn at_dir(dir: PathBuf) -> Self {
Self {
sharded_dir: dir.join("sharded"),
dir,
}
}
pub(crate) fn load_classified_seams(
&self,
key: &RepoSeamCacheKey,
) -> CacheLoad<(Vec<ClassifiedSeam>, Option<CachedSeamLimitInfo>)> {
match self.load_single_classified_seams(key) {
CacheLoad::Miss => self.load_sharded_classified_seams(key),
other => other,
}
}
fn load_single_classified_seams(
&self,
key: &RepoSeamCacheKey,
) -> CacheLoad<(Vec<ClassifiedSeam>, Option<CachedSeamLimitInfo>)> {
let path = self.entry_path(key);
let bytes = match std::fs::read(&path) {
Ok(b) => b,
Err(err) if err.kind() == std::io::ErrorKind::NotFound => return CacheLoad::Miss,
Err(err) => {
return CacheLoad::CorruptIgnored {
reason: format!("read failed: {err}"),
};
}
};
match codec::decode(&bytes) {
Ok(envelope) => {
if envelope.matches_key(key) {
CacheLoad::Hit((envelope.classified_seams, envelope.seam_limit_info))
} else {
CacheLoad::Miss
}
}
Err(reason) => CacheLoad::CorruptIgnored { reason },
}
}
pub(crate) fn store_compact_classified_seams_with_limit(
&self,
key: &RepoSeamCacheKey,
seams: &[ClassifiedSeam],
store_limit: usize,
) -> Result<CacheStoreStatus, String> {
self.store_classified_seams_with_limit(key, seams, None, store_limit)
}
pub(crate) fn store_classified_seams_with_limit(
&self,
key: &RepoSeamCacheKey,
seams: &[ClassifiedSeam],
limit_info: Option<&CachedSeamLimitInfo>,
store_limit: usize,
) -> Result<CacheStoreStatus, String> {
if store_limit == 0 {
return Err("classified seam cache store limit must be positive".to_string());
}
if seams.len() > store_limit {
return self.store_sharded_classified_seams_with_limit(
key,
seams,
limit_info,
store_limit,
);
}
std::fs::create_dir_all(&self.dir)
.map_err(|err| format!("create cache dir failed: {err}"))?;
let envelope = CacheEnvelope::new(key.clone(), seams.to_vec(), limit_info.cloned());
let bytes = codec::encode(&envelope)?;
let path = self.entry_path(key);
std::fs::write(&path, &bytes).map_err(|err| format!("write cache failed: {err}"))?;
Ok(CacheStoreStatus {
label: "ok".to_string(),
})
}
fn entry_path(&self, key: &RepoSeamCacheKey) -> PathBuf {
self.dir.join(key.filename())
}
fn load_sharded_classified_seams(
&self,
key: &RepoSeamCacheKey,
) -> CacheLoad<(Vec<ClassifiedSeam>, Option<CachedSeamLimitInfo>)> {
let manifest_path = self.sharded_manifest_path(key);
let bytes = match std::fs::read(&manifest_path) {
Ok(bytes) => bytes,
Err(err) if err.kind() == std::io::ErrorKind::NotFound => return CacheLoad::Miss,
Err(err) => {
return CacheLoad::CorruptIgnored {
reason: format!("read sharded manifest failed: {err}"),
};
}
};
let manifest = match codec::decode_sharded_manifest(&bytes) {
Ok(manifest) => manifest,
Err(reason) => return CacheLoad::CorruptIgnored { reason },
};
if !manifest.matches_key(key) {
return CacheLoad::Miss;
}
if manifest.shards.is_empty() && manifest.total_seams != 0 {
return CacheLoad::CorruptIgnored {
reason: "sharded manifest has no shards for non-empty seam payload".to_string(),
};
}
if manifest.shard_count != manifest.shards.len() {
return CacheLoad::CorruptIgnored {
reason: format!(
"sharded manifest expected {} shards but listed {}",
manifest.shard_count,
manifest.shards.len()
),
};
}
let mut seams = Vec::with_capacity(manifest.total_seams);
for (index, shard) in manifest.shards.iter().enumerate() {
if shard.index != index {
return CacheLoad::CorruptIgnored {
reason: format!(
"sharded manifest index mismatch at position {index}: {}",
shard.index
),
};
}
let shard_path = self.sharded_entry_dir(key).join(&shard.file);
let bytes = match std::fs::read(&shard_path) {
Ok(bytes) => bytes,
Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
return CacheLoad::CorruptIgnored {
reason: format!("missing sharded cache file {}", shard_path.display()),
};
}
Err(err) => {
return CacheLoad::CorruptIgnored {
reason: format!("read sharded cache file failed: {err}"),
};
}
};
let envelope = match codec::decode_shard(&bytes) {
Ok(envelope) => envelope,
Err(reason) => return CacheLoad::CorruptIgnored { reason },
};
if !envelope.matches_key(key) {
return CacheLoad::CorruptIgnored {
reason: format!("sharded cache key mismatch in {}", shard.file),
};
}
if envelope.sharded_cache_schema_version != manifest.sharded_cache_schema_version
|| envelope.shard_index != shard.index
|| envelope.shard_count != manifest.shard_count
{
return CacheLoad::CorruptIgnored {
reason: format!("sharded cache metadata mismatch in {}", shard.file),
};
}
if envelope.classified_seams.len() != shard.seams {
return CacheLoad::CorruptIgnored {
reason: format!(
"sharded cache file {} carried {} seams but manifest expected {}",
shard.file,
envelope.classified_seams.len(),
shard.seams
),
};
}
seams.extend(envelope.classified_seams);
}
if seams.len() != manifest.total_seams {
return CacheLoad::CorruptIgnored {
reason: format!(
"sharded cache loaded {} seams but manifest expected {}",
seams.len(),
manifest.total_seams
),
};
}
CacheLoad::Hit((seams, manifest.seam_limit_info))
}
fn store_sharded_classified_seams_with_limit(
&self,
key: &RepoSeamCacheKey,
seams: &[ClassifiedSeam],
limit_info: Option<&CachedSeamLimitInfo>,
store_limit: usize,
) -> Result<CacheStoreStatus, String> {
std::fs::create_dir_all(self.sharded_entry_dir(key))
.map_err(|err| format!("create sharded cache dir failed: {err}"))?;
let shard_count = seams.len().div_ceil(store_limit);
let mut shard_refs = Vec::with_capacity(shard_count);
for (index, chunk) in seams.chunks(store_limit).enumerate() {
let file = format!("shard-{index:05}.json");
let envelope =
ShardedCacheEnvelope::new(key.clone(), index, shard_count, chunk.to_vec());
let bytes = codec::encode_shard(&envelope)?;
let path = self.sharded_entry_dir(key).join(&file);
std::fs::write(&path, &bytes)
.map_err(|err| format!("write sharded cache file failed: {err}"))?;
shard_refs.push(ShardedCacheShardRef {
index,
file,
seams: chunk.len(),
});
}
let manifest = ShardedCacheManifest::new(
key.clone(),
seams.len(),
shard_count,
shard_refs,
limit_info.cloned(),
);
let bytes = codec::encode_sharded_manifest(&manifest)?;
let manifest_path = self.sharded_manifest_path(key);
std::fs::write(&manifest_path, bytes)
.map_err(|err| format!("write sharded cache manifest failed: {err}"))?;
Ok(CacheStoreStatus {
label: format!(
"sharded_ok_seams_{}_shards_{}_limit_{}",
seams.len(),
shard_count,
store_limit
),
})
}
fn sharded_entry_dir(&self, key: &RepoSeamCacheKey) -> PathBuf {
self.sharded_dir
.join(key.filename().trim_end_matches(".json"))
}
fn sharded_manifest_path(&self, key: &RepoSeamCacheKey) -> PathBuf {
self.sharded_entry_dir(key).join("manifest.json")
}
}
#[derive(Clone, Debug, Default, PartialEq, Eq)]
pub(crate) struct FileFactCacheStats {
pub(crate) hits: usize,
pub(crate) misses: usize,
pub(crate) corrupt_ignored: usize,
pub(crate) stores: usize,
pub(crate) store_errors: usize,
}
impl FileFactCacheStats {
pub(crate) fn status_label(&self) -> String {
format!(
"hits_{}_misses_{}_corrupt_{}_store_errors_{}",
self.hits, self.misses, self.corrupt_ignored, self.store_errors
)
}
}
pub(crate) struct RepoFileFactCache {
dir: PathBuf,
}
impl RepoFileFactCache {
pub(crate) fn at(workspace_root: &Path) -> Self {
Self {
dir: cache_base_dir(workspace_root)
.join("repo-file-facts")
.join(FILE_FACT_CACHE_SCHEMA_VERSION),
}
}
#[cfg(test)]
pub(crate) fn at_dir(dir: PathBuf) -> Self {
Self { dir }
}
pub(crate) fn load_file_facts(&self, key: &RepoFileFactCacheKey) -> CacheLoad<FileFacts> {
let path = self.entry_path(key);
let bytes = match std::fs::read(&path) {
Ok(bytes) => bytes,
Err(err) if err.kind() == std::io::ErrorKind::NotFound => return CacheLoad::Miss,
Err(err) => {
return CacheLoad::CorruptIgnored {
reason: format!("read failed: {err}"),
};
}
};
match codec::decode_file_facts(&bytes) {
Ok(envelope) => {
if envelope.matches_key(key) {
CacheLoad::Hit(envelope.file_facts)
} else {
CacheLoad::Miss
}
}
Err(reason) => CacheLoad::CorruptIgnored { reason },
}
}
pub(crate) fn store_file_facts(
&self,
key: &RepoFileFactCacheKey,
facts: &FileFacts,
) -> Result<(), String> {
std::fs::create_dir_all(&self.dir)
.map_err(|err| format!("create file fact cache dir failed: {err}"))?;
let envelope = FileFactCacheEnvelope::new(key.clone(), facts.clone());
let bytes = codec::encode_file_facts(&envelope)?;
std::fs::write(self.entry_path(key), bytes)
.map_err(|err| format!("write file fact cache failed: {err}"))?;
Ok(())
}
fn entry_path(&self, key: &RepoFileFactCacheKey) -> PathBuf {
self.dir.join(key.filename())
}
}
#[cfg(test)]
pub(crate) struct RepoSeamCountCache {
dir: PathBuf,
}
#[cfg(test)]
impl RepoSeamCountCache {
pub(crate) fn at(workspace_root: &Path) -> Self {
Self {
dir: cache_base_dir(workspace_root)
.join("repo-seam-counts")
.join(COUNT_CACHE_SCHEMA_VERSION),
}
}
pub(crate) fn load_counts(&self, key: &RepoSeamCacheKey) -> CacheLoad<SeamGripClassCounts> {
let path = self.entry_path(key);
let bytes = match std::fs::read(&path) {
Ok(b) => b,
Err(err) if err.kind() == std::io::ErrorKind::NotFound => return CacheLoad::Miss,
Err(err) => {
return CacheLoad::CorruptIgnored {
reason: format!("read failed: {err}"),
};
}
};
match codec::decode_counts(&bytes) {
Ok(envelope) => {
if envelope.matches_key(key) {
CacheLoad::Hit(envelope.counts)
} else {
CacheLoad::Miss
}
}
Err(reason) => CacheLoad::CorruptIgnored { reason },
}
}
pub(crate) fn store_counts(
&self,
key: &RepoSeamCacheKey,
counts: &SeamGripClassCounts,
) -> Result<(), String> {
std::fs::create_dir_all(&self.dir)
.map_err(|err| format!("create count cache dir failed: {err}"))?;
let envelope = CountCacheEnvelope::new(key.clone(), counts.clone());
let bytes = codec::encode_counts(&envelope)?;
let path = self.entry_path(key);
std::fs::write(&path, &bytes).map_err(|err| format!("write count cache failed: {err}"))?;
Ok(())
}
fn entry_path(&self, key: &RepoSeamCacheKey) -> PathBuf {
self.dir.join(key.filename())
}
}
#[derive(serde::Serialize, serde::Deserialize)]
struct CacheEnvelope {
schema_version: String,
analyzer_version: String,
workspace_root_hash: String,
files_content_hash: String,
cfg_features_hash: String,
config_hash: String,
test_intent_hash: String,
suppressions_hash: String,
classified_seams: Vec<ClassifiedSeam>,
#[serde(default)]
seam_limit_info: Option<CachedSeamLimitInfo>,
}
#[cfg(test)]
#[derive(serde::Serialize, serde::Deserialize)]
struct CountCacheEnvelope {
count_cache_schema_version: String,
schema_version: String,
analyzer_version: String,
workspace_root_hash: String,
files_content_hash: String,
cfg_features_hash: String,
config_hash: String,
test_intent_hash: String,
suppressions_hash: String,
counts: SeamGripClassCounts,
}
#[derive(serde::Serialize, serde::Deserialize)]
struct FileFactCacheEnvelope {
file_fact_cache_schema_version: String,
analyzer_version: String,
file_path: PathBuf,
content_hash: String,
file_facts: FileFacts,
}
impl FileFactCacheEnvelope {
fn new(key: RepoFileFactCacheKey, file_facts: FileFacts) -> Self {
Self {
file_fact_cache_schema_version: key.schema_version,
analyzer_version: key.analyzer_version,
file_path: key.file_path,
content_hash: key.content_hash,
file_facts,
}
}
fn matches_key(&self, key: &RepoFileFactCacheKey) -> bool {
self.file_fact_cache_schema_version == key.schema_version
&& self.analyzer_version == key.analyzer_version
&& self.file_path == key.file_path
&& self.content_hash == key.content_hash
}
}
#[cfg(test)]
impl CountCacheEnvelope {
fn new(key: RepoSeamCacheKey, counts: SeamGripClassCounts) -> Self {
Self {
count_cache_schema_version: COUNT_CACHE_SCHEMA_VERSION.to_string(),
schema_version: key.schema_version,
analyzer_version: key.analyzer_version,
workspace_root_hash: key.workspace_root_hash,
files_content_hash: key.files_content_hash,
cfg_features_hash: key.cfg_features_hash,
config_hash: key.config_hash,
test_intent_hash: key.test_intent_hash,
suppressions_hash: key.suppressions_hash,
counts,
}
}
fn matches_key(&self, key: &RepoSeamCacheKey) -> bool {
self.count_cache_schema_version == COUNT_CACHE_SCHEMA_VERSION
&& self.schema_version == key.schema_version
&& self.analyzer_version == key.analyzer_version
&& self.workspace_root_hash == key.workspace_root_hash
&& self.files_content_hash == key.files_content_hash
&& self.cfg_features_hash == key.cfg_features_hash
&& self.config_hash == key.config_hash
&& self.test_intent_hash == key.test_intent_hash
&& self.suppressions_hash == key.suppressions_hash
}
}
impl CacheEnvelope {
fn new(
key: RepoSeamCacheKey,
classified_seams: Vec<ClassifiedSeam>,
seam_limit_info: Option<CachedSeamLimitInfo>,
) -> Self {
Self {
schema_version: key.schema_version,
analyzer_version: key.analyzer_version,
workspace_root_hash: key.workspace_root_hash,
files_content_hash: key.files_content_hash,
cfg_features_hash: key.cfg_features_hash,
config_hash: key.config_hash,
test_intent_hash: key.test_intent_hash,
suppressions_hash: key.suppressions_hash,
classified_seams,
seam_limit_info,
}
}
fn matches_key(&self, key: &RepoSeamCacheKey) -> bool {
self.schema_version == key.schema_version
&& self.analyzer_version == key.analyzer_version
&& self.workspace_root_hash == key.workspace_root_hash
&& self.files_content_hash == key.files_content_hash
&& self.cfg_features_hash == key.cfg_features_hash
&& self.config_hash == key.config_hash
&& self.test_intent_hash == key.test_intent_hash
&& self.suppressions_hash == key.suppressions_hash
}
}
#[derive(serde::Serialize, serde::Deserialize)]
struct ShardedCacheManifest {
sharded_cache_schema_version: String,
schema_version: String,
analyzer_version: String,
workspace_root_hash: String,
files_content_hash: String,
cfg_features_hash: String,
config_hash: String,
test_intent_hash: String,
suppressions_hash: String,
total_seams: usize,
shard_count: usize,
shards: Vec<ShardedCacheShardRef>,
#[serde(default)]
seam_limit_info: Option<CachedSeamLimitInfo>,
}
#[derive(Clone, serde::Serialize, serde::Deserialize)]
struct ShardedCacheShardRef {
index: usize,
file: String,
seams: usize,
}
#[derive(serde::Serialize, serde::Deserialize)]
struct ShardedCacheEnvelope {
sharded_cache_schema_version: String,
schema_version: String,
analyzer_version: String,
workspace_root_hash: String,
files_content_hash: String,
cfg_features_hash: String,
config_hash: String,
test_intent_hash: String,
suppressions_hash: String,
shard_index: usize,
shard_count: usize,
classified_seams: Vec<ClassifiedSeam>,
}
impl ShardedCacheManifest {
fn new(
key: RepoSeamCacheKey,
total_seams: usize,
shard_count: usize,
shards: Vec<ShardedCacheShardRef>,
seam_limit_info: Option<CachedSeamLimitInfo>,
) -> Self {
Self {
sharded_cache_schema_version: SHARDED_CLASSIFIED_SEAM_CACHE_SCHEMA_VERSION.to_string(),
schema_version: key.schema_version,
analyzer_version: key.analyzer_version,
workspace_root_hash: key.workspace_root_hash,
files_content_hash: key.files_content_hash,
cfg_features_hash: key.cfg_features_hash,
config_hash: key.config_hash,
test_intent_hash: key.test_intent_hash,
suppressions_hash: key.suppressions_hash,
total_seams,
shard_count,
shards,
seam_limit_info,
}
}
fn matches_key(&self, key: &RepoSeamCacheKey) -> bool {
self.sharded_cache_schema_version == SHARDED_CLASSIFIED_SEAM_CACHE_SCHEMA_VERSION
&& self.schema_version == key.schema_version
&& self.analyzer_version == key.analyzer_version
&& self.workspace_root_hash == key.workspace_root_hash
&& self.files_content_hash == key.files_content_hash
&& self.cfg_features_hash == key.cfg_features_hash
&& self.config_hash == key.config_hash
&& self.test_intent_hash == key.test_intent_hash
&& self.suppressions_hash == key.suppressions_hash
}
}
impl ShardedCacheEnvelope {
fn new(
key: RepoSeamCacheKey,
shard_index: usize,
shard_count: usize,
classified_seams: Vec<ClassifiedSeam>,
) -> Self {
Self {
sharded_cache_schema_version: SHARDED_CLASSIFIED_SEAM_CACHE_SCHEMA_VERSION.to_string(),
schema_version: key.schema_version,
analyzer_version: key.analyzer_version,
workspace_root_hash: key.workspace_root_hash,
files_content_hash: key.files_content_hash,
cfg_features_hash: key.cfg_features_hash,
config_hash: key.config_hash,
test_intent_hash: key.test_intent_hash,
suppressions_hash: key.suppressions_hash,
shard_index,
shard_count,
classified_seams,
}
}
fn matches_key(&self, key: &RepoSeamCacheKey) -> bool {
self.sharded_cache_schema_version == SHARDED_CLASSIFIED_SEAM_CACHE_SCHEMA_VERSION
&& self.schema_version == key.schema_version
&& self.analyzer_version == key.analyzer_version
&& self.workspace_root_hash == key.workspace_root_hash
&& self.files_content_hash == key.files_content_hash
&& self.cfg_features_hash == key.cfg_features_hash
&& self.config_hash == key.config_hash
&& self.test_intent_hash == key.test_intent_hash
&& self.suppressions_hash == key.suppressions_hash
}
}
mod codec {
#[cfg(test)]
use super::CountCacheEnvelope;
use super::{CacheEnvelope, FileFactCacheEnvelope, ShardedCacheEnvelope, ShardedCacheManifest};
pub(super) fn encode(envelope: &CacheEnvelope) -> Result<Vec<u8>, String> {
serde_json::to_vec_pretty(envelope).map_err(|err| format!("encode failed: {err}"))
}
pub(super) fn decode(bytes: &[u8]) -> Result<CacheEnvelope, String> {
serde_json::from_slice(bytes).map_err(|err| format!("decode failed: {err}"))
}
pub(super) fn encode_sharded_manifest(
manifest: &ShardedCacheManifest,
) -> Result<Vec<u8>, String> {
serde_json::to_vec_pretty(manifest)
.map_err(|err| format!("encode sharded manifest failed: {err}"))
}
pub(super) fn decode_sharded_manifest(bytes: &[u8]) -> Result<ShardedCacheManifest, String> {
serde_json::from_slice(bytes)
.map_err(|err| format!("decode sharded manifest failed: {err}"))
}
pub(super) fn encode_shard(envelope: &ShardedCacheEnvelope) -> Result<Vec<u8>, String> {
serde_json::to_vec_pretty(envelope)
.map_err(|err| format!("encode sharded cache file failed: {err}"))
}
pub(super) fn decode_shard(bytes: &[u8]) -> Result<ShardedCacheEnvelope, String> {
serde_json::from_slice(bytes)
.map_err(|err| format!("decode sharded cache file failed: {err}"))
}
#[cfg(test)]
pub(super) fn encode_counts(envelope: &CountCacheEnvelope) -> Result<Vec<u8>, String> {
serde_json::to_vec_pretty(envelope).map_err(|err| format!("encode counts failed: {err}"))
}
#[cfg(test)]
pub(super) fn decode_counts(bytes: &[u8]) -> Result<CountCacheEnvelope, String> {
serde_json::from_slice(bytes).map_err(|err| format!("decode counts failed: {err}"))
}
pub(super) fn encode_file_facts(envelope: &FileFactCacheEnvelope) -> Result<Vec<u8>, String> {
serde_json::to_vec_pretty(envelope)
.map_err(|err| format!("encode file facts failed: {err}"))
}
pub(super) fn decode_file_facts(bytes: &[u8]) -> Result<FileFactCacheEnvelope, String> {
serde_json::from_slice(bytes).map_err(|err| format!("decode file facts failed: {err}"))
}
}
fn hash_str(s: &str) -> String {
hash_bytes(s.as_bytes())
}
fn hash_bytes(bytes: &[u8]) -> String {
format!("{:016x}", fnv1a_64(bytes))
}
fn fnv1a_64(bytes: &[u8]) -> u64 {
const FNV_OFFSET: u64 = 0xcbf29ce484222325;
const FNV_PRIME: u64 = 0x100000001b3;
let mut hash: u64 = FNV_OFFSET;
for byte in bytes {
hash ^= u64::from(*byte);
hash = hash.wrapping_mul(FNV_PRIME);
}
hash
}
#[cfg(test)]
mod tests {
use super::*;
use crate::analysis::seam_classification::ClassifiedSeam;
use crate::analysis::seams::{
ExpectedSink, RepoSeam, RequiredDiscriminator, SeamGripClass, SeamKind,
};
use crate::analysis::test_grip_evidence::TestGripEvidence;
use crate::domain::{Confidence, StageEvidence, StageState};
use std::path::PathBuf;
fn sample_classified() -> ClassifiedSeam {
let seam = RepoSeam::new(
PathBuf::from("src/foo.rs"),
"src/foo.rs::foo",
SeamKind::PredicateBoundary,
42,
10,
"x > 5".to_string(),
RequiredDiscriminator::BoundaryValue {
description: "x > 5".to_string(),
},
ExpectedSink::ReturnValue,
);
let evidence = TestGripEvidence {
seam_id: seam.id().clone(),
related_tests: Vec::new(),
reach: StageEvidence::new(StageState::Yes, Confidence::High, "reach"),
activate: StageEvidence::new(StageState::Unknown, Confidence::Medium, "activate"),
propagate: StageEvidence::new(StageState::Unknown, Confidence::Medium, "propagate"),
observe: StageEvidence::new(StageState::Weak, Confidence::Low, "observe"),
discriminate: StageEvidence::new(StageState::No, Confidence::Low, "discriminate"),
observed_values: Vec::new(),
missing_discriminators: Vec::new(),
};
ClassifiedSeam {
seam,
evidence,
class: SeamGripClass::Ungripped,
}
}
fn empty_state() -> WorkspaceState<'static> {
WorkspaceState {
workspace_root: Path::new("/repo"),
files: &[],
cfg_features: None,
config_text: None,
test_intent_text: None,
suppressions_text: None,
}
}
fn isolated_dir(label: &str) -> PathBuf {
std::env::temp_dir().join(format!("ripr-cache-{label}-{}", uuid_like()))
}
#[test]
fn given_no_cache_when_load_runs_then_miss_is_returned() -> Result<(), String> {
let dir = isolated_dir("cold");
let _ = std::fs::remove_dir_all(&dir);
let cache = RepoSeamFactCache::at_dir(dir);
let key = empty_state().cache_key();
match cache.load_classified_seams(&key) {
CacheLoad::Miss => Ok(()),
other => Err(format!("expected Miss on missing cache dir, got {other:?}")),
}
}
#[test]
fn given_unchanged_inputs_when_cache_is_warm_then_classified_seams_are_reused()
-> Result<(), String> {
let dir = isolated_dir("warm");
let _ = std::fs::remove_dir_all(&dir);
let cache = RepoSeamFactCache::at_dir(dir.clone());
let key = empty_state().cache_key();
let seams = vec![sample_classified()];
cache
.store_classified_seams_with_limit(
&key,
&seams,
None,
CLASSIFIED_SEAM_CACHE_STORE_LIMIT,
)
.map_err(|err| format!("store should succeed: {err}"))?;
let result = match cache.load_classified_seams(&key) {
CacheLoad::Hit((loaded, limit_info)) => {
if loaded.len() != seams.len() {
Err(format!(
"warm path should return stored seams, got {} vs {}",
loaded.len(),
seams.len()
))
} else if loaded[0].seam.id().as_str() != seams[0].seam.id().as_str() {
Err(format!(
"round-trip should preserve seam id, got {} vs {}",
loaded[0].seam.id().as_str(),
seams[0].seam.id().as_str()
))
} else if loaded[0].class != seams[0].class {
Err(format!(
"round-trip should preserve class, got {:?} vs {:?}",
loaded[0].class, seams[0].class
))
} else if limit_info.is_some() {
Err(format!(
"complete run should store None limit_info, got {limit_info:?}"
))
} else {
Ok(())
}
}
other => Err(format!("expected Hit on warm cache, got {other:?}")),
};
let _ = std::fs::remove_dir_all(&dir);
result
}
#[test]
fn given_large_classified_entry_when_cache_store_runs_then_shards_are_written()
-> Result<(), String> {
let dir = isolated_dir("large-shard");
let _ = std::fs::remove_dir_all(&dir);
let cache = RepoSeamFactCache::at_dir(dir.clone());
let key = empty_state().cache_key();
let seams = vec![sample_classified(); 2];
let status = cache
.store_classified_seams_with_limit(&key, &seams, None, 1)
.map_err(|err| format!("large classified seam cache should shard: {err}"))?;
assert_eq!(status.label, "sharded_ok_seams_2_shards_2_limit_1");
assert!(
!cache.entry_path(&key).exists(),
"sharded cache store should not write a monolithic classified seam entry"
);
assert!(
cache.sharded_manifest_path(&key).exists(),
"sharded cache store should write a manifest"
);
assert!(
cache
.sharded_entry_dir(&key)
.join("shard-00000.json")
.exists(),
"sharded cache store should write the first shard"
);
assert!(
cache
.sharded_entry_dir(&key)
.join("shard-00001.json")
.exists(),
"sharded cache store should write the second shard"
);
match cache.load_classified_seams(&key) {
CacheLoad::Hit((loaded, _)) if loaded.len() == 2 => {}
other => return Err(format!("expected sharded cache hit, got {other:?}")),
}
let _ = std::fs::remove_dir_all(&dir);
Ok(())
}
#[test]
fn classified_cache_store_limit_rejects_zero_direct_limit() -> Result<(), String> {
let dir = isolated_dir("zero-limit");
let _ = std::fs::remove_dir_all(&dir);
let cache = RepoSeamFactCache::at_dir(dir.clone());
let key = empty_state().cache_key();
let err =
match cache.store_classified_seams_with_limit(&key, &[sample_classified()], None, 0) {
Ok(status) => {
return Err(format!(
"zero direct cache limit should fail, got {}",
status.label
));
}
Err(err) => err,
};
assert!(
err.contains("positive"),
"zero direct cache limit should produce positive-limit diagnostic: {err}"
);
let _ = std::fs::remove_dir_all(&dir);
Ok(())
}
#[test]
fn classified_cache_store_limit_defaults_to_20k_when_env_missing() -> Result<(), String> {
let limit =
classified_seam_cache_store_limit_from_env(Err(std::env::VarError::NotPresent))?;
assert_eq!(limit, CLASSIFIED_SEAM_CACHE_STORE_LIMIT);
Ok(())
}
#[test]
fn classified_cache_store_limit_accepts_positive_env_override() -> Result<(), String> {
let limit = classified_seam_cache_store_limit_from_env(Ok("25000".to_string()))?;
assert_eq!(limit, 25_000);
Ok(())
}
#[test]
fn classified_cache_store_limit_rejects_invalid_env_override() -> Result<(), String> {
for value in ["", "0", "not-a-number"] {
let err = match classified_seam_cache_store_limit_from_env(Ok(value.to_string())) {
Ok(limit) => {
return Err(format!(
"invalid classified cache env value {value:?} should fail, got limit {limit}"
));
}
Err(err) => err,
};
assert!(
err.contains(CLASSIFIED_SEAM_CACHE_STORE_LIMIT_ENV),
"diagnostic should name env var for {value:?}: {err}"
);
assert!(
err.contains("positive integer"),
"diagnostic should describe expected value for {value:?}: {err}"
);
}
Ok(())
}
#[test]
fn classified_cache_store_limit_can_be_raised_for_large_entries() -> Result<(), String> {
let dir = isolated_dir("classified-raised-limit");
let _ = std::fs::remove_dir_all(&dir);
let cache = RepoSeamFactCache::at_dir(dir.clone());
let key = empty_state().cache_key();
let seams = vec![sample_classified(); 2];
cache
.store_classified_seams_with_limit(&key, &seams, None, 2)
.map_err(|err| format!("raised classified cache limit should allow store: {err}"))?;
match cache.load_classified_seams(&key) {
CacheLoad::Hit((loaded, _)) if loaded.len() == 2 => {}
other => {
return Err(format!(
"expected classified cache hit after raised limit: {other:?}"
));
}
}
let _ = std::fs::remove_dir_all(&dir);
Ok(())
}
#[test]
fn compact_cache_store_limit_defaults_to_100k_when_env_missing() -> Result<(), String> {
let limit = compact_classified_seam_cache_store_limit_from_env(Err(
std::env::VarError::NotPresent,
))?;
assert_eq!(limit, COMPACT_CLASSIFIED_SEAM_CACHE_STORE_LIMIT);
Ok(())
}
#[test]
fn compact_cache_store_limit_accepts_positive_env_override() -> Result<(), String> {
let limit = compact_classified_seam_cache_store_limit_from_env(Ok("200000".to_string()))?;
assert_eq!(limit, 200_000);
Ok(())
}
#[test]
fn compact_cache_store_limit_rejects_invalid_env_override() -> Result<(), String> {
for value in ["", "0", "not-a-number"] {
let err = match compact_classified_seam_cache_store_limit_from_env(
Ok(value.to_string()),
) {
Ok(limit) => {
return Err(format!(
"invalid compact cache env value {value:?} should fail, got limit {limit}"
));
}
Err(err) => err,
};
assert!(
err.contains(COMPACT_CLASSIFIED_SEAM_CACHE_STORE_LIMIT_ENV),
"diagnostic should name env var for {value:?}: {err}"
);
assert!(
err.contains("positive integer"),
"diagnostic should describe expected value for {value:?}: {err}"
);
}
Ok(())
}
#[test]
fn compact_cache_store_limit_controls_shard_size() -> Result<(), String> {
let dir = isolated_dir("compact-large-shard");
let _ = std::fs::remove_dir_all(&dir);
let cache = RepoSeamFactCache::at_dir(dir.clone());
let key = empty_state().cache_key();
let seams = vec![sample_classified(); 2];
let status = cache
.store_compact_classified_seams_with_limit(&key, &seams, 1)
.map_err(|err| format!("configured compact cache limit should shard: {err}"))?;
assert_eq!(status.label, "sharded_ok_seams_2_shards_2_limit_1");
assert!(
!cache.entry_path(&key).exists(),
"sharded compact cache store should not write a monolithic entry"
);
match cache.load_classified_seams(&key) {
CacheLoad::Hit((loaded, _)) if loaded.len() == 2 => {}
other => return Err(format!("expected compact sharded cache hit: {other:?}")),
}
let _ = std::fs::remove_dir_all(&dir);
Ok(())
}
#[test]
fn given_missing_sharded_cache_file_when_loading_then_corrupt_ignored_is_reported()
-> Result<(), String> {
let dir = isolated_dir("missing-shard");
let _ = std::fs::remove_dir_all(&dir);
let cache = RepoSeamFactCache::at_dir(dir.clone());
let key = empty_state().cache_key();
let seams = vec![sample_classified(); 2];
cache
.store_classified_seams_with_limit(&key, &seams, None, 1)
.map_err(|err| format!("large classified seam cache should shard: {err}"))?;
std::fs::remove_file(cache.sharded_entry_dir(&key).join("shard-00001.json"))
.map_err(|err| format!("remove shard fixture: {err}"))?;
match cache.load_classified_seams(&key) {
CacheLoad::CorruptIgnored { reason } => {
assert!(
reason.contains("missing sharded cache file"),
"missing shard should be named in corrupt reason: {reason}"
);
}
other => {
return Err(format!(
"expected CorruptIgnored for missing sharded cache file, got {other:?}"
));
}
}
let _ = std::fs::remove_dir_all(&dir);
Ok(())
}
#[test]
fn compact_cache_store_limit_can_be_raised_for_large_entries() -> Result<(), String> {
let dir = isolated_dir("compact-raised-limit");
let _ = std::fs::remove_dir_all(&dir);
let cache = RepoSeamFactCache::at_dir(dir.clone());
let key = empty_state().cache_key();
let seams = vec![sample_classified(); 2];
cache
.store_compact_classified_seams_with_limit(&key, &seams, 2)
.map_err(|err| format!("raised compact cache limit should allow store: {err}"))?;
match cache.load_classified_seams(&key) {
CacheLoad::Hit((loaded, _)) if loaded.len() == 2 => {}
other => {
return Err(format!(
"expected compact cache hit after raised limit: {other:?}"
));
}
}
let _ = std::fs::remove_dir_all(&dir);
Ok(())
}
#[test]
fn given_changed_file_content_hash_when_cache_is_loaded_then_old_entry_is_treated_as_miss()
-> Result<(), String> {
let dir = isolated_dir("changed");
let _ = std::fs::remove_dir_all(&dir);
let cache = RepoSeamFactCache::at_dir(dir.clone());
let path = PathBuf::from("src/foo.rs");
let original_files = [(path.clone(), b"fn foo() {}\n".to_vec())];
let original_key = WorkspaceState {
workspace_root: Path::new("/repo"),
files: &original_files,
cfg_features: None,
config_text: None,
test_intent_text: None,
suppressions_text: None,
}
.cache_key();
cache
.store_classified_seams_with_limit(
&original_key,
&[sample_classified()],
None,
CLASSIFIED_SEAM_CACHE_STORE_LIMIT,
)
.map_err(|err| format!("store original: {err}"))?;
let new_files = [(path, b"fn foo() { let x = 1; }\n".to_vec())];
let new_key = WorkspaceState {
workspace_root: Path::new("/repo"),
files: &new_files,
cfg_features: None,
config_text: None,
test_intent_text: None,
suppressions_text: None,
}
.cache_key();
if original_key.files_content_hash == new_key.files_content_hash {
return Err("different file content must produce different files_content_hash".into());
}
let result = match cache.load_classified_seams(&new_key) {
CacheLoad::Miss => Ok(()),
other => Err(format!(
"expected Miss after file content change, got {other:?}"
)),
};
let _ = std::fs::remove_dir_all(&dir);
result
}
#[test]
fn given_test_file_content_changes_when_cache_key_is_built_then_classified_seam_cache_is_invalidated()
-> Result<(), String> {
let prod = PathBuf::from("src/foo.rs");
let prod_bytes = b"pub fn foo() -> i32 { 1 }\n".to_vec();
let test_path = PathBuf::from("tests/foo_test.rs");
let baseline_files = [
(prod.clone(), prod_bytes.clone()),
(
test_path.clone(),
b"#[test] fn smoke() { assert_eq!(1, 1); }\n".to_vec(),
),
];
let baseline = WorkspaceState {
workspace_root: Path::new("/repo"),
files: &baseline_files,
cfg_features: None,
config_text: None,
test_intent_text: None,
suppressions_text: None,
}
.cache_key();
let updated_files = [
(prod, prod_bytes),
(
test_path,
b"#[test] fn smoke() { assert_eq!(super::foo(), 1); }\n".to_vec(),
),
];
let updated = WorkspaceState {
workspace_root: Path::new("/repo"),
files: &updated_files,
cfg_features: None,
config_text: None,
test_intent_text: None,
suppressions_text: None,
}
.cache_key();
if baseline.files_content_hash == updated.files_content_hash {
return Err(
"test-only file content change must change files_content_hash so stale \
TestGripEvidence cannot survive in the cache"
.into(),
);
}
if baseline.filename() == updated.filename() {
return Err(
"test-only file content change must produce a different cache filename".into(),
);
}
Ok(())
}
#[test]
fn given_test_intent_hash_change_when_cache_is_loaded_then_classified_seam_cache_is_invalidated()
-> Result<(), String> {
let baseline = WorkspaceState {
test_intent_text: Some(""),
..empty_state()
}
.cache_key();
let updated = WorkspaceState {
test_intent_text: Some("[[test]] name = \"smoke\""),
..empty_state()
}
.cache_key();
if baseline.test_intent_hash == updated.test_intent_hash {
return Err("different test intent must produce different test_intent_hash".into());
}
if baseline.filename() == updated.filename() {
return Err(
"different test_intent_hash must produce a different cache filename".into(),
);
}
Ok(())
}
#[test]
fn given_suppression_hash_change_when_cache_is_loaded_then_classified_seam_cache_is_invalidated()
-> Result<(), String> {
let baseline = WorkspaceState {
suppressions_text: Some(""),
..empty_state()
}
.cache_key();
let updated = WorkspaceState {
suppressions_text: Some("[[suppression]] kind = \"exposure_gap\""),
..empty_state()
}
.cache_key();
if baseline.suppressions_hash == updated.suppressions_hash {
return Err(
"different suppressions text must produce different suppressions_hash".into(),
);
}
if baseline.filename() == updated.filename() {
return Err(
"different suppressions_hash must produce a different cache filename".into(),
);
}
Ok(())
}
#[test]
fn given_corrupt_cache_entry_when_loading_then_corrupt_ignored_is_reported_without_failing()
-> Result<(), String> {
let dir = isolated_dir("corrupt");
let _ = std::fs::remove_dir_all(&dir);
std::fs::create_dir_all(&dir).map_err(|err| format!("mkdir: {err}"))?;
let cache = RepoSeamFactCache::at_dir(dir.clone());
let key = empty_state().cache_key();
let path = cache.entry_path(&key);
std::fs::write(&path, b"{not valid json")
.map_err(|err| format!("write corrupt entry: {err}"))?;
let result = match cache.load_classified_seams(&key) {
CacheLoad::CorruptIgnored { reason } => {
if !reason.contains("decode failed") {
Err(format!(
"corrupt reason should explain decode failure, got {reason}"
))
} else {
Ok(())
}
}
other => Err(format!(
"expected CorruptIgnored on bad json, got {other:?}"
)),
};
let _ = std::fs::remove_dir_all(&dir);
result
}
#[test]
fn given_envelope_key_mismatch_when_loading_then_miss_is_returned_without_failing()
-> Result<(), String> {
let dir = isolated_dir("keymismatch");
let _ = std::fs::remove_dir_all(&dir);
let cache = RepoSeamFactCache::at_dir(dir.clone());
let key_a = WorkspaceState {
cfg_features: Some("a"),
..empty_state()
}
.cache_key();
let key_b = WorkspaceState {
cfg_features: Some("b"),
..empty_state()
}
.cache_key();
cache
.store_classified_seams_with_limit(
&key_a,
&[sample_classified()],
None,
CLASSIFIED_SEAM_CACHE_STORE_LIMIT,
)
.map_err(|err| format!("store under key_a: {err}"))?;
let envelope = CacheEnvelope::new(key_a.clone(), vec![sample_classified()], None);
std::fs::create_dir_all(&dir).map_err(|err| format!("mkdir: {err}"))?;
let bytes = codec::encode(&envelope)?;
std::fs::write(cache.entry_path(&key_b), bytes)
.map_err(|err| format!("write under wrong filename: {err}"))?;
let result = match cache.load_classified_seams(&key_b) {
CacheLoad::Miss => Ok(()),
other => Err(format!(
"expected Miss when envelope key mismatches request, got {other:?}"
)),
};
let _ = std::fs::remove_dir_all(&dir);
result
}
#[test]
fn given_file_facts_cached_when_loading_same_file_bytes_then_hit_is_returned()
-> Result<(), String> {
let dir = isolated_dir("file-facts-warm");
let _ = std::fs::remove_dir_all(&dir);
let cache = RepoFileFactCache::at_dir(dir.clone());
let path = PathBuf::from("src/lib.rs");
let key = RepoFileFactCacheKey::new(&path, b"pub fn cached() {}\n");
let facts = FileFacts {
path: path.clone(),
source: "pub fn cached() {}\n".to_string(),
..FileFacts::default()
};
cache
.store_file_facts(&key, &facts)
.map_err(|err| format!("store file facts should succeed: {err}"))?;
let result = match cache.load_file_facts(&key) {
CacheLoad::Hit(loaded) => {
if loaded != facts {
Err("loaded file facts should match stored facts".to_string())
} else {
Ok(())
}
}
other => Err(format!("expected file fact cache hit, got {other:?}")),
};
let _ = std::fs::remove_dir_all(&dir);
result
}
#[test]
fn given_file_content_changes_when_file_facts_load_then_miss_is_returned() -> Result<(), String>
{
let dir = isolated_dir("file-facts-invalidates");
let _ = std::fs::remove_dir_all(&dir);
let cache = RepoFileFactCache::at_dir(dir.clone());
let path = PathBuf::from("src/lib.rs");
let original_key = RepoFileFactCacheKey::new(&path, b"pub fn cached() -> i32 { 1 }\n");
let changed_key = RepoFileFactCacheKey::new(&path, b"pub fn cached() -> i32 { 2 }\n");
let facts = FileFacts {
path: path.clone(),
source: "pub fn cached() -> i32 { 1 }\n".to_string(),
..FileFacts::default()
};
cache
.store_file_facts(&original_key, &facts)
.map_err(|err| format!("store original file facts: {err}"))?;
let result = match cache.load_file_facts(&changed_key) {
CacheLoad::Miss => Ok(()),
other => Err(format!(
"expected Miss after file content change, got {other:?}"
)),
};
let _ = std::fs::remove_dir_all(&dir);
result
}
#[test]
fn file_fact_cache_stats_status_label_is_trace_safe() {
let stats = FileFactCacheStats {
hits: 2,
misses: 3,
corrupt_ignored: 1,
stores: 3,
store_errors: 0,
};
assert_eq!(
stats.status_label(),
"hits_2_misses_3_corrupt_1_store_errors_0"
);
}
#[test]
fn cache_base_dir_returns_default_when_env_is_unset() {
let root = PathBuf::from("/some/workspace");
let result = cache_base_dir_from_env(&root, Err(std::env::VarError::NotPresent));
assert_eq!(
result,
root.join("target").join("ripr").join("cache"),
"unset env must return default cache base"
);
}
#[test]
fn cache_base_dir_returns_default_when_env_is_empty() {
let root = PathBuf::from("/some/workspace");
let result = cache_base_dir_from_env(&root, Ok(String::new()));
assert_eq!(
result,
root.join("target").join("ripr").join("cache"),
"empty RIPR_CACHE_DIR must return default cache base"
);
}
#[test]
fn cache_base_dir_returns_env_value_when_set() {
let root = PathBuf::from("/some/workspace");
let override_dir = "/tmp/my-ripr-cache";
let result = cache_base_dir_from_env(&root, Ok(override_dir.to_string()));
assert_eq!(
result,
PathBuf::from(override_dir),
"non-empty RIPR_CACHE_DIR must override the default cache base"
);
}
#[test]
fn cache_base_dir_trims_whitespace_from_env_value() {
let root = PathBuf::from("/some/workspace");
let result = cache_base_dir_from_env(&root, Ok(" /tmp/trimmed-cache ".to_string()));
assert_eq!(
result,
PathBuf::from("/tmp/trimmed-cache"),
"RIPR_CACHE_DIR value must be trimmed before use"
);
}
fn uuid_like() -> String {
use std::time::{SystemTime, UNIX_EPOCH};
let nanos = SystemTime::now()
.duration_since(UNIX_EPOCH)
.map(|d| d.as_nanos())
.unwrap_or(0);
format!("{}-{:x}", std::process::id(), nanos)
}
#[test]
fn cache_envelope_with_limit_info_round_trips() -> Result<(), String> {
let dir = isolated_dir("envelope-limit-info");
let _ = std::fs::remove_dir_all(&dir);
let cache = RepoSeamFactCache::at_dir(dir.clone());
let key = empty_state().cache_key();
let seams = vec![sample_classified()];
let limit_info = CachedSeamLimitInfo {
analyzed: 1,
total: 5,
source: SeamLimitSource::Configured,
};
cache
.store_classified_seams_with_limit(
&key,
&seams,
Some(&limit_info),
CLASSIFIED_SEAM_CACHE_STORE_LIMIT,
)
.map_err(|err| format!("store with limit_info should succeed: {err}"))?;
let result = match cache.load_classified_seams(&key) {
CacheLoad::Hit((loaded, loaded_limit)) => {
if loaded.len() != seams.len() {
Err(format!(
"seam count mismatch: {} vs {}",
loaded.len(),
seams.len()
))
} else {
match loaded_limit {
None => Err("expected Some(limit_info) on hit, got None".to_string()),
Some(li) => {
if li.analyzed != limit_info.analyzed {
Err(format!(
"analyzed mismatch: {} vs {}",
li.analyzed, limit_info.analyzed
))
} else if li.total != limit_info.total {
Err(format!(
"total mismatch: {} vs {}",
li.total, limit_info.total
))
} else if li.source.as_str() != limit_info.source.as_str() {
Err(format!(
"source mismatch: {} vs {}",
li.source.as_str(),
limit_info.source.as_str()
))
} else {
Ok(())
}
}
}
}
}
other => Err(format!("expected Hit with limit_info, got {other:?}")),
};
let _ = std::fs::remove_dir_all(&dir);
result
}
#[test]
fn cache_envelope_missing_limit_info_field_deserializes_as_none() -> Result<(), String> {
let dir = isolated_dir("envelope-compat");
let _ = std::fs::remove_dir_all(&dir);
let cache = RepoSeamFactCache::at_dir(dir.clone());
let key = empty_state().cache_key();
let seams = vec![sample_classified()];
cache
.store_classified_seams_with_limit(
&key,
&seams,
None,
CLASSIFIED_SEAM_CACHE_STORE_LIMIT,
)
.map_err(|err| format!("store: {err}"))?;
let entry_path = cache.entry_path(&key);
let raw = std::fs::read(&entry_path).map_err(|err| format!("read entry: {err}"))?;
let mut json_val: serde_json::Value =
serde_json::from_slice(&raw).map_err(|err| format!("parse entry: {err}"))?;
json_val
.as_object_mut()
.map(|m| m.remove("seam_limit_info"));
let rewritten = serde_json::to_vec(&json_val).map_err(|err| format!("re-encode: {err}"))?;
std::fs::write(&entry_path, rewritten).map_err(|err| format!("rewrite: {err}"))?;
let result = match cache.load_classified_seams(&key) {
CacheLoad::Hit((_, limit_info)) => {
if limit_info.is_some() {
Err(format!(
"old cache entry without seam_limit_info should deserialize as None, got {limit_info:?}"
))
} else {
Ok(())
}
}
other => Err(format!("expected Hit on compat cache entry, got {other:?}")),
};
let _ = std::fs::remove_dir_all(&dir);
result
}
#[test]
fn complete_run_stores_none_limit_info_in_envelope() -> Result<(), String> {
let dir = isolated_dir("envelope-complete");
let _ = std::fs::remove_dir_all(&dir);
let cache = RepoSeamFactCache::at_dir(dir.clone());
let key = empty_state().cache_key();
let seams = vec![sample_classified()];
cache
.store_classified_seams_with_limit(
&key,
&seams,
None,
CLASSIFIED_SEAM_CACHE_STORE_LIMIT,
)
.map_err(|err| format!("store complete: {err}"))?;
let result = match cache.load_classified_seams(&key) {
CacheLoad::Hit((_, None)) => Ok(()),
CacheLoad::Hit((_, Some(info))) => Err(format!(
"complete run should return None limit_info, got analyzed={} total={}",
info.analyzed, info.total
)),
other => Err(format!("expected Hit on complete cache, got {other:?}")),
};
let _ = std::fs::remove_dir_all(&dir);
result
}
}