use std::collections::HashMap;
use std::path::{Path, PathBuf};
use serde::{Deserialize, Serialize};
use crate::confidence::ConfidenceMetadata;
pub const MANIFEST_SCHEMA_VERSION: u32 = 1;
pub const SNAPSHOT_FORMAT_VERSION: u32 = 2;
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct ConfigProvenance {
pub config_file: PathBuf,
pub config_checksum: String,
pub schema_version: u32,
pub overrides: HashMap<String, OverrideEntry>,
pub build_timestamp: u64,
pub build_host: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct OverrideEntry {
pub source: OverrideSource,
pub key: String,
pub value: String,
pub original_value: Option<String>,
}
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
#[serde(rename_all = "lowercase")]
pub enum OverrideSource {
Cli,
Env,
Api,
}
impl ConfigProvenance {
#[must_use]
pub fn new(
config_file: impl Into<PathBuf>,
config_checksum: String,
schema_version: u32,
) -> Self {
Self {
config_file: config_file.into(),
config_checksum,
schema_version,
overrides: HashMap::new(),
build_timestamp: std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap_or_default()
.as_secs(),
build_host: hostname::get().ok().and_then(|h| h.into_string().ok()),
}
}
pub fn add_override(&mut self, entry: OverrideEntry) {
let key = format!("{}:{}", entry.source.as_str(), entry.key);
self.overrides.insert(key, entry);
}
#[must_use]
pub fn config_matches(&self, current_checksum: &str) -> bool {
self.config_checksum == current_checksum
}
#[must_use]
pub fn override_count(&self) -> usize {
self.overrides.len()
}
#[must_use]
pub fn has_overrides(&self) -> bool {
!self.overrides.is_empty()
}
}
impl OverrideSource {
#[must_use]
pub const fn as_str(&self) -> &'static str {
match self {
Self::Cli => "cli",
Self::Env => "env",
Self::Api => "api",
}
}
}
impl std::fmt::Display for OverrideSource {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.as_str())
}
}
#[derive(Debug)]
pub struct ConfigProvenanceBuilder {
config_file: PathBuf,
config_checksum: String,
schema_version: u32,
overrides: Vec<OverrideEntry>,
}
impl ConfigProvenanceBuilder {
#[must_use]
pub fn new(
config_file: impl Into<PathBuf>,
config_checksum: String,
schema_version: u32,
) -> Self {
Self {
config_file: config_file.into(),
config_checksum,
schema_version,
overrides: Vec::new(),
}
}
#[must_use]
pub fn with_cli_override(
mut self,
key: impl Into<String>,
value: impl Into<String>,
original: Option<String>,
) -> Self {
self.overrides.push(OverrideEntry {
source: OverrideSource::Cli,
key: key.into(),
value: value.into(),
original_value: original,
});
self
}
#[must_use]
pub fn with_env_override(
mut self,
key: impl Into<String>,
value: impl Into<String>,
original: Option<String>,
) -> Self {
self.overrides.push(OverrideEntry {
source: OverrideSource::Env,
key: key.into(),
value: value.into(),
original_value: original,
});
self
}
#[must_use]
pub fn build(self) -> ConfigProvenance {
let mut provenance =
ConfigProvenance::new(self.config_file, self.config_checksum, self.schema_version);
for entry in self.overrides {
provenance.add_override(entry);
}
provenance
}
}
#[must_use]
pub fn default_provenance() -> ConfigProvenance {
ConfigProvenance {
config_file: PathBuf::from(".sqry/graph/config/config.json"),
config_checksum: String::from("none"),
schema_version: 0,
overrides: HashMap::new(),
build_timestamp: std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap_or_default()
.as_secs(),
build_host: hostname::get().ok().and_then(|h| h.into_string().ok()),
}
}
pub fn compute_config_checksum(path: impl AsRef<Path>) -> std::io::Result<String> {
let content = std::fs::read(path)?;
let hash = blake3::hash(&content);
Ok(hash.to_hex().to_string())
}
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)]
pub struct BuildProvenance {
pub sqry_version: String,
pub build_timestamp: String,
pub build_command: String,
#[serde(default, skip_serializing_if = "HashMap::is_empty")]
pub plugin_hashes: HashMap<String, String>,
}
impl BuildProvenance {
#[must_use]
pub fn new(sqry_version: impl Into<String>, build_command: impl Into<String>) -> Self {
Self {
sqry_version: sqry_version.into(),
build_timestamp: chrono::Utc::now().to_rfc3339(),
build_command: build_command.into(),
plugin_hashes: HashMap::new(),
}
}
pub fn add_plugin_hash(&mut self, plugin_name: impl Into<String>, hash: impl Into<String>) {
self.plugin_hashes.insert(plugin_name.into(), hash.into());
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Manifest {
pub schema_version: u32,
pub snapshot_format_version: u32,
pub built_at: String,
pub root_path: String,
pub node_count: usize,
pub edge_count: usize,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub raw_edge_count: Option<usize>,
pub snapshot_sha256: String,
pub build_provenance: BuildProvenance,
#[serde(default, skip_serializing_if = "HashMap::is_empty")]
pub file_count: HashMap<String, usize>,
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub languages: Vec<String>,
#[serde(default, skip_serializing_if = "HashMap::is_empty")]
pub config: HashMap<String, String>,
#[serde(default, skip_serializing_if = "HashMap::is_empty")]
pub confidence: HashMap<String, ConfidenceMetadata>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub last_indexed_commit: Option<String>,
}
impl Manifest {
#[must_use]
pub fn new(
root_path: impl Into<String>,
node_count: usize,
edge_count: usize,
snapshot_sha256: impl Into<String>,
build_provenance: BuildProvenance,
) -> Self {
Self {
schema_version: MANIFEST_SCHEMA_VERSION,
snapshot_format_version: SNAPSHOT_FORMAT_VERSION,
built_at: chrono::Utc::now().to_rfc3339(),
root_path: root_path.into(),
node_count,
edge_count,
raw_edge_count: None,
snapshot_sha256: snapshot_sha256.into(),
build_provenance,
file_count: HashMap::new(),
languages: Vec::new(),
config: HashMap::new(),
confidence: HashMap::new(),
last_indexed_commit: None,
}
}
#[must_use]
pub fn with_last_indexed_commit(mut self, commit: Option<String>) -> Self {
self.last_indexed_commit = commit;
self
}
pub fn save(&self, path: impl AsRef<Path>) -> std::io::Result<()> {
write_manifest_atomic(path.as_ref(), self)
}
pub fn load(path: impl AsRef<Path>) -> std::io::Result<Self> {
let content = std::fs::read_to_string(path)?;
serde_json::from_str(&content)
.map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))
}
#[must_use]
pub fn with_file_count(mut self, file_count: HashMap<String, usize>) -> Self {
self.file_count = file_count;
self
}
#[must_use]
pub fn with_languages(mut self, languages: Vec<String>) -> Self {
self.languages = languages;
self
}
#[must_use]
pub fn with_config(mut self, config: HashMap<String, String>) -> Self {
self.config = config;
self
}
#[must_use]
pub fn with_confidence(mut self, confidence: HashMap<String, ConfidenceMetadata>) -> Self {
self.confidence = confidence;
self
}
}
fn write_manifest_atomic(path: &Path, manifest: &Manifest) -> std::io::Result<()> {
let parent = path.parent().ok_or_else(|| {
std::io::Error::new(std::io::ErrorKind::InvalidInput, "Path has no parent")
})?;
let mut temp = tempfile::NamedTempFile::new_in(parent)?;
serde_json::to_writer_pretty(&mut temp, manifest)
.map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))?;
temp.as_file().sync_all()?;
#[cfg(unix)]
{
temp.persist(path)?;
}
#[cfg(windows)]
{
use std::ffi::OsStr;
use std::os::windows::ffi::OsStrExt;
use windows_sys::Win32::Storage::FileSystem::{
MOVEFILE_REPLACE_EXISTING, MOVEFILE_WRITE_THROUGH, MoveFileExW,
};
let temp_path = temp.into_temp_path();
let source: Vec<u16> = OsStr::new(&temp_path)
.encode_wide()
.chain(Some(0))
.collect();
let dest: Vec<u16> = OsStr::new(path).encode_wide().chain(Some(0)).collect();
let result = unsafe {
MoveFileExW(
source.as_ptr(),
dest.as_ptr(),
MOVEFILE_REPLACE_EXISTING | MOVEFILE_WRITE_THROUGH,
)
};
if result == 0 {
return Err(std::io::Error::last_os_error());
}
drop(temp_path.close());
}
#[cfg(not(any(unix, windows)))]
{
compile_error!("Atomic manifest writes require Unix or Windows platform");
}
Ok(())
}
pub(crate) fn write_manifest_bytes_atomic(path: &Path, bytes: &[u8]) -> std::io::Result<()> {
use std::io::Write;
let parent = path.parent().ok_or_else(|| {
std::io::Error::new(std::io::ErrorKind::InvalidInput, "Path has no parent")
})?;
let mut temp = tempfile::NamedTempFile::new_in(parent)?;
temp.write_all(bytes)?;
temp.as_file().sync_all()?;
#[cfg(unix)]
{
temp.persist(path)?;
}
#[cfg(windows)]
{
use std::ffi::OsStr;
use std::os::windows::ffi::OsStrExt;
use windows_sys::Win32::Storage::FileSystem::{
MOVEFILE_REPLACE_EXISTING, MOVEFILE_WRITE_THROUGH, MoveFileExW,
};
let temp_path = temp.into_temp_path();
let source: Vec<u16> = OsStr::new(&temp_path)
.encode_wide()
.chain(Some(0))
.collect();
let dest: Vec<u16> = OsStr::new(path).encode_wide().chain(Some(0)).collect();
let result = unsafe {
MoveFileExW(
source.as_ptr(),
dest.as_ptr(),
MOVEFILE_REPLACE_EXISTING | MOVEFILE_WRITE_THROUGH,
)
};
if result == 0 {
return Err(std::io::Error::last_os_error());
}
drop(temp_path.close());
}
#[cfg(not(any(unix, windows)))]
{
compile_error!("Atomic manifest writes require Unix or Windows platform");
}
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::Write;
use tempfile::NamedTempFile;
#[test]
fn test_config_provenance_new() {
let provenance =
ConfigProvenance::new(".sqry/graph/config/config.json", "abc123".to_string(), 1);
assert_eq!(
provenance.config_file,
PathBuf::from(".sqry/graph/config/config.json")
);
assert_eq!(provenance.config_checksum, "abc123");
assert_eq!(provenance.schema_version, 1);
assert!(!provenance.has_overrides());
assert!(provenance.build_timestamp > 0);
}
#[test]
fn test_config_provenance_with_overrides() {
let mut provenance =
ConfigProvenance::new(".sqry/graph/config/config.json", "abc123".to_string(), 1);
provenance.add_override(OverrideEntry {
source: OverrideSource::Cli,
key: "parallelism.max_workers".to_string(),
value: "8".to_string(),
original_value: Some("4".to_string()),
});
provenance.add_override(OverrideEntry {
source: OverrideSource::Env,
key: "cache.max_size_mb".to_string(),
value: "512".to_string(),
original_value: None,
});
assert!(provenance.has_overrides());
assert_eq!(provenance.override_count(), 2);
assert!(
provenance
.overrides
.contains_key("cli:parallelism.max_workers")
);
assert!(provenance.overrides.contains_key("env:cache.max_size_mb"));
}
#[test]
fn test_config_matches() {
let provenance =
ConfigProvenance::new(".sqry/graph/config/config.json", "abc123".to_string(), 1);
assert!(provenance.config_matches("abc123"));
assert!(!provenance.config_matches("def456"));
}
#[test]
fn test_provenance_builder() {
let provenance = ConfigProvenanceBuilder::new(
".sqry/graph/config/config.json",
"checksum123".to_string(),
2,
)
.with_cli_override(
"limits.max_file_size",
"10485760",
Some("5242880".to_string()),
)
.with_env_override("output.format", "json", None)
.build();
assert_eq!(provenance.schema_version, 2);
assert_eq!(provenance.override_count(), 2);
}
#[test]
fn test_serialization_roundtrip() {
let mut provenance =
ConfigProvenance::new(".sqry/graph/config/config.json", "abc123".to_string(), 1);
provenance.add_override(OverrideEntry {
source: OverrideSource::Cli,
key: "test.key".to_string(),
value: "test_value".to_string(),
original_value: Some("original".to_string()),
});
let json = serde_json::to_string(&provenance).unwrap();
let deserialized: ConfigProvenance = serde_json::from_str(&json).unwrap();
assert_eq!(provenance.config_file, deserialized.config_file);
assert_eq!(provenance.config_checksum, deserialized.config_checksum);
assert_eq!(provenance.schema_version, deserialized.schema_version);
assert_eq!(provenance.overrides, deserialized.overrides);
}
#[test]
fn test_compute_config_checksum() {
let mut temp_file = NamedTempFile::new().unwrap();
writeln!(temp_file, "{{\"test\": \"config\"}}").unwrap();
let checksum = compute_config_checksum(temp_file.path()).unwrap();
assert_eq!(checksum.len(), 64);
let checksum2 = compute_config_checksum(temp_file.path()).unwrap();
assert_eq!(checksum, checksum2);
}
#[test]
fn test_default_provenance() {
let provenance = default_provenance();
assert_eq!(
provenance.config_file,
PathBuf::from(".sqry/graph/config/config.json")
);
assert_eq!(provenance.config_checksum, "none");
assert_eq!(provenance.schema_version, 0);
assert!(!provenance.has_overrides());
}
#[test]
fn test_override_source_display() {
assert_eq!(OverrideSource::Cli.to_string(), "cli");
assert_eq!(OverrideSource::Env.to_string(), "env");
assert_eq!(OverrideSource::Api.to_string(), "api");
}
#[test]
fn test_manifest_with_confidence() {
use crate::confidence::{ConfidenceLevel, ConfidenceMetadata};
use std::collections::HashMap;
let build_prov = BuildProvenance::new("2.8.0", "sqry index");
let mut confidence_map = HashMap::new();
confidence_map.insert(
"rust".to_string(),
ConfidenceMetadata {
level: ConfidenceLevel::AstOnly,
limitations: vec!["No rust-analyzer available".to_string()],
unavailable_features: vec!["Type inference".to_string()],
},
);
let manifest = Manifest::new("/test/path", 100, 200, "abc123", build_prov)
.with_confidence(confidence_map.clone());
assert_eq!(manifest.confidence.len(), 1);
assert!(manifest.confidence.contains_key("rust"));
let rust_confidence = &manifest.confidence["rust"];
assert_eq!(rust_confidence.level, ConfidenceLevel::AstOnly);
assert_eq!(rust_confidence.limitations.len(), 1);
assert_eq!(rust_confidence.unavailable_features.len(), 1);
}
#[test]
fn test_manifest_confidence_serialization() {
use crate::confidence::{ConfidenceLevel, ConfidenceMetadata};
use std::collections::HashMap;
let build_prov = BuildProvenance::new("2.8.0", "sqry index");
let mut confidence_map = HashMap::new();
confidence_map.insert(
"rust".to_string(),
ConfidenceMetadata {
level: ConfidenceLevel::Partial,
limitations: vec![],
unavailable_features: vec![],
},
);
let manifest = Manifest::new("/test/path", 100, 200, "abc123", build_prov)
.with_confidence(confidence_map);
let json = serde_json::to_string(&manifest).unwrap();
assert!(json.contains("\"confidence\""));
assert!(json.contains("\"rust\""));
assert!(json.contains("\"partial\""));
}
#[test]
fn test_manifest_empty_confidence_omitted() {
let build_prov = BuildProvenance::new("2.8.0", "sqry index");
let manifest = Manifest::new("/test/path", 100, 200, "abc123", build_prov);
let json = serde_json::to_string(&manifest).unwrap();
assert!(!json.contains("\"confidence\""));
}
#[test]
fn test_manifest_raw_edge_count_field_present() {
let build_prov = BuildProvenance::new("3.6.0", "cli:index");
let mut manifest = Manifest::new("/test/path", 500, 300, "sha256", build_prov);
manifest.raw_edge_count = Some(450);
let json = serde_json::to_string_pretty(&manifest).unwrap();
assert!(
json.contains("\"raw_edge_count\""),
"New manifests should include raw_edge_count field"
);
assert!(
json.contains("450"),
"raw_edge_count value should be serialized"
);
let loaded: Manifest = serde_json::from_str(&json).unwrap();
assert_eq!(loaded.raw_edge_count, Some(450));
assert_eq!(loaded.edge_count, 300);
}
#[test]
fn test_legacy_manifest_without_raw_edge_count() {
let json = r#"{
"schema_version": 1,
"snapshot_format_version": 2,
"built_at": "2026-01-15T10:00:00Z",
"root_path": "/legacy/path",
"node_count": 1000,
"edge_count": 2000,
"snapshot_sha256": "legacy_sha256",
"build_provenance": {
"sqry_version": "3.4.0",
"build_timestamp": "2026-01-15T10:00:00Z",
"build_command": "sqry index"
}
}"#;
let manifest: Manifest = serde_json::from_str(json).unwrap();
assert_eq!(manifest.node_count, 1000);
assert_eq!(manifest.edge_count, 2000);
assert_eq!(
manifest.raw_edge_count, None,
"Legacy manifests should deserialize with raw_edge_count = None"
);
}
#[test]
fn test_manifest_raw_edge_count_none_omitted() {
let build_prov = BuildProvenance::new("3.6.0", "cli:index");
let manifest = Manifest::new("/test/path", 500, 300, "sha256", build_prov);
let json = serde_json::to_string(&manifest).unwrap();
assert!(
!json.contains("raw_edge_count"),
"None raw_edge_count should be omitted (skip_serializing_if)"
);
}
#[test]
fn test_atomic_manifest_write_basic() {
use tempfile::TempDir;
let temp_dir = TempDir::new().unwrap();
let manifest_path = temp_dir.path().join("manifest.json");
let build_prov = BuildProvenance::new("3.1.1", "sqry index");
let manifest = Manifest::new("/test/workspace", 100, 200, "test_sha256", build_prov);
write_manifest_atomic(&manifest_path, &manifest).unwrap();
assert!(manifest_path.exists());
let loaded = Manifest::load(&manifest_path).unwrap();
assert_eq!(loaded.node_count, 100);
assert_eq!(loaded.edge_count, 200);
assert_eq!(loaded.snapshot_sha256, "test_sha256");
}
#[test]
fn test_file_id_changes_after_atomic_write() {
use tempfile::TempDir;
let temp_dir = TempDir::new().unwrap();
let manifest_path = temp_dir.path().join("manifest.json");
let build_prov = BuildProvenance::new("3.1.1", "sqry index");
let manifest1 = Manifest::new("/test/workspace", 100, 200, "sha1", build_prov.clone());
write_manifest_atomic(&manifest_path, &manifest1).unwrap();
let metadata1 = std::fs::metadata(&manifest_path).unwrap();
let file_id1 = extract_file_id(&metadata1);
std::thread::sleep(std::time::Duration::from_millis(10));
let manifest2 = Manifest::new("/test/workspace", 150, 250, "sha2", build_prov);
write_manifest_atomic(&manifest_path, &manifest2).unwrap();
let metadata2 = std::fs::metadata(&manifest_path).unwrap();
let file_id2 = extract_file_id(&metadata2);
if file_id1.is_some() && file_id2.is_some() {
assert_ne!(
file_id1, file_id2,
"File ID should change after atomic write"
);
}
let loaded = Manifest::load(&manifest_path).unwrap();
assert_eq!(loaded.node_count, 150);
assert_eq!(loaded.snapshot_sha256, "sha2");
}
#[cfg(unix)]
#[allow(clippy::unnecessary_wraps)]
fn extract_file_id(metadata: &std::fs::Metadata) -> Option<u64> {
use std::os::unix::fs::MetadataExt;
Some(metadata.ino())
}
#[cfg(windows)]
#[allow(clippy::unnecessary_wraps)]
fn extract_file_id(_metadata: &std::fs::Metadata) -> Option<u64> {
None
}
#[cfg(not(any(unix, windows)))]
fn extract_file_id(_metadata: &std::fs::Metadata) -> Option<u64> {
None
}
#[test]
fn test_atomic_write_replaces_existing() {
use tempfile::TempDir;
let temp_dir = TempDir::new().unwrap();
let manifest_path = temp_dir.path().join("manifest.json");
std::fs::write(&manifest_path, "old content").unwrap();
let build_prov = BuildProvenance::new("3.1.1", "sqry index");
let manifest = Manifest::new("/test/workspace", 100, 200, "new_sha", build_prov);
write_manifest_atomic(&manifest_path, &manifest).unwrap();
let content = std::fs::read_to_string(&manifest_path).unwrap();
assert!(content.contains("new_sha"));
assert!(!content.contains("old content"));
}
#[test]
#[cfg(unix)]
fn test_atomic_manifest_write_unix_persist() {
use tempfile::TempDir;
let temp_dir = TempDir::new().unwrap();
let manifest_path = temp_dir.path().join("manifest.json");
let build_prov = BuildProvenance::new("3.1.1", "sqry index");
let manifest = Manifest::new("/test/workspace", 100, 200, "unix_test", build_prov);
write_manifest_atomic(&manifest_path, &manifest).unwrap();
let entries: Vec<_> = std::fs::read_dir(temp_dir.path())
.unwrap()
.filter_map(Result::ok)
.collect();
assert_eq!(entries.len(), 1, "Should only have manifest.json");
assert_eq!(entries[0].file_name(), "manifest.json");
}
#[test]
#[cfg(windows)]
fn test_atomic_manifest_write_windows_movefile() {
use tempfile::TempDir;
let temp_dir = TempDir::new().unwrap();
let manifest_path = temp_dir.path().join("manifest.json");
let build_prov = BuildProvenance::new("3.1.1", "sqry index");
let manifest = Manifest::new("/test/workspace", 100, 200, "windows_test", build_prov);
write_manifest_atomic(&manifest_path, &manifest).unwrap();
let entries: Vec<_> = std::fs::read_dir(temp_dir.path())
.unwrap()
.filter_map(Result::ok)
.collect();
assert_eq!(entries.len(), 1, "Should only have manifest.json");
assert_eq!(entries[0].file_name(), "manifest.json");
}
}