use std::fs::File;
use std::io::{BufReader, BufWriter, Cursor, Read, Write};
use std::path::Path;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use super::format::{FormatVersion, GraphHeader, MAGIC_BYTES_V9, MAGIC_BYTES_V10, VERSION};
use super::manifest::ConfigProvenance;
use crate::config::buffers::max_snapshot_bytes;
use crate::graph::unified::BidirectionalEdgeStore;
use crate::graph::unified::bind::alias::AliasTable;
use crate::graph::unified::bind::scope::arena::ScopeArena;
use crate::graph::unified::bind::scope::provenance::ScopeProvenanceStore;
use crate::graph::unified::bind::shadow::ShadowTable;
use crate::graph::unified::build::phase4e_binding::derive_binding_plane;
use crate::graph::unified::concurrent::CodeGraph;
use crate::graph::unified::resolution::is_canonical_graph_qualified_name;
use crate::graph::unified::storage::{
AuxiliaryIndices, EdgeProvenanceStore, FileRegistry, FileSegmentTable, NodeArena,
NodeMetadataStore, NodeProvenanceStore, StringInterner,
};
use crate::plugin::PluginManager;
const MAX_HEADER_BYTES: usize = 1_048_576;
const MAX_REASONABLE_NODES: usize = 100_000_000;
const MAX_REASONABLE_EDGES: usize = 1_000_000_000;
const MAX_REASONABLE_STRINGS: usize = 50_000_000;
const MAX_REASONABLE_FILES: usize = 1_000_000;
#[derive(Debug)]
pub enum PersistenceError {
Io(std::io::Error),
Serialization(String),
InvalidMagic {
expected: Vec<u8>,
found: Vec<u8>,
},
IncompatibleVersion {
expected: u32,
found: u32,
},
PluginVersionMismatch {
plugin_id: String,
expected: String,
found: String,
},
ValidationFailed(String),
}
impl std::fmt::Display for PersistenceError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Io(e) => write!(f, "I/O error: {e}"),
Self::Serialization(e) => write!(f, "Serialization error: {e}"),
Self::InvalidMagic { expected, found } => {
write!(
f,
"Invalid magic bytes: expected {expected:?}, found {found:?}. \
Index was created with an older version. Run `sqry index` to rebuild."
)
}
Self::IncompatibleVersion { expected, found } => {
write!(
f,
"Incompatible format version: expected {expected}, found {found}. \
Index was created with an older version. Run `sqry index` to rebuild."
)
}
Self::PluginVersionMismatch {
plugin_id,
expected,
found,
} => {
write!(
f,
"Plugin version mismatch for {plugin_id}: expected {expected}, found {found} (index needs rebuild)"
)
}
Self::ValidationFailed(msg) => write!(f, "Validation failed: {msg}"),
}
}
}
impl std::error::Error for PersistenceError {}
impl From<std::io::Error> for PersistenceError {
fn from(e: std::io::Error) -> Self {
Self::Io(e)
}
}
impl From<postcard::Error> for PersistenceError {
fn from(e: postcard::Error) -> Self {
Self::Serialization(e.to_string())
}
}
#[derive(Debug, Serialize, Deserialize)]
struct GraphSnapshotData {
nodes: NodeArena,
edges: BidirectionalEdgeStore,
strings: StringInterner,
files: FileRegistry,
indices: AuxiliaryIndices,
macro_metadata: NodeMetadataStore,
node_provenance: NodeProvenanceStore,
edge_provenance: EdgeProvenanceStore,
}
#[derive(Debug, Serialize, Deserialize)]
struct GraphSnapshotDataV9 {
nodes: NodeArena,
edges: BidirectionalEdgeStore,
strings: StringInterner,
files: FileRegistry,
indices: AuxiliaryIndices,
macro_metadata: NodeMetadataStore,
node_provenance: NodeProvenanceStore,
edge_provenance: EdgeProvenanceStore,
scope_arena: ScopeArena,
alias_table: AliasTable,
shadow_table: ShadowTable,
scope_provenance: ScopeProvenanceStore,
}
#[derive(Debug, Serialize, Deserialize)]
struct GraphSnapshotDataV10 {
nodes: NodeArena,
edges: BidirectionalEdgeStore,
strings: StringInterner,
files: FileRegistry,
indices: AuxiliaryIndices,
macro_metadata: NodeMetadataStore,
node_provenance: NodeProvenanceStore,
edge_provenance: EdgeProvenanceStore,
scope_arena: ScopeArena,
alias_table: AliasTable,
shadow_table: ShadowTable,
scope_provenance: ScopeProvenanceStore,
file_segments: FileSegmentTable,
}
#[derive(Debug, Deserialize)]
struct GraphSnapshotDataV7 {
nodes: NodeArena,
edges: BidirectionalEdgeStore,
strings: StringInterner,
files: FileRegistry,
indices: AuxiliaryIndices,
macro_metadata: NodeMetadataStore,
}
fn validate_header_sanity(header: &GraphHeader) -> Result<(), PersistenceError> {
if header.node_count > MAX_REASONABLE_NODES {
return Err(PersistenceError::ValidationFailed(format!(
"Unreasonable node_count: {} exceeds maximum of {}. \
This likely indicates a corrupted snapshot file.",
header.node_count, MAX_REASONABLE_NODES
)));
}
if header.edge_count > MAX_REASONABLE_EDGES {
return Err(PersistenceError::ValidationFailed(format!(
"Unreasonable edge_count: {} exceeds maximum of {}. \
This likely indicates a corrupted snapshot file.",
header.edge_count, MAX_REASONABLE_EDGES
)));
}
if header.string_count > MAX_REASONABLE_STRINGS {
return Err(PersistenceError::ValidationFailed(format!(
"Unreasonable string_count: {} exceeds maximum of {}. \
This likely indicates a corrupted snapshot file.",
header.string_count, MAX_REASONABLE_STRINGS
)));
}
if header.file_count > MAX_REASONABLE_FILES {
return Err(PersistenceError::ValidationFailed(format!(
"Unreasonable file_count: {} exceeds maximum of {}. \
This likely indicates a corrupted snapshot file.",
header.file_count, MAX_REASONABLE_FILES
)));
}
Ok(())
}
#[allow(dead_code)]
fn validate_loaded_snapshot(
header: &GraphHeader,
snapshot_data: &GraphSnapshotData,
) -> Result<(), PersistenceError> {
let forward_stats = snapshot_data.edges.stats().forward;
let total_edges = forward_stats.csr_edge_count + forward_stats.delta_edge_count;
if header.node_count != snapshot_data.nodes.len() {
return Err(PersistenceError::ValidationFailed(format!(
"node_count mismatch: header={}, data={}",
header.node_count,
snapshot_data.nodes.len()
)));
}
if header.edge_count != total_edges {
return Err(PersistenceError::ValidationFailed(format!(
"edge_count mismatch: header={}, data={}",
header.edge_count, total_edges
)));
}
if header.string_count != snapshot_data.strings.len() {
return Err(PersistenceError::ValidationFailed(format!(
"string_count mismatch: header={}, data={}",
header.string_count,
snapshot_data.strings.len()
)));
}
if header.file_count != snapshot_data.files.len() {
return Err(PersistenceError::ValidationFailed(format!(
"file_count mismatch: header={}, data={}",
header.file_count,
snapshot_data.files.len()
)));
}
validate_snapshot_semantics(snapshot_data)?;
Ok(())
}
#[allow(dead_code)] fn validate_loaded_snapshot_v9(
header: &GraphHeader,
snapshot_data: &GraphSnapshotDataV9,
) -> Result<(), PersistenceError> {
let forward_stats = snapshot_data.edges.stats().forward;
let total_edges = forward_stats.csr_edge_count + forward_stats.delta_edge_count;
if header.node_count != snapshot_data.nodes.len() {
return Err(PersistenceError::ValidationFailed(format!(
"node_count mismatch: header={}, data={}",
header.node_count,
snapshot_data.nodes.len()
)));
}
if header.edge_count != total_edges {
return Err(PersistenceError::ValidationFailed(format!(
"edge_count mismatch: header={}, data={}",
header.edge_count, total_edges
)));
}
if header.string_count != snapshot_data.strings.len() {
return Err(PersistenceError::ValidationFailed(format!(
"string_count mismatch: header={}, data={}",
header.string_count,
snapshot_data.strings.len()
)));
}
if header.file_count != snapshot_data.files.len() {
return Err(PersistenceError::ValidationFailed(format!(
"file_count mismatch: header={}, data={}",
header.file_count,
snapshot_data.files.len()
)));
}
validate_snapshot_semantics_v9(snapshot_data)?;
Ok(())
}
#[allow(dead_code)]
fn validate_snapshot_semantics(snapshot_data: &GraphSnapshotData) -> Result<(), PersistenceError> {
for (node_id, entry) in snapshot_data.nodes.iter() {
if entry.name == crate::graph::unified::string::StringId::INVALID {
continue;
}
let file_path = snapshot_data.files.resolve(entry.file).ok_or_else(|| {
PersistenceError::ValidationFailed(format!(
"resolver-eligible node {node_id:?} has unresolved file id {:?}; run `sqry index` to rebuild",
entry.file
))
})?;
let _name = snapshot_data.strings.resolve(entry.name).ok_or_else(|| {
PersistenceError::ValidationFailed(format!(
"resolver-eligible node {node_id:?} has unresolved name string id {:?}; run `sqry index` to rebuild",
entry.name
))
})?;
let Some(qualified_name_id) = entry.qualified_name else {
continue;
};
let qualified_name =
snapshot_data
.strings
.resolve(qualified_name_id)
.ok_or_else(|| {
PersistenceError::ValidationFailed(format!(
"resolver-eligible node {node_id:?} has unresolved qualified-name string id {qualified_name_id:?}; run `sqry index` to rebuild"
))
})?;
let language = snapshot_data
.files
.language_for_file(entry.file)
.ok_or_else(|| {
PersistenceError::ValidationFailed(format!(
"resolver-eligible node {node_id:?} in '{}' is missing file language metadata; run `sqry index` to rebuild",
file_path.display()
))
})?;
if !is_canonical_graph_qualified_name(language, qualified_name.as_ref()) {
return Err(PersistenceError::ValidationFailed(format!(
"resolver-eligible node {node_id:?} in '{}' stores non-canonical qualified name '{}'; run `sqry index` to rebuild",
file_path.display(),
qualified_name
)));
}
}
Ok(())
}
#[allow(dead_code)] fn validate_snapshot_semantics_v9(
snapshot_data: &GraphSnapshotDataV9,
) -> Result<(), PersistenceError> {
for (node_id, entry) in snapshot_data.nodes.iter() {
if entry.name == crate::graph::unified::string::StringId::INVALID {
continue;
}
let file_path = snapshot_data.files.resolve(entry.file).ok_or_else(|| {
PersistenceError::ValidationFailed(format!(
"resolver-eligible node {node_id:?} has unresolved file id {:?}; run `sqry index` to rebuild",
entry.file
))
})?;
let _name = snapshot_data.strings.resolve(entry.name).ok_or_else(|| {
PersistenceError::ValidationFailed(format!(
"resolver-eligible node {node_id:?} has unresolved name string id {:?}; run `sqry index` to rebuild",
entry.name
))
})?;
let Some(qualified_name_id) = entry.qualified_name else {
continue;
};
let qualified_name =
snapshot_data
.strings
.resolve(qualified_name_id)
.ok_or_else(|| {
PersistenceError::ValidationFailed(format!(
"resolver-eligible node {node_id:?} has unresolved qualified-name string id {qualified_name_id:?}; run `sqry index` to rebuild"
))
})?;
let language = snapshot_data
.files
.language_for_file(entry.file)
.ok_or_else(|| {
PersistenceError::ValidationFailed(format!(
"resolver-eligible node {node_id:?} in '{}' is missing file language metadata; run `sqry index` to rebuild",
file_path.display()
))
})?;
if !is_canonical_graph_qualified_name(language, qualified_name.as_ref()) {
return Err(PersistenceError::ValidationFailed(format!(
"resolver-eligible node {node_id:?} in '{}' stores non-canonical qualified name '{}'; run `sqry index` to rebuild",
file_path.display(),
qualified_name
)));
}
}
Ok(())
}
fn validate_snapshot_semantics_v10(
snapshot_data: &GraphSnapshotDataV10,
) -> Result<(), PersistenceError> {
for (node_id, entry) in snapshot_data.nodes.iter() {
if entry.name == crate::graph::unified::string::StringId::INVALID {
continue;
}
let file_path = snapshot_data.files.resolve(entry.file).ok_or_else(|| {
PersistenceError::ValidationFailed(format!(
"resolver-eligible node {node_id:?} has unresolved file id {:?}; run `sqry index` to rebuild",
entry.file
))
})?;
let _name = snapshot_data.strings.resolve(entry.name).ok_or_else(|| {
PersistenceError::ValidationFailed(format!(
"resolver-eligible node {node_id:?} has unresolved name string id {:?}; run `sqry index` to rebuild",
entry.name
))
})?;
let Some(qualified_name_id) = entry.qualified_name else {
continue;
};
let qualified_name =
snapshot_data
.strings
.resolve(qualified_name_id)
.ok_or_else(|| {
PersistenceError::ValidationFailed(format!(
"resolver-eligible node {node_id:?} has unresolved qualified-name string id {qualified_name_id:?}; run `sqry index` to rebuild"
))
})?;
let language = snapshot_data
.files
.language_for_file(entry.file)
.ok_or_else(|| {
PersistenceError::ValidationFailed(format!(
"resolver-eligible node {node_id:?} in '{}' is missing file language metadata; run `sqry index` to rebuild",
file_path.display()
))
})?;
if !is_canonical_graph_qualified_name(language, qualified_name.as_ref()) {
return Err(PersistenceError::ValidationFailed(format!(
"resolver-eligible node {node_id:?} in '{}' stores non-canonical qualified name '{}'; run `sqry index` to rebuild",
file_path.display(),
qualified_name
)));
}
}
Ok(())
}
fn next_fact_epoch(snapshot_path: &Path) -> u64 {
let now_secs = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap_or_default()
.as_secs();
let prev_epoch = read_prev_fact_epoch(snapshot_path).unwrap_or(0);
std::cmp::max(prev_epoch + 1, now_secs)
}
fn read_prev_fact_epoch(path: &Path) -> Option<u64> {
let file = File::open(path).ok()?;
let mut reader = BufReader::new(file);
let (_version, header_len, _consumed) = read_magic_and_header_len(&mut reader).ok()?;
if header_len > MAX_HEADER_BYTES {
return None;
}
let mut header_buf = vec![0u8; header_len];
reader.read_exact(&mut header_buf).ok()?;
let header: GraphHeader = postcard::from_bytes(&header_buf).ok()?;
Some(header.fact_epoch())
}
fn upconvert_v7_to_v8(v7: GraphSnapshotDataV7) -> GraphSnapshotData {
let node_slot_count = v7.nodes.slot_count();
let edge_count = {
let stats = v7.edges.stats().forward;
stats.csr_edge_count + stats.delta_edge_count
};
let mut node_provenance = NodeProvenanceStore::new();
node_provenance.resize_to(node_slot_count);
let mut edge_provenance = EdgeProvenanceStore::new();
edge_provenance.resize_to(edge_count);
GraphSnapshotData {
nodes: v7.nodes,
edges: v7.edges,
strings: v7.strings,
files: v7.files,
indices: v7.indices,
macro_metadata: v7.macro_metadata,
node_provenance,
edge_provenance,
}
}
fn upconvert_v8_to_v9(v8: GraphSnapshotData) -> GraphSnapshotDataV9 {
let node_provenance = v8.node_provenance;
let edge_provenance = v8.edge_provenance;
let fact_epoch = 0;
let mut graph = CodeGraph::from_components(
v8.nodes,
v8.edges,
v8.strings,
v8.files,
v8.indices,
v8.macro_metadata,
);
graph.set_provenance(node_provenance, edge_provenance, fact_epoch);
derive_binding_plane(&mut graph);
let snapshot = graph.snapshot();
let scope_arena = snapshot.scope_arena().clone();
let alias_table = snapshot.alias_table().clone();
let shadow_table = snapshot.shadow_table().clone();
let scope_provenance = snapshot.scope_provenance_store().clone();
let node_prov = snapshot.nodes().iter().fold(
{
let mut s = NodeProvenanceStore::new();
s.resize_to(snapshot.nodes().slot_count());
s
},
|mut acc, (nid, _)| {
if let Some(p) = snapshot.node_provenance(nid) {
acc.insert(nid, *p);
}
acc
},
);
use crate::graph::unified::edge::id::EdgeId;
use crate::graph::unified::storage::edge_provenance::EdgeProvenance;
let edge_stats = snapshot.edges().stats().forward;
let total_edges = edge_stats.csr_edge_count + edge_stats.delta_edge_count;
let mut edge_prov = EdgeProvenanceStore::new();
edge_prov.resize_to(total_edges);
for edge_idx in 0..total_edges {
if let Ok(idx) = u32::try_from(edge_idx) {
let eid = EdgeId::new(idx);
if eid.is_valid() {
let p = snapshot
.edge_provenance(eid)
.cloned()
.unwrap_or_else(|| EdgeProvenance::fresh(0));
edge_prov.insert(eid, p);
}
}
}
GraphSnapshotDataV9 {
nodes: snapshot.nodes().clone(),
edges: snapshot.edges().clone(),
strings: snapshot.strings().clone(),
files: snapshot.files().clone(),
indices: snapshot.indices().clone(),
macro_metadata: snapshot.macro_metadata().clone(),
node_provenance: node_prov,
edge_provenance: edge_prov,
scope_arena,
alias_table,
shadow_table,
scope_provenance,
}
}
fn upconvert_v9_to_v10(v9: GraphSnapshotDataV9) -> GraphSnapshotDataV10 {
let file_segments = rebuild_file_segments_from_arena(&v9.nodes);
GraphSnapshotDataV10 {
nodes: v9.nodes,
edges: v9.edges,
strings: v9.strings,
files: v9.files,
indices: v9.indices,
macro_metadata: v9.macro_metadata,
node_provenance: v9.node_provenance,
edge_provenance: v9.edge_provenance,
scope_arena: v9.scope_arena,
alias_table: v9.alias_table,
shadow_table: v9.shadow_table,
scope_provenance: v9.scope_provenance,
file_segments,
}
}
pub fn rebuild_file_segments_from_arena(arena: &NodeArena) -> FileSegmentTable {
use crate::graph::unified::file::id::FileId;
use std::collections::HashMap;
let mut file_ranges: HashMap<FileId, (u32, u32)> = HashMap::new();
for (idx, slot) in arena.slots().iter().enumerate() {
if let Some(entry) = slot.get() {
let fid = entry.file;
if fid != FileId::INVALID {
let slot_idx = idx as u32;
file_ranges
.entry(fid)
.and_modify(|(min, max)| {
if slot_idx < *min {
*min = slot_idx;
}
if slot_idx > *max {
*max = slot_idx;
}
})
.or_insert((slot_idx, slot_idx));
}
}
}
let mut table = FileSegmentTable::with_capacity(file_ranges.len());
for (fid, (min, max)) in file_ranges {
table.record_range(fid, min, max - min + 1);
}
table
}
fn read_magic_and_header_len(
reader: &mut impl Read,
) -> Result<(FormatVersion, usize, u64), PersistenceError> {
let mut magic = [0u8; 14];
reader.read_exact(&mut magic)?;
let format_version =
FormatVersion::from_magic(&magic).ok_or_else(|| PersistenceError::InvalidMagic {
expected: MAGIC_BYTES_V10.to_vec(),
found: magic.to_vec(),
})?;
if format_version == FormatVersion::V10 {
let hl = read_u32_le(reader)? as usize;
Ok((format_version, hl, 18)) } else {
let mut rest = [0u8; 3];
reader.read_exact(&mut rest)?;
let hl = u32::from_le_bytes([magic[13], rest[0], rest[1], rest[2]]) as usize;
Ok((format_version, hl, 17)) }
}
fn read_u32_le(reader: &mut impl Read) -> Result<u32, std::io::Error> {
let mut buf = [0u8; 4];
reader.read_exact(&mut buf)?;
Ok(u32::from_le_bytes(buf))
}
fn read_u64_le(reader: &mut impl Read) -> Result<u64, std::io::Error> {
let mut buf = [0u8; 8];
reader.read_exact(&mut buf)?;
Ok(u64::from_le_bytes(buf))
}
fn build_provenance_from_snapshot(
snapshot: &crate::graph::unified::concurrent::GraphSnapshot,
epoch: u64,
) -> (NodeProvenanceStore, EdgeProvenanceStore) {
use crate::graph::unified::edge::id::EdgeId;
use crate::graph::unified::storage::edge_provenance::EdgeProvenance;
use crate::graph::unified::storage::node_provenance::NodeProvenance;
let nodes = snapshot.nodes();
let mut node_prov = NodeProvenanceStore::new();
node_prov.resize_to(nodes.slot_count());
for (node_id, entry) in nodes.iter() {
let content_hash = node_content_hash(entry);
node_prov.insert(node_id, NodeProvenance::fresh(epoch, content_hash));
}
let edge_stats = snapshot.edges().stats().forward;
let total_edges = edge_stats.csr_edge_count + edge_stats.delta_edge_count;
let mut edge_prov = EdgeProvenanceStore::new();
edge_prov.resize_to(total_edges);
for edge_idx in 0..total_edges {
if let Ok(idx) = u32::try_from(edge_idx) {
let eid = EdgeId::new(idx);
if eid.is_valid() {
edge_prov.insert(eid, EdgeProvenance::fresh(epoch));
}
}
}
(node_prov, edge_prov)
}
fn node_content_hash(entry: &crate::graph::unified::storage::NodeEntry) -> [u8; 32] {
match entry.body_hash {
Some(bh) => {
let mut hash = [0u8; 32];
let bh_bytes = bh.as_u128().to_le_bytes();
hash[..16].copy_from_slice(&bh_bytes);
hash
}
None => [0u8; 32],
}
}
fn merge_provenance_from_snapshot(
snapshot: &crate::graph::unified::concurrent::GraphSnapshot,
epoch: u64,
) -> (NodeProvenanceStore, EdgeProvenanceStore) {
use crate::graph::unified::edge::id::EdgeId;
use crate::graph::unified::storage::edge_provenance::EdgeProvenance;
use crate::graph::unified::storage::node_provenance::NodeProvenance;
let nodes = snapshot.nodes();
let mut node_prov = NodeProvenanceStore::new();
node_prov.resize_to(nodes.slot_count());
for (node_id, entry) in nodes.iter() {
let content_hash = node_content_hash(entry);
let provenance = match snapshot.node_provenance(node_id) {
Some(existing) => {
NodeProvenance {
first_seen_epoch: existing.first_seen_epoch,
last_seen_epoch: epoch,
content_hash,
}
}
None => NodeProvenance::fresh(epoch, content_hash),
};
node_prov.insert(node_id, provenance);
}
let edge_stats = snapshot.edges().stats().forward;
let total_edges = edge_stats.csr_edge_count + edge_stats.delta_edge_count;
let mut edge_prov = EdgeProvenanceStore::new();
edge_prov.resize_to(total_edges);
for edge_idx in 0..total_edges {
if let Ok(idx) = u32::try_from(edge_idx) {
let eid = EdgeId::new(idx);
if eid.is_valid() {
let provenance = match snapshot.edge_provenance(eid) {
Some(existing) => {
EdgeProvenance {
first_seen_epoch: existing.first_seen_epoch,
last_seen_epoch: epoch,
}
}
None => EdgeProvenance::fresh(epoch),
};
edge_prov.insert(eid, provenance);
}
}
}
(node_prov, edge_prov)
}
fn resolve_provenance(
snapshot: &crate::graph::unified::concurrent::GraphSnapshot,
epoch: u64,
) -> (NodeProvenanceStore, EdgeProvenanceStore) {
if snapshot.fact_epoch() > 0 {
merge_provenance_from_snapshot(snapshot, epoch)
} else {
build_provenance_from_snapshot(snapshot, epoch)
}
}
fn stamp_file_indexed_at(files: &mut FileRegistry, epoch: u64) {
use crate::graph::unified::file::id::FileId;
let slot_count = files.slot_count();
for idx in 1..slot_count {
if let Ok(i) = u32::try_from(idx) {
let fid = FileId::new(i);
files.set_indexed_at(fid, epoch);
}
}
}
#[allow(clippy::cast_possible_truncation)] #[allow(dead_code)] fn write_framed_v9(
writer: &mut BufWriter<File>,
header: &GraphHeader,
snapshot_data: &GraphSnapshotDataV9,
) -> Result<(), PersistenceError> {
debug_assert!(
!snapshot_data.strings.is_lookup_stale(),
"Cannot serialize StringInterner with stale lookup — \
call build_dedup_table() before saving"
);
let header_bytes = postcard::to_allocvec(header)?;
let data_bytes = postcard::to_allocvec(snapshot_data)?;
if header_bytes.len() > MAX_HEADER_BYTES {
return Err(PersistenceError::ValidationFailed(format!(
"header too large to save: {} bytes exceeds MAX_HEADER_BYTES ({} bytes)",
header_bytes.len(),
MAX_HEADER_BYTES,
)));
}
let max_data_bytes = max_snapshot_bytes();
if data_bytes.len() as u64 > max_data_bytes {
return Err(PersistenceError::ValidationFailed(format!(
"data section too large to save: {} bytes exceeds limit ({} bytes); \
increase SQRY_MAX_SNAPSHOT_BYTES if the codebase legitimately requires a larger snapshot",
data_bytes.len(),
max_data_bytes,
)));
}
writer.write_all(MAGIC_BYTES_V9)?;
writer.write_all(
&u32::try_from(header_bytes.len())
.map_err(|_| {
PersistenceError::ValidationFailed(
"header too large for u32 length prefix".to_string(),
)
})?
.to_le_bytes(),
)?;
writer.write_all(&header_bytes)?;
writer.write_all(&(data_bytes.len() as u64).to_le_bytes())?;
writer.write_all(&data_bytes)?;
writer.flush()?;
Ok(())
}
fn write_framed_v10(
writer: &mut BufWriter<File>,
header: &GraphHeader,
snapshot_data: &GraphSnapshotDataV10,
) -> Result<(), PersistenceError> {
debug_assert!(
!snapshot_data.strings.is_lookup_stale(),
"Cannot serialize StringInterner with stale lookup — \
call build_dedup_table() before saving"
);
let header_bytes = postcard::to_allocvec(header)?;
let data_bytes = postcard::to_allocvec(snapshot_data)?;
if header_bytes.len() > MAX_HEADER_BYTES {
return Err(PersistenceError::ValidationFailed(format!(
"header too large to save: {} bytes exceeds MAX_HEADER_BYTES ({} bytes)",
header_bytes.len(),
MAX_HEADER_BYTES,
)));
}
let max_data_bytes = max_snapshot_bytes();
if data_bytes.len() as u64 > max_data_bytes {
return Err(PersistenceError::ValidationFailed(format!(
"data section too large to save: {} bytes exceeds limit ({} bytes); \
increase SQRY_MAX_SNAPSHOT_BYTES if the codebase legitimately requires a larger snapshot",
data_bytes.len(),
max_data_bytes,
)));
}
writer.write_all(MAGIC_BYTES_V10)?;
writer.write_all(
&u32::try_from(header_bytes.len())
.map_err(|_| {
PersistenceError::ValidationFailed(
"header too large for u32 length prefix".to_string(),
)
})?
.to_le_bytes(),
)?;
writer.write_all(&header_bytes)?;
writer.write_all(&(data_bytes.len() as u64).to_le_bytes())?;
writer.write_all(&data_bytes)?;
writer.flush()?;
Ok(())
}
pub fn save_to_path(graph: &CodeGraph, path: impl AsRef<Path>) -> Result<(), PersistenceError> {
let path = path.as_ref();
let fact_epoch = next_fact_epoch(path);
let file = File::create(path)?;
let mut writer = BufWriter::new(file);
let snapshot = graph.snapshot();
let (node_provenance, edge_provenance) = resolve_provenance(&snapshot, fact_epoch);
let nodes = snapshot.nodes().clone();
let edges = snapshot.edges().clone();
let strings = snapshot.strings().clone();
let mut files = snapshot.files().clone();
let indices = snapshot.indices().clone();
let macro_metadata = snapshot.macro_metadata().clone();
stamp_file_indexed_at(&mut files, fact_epoch);
let scope_arena = snapshot.scope_arena().clone();
let alias_table = snapshot.alias_table().clone();
let shadow_table = snapshot.shadow_table().clone();
let scope_provenance = snapshot.scope_provenance_store().clone();
let file_segments = snapshot.file_segments().clone();
let snapshot_data = GraphSnapshotDataV10 {
nodes,
edges,
strings,
files,
indices,
macro_metadata,
node_provenance,
edge_provenance,
scope_arena,
alias_table,
shadow_table,
scope_provenance,
file_segments,
};
validate_snapshot_semantics_v10(&snapshot_data)?;
let forward_stats = snapshot_data.edges.stats().forward;
let total_edges = forward_stats.csr_edge_count + forward_stats.delta_edge_count;
let mut header = GraphHeader::new(
snapshot_data.nodes.len(),
total_edges,
snapshot_data.strings.len(),
snapshot_data.files.len(),
);
header.version = FormatVersion::V10.as_u32();
header.set_fact_epoch(fact_epoch);
write_framed_v10(&mut writer, &header, &snapshot_data)
}
pub fn save_to_path_with_provenance(
graph: &CodeGraph,
path: impl AsRef<Path>,
provenance: ConfigProvenance,
plugins: &PluginManager,
) -> Result<(), PersistenceError> {
let path = path.as_ref();
let fact_epoch = next_fact_epoch(path);
let file = File::create(path)?;
let mut writer = BufWriter::new(file);
let snapshot = graph.snapshot();
let (node_provenance, edge_provenance) = resolve_provenance(&snapshot, fact_epoch);
let nodes = snapshot.nodes().clone();
let edges = snapshot.edges().clone();
let strings = snapshot.strings().clone();
let mut files = snapshot.files().clone();
let indices = snapshot.indices().clone();
let macro_metadata = snapshot.macro_metadata().clone();
stamp_file_indexed_at(&mut files, fact_epoch);
let plugin_versions: HashMap<String, String> = plugins
.plugins()
.iter()
.map(|p| {
let meta = p.metadata();
(meta.id.to_string(), meta.version.to_string())
})
.collect();
let scope_arena = snapshot.scope_arena().clone();
let alias_table = snapshot.alias_table().clone();
let shadow_table = snapshot.shadow_table().clone();
let scope_provenance = snapshot.scope_provenance_store().clone();
let file_segments = snapshot.file_segments().clone();
let snapshot_data = GraphSnapshotDataV10 {
nodes,
edges,
strings,
files,
indices,
macro_metadata,
node_provenance,
edge_provenance,
scope_arena,
alias_table,
shadow_table,
scope_provenance,
file_segments,
};
let forward_stats = snapshot_data.edges.stats().forward;
let total_edges = forward_stats.csr_edge_count + forward_stats.delta_edge_count;
let mut header = GraphHeader::with_provenance_and_plugins(
snapshot_data.nodes.len(),
total_edges,
snapshot_data.strings.len(),
snapshot_data.files.len(),
provenance,
plugin_versions,
);
header.version = FormatVersion::V10.as_u32();
header.set_fact_epoch(fact_epoch);
write_framed_v10(&mut writer, &header, &snapshot_data)
}
fn validate_plugin_versions(
header: &GraphHeader,
plugins: &PluginManager,
) -> Result<(), PersistenceError> {
let current_versions: HashMap<String, String> = plugins
.plugins()
.iter()
.map(|p| {
let meta = p.metadata();
(meta.id.to_string(), meta.version.to_string())
})
.collect();
for (plugin_id, stored_version) in header.plugin_versions() {
match current_versions.get(plugin_id) {
Some(current_version) if current_version != stored_version => {
return Err(PersistenceError::PluginVersionMismatch {
plugin_id: plugin_id.clone(),
expected: current_version.clone(),
found: stored_version.clone(),
});
}
None => {
return Err(PersistenceError::PluginVersionMismatch {
plugin_id: plugin_id.clone(),
expected: "not installed".to_string(),
found: stored_version.clone(),
});
}
Some(_) => {
}
}
}
Ok(())
}
pub fn verify_snapshot_bytes(data: &[u8], expected_sha256: &str) -> anyhow::Result<()> {
use sha2::{Digest, Sha256};
if expected_sha256.is_empty() {
return Ok(());
}
let actual_hash = format!("{:x}", Sha256::digest(data));
if actual_hash != expected_sha256 {
anyhow::bail!(
"Snapshot integrity check failed: expected SHA256 {expected_sha256}, got {actual_hash}. \
The index may be corrupt or tampered with. Run `sqry index` to rebuild.",
);
}
Ok(())
}
#[allow(clippy::cast_possible_truncation)] pub fn load_from_bytes(
bytes: &[u8],
plugins: Option<&PluginManager>,
) -> Result<CodeGraph, PersistenceError> {
let total_len = bytes.len() as u64;
let mut reader = Cursor::new(bytes);
let mut bytes_consumed: u64 = 0;
let (format_version, header_len, magic_bytes) = read_magic_and_header_len(&mut reader)?;
bytes_consumed += magic_bytes;
if header_len > MAX_HEADER_BYTES {
return Err(PersistenceError::ValidationFailed(
"header too large".to_string(),
));
}
let remaining = total_len.saturating_sub(bytes_consumed);
if (header_len as u64) > remaining {
return Err(PersistenceError::ValidationFailed(
"header length exceeds remaining file bytes".to_string(),
));
}
let mut header_buf = vec![0u8; header_len];
reader.read_exact(&mut header_buf)?;
bytes_consumed += header_len as u64;
let header: GraphHeader = postcard::from_bytes(&header_buf)?;
if header.version != VERSION
&& header.version != FormatVersion::V8.as_u32()
&& header.version != FormatVersion::V9.as_u32()
&& header.version != FormatVersion::V10.as_u32()
{
return Err(PersistenceError::IncompatibleVersion {
expected: FormatVersion::V10.as_u32(),
found: header.version,
});
}
if let Some(plugin_manager) = plugins {
validate_plugin_versions(&header, plugin_manager)?;
}
validate_header_sanity(&header)?;
let data_len = read_u64_le(&mut reader)?;
bytes_consumed += 8;
let max_data_bytes = max_snapshot_bytes();
if data_len > max_data_bytes {
return Err(PersistenceError::ValidationFailed(format!(
"data section too large: {data_len} bytes exceeds limit ({max_data_bytes} bytes); \
increase SQRY_MAX_SNAPSHOT_BYTES to load this snapshot",
)));
}
let remaining = total_len.saturating_sub(bytes_consumed);
if data_len > remaining {
return Err(PersistenceError::ValidationFailed(
"data length exceeds remaining file bytes".to_string(),
));
}
let mut data_buf = vec![0u8; data_len as usize];
reader.read_exact(&mut data_buf)?;
let mut snapshot_data: GraphSnapshotDataV10 = match format_version {
FormatVersion::V7 => {
let v7: GraphSnapshotDataV7 = postcard::from_bytes(&data_buf)?;
let v8 = upconvert_v7_to_v8(v7);
let v9 = upconvert_v8_to_v9(v8);
upconvert_v9_to_v10(v9)
}
FormatVersion::V8 => {
let v8: GraphSnapshotData = postcard::from_bytes(&data_buf)?;
let v9 = upconvert_v8_to_v9(v8);
upconvert_v9_to_v10(v9)
}
FormatVersion::V9 => {
let v9: GraphSnapshotDataV9 = postcard::from_bytes(&data_buf)?;
upconvert_v9_to_v10(v9)
}
FormatVersion::V10 => postcard::from_bytes(&data_buf)?,
};
snapshot_data
.scope_provenance
.rebuild_reverse_index(&snapshot_data.scope_arena);
let mut trailing = [0u8; 1];
if reader.read(&mut trailing)? > 0 {
return Err(PersistenceError::ValidationFailed(
"unexpected trailing bytes after data section".to_string(),
));
}
validate_snapshot_semantics_v10(&snapshot_data)?;
let mut graph = CodeGraph::from_components(
snapshot_data.nodes,
snapshot_data.edges,
snapshot_data.strings,
snapshot_data.files,
snapshot_data.indices,
snapshot_data.macro_metadata,
);
graph.set_provenance(
snapshot_data.node_provenance,
snapshot_data.edge_provenance,
header.fact_epoch(),
);
graph.set_scope_arena(snapshot_data.scope_arena);
graph.set_alias_table(snapshot_data.alias_table);
graph.set_shadow_table(snapshot_data.shadow_table);
graph.set_scope_provenance_store(snapshot_data.scope_provenance);
graph.set_file_segments(snapshot_data.file_segments);
Ok(graph)
}
#[allow(clippy::cast_possible_truncation)] pub fn load_from_path(
path: impl AsRef<Path>,
plugins: Option<&PluginManager>,
) -> Result<CodeGraph, PersistenceError> {
let path = path.as_ref();
let file = File::open(path)?;
let file_len = file.metadata()?.len();
let mut reader = BufReader::new(file);
let mut bytes_consumed: u64 = 0;
let (format_version, header_len, magic_bytes) = read_magic_and_header_len(&mut reader)?;
bytes_consumed += magic_bytes;
if header_len > MAX_HEADER_BYTES {
return Err(PersistenceError::ValidationFailed(
"header too large".to_string(),
));
}
let remaining = file_len.saturating_sub(bytes_consumed);
if (header_len as u64) > remaining {
return Err(PersistenceError::ValidationFailed(
"header length exceeds remaining file bytes".to_string(),
));
}
let mut header_buf = vec![0u8; header_len];
reader.read_exact(&mut header_buf)?;
bytes_consumed += header_len as u64;
let header: GraphHeader = postcard::from_bytes(&header_buf)?;
if header.version != VERSION
&& header.version != FormatVersion::V8.as_u32()
&& header.version != FormatVersion::V9.as_u32()
&& header.version != FormatVersion::V10.as_u32()
{
return Err(PersistenceError::IncompatibleVersion {
expected: FormatVersion::V10.as_u32(),
found: header.version,
});
}
if let Some(plugin_manager) = plugins {
validate_plugin_versions(&header, plugin_manager)?;
}
validate_header_sanity(&header)?;
let data_len = read_u64_le(&mut reader)?;
bytes_consumed += 8;
let max_data_bytes = max_snapshot_bytes();
if data_len > max_data_bytes {
return Err(PersistenceError::ValidationFailed(format!(
"data section too large: {data_len} bytes exceeds limit ({max_data_bytes} bytes); \
increase SQRY_MAX_SNAPSHOT_BYTES to load this snapshot",
)));
}
let remaining = file_len.saturating_sub(bytes_consumed);
if data_len > remaining {
return Err(PersistenceError::ValidationFailed(
"data length exceeds remaining file bytes".to_string(),
));
}
let mut data_buf = vec![0u8; data_len as usize];
reader.read_exact(&mut data_buf)?;
let mut snapshot_data: GraphSnapshotDataV10 = match format_version {
FormatVersion::V7 => {
let v7: GraphSnapshotDataV7 = postcard::from_bytes(&data_buf)?;
let v8 = upconvert_v7_to_v8(v7);
let v9 = upconvert_v8_to_v9(v8);
upconvert_v9_to_v10(v9)
}
FormatVersion::V8 => {
let v8: GraphSnapshotData = postcard::from_bytes(&data_buf)?;
let v9 = upconvert_v8_to_v9(v8);
upconvert_v9_to_v10(v9)
}
FormatVersion::V9 => {
let v9: GraphSnapshotDataV9 = postcard::from_bytes(&data_buf)?;
upconvert_v9_to_v10(v9)
}
FormatVersion::V10 => postcard::from_bytes(&data_buf)?,
};
snapshot_data
.scope_provenance
.rebuild_reverse_index(&snapshot_data.scope_arena);
let mut trailing = [0u8; 1];
if reader.read(&mut trailing)? > 0 {
return Err(PersistenceError::ValidationFailed(
"unexpected trailing bytes after data section".to_string(),
));
}
validate_snapshot_semantics_v10(&snapshot_data)?;
let mut graph = CodeGraph::from_components(
snapshot_data.nodes,
snapshot_data.edges,
snapshot_data.strings,
snapshot_data.files,
snapshot_data.indices,
snapshot_data.macro_metadata,
);
graph.set_provenance(
snapshot_data.node_provenance,
snapshot_data.edge_provenance,
header.fact_epoch(),
);
graph.set_scope_arena(snapshot_data.scope_arena);
graph.set_alias_table(snapshot_data.alias_table);
graph.set_shadow_table(snapshot_data.shadow_table);
graph.set_scope_provenance_store(snapshot_data.scope_provenance);
graph.set_file_segments(snapshot_data.file_segments);
Ok(graph)
}
pub fn validate_snapshot(path: impl AsRef<Path>) -> Result<bool, PersistenceError> {
let path = path.as_ref();
let file = File::open(path)?;
let file_len = file.metadata()?.len();
let mut reader = BufReader::new(file);
let mut bytes_consumed: u64 = 0;
let (_format_version, header_len, magic_bytes) = read_magic_and_header_len(&mut reader)?;
bytes_consumed += magic_bytes;
if header_len > MAX_HEADER_BYTES {
return Err(PersistenceError::ValidationFailed(
"header too large".to_string(),
));
}
let remaining = file_len.saturating_sub(bytes_consumed);
if (header_len as u64) > remaining {
return Err(PersistenceError::ValidationFailed(
"header length exceeds remaining file bytes".to_string(),
));
}
let mut header_buf = vec![0u8; header_len];
reader.read_exact(&mut header_buf)?;
let header: GraphHeader = postcard::from_bytes(&header_buf)?;
if header.version != VERSION
&& header.version != FormatVersion::V8.as_u32()
&& header.version != FormatVersion::V9.as_u32()
&& header.version != FormatVersion::V10.as_u32()
{
return Err(PersistenceError::IncompatibleVersion {
expected: FormatVersion::V10.as_u32(),
found: header.version,
});
}
Ok(true)
}
pub fn load_header_from_path(path: impl AsRef<Path>) -> Result<GraphHeader, PersistenceError> {
let path = path.as_ref();
let file = File::open(path)?;
let file_len = file.metadata()?.len();
let mut reader = BufReader::new(file);
let mut bytes_consumed: u64 = 0;
let (_format_version, header_len, magic_bytes) = read_magic_and_header_len(&mut reader)?;
bytes_consumed += magic_bytes;
if header_len > MAX_HEADER_BYTES {
return Err(PersistenceError::ValidationFailed(
"header too large".to_string(),
));
}
let remaining = file_len.saturating_sub(bytes_consumed);
if (header_len as u64) > remaining {
return Err(PersistenceError::ValidationFailed(
"header length exceeds remaining file bytes".to_string(),
));
}
let mut header_buf = vec![0u8; header_len];
reader.read_exact(&mut header_buf)?;
let header: GraphHeader = postcard::from_bytes(&header_buf)?;
if header.version != VERSION
&& header.version != FormatVersion::V8.as_u32()
&& header.version != FormatVersion::V9.as_u32()
&& header.version != FormatVersion::V10.as_u32()
{
return Err(PersistenceError::IncompatibleVersion {
expected: FormatVersion::V10.as_u32(),
found: header.version,
});
}
Ok(header)
}
pub fn check_config_drift(
graph_path: impl AsRef<Path>,
current_checksum: &str,
) -> Result<bool, PersistenceError> {
let header = load_header_from_path(graph_path)?;
match header.config_provenance {
Some(provenance) => Ok(provenance.config_matches(current_checksum)),
None => Err(PersistenceError::ValidationFailed(
"Graph has no config provenance".to_string(),
)),
}
}
#[cfg(test)]
mod tests {
use super::super::format::{MAGIC_BYTES, MAGIC_BYTES_V8};
use super::super::manifest::{OverrideEntry, OverrideSource};
use super::*;
use crate::graph::node::Language;
use crate::graph::unified::file::FileId;
use crate::graph::unified::node::NodeKind;
use crate::graph::unified::storage::NodeEntry;
use tempfile::NamedTempFile;
fn create_test_plugin_manager() -> PluginManager {
PluginManager::new()
}
fn write_snapshot_fixture(
path: &Path,
snapshot_data: &GraphSnapshotData,
) -> Result<(), PersistenceError> {
let forward_stats = snapshot_data.edges.stats().forward;
let total_edges = forward_stats.csr_edge_count + forward_stats.delta_edge_count;
let header = GraphHeader::new(
snapshot_data.nodes.len(),
total_edges,
snapshot_data.strings.len(),
snapshot_data.files.len(),
);
let header_bytes = postcard::to_allocvec(&header)?;
let data_bytes = postcard::to_allocvec(snapshot_data)?;
let mut file = File::create(path)?;
file.write_all(MAGIC_BYTES_V8)?;
file.write_all(
&u32::try_from(header_bytes.len())
.expect("header fits in u32")
.to_le_bytes(),
)?;
file.write_all(&header_bytes)?;
file.write_all(&(data_bytes.len() as u64).to_le_bytes())?;
file.write_all(&data_bytes)?;
file.flush()?;
Ok(())
}
fn graph_with_one_node(
qualified_name: &str,
language: Language,
file_path: &Path,
) -> CodeGraph {
let mut graph = CodeGraph::new();
let file_id = graph
.files_mut()
.register_with_language(file_path, Some(language))
.unwrap();
let name_id = graph.strings_mut().intern("target").unwrap();
let qname_id = graph.strings_mut().intern(qualified_name).unwrap();
let entry = NodeEntry::new(NodeKind::Function, name_id, file_id)
.with_location(1, 0, 1, 6)
.with_qualified_name(qname_id);
let node_id = graph.nodes_mut().alloc(entry.clone()).unwrap();
graph.indices_mut().add(
node_id,
entry.kind,
entry.name,
entry.qualified_name,
entry.file,
);
graph
}
#[test]
fn test_save_load_empty_graph() {
let graph = CodeGraph::new();
let temp_file = NamedTempFile::new().unwrap();
let path = temp_file.path();
let plugins = create_test_plugin_manager();
save_to_path(&graph, path).unwrap();
assert!(validate_snapshot(path).unwrap());
let loaded = load_from_path(path, Some(&plugins)).unwrap();
let snapshot = loaded.snapshot();
assert_eq!(snapshot.nodes().len(), 0);
assert_eq!(snapshot.strings().len(), 0);
assert_eq!(snapshot.files().len(), 0);
}
#[test]
fn test_save_load_with_provenance() {
let graph = CodeGraph::new();
let temp_file = NamedTempFile::new().unwrap();
let path = temp_file.path();
let plugins = create_test_plugin_manager();
let provenance = ConfigProvenance::new(
".sqry/graph/config/config.json",
"abc123checksum".to_string(),
1,
);
save_to_path_with_provenance(&graph, path, provenance, &plugins).unwrap();
let header = load_header_from_path(path).unwrap();
assert!(header.has_provenance());
let loaded_provenance = header.provenance().unwrap();
assert_eq!(loaded_provenance.config_checksum, "abc123checksum");
assert_eq!(loaded_provenance.schema_version, 1);
}
#[test]
fn test_config_drift_detection() {
let graph = CodeGraph::new();
let temp_file = NamedTempFile::new().unwrap();
let path = temp_file.path();
let plugins = create_test_plugin_manager();
let provenance = ConfigProvenance::new(
".sqry/graph/config/config.json",
"original_checksum".to_string(),
1,
);
save_to_path_with_provenance(&graph, path, provenance, &plugins).unwrap();
assert!(check_config_drift(path, "original_checksum").unwrap());
assert!(!check_config_drift(path, "different_checksum").unwrap());
}
#[test]
fn test_config_drift_no_provenance() {
let graph = CodeGraph::new();
let temp_file = NamedTempFile::new().unwrap();
let path = temp_file.path();
save_to_path(&graph, path).unwrap();
let result = check_config_drift(path, "any_checksum");
assert!(result.is_err());
}
#[test]
fn test_provenance_with_overrides() {
let graph = CodeGraph::new();
let temp_file = NamedTempFile::new().unwrap();
let path = temp_file.path();
let plugins = create_test_plugin_manager();
let mut provenance =
ConfigProvenance::new(".sqry/graph/config/config.json", "checksum".to_string(), 1);
provenance.add_override(OverrideEntry {
source: OverrideSource::Cli,
key: "parallelism.max_workers".to_string(),
value: "16".to_string(),
original_value: Some("8".to_string()),
});
save_to_path_with_provenance(&graph, path, provenance, &plugins).unwrap();
let header = load_header_from_path(path).unwrap();
let loaded_provenance = header.provenance().unwrap();
assert!(loaded_provenance.has_overrides());
assert_eq!(loaded_provenance.override_count(), 1);
}
#[test]
fn test_load_rejects_invalid_magic() {
let temp_file = NamedTempFile::new().unwrap();
let path = temp_file.path();
let plugins = create_test_plugin_manager();
let mut file = File::create(path).unwrap();
file.write_all(b"NOT_SQRY_MAGIC").unwrap();
file.flush().unwrap();
let result = load_from_path(path, Some(&plugins));
assert!(result.is_err());
match result.unwrap_err() {
PersistenceError::InvalidMagic { .. } => {}
other => panic!("Expected InvalidMagic, got: {other:?}"),
}
}
#[test]
fn test_load_rejects_v3_snapshot() {
let temp_file = NamedTempFile::new().unwrap();
let path = temp_file.path();
let plugins = create_test_plugin_manager();
let mut file = File::create(path).unwrap();
file.write_all(b"SQRY_GRAPH_V3\x00").unwrap();
file.flush().unwrap();
let result = load_from_path(path, Some(&plugins));
assert!(result.is_err());
match result.unwrap_err() {
PersistenceError::InvalidMagic { .. } => {}
other => panic!("Expected InvalidMagic for V3 snapshot, got: {other:?}"),
}
}
#[test]
fn test_load_rejects_corrupted_header_counts() {
let temp_file = NamedTempFile::new().unwrap();
let path = temp_file.path();
let plugins = create_test_plugin_manager();
let corrupt_header = GraphHeader::new(
100_000_001, 0,
0,
0,
);
let header_bytes = postcard::to_allocvec(&corrupt_header).unwrap();
let mut file = File::create(path).unwrap();
file.write_all(MAGIC_BYTES).unwrap();
file.write_all(
&u32::try_from(header_bytes.len())
.expect("header fits in u32")
.to_le_bytes(),
)
.unwrap();
file.write_all(&header_bytes).unwrap();
file.write_all(&0u64.to_le_bytes()).unwrap();
file.flush().unwrap();
let result = load_from_path(path, Some(&plugins));
assert!(result.is_err());
match result.unwrap_err() {
PersistenceError::ValidationFailed(msg) => {
assert!(msg.contains("Unreasonable node_count"));
assert!(msg.contains("corrupted"));
}
other => panic!("Expected ValidationFailed, got: {other:?}"),
}
}
#[test]
fn test_load_rejects_header_length_exceeding_file() {
let temp_file = NamedTempFile::new().unwrap();
let path = temp_file.path();
let plugins = create_test_plugin_manager();
let mut file = File::create(path).unwrap();
file.write_all(MAGIC_BYTES).unwrap();
file.write_all(&999_999u32.to_le_bytes()).unwrap(); file.flush().unwrap();
let result = load_from_path(path, Some(&plugins));
assert!(result.is_err());
match result.unwrap_err() {
PersistenceError::ValidationFailed(msg) => {
assert!(msg.contains("header length exceeds remaining file bytes"));
}
other => panic!("Expected ValidationFailed, got: {other:?}"),
}
}
#[test]
fn test_load_rejects_data_length_exceeding_file() {
let temp_file = NamedTempFile::new().unwrap();
let path = temp_file.path();
let plugins = create_test_plugin_manager();
let header = GraphHeader::new(0, 0, 0, 0);
let header_bytes = postcard::to_allocvec(&header).unwrap();
let mut file = File::create(path).unwrap();
file.write_all(MAGIC_BYTES).unwrap();
file.write_all(
&u32::try_from(header_bytes.len())
.expect("header fits in u32")
.to_le_bytes(),
)
.unwrap();
file.write_all(&header_bytes).unwrap();
file.write_all(&999_999u64.to_le_bytes()).unwrap(); file.flush().unwrap();
let result = load_from_path(path, Some(&plugins));
assert!(result.is_err());
match result.unwrap_err() {
PersistenceError::ValidationFailed(msg) => {
assert!(msg.contains("data length exceeds remaining file bytes"));
}
other => panic!("Expected ValidationFailed, got: {other:?}"),
}
}
#[test]
fn test_load_rejects_trailing_bytes() {
let temp_file = NamedTempFile::new().unwrap();
let path = temp_file.path();
let plugins = create_test_plugin_manager();
let graph = CodeGraph::new();
save_to_path(&graph, path).unwrap();
let mut file = std::fs::OpenOptions::new().append(true).open(path).unwrap();
file.write_all(b"junk").unwrap();
file.flush().unwrap();
let result = load_from_path(path, Some(&plugins));
assert!(result.is_err());
match result.unwrap_err() {
PersistenceError::ValidationFailed(msg) => {
assert!(msg.contains("trailing bytes"));
}
other => panic!("Expected ValidationFailed for trailing bytes, got: {other:?}"),
}
}
#[test]
fn test_save_rejects_non_canonical_qualified_name() {
let graph = graph_with_one_node(
"pkg.module.target",
Language::Python,
Path::new("/tmp/test.py"),
);
let temp_file = NamedTempFile::new().unwrap();
let result = save_to_path(&graph, temp_file.path());
assert!(result.is_err());
match result.unwrap_err() {
PersistenceError::ValidationFailed(message) => {
assert!(message.contains("non-canonical qualified name"));
assert!(message.contains("sqry index"));
}
other => panic!("Expected ValidationFailed, got: {other:?}"),
}
}
#[test]
fn test_load_rejects_non_canonical_qualified_name() {
let graph = graph_with_one_node(
"pkg::module::target",
Language::Python,
Path::new("/tmp/test.py"),
);
let snapshot = graph.snapshot();
let mut snapshot_data = GraphSnapshotData {
nodes: snapshot.nodes().clone(),
edges: snapshot.edges().clone(),
strings: snapshot.strings().clone(),
files: snapshot.files().clone(),
indices: snapshot.indices().clone(),
macro_metadata: snapshot.macro_metadata().clone(),
node_provenance: NodeProvenanceStore::new(),
edge_provenance: EdgeProvenanceStore::new(),
};
let temp_file = NamedTempFile::new().unwrap();
let plugins = create_test_plugin_manager();
let invalid_qname_id = snapshot_data.strings.intern("pkg.module.target").unwrap();
let (node_id, entry) = snapshot_data.nodes.iter().next().unwrap();
let entry_kind = entry.kind;
let entry_name = entry.name;
let entry_file = entry.file;
snapshot_data.nodes.get_mut(node_id).unwrap().qualified_name = Some(invalid_qname_id);
snapshot_data.indices.clear();
snapshot_data.indices.add(
node_id,
entry_kind,
entry_name,
Some(invalid_qname_id),
entry_file,
);
write_snapshot_fixture(temp_file.path(), &snapshot_data).unwrap();
let result = load_from_path(temp_file.path(), Some(&plugins));
assert!(result.is_err());
match result.unwrap_err() {
PersistenceError::ValidationFailed(message) => {
assert!(message.contains("non-canonical qualified name"));
assert!(message.contains("sqry index"));
}
other => panic!("Expected ValidationFailed, got: {other:?}"),
}
}
#[test]
fn test_load_rejects_node_with_unresolved_file_id() {
let mut graph = CodeGraph::new();
let registered_file = graph
.files_mut()
.register_with_language(Path::new("/tmp/test.rs"), Some(Language::Rust))
.unwrap();
let name_id = graph.strings_mut().intern("target").unwrap();
let qname_id = graph.strings_mut().intern("pkg::target").unwrap();
let invalid_file_id = FileId::new(registered_file.index() + 100);
let entry = NodeEntry::new(NodeKind::Function, name_id, invalid_file_id)
.with_location(1, 0, 1, 6)
.with_qualified_name(qname_id);
let node_id = graph.nodes_mut().alloc(entry.clone()).unwrap();
graph.indices_mut().add(
node_id,
entry.kind,
entry.name,
entry.qualified_name,
entry.file,
);
let snapshot = graph.snapshot();
let snapshot_data = GraphSnapshotData {
nodes: snapshot.nodes().clone(),
edges: snapshot.edges().clone(),
strings: snapshot.strings().clone(),
files: snapshot.files().clone(),
indices: snapshot.indices().clone(),
macro_metadata: snapshot.macro_metadata().clone(),
node_provenance: NodeProvenanceStore::new(),
edge_provenance: EdgeProvenanceStore::new(),
};
let temp_file = NamedTempFile::new().unwrap();
let plugins = create_test_plugin_manager();
write_snapshot_fixture(temp_file.path(), &snapshot_data).unwrap();
let result = load_from_path(temp_file.path(), Some(&plugins));
assert!(result.is_err());
match result.unwrap_err() {
PersistenceError::ValidationFailed(message) => {
assert!(message.contains("unresolved file id"));
assert!(message.contains("sqry index"));
}
other => panic!("Expected ValidationFailed, got: {other:?}"),
}
}
#[test]
fn test_load_rejects_large_edge_count() {
let temp_file = NamedTempFile::new().unwrap();
let path = temp_file.path();
let plugins = create_test_plugin_manager();
let corrupt_header = GraphHeader::new(
100,
1_000_001_000, 10,
1,
);
let header_bytes = postcard::to_allocvec(&corrupt_header).unwrap();
let mut file = File::create(path).unwrap();
file.write_all(MAGIC_BYTES).unwrap();
file.write_all(
&u32::try_from(header_bytes.len())
.expect("header fits in u32")
.to_le_bytes(),
)
.unwrap();
file.write_all(&header_bytes).unwrap();
file.write_all(&0u64.to_le_bytes()).unwrap();
file.flush().unwrap();
let result = load_from_path(path, Some(&plugins));
assert!(result.is_err());
match result.unwrap_err() {
PersistenceError::ValidationFailed(msg) => {
assert!(msg.contains("Unreasonable edge_count"));
}
other => panic!("Expected ValidationFailed, got: {other:?}"),
}
}
#[test]
fn test_load_rejects_large_string_count() {
let temp_file = NamedTempFile::new().unwrap();
let path = temp_file.path();
let plugins = create_test_plugin_manager();
let corrupt_header = GraphHeader::new(
100, 1000, 50_001_000, 1,
);
let header_bytes = postcard::to_allocvec(&corrupt_header).unwrap();
let mut file = File::create(path).unwrap();
file.write_all(MAGIC_BYTES).unwrap();
file.write_all(
&u32::try_from(header_bytes.len())
.expect("header fits in u32")
.to_le_bytes(),
)
.unwrap();
file.write_all(&header_bytes).unwrap();
file.write_all(&0u64.to_le_bytes()).unwrap();
file.flush().unwrap();
let result = load_from_path(path, Some(&plugins));
assert!(result.is_err());
match result.unwrap_err() {
PersistenceError::ValidationFailed(msg) => {
assert!(msg.contains("Unreasonable string_count"));
}
other => panic!("Expected ValidationFailed, got: {other:?}"),
}
}
#[test]
fn test_load_rejects_large_file_count() {
let temp_file = NamedTempFile::new().unwrap();
let path = temp_file.path();
let plugins = create_test_plugin_manager();
let corrupt_header = GraphHeader::new(
100, 1000, 1000, 1_001_000, );
let header_bytes = postcard::to_allocvec(&corrupt_header).unwrap();
let mut file = File::create(path).unwrap();
file.write_all(MAGIC_BYTES).unwrap();
file.write_all(
&u32::try_from(header_bytes.len())
.expect("header fits in u32")
.to_le_bytes(),
)
.unwrap();
file.write_all(&header_bytes).unwrap();
file.write_all(&0u64.to_le_bytes()).unwrap();
file.flush().unwrap();
let result = load_from_path(path, Some(&plugins));
assert!(result.is_err());
match result.unwrap_err() {
PersistenceError::ValidationFailed(msg) => {
assert!(msg.contains("Unreasonable file_count"));
}
other => panic!("Expected ValidationFailed, got: {other:?}"),
}
}
#[test]
fn test_plugin_version_tracking() {
let graph = CodeGraph::new();
let temp_file = NamedTempFile::new().unwrap();
let path = temp_file.path();
let plugins = create_test_plugin_manager();
let provenance = ConfigProvenance::new(
".sqry/graph/config/config.json",
"test_checksum".to_string(),
1,
);
save_to_path_with_provenance(&graph, path, provenance, &plugins).unwrap();
let header = load_header_from_path(path).unwrap();
assert_eq!(header.plugin_versions().len(), 0);
let loaded = load_from_path(path, Some(&plugins)).unwrap();
assert_eq!(loaded.snapshot().nodes().len(), 0);
}
#[test]
fn test_load_rejects_header_exceeding_max_header_bytes() {
let temp_file = NamedTempFile::new().unwrap();
let path = temp_file.path();
#[allow(clippy::cast_possible_truncation)]
let declared_header_len: u32 = (MAX_HEADER_BYTES as u32) + 1;
let mut file = File::create(path).unwrap();
file.write_all(MAGIC_BYTES).unwrap();
file.write_all(&declared_header_len.to_le_bytes()).unwrap();
let padding = vec![0u8; declared_header_len as usize + 16];
file.write_all(&padding).unwrap();
file.flush().unwrap();
let result = load_from_path(path, None);
assert!(result.is_err());
match result.unwrap_err() {
PersistenceError::ValidationFailed(msg) => {
assert!(
msg.contains("header too large"),
"Expected 'header too large', got: {msg}"
);
}
other => panic!("Expected ValidationFailed, got: {other:?}"),
}
}
#[test]
fn test_load_rejects_data_exceeding_max_snapshot_bytes() {
let temp_file = NamedTempFile::new().unwrap();
let path = temp_file.path();
let plugins = create_test_plugin_manager();
let header = GraphHeader::new(0, 0, 0, 0);
let header_bytes = postcard::to_allocvec(&header).unwrap();
let declared_data_len: u64 = max_snapshot_bytes() + 1;
let mut file = File::create(path).unwrap();
file.write_all(MAGIC_BYTES).unwrap();
file.write_all(
&u32::try_from(header_bytes.len())
.expect("header fits in u32")
.to_le_bytes(),
)
.unwrap();
file.write_all(&header_bytes).unwrap();
file.write_all(&declared_data_len.to_le_bytes()).unwrap();
file.flush().unwrap();
let result = load_from_path(path, Some(&plugins));
assert!(result.is_err());
match result.unwrap_err() {
PersistenceError::ValidationFailed(msg) => {
assert!(
msg.contains("data section too large"),
"Expected 'data section too large', got: {msg}"
);
}
other => panic!("Expected ValidationFailed, got: {other:?}"),
}
}
#[test]
#[serial_test::serial]
fn test_default_max_snapshot_bytes_supports_linux_kernel() {
unsafe {
std::env::remove_var("SQRY_MAX_SNAPSHOT_BYTES");
}
assert!(
max_snapshot_bytes() >= 8 * 1024 * 1024 * 1024,
"default snapshot limit must be >= 8 GB to support Linux-kernel-class repos; \
got {} bytes",
max_snapshot_bytes()
);
}
#[test]
fn test_verify_snapshot_bytes_correct_hash() {
use sha2::{Digest, Sha256};
let data = b"some graph snapshot data";
let correct_hash = format!("{:x}", Sha256::digest(data));
assert!(verify_snapshot_bytes(data, &correct_hash).is_ok());
}
#[test]
fn test_verify_snapshot_bytes_wrong_hash() {
let data = b"some graph snapshot data";
let err = verify_snapshot_bytes(data, "deadbeef").unwrap_err();
assert!(err.to_string().contains("integrity check failed"));
}
#[test]
fn test_verify_snapshot_bytes_empty_hash_skips() {
let data = b"anything";
assert!(verify_snapshot_bytes(data, "").is_ok());
}
#[test]
fn test_load_from_bytes_matches_load_from_path() {
let plugins = crate::plugin::PluginManager::new();
let graph = CodeGraph::new();
let dir = tempfile::tempdir().unwrap();
let path = dir.path().join("test.sqry");
save_to_path(&graph, &path).unwrap();
let path_graph = load_from_path(&path, Some(&plugins)).unwrap();
let bytes = std::fs::read(&path).unwrap();
let bytes_graph = load_from_bytes(&bytes, Some(&plugins)).unwrap();
assert_eq!(path_graph.node_count(), bytes_graph.node_count());
assert_eq!(path_graph.edge_count(), bytes_graph.edge_count());
}
fn write_v7_fixture(path: &Path, graph: &CodeGraph) -> Result<(), PersistenceError> {
let snapshot = graph.snapshot();
let forward_stats = snapshot.edges().stats().forward;
let total_edges = forward_stats.csr_edge_count + forward_stats.delta_edge_count;
let header = GraphHeader::new(
snapshot.nodes().len(),
total_edges,
snapshot.strings().len(),
snapshot.files().len(),
);
#[derive(Serialize)]
struct V7SnapshotData {
nodes: NodeArena,
edges: BidirectionalEdgeStore,
strings: StringInterner,
files: FileRegistry,
indices: AuxiliaryIndices,
macro_metadata: NodeMetadataStore,
}
let v7_data = V7SnapshotData {
nodes: snapshot.nodes().clone(),
edges: snapshot.edges().clone(),
strings: snapshot.strings().clone(),
files: snapshot.files().clone(),
indices: snapshot.indices().clone(),
macro_metadata: snapshot.macro_metadata().clone(),
};
let header_bytes = postcard::to_allocvec(&header)?;
let data_bytes = postcard::to_allocvec(&v7_data)?;
let mut file = File::create(path)?;
file.write_all(MAGIC_BYTES)?; file.write_all(
&u32::try_from(header_bytes.len())
.expect("header fits in u32")
.to_le_bytes(),
)?;
file.write_all(&header_bytes)?;
file.write_all(&(data_bytes.len() as u64).to_le_bytes())?;
file.write_all(&data_bytes)?;
file.flush()?;
Ok(())
}
#[test]
fn phase1_v7_legacy_loads_with_defaulted_provenance() {
let graph = CodeGraph::new();
let temp_file = NamedTempFile::new().unwrap();
write_v7_fixture(temp_file.path(), &graph).unwrap();
let loaded = load_from_path(temp_file.path(), None).unwrap();
assert_eq!(loaded.node_count(), graph.node_count());
assert_eq!(loaded.edge_count(), graph.edge_count());
}
#[test]
fn phase1_v7_legacy_loads_via_bytes() {
let graph = CodeGraph::new();
let temp_file = NamedTempFile::new().unwrap();
write_v7_fixture(temp_file.path(), &graph).unwrap();
let bytes = std::fs::read(temp_file.path()).unwrap();
let loaded = load_from_bytes(&bytes, None).unwrap();
assert_eq!(loaded.node_count(), graph.node_count());
assert_eq!(loaded.edge_count(), graph.edge_count());
}
#[test]
fn phase1_v7_validate_snapshot_accepts_legacy() {
let graph = CodeGraph::new();
let temp_file = NamedTempFile::new().unwrap();
write_v7_fixture(temp_file.path(), &graph).unwrap();
assert!(validate_snapshot(temp_file.path()).unwrap());
}
#[test]
fn phase1_v8_round_trip_preserves_fact_epoch() {
let graph = CodeGraph::new();
let temp_file = NamedTempFile::new().unwrap();
save_to_path(&graph, temp_file.path()).unwrap();
let header = load_header_from_path(temp_file.path()).unwrap();
assert!(
header.fact_epoch() > 0,
"V8 save should stamp a non-zero fact_epoch"
);
}
#[test]
fn phase1_repeated_saves_produce_increasing_epochs() {
let graph = CodeGraph::new();
let temp_file = NamedTempFile::new().unwrap();
save_to_path(&graph, temp_file.path()).unwrap();
let epoch1 = load_header_from_path(temp_file.path())
.unwrap()
.fact_epoch();
save_to_path(&graph, temp_file.path()).unwrap();
let epoch2 = load_header_from_path(temp_file.path())
.unwrap()
.fact_epoch();
assert!(
epoch2 > epoch1,
"second save epoch ({epoch2}) must exceed first ({epoch1})"
);
}
#[test]
fn stamp_file_indexed_at_covers_sparse_registry() {
let mut reg = FileRegistry::new();
let id1 = reg.register(Path::new("/a.rs")).unwrap();
let id2 = reg.register(Path::new("/b.rs")).unwrap();
let id3 = reg.register(Path::new("/c.rs")).unwrap();
let id4 = reg.register(Path::new("/d.rs")).unwrap();
let id5 = reg.register(Path::new("/e.rs")).unwrap();
reg.unregister(id2);
reg.unregister(id3);
assert_eq!(reg.len(), 3);
assert_eq!(reg.slot_count(), 6);
stamp_file_indexed_at(&mut reg, 42_000);
assert_eq!(reg.file_provenance(id1).unwrap().indexed_at, 42_000);
assert_eq!(reg.file_provenance(id4).unwrap().indexed_at, 42_000);
assert_eq!(reg.file_provenance(id5).unwrap().indexed_at, 42_000);
assert!(reg.file_provenance(id2).is_none());
assert!(reg.file_provenance(id3).is_none());
}
#[test]
fn stamp_file_indexed_at_covers_reused_slots() {
let mut reg = FileRegistry::new();
let id1 = reg.register(Path::new("/first.rs")).unwrap();
let id2 = reg.register(Path::new("/second.rs")).unwrap();
let id3 = reg.register(Path::new("/third.rs")).unwrap();
reg.unregister(id2);
let id_reused = reg.register(Path::new("/reused.rs")).unwrap();
assert_eq!(id_reused.index(), id2.index());
stamp_file_indexed_at(&mut reg, 99_000);
assert_eq!(reg.file_provenance(id1).unwrap().indexed_at, 99_000);
assert_eq!(reg.file_provenance(id_reused).unwrap().indexed_at, 99_000);
assert_eq!(reg.file_provenance(id3).unwrap().indexed_at, 99_000);
}
#[test]
fn provenance_first_seen_survives_save_load_save_round_trip() {
let graph = graph_with_one_node("my_module::my_fn", Language::Rust, Path::new("/test.rs"));
let temp_file = NamedTempFile::new().unwrap();
let path = temp_file.path();
let plugins = create_test_plugin_manager();
save_to_path(&graph, path).unwrap();
let header1 = load_header_from_path(path).unwrap();
let epoch1 = header1.fact_epoch();
assert!(epoch1 > 0, "first save must stamp a non-zero epoch");
let loaded = load_from_path(path, Some(&plugins)).unwrap();
let snap1 = loaded.snapshot();
let node_id = snap1.nodes().iter().next().unwrap().0;
let prov1 = snap1.node_provenance(node_id).unwrap();
assert_eq!(prov1.first_seen_epoch, epoch1);
assert_eq!(prov1.last_seen_epoch, epoch1);
save_to_path(&loaded, path).unwrap();
let header2 = load_header_from_path(path).unwrap();
let epoch2 = header2.fact_epoch();
assert!(epoch2 > epoch1, "second epoch must exceed first");
let reloaded = load_from_path(path, Some(&plugins)).unwrap();
let snap2 = reloaded.snapshot();
let node_id2 = snap2.nodes().iter().next().unwrap().0;
let prov2 = snap2.node_provenance(node_id2).unwrap();
assert_eq!(
prov2.first_seen_epoch, epoch1,
"first_seen_epoch must survive save/load/save round-trip"
);
assert_eq!(
prov2.last_seen_epoch, epoch2,
"last_seen_epoch must advance to the second save epoch"
);
}
#[test]
fn provenance_content_hash_refreshed_on_resave() {
let graph = graph_with_one_node(
"my_module::hash_fn",
Language::Rust,
Path::new("/hash_test.rs"),
);
let temp_file = NamedTempFile::new().unwrap();
let path = temp_file.path();
let plugins = create_test_plugin_manager();
save_to_path(&graph, path).unwrap();
let loaded = load_from_path(path, Some(&plugins)).unwrap();
let snap1 = loaded.snapshot();
let node_id = snap1.nodes().iter().next().unwrap().0;
let hash1 = snap1.node_provenance(node_id).unwrap().content_hash;
save_to_path(&loaded, path).unwrap();
let reloaded = load_from_path(path, Some(&plugins)).unwrap();
let snap2 = reloaded.snapshot();
let node_id2 = snap2.nodes().iter().next().unwrap().0;
let hash2 = snap2.node_provenance(node_id2).unwrap().content_hash;
assert_eq!(
hash1, hash2,
"content_hash must be refreshed from current node body on resave"
);
}
#[test]
fn edge_provenance_first_seen_survives_round_trip() {
use crate::graph::unified::edge::EdgeKind;
let mut graph = graph_with_one_node(
"my_module::caller",
Language::Rust,
Path::new("/edge_test.rs"),
);
let file_id = graph.files().get(Path::new("/edge_test.rs")).unwrap();
let name2 = graph.strings_mut().intern("callee").unwrap();
let qname2 = graph.strings_mut().intern("my_module::callee").unwrap();
let entry2 = NodeEntry::new(NodeKind::Function, name2, file_id)
.with_location(5, 0, 5, 10)
.with_qualified_name(qname2);
let node2 = graph.nodes_mut().alloc(entry2).unwrap();
let node1 = graph.nodes().iter().next().unwrap().0;
let _edge = graph.edges().add_edge(
node1,
node2,
EdgeKind::Calls {
argument_count: 0,
is_async: false,
},
file_id,
);
let temp_file = NamedTempFile::new().unwrap();
let path = temp_file.path();
let plugins = create_test_plugin_manager();
save_to_path(&graph, path).unwrap();
let epoch1 = load_header_from_path(path).unwrap().fact_epoch();
let loaded = load_from_path(path, Some(&plugins)).unwrap();
save_to_path(&loaded, path).unwrap();
let epoch2 = load_header_from_path(path).unwrap().fact_epoch();
assert!(epoch2 > epoch1);
let reloaded = load_from_path(path, Some(&plugins)).unwrap();
let snap = reloaded.snapshot();
let n1 = snap.nodes().iter().next().unwrap().0;
let edges = snap.edges().edges_from(n1);
assert!(!edges.is_empty(), "graph must have at least one edge");
drop(edges);
let forward_stats = snap.edges().stats().forward;
let total_edges = forward_stats.csr_edge_count + forward_stats.delta_edge_count;
let mut found_preserved = false;
for idx in 0..total_edges {
if let Ok(i) = u32::try_from(idx) {
let eid = crate::graph::unified::edge::id::EdgeId::new(i);
if let Some(eprov) = snap.edge_provenance(eid) {
assert_eq!(
eprov.first_seen_epoch, epoch1,
"edge slot {idx}: first_seen_epoch must survive round-trip"
);
assert_eq!(
eprov.last_seen_epoch, epoch2,
"edge slot {idx}: last_seen_epoch must advance to second epoch"
);
found_preserved = true;
}
}
}
assert!(
found_preserved,
"must find at least one edge with preserved provenance"
);
}
#[test]
fn provenance_reused_node_slot_gets_fresh_first_seen() {
let graph = graph_with_one_node(
"my_module::original",
Language::Rust,
Path::new("/reuse_test.rs"),
);
let temp_file = NamedTempFile::new().unwrap();
let path = temp_file.path();
let plugins = create_test_plugin_manager();
save_to_path(&graph, path).unwrap();
let epoch1 = load_header_from_path(path).unwrap().fact_epoch();
let mut loaded = load_from_path(path, Some(&plugins)).unwrap();
let (old_node_id, _) = loaded.nodes().iter().next().unwrap();
let old_index = old_node_id.index();
let old_generation = old_node_id.generation();
assert!(
loaded.node_provenance(old_node_id).is_some(),
"loaded graph must carry provenance for the original node"
);
let file_id = loaded.files().get(Path::new("/reuse_test.rs")).unwrap();
loaded.nodes_mut().remove(old_node_id);
let name2 = loaded.strings_mut().intern("replacement").unwrap();
let qname2 = loaded
.strings_mut()
.intern("my_module::replacement")
.unwrap();
let entry2 = NodeEntry::new(NodeKind::Function, name2, file_id)
.with_location(10, 0, 10, 20)
.with_qualified_name(qname2);
let new_node_id = loaded.nodes_mut().alloc(entry2).unwrap();
assert_eq!(
new_node_id.index(),
old_index,
"new node must reuse the freed slot"
);
assert!(
new_node_id.generation() > old_generation,
"reused slot must have a bumped generation"
);
save_to_path(&loaded, path).unwrap();
let epoch2 = load_header_from_path(path).unwrap().fact_epoch();
assert!(epoch2 > epoch1);
let reloaded = load_from_path(path, Some(&plugins)).unwrap();
let snap = reloaded.snapshot();
let (reloaded_id, _) = snap.nodes().iter().next().unwrap();
let prov = snap
.node_provenance(reloaded_id)
.expect("new occupant must have provenance");
assert_eq!(
prov.first_seen_epoch, epoch2,
"reused slot with bumped generation must get fresh first_seen_epoch, \
not carry over from the old tenant"
);
assert_eq!(prov.last_seen_epoch, epoch2);
}
}