Skip to main content

sqry_db/
persistence.rs

1//! Derived facts persistence to `.sqry/graph/derived.sqry`.
2//!
3//! Hot derived facts (cached query results) are persisted to a companion
4//! file alongside the main snapshot. On reload, the cache is warmed from
5//! the persisted facts if the graph identity (snapshot SHA-256) matches.
6//!
7//! # Format (v02)
8//!
9//! The derived file is a postcard stream with the layout:
10//!
11//! ```text
12//! [DerivedHeader][PersistedEntry][PersistedEntry]...[PersistedEntry]
13//! ```
14//!
15//! The header is always first and carries the magic bytes, format version,
16//! snapshot identity, all three revision tiers, and the entry count.
17//! Each subsequent record is a [`PersistedEntry`] carrying the serialized
18//! key, value, and dependency metadata for one cached query result.
19//!
20//! Streaming decode (postcard `take_from_bytes`) lets fatal framing
21//! corruption at entry N be caught before any entry is committed, while
22//! still supporting large entry counts without peak-RAM serialization of
23//! the whole file.
24//!
25//! # Magic + version
26//!
27//! Magic: [`DERIVED_MAGIC`] — exactly 16 ASCII bytes `b"SQRY_DERIVED_V02"`.
28//! Format version: [`DERIVED_FORMAT_VERSION`] = `2`. Version `1` is
29//! reserved and intentionally skipped to avoid schema collision with the
30//! prior warm-only `DerivedManifest` (DB03, three-field struct).
31//!
32//! # Stale detection
33//!
34//! If the snapshot's SHA-256 doesn't match the header's `snapshot_sha256`,
35//! the entire derived file is discarded and queries recompute on demand.
36
37use std::path::{Path, PathBuf};
38use std::sync::atomic::Ordering;
39
40use serde::{Deserialize, Serialize};
41use sha2::{Digest, Sha256};
42use sqry_core::graph::unified::file::id::FileId;
43use sqry_core::persistence::{PathSafetyError, atomic_write_bytes, validate_path_in_workspace};
44
45// ============================================================================
46// Constants
47// ============================================================================
48
49/// Magic bytes for the v02 derived-cache file format.
50///
51/// Exactly 16 ASCII bytes. Chosen to be a fixed 16-byte header guard so any
52/// file not starting with this exact sequence is immediately rejected at load.
53/// 16 bytes (not 15) was chosen to fix the iter1-flagged inconsistency in the
54/// prior `"SQRY_DERIVED_V1"` string (15 bytes).
55pub const DERIVED_MAGIC: [u8; 16] = *b"SQRY_DERIVED_V02";
56
57/// Format revision for the current derived-cache wire format.
58///
59/// Value `2` skips `1` to avoid schema collision with the prior warm-only
60/// `DerivedManifest` (DB03) which used a 3-field postcard struct.  The
61/// `LOAD_PATH` unit rejects any file whose decoded `format_version != 2`.
62pub const DERIVED_FORMAT_VERSION: u16 = 2;
63
64// ============================================================================
65// QueryDeps — serializable three-tier dependency snapshot
66// ============================================================================
67
68/// Serializable snapshot of the three-tier dependency metadata recorded
69/// during query execution.
70///
71/// Stored inside each [`PersistedEntry`] so the LOAD_PATH layer can
72/// reconstruct `CachedResult`'s dependency fields after cold-start
73/// rehydration.
74///
75/// Field names intentionally mirror the `CachedResult` fields:
76/// - Tier 1: `file_deps` — `(FileId, revision_at_read_time)` pairs.
77/// - Tier 2: `edge_revision` — global edge revision at cache time.
78/// - Tier 3: `metadata_revision` — global metadata revision at cache time.
79#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
80pub struct QueryDeps {
81    /// Tier 1: file-level dependencies.
82    ///
83    /// Each entry is `(FileId, revision_at_read_time)`.  An empty `Vec`
84    /// means the query did not touch any file-specific data (rare but valid
85    /// for pure global queries).
86    pub file_deps: Vec<(FileId, u64)>,
87    /// Tier 2: global edge revision at cache time.
88    ///
89    /// `None` if the query does not track `TRACKS_EDGE_REVISION`.
90    pub edge_revision: Option<u64>,
91    /// Tier 3: global metadata revision at cache time.
92    ///
93    /// `None` if the query does not track `TRACKS_METADATA_REVISION`.
94    pub metadata_revision: Option<u64>,
95}
96
97// ============================================================================
98// DerivedHeader — file-level header (v02)
99// ============================================================================
100
101/// File-level header. Always first in the derived file.
102///
103/// Carries the magic guard, format version, snapshot identity, all three
104/// revision tiers, the entry count, and the save timestamp.
105///
106/// Field order MUST NOT be changed: postcard serialization is order-sensitive.
107#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
108pub struct DerivedHeader {
109    /// Magic bytes. EXACTLY 16 ASCII bytes: `b"SQRY_DERIVED_V02"`.
110    pub magic: [u8; 16],
111    /// Format revision. Current: 2.
112    pub format_version: u16,
113    /// SHA-256 of the main `snapshot.sqry` file.
114    pub snapshot_sha256: [u8; 32],
115    /// Saved global edge revision.
116    pub edge_revision: u64,
117    /// Saved global metadata revision.
118    pub metadata_revision: u64,
119    /// Saved per-file revisions.
120    pub file_revisions: Vec<(FileId, u64)>,
121    /// Number of `PersistedEntry` records following the header.
122    pub entry_count: u64,
123    /// Unix seconds when saved.
124    pub saved_at: u64,
125}
126
127impl DerivedHeader {
128    /// Creates a new v02 header for the given snapshot hash and revision
129    /// state.  `saved_at` is populated from `SystemTime::now`.
130    #[must_use]
131    pub fn new(
132        snapshot_sha256: [u8; 32],
133        edge_revision: u64,
134        metadata_revision: u64,
135        file_revisions: Vec<(FileId, u64)>,
136        entry_count: u64,
137    ) -> Self {
138        let saved_at = std::time::SystemTime::now()
139            .duration_since(std::time::UNIX_EPOCH)
140            .unwrap_or_default()
141            .as_secs();
142        Self {
143            magic: DERIVED_MAGIC,
144            format_version: DERIVED_FORMAT_VERSION,
145            snapshot_sha256,
146            edge_revision,
147            metadata_revision,
148            file_revisions,
149            entry_count,
150            saved_at,
151        }
152    }
153
154    /// Returns `true` if the magic bytes and format version identify a valid
155    /// v02 derived file.
156    ///
157    /// Used by LOAD_PATH to reject legacy v01 files and corrupted files
158    /// before attempting entry decode.
159    #[must_use]
160    pub fn is_valid_v02(&self) -> bool {
161        self.magic == DERIVED_MAGIC && self.format_version == DERIVED_FORMAT_VERSION
162    }
163
164    /// Checks if this header matches the given snapshot hash.
165    #[must_use]
166    pub fn matches_snapshot(&self, snapshot_sha256: &[u8; 32]) -> bool {
167        self.snapshot_sha256 == *snapshot_sha256
168    }
169}
170
171// ============================================================================
172// Legacy DB03 alias
173// ============================================================================
174
175/// Legacy DB03 alias for [`DerivedHeader`].
176///
177/// The warm-only `DerivedManifest` (DB03, three-field struct) has been
178/// superseded by `DerivedHeader` v02.  This alias is retained so that any
179/// code referencing `DerivedManifest` compiles without changes during the
180/// PN3 transition.  New code should use [`DerivedHeader`] directly.
181// Legacy DB03 alias
182pub type DerivedManifest = DerivedHeader;
183
184// ============================================================================
185// PersistedEntry — per-entry wire record
186// ============================================================================
187
188/// One persisted cache entry in the derived file.
189///
190/// Follows the [`DerivedHeader`] in the stream, repeated `entry_count` times.
191/// The LOAD_PATH unit decodes entries one-by-one with
192/// [`deserialize_next_entry`] so framing corruption at entry N is caught
193/// before any entries are committed.
194#[derive(Debug, Clone, Serialize, Deserialize)]
195pub struct PersistedEntry {
196    /// Stable on-disk query type discriminator.  Must match a registered
197    /// query's `DerivedQuery::QUERY_TYPE_ID`; unknown IDs are silently
198    /// skipped by LOAD_PATH.
199    pub query_type_id: u32,
200    /// Postcard-serialized query key bytes.
201    pub raw_key_bytes: Vec<u8>,
202    /// Postcard-serialized query result bytes.
203    pub raw_result_bytes: Vec<u8>,
204    /// Three-tier dependency snapshot at cache time.
205    pub deps: QueryDeps,
206}
207
208// ============================================================================
209// Stream helpers
210// ============================================================================
211
212/// Serialize the header + iterator of entries into a single `Vec<u8>`.
213///
214/// Wire layout: `[header postcard bytes][entry postcard bytes]*`.
215///
216/// Each record is independently postcard-encoded and concatenated. The LOAD_PATH
217/// layer uses [`deserialize_derived_header`] + repeated [`deserialize_next_entry`]
218/// to decode the stream incrementally without peak-RAM buffering of all entries.
219///
220/// # Errors
221///
222/// Returns `postcard::Error` if serialization of the header or any entry
223/// fails.  All `postcard::to_allocvec` calls are infallible for well-formed
224/// structs in practice; the `?` propagation is for forward-compatibility.
225pub fn serialize_derived_stream<I>(
226    header: &DerivedHeader,
227    entries: I,
228) -> Result<Vec<u8>, postcard::Error>
229where
230    I: IntoIterator<Item = PersistedEntry>,
231{
232    let mut buf = postcard::to_allocvec(header)?;
233    for entry in entries {
234        let entry_bytes = postcard::to_allocvec(&entry)?;
235        buf.extend_from_slice(&entry_bytes);
236    }
237    Ok(buf)
238}
239
240/// Deserialize the header from the beginning of `bytes`, returning the
241/// header and the remaining byte slice (the entry stream tail).
242///
243/// Does NOT decode entries — that is the caller's responsibility.  LOAD_PATH
244/// calls [`deserialize_next_entry`] repeatedly on the returned tail to decode
245/// entries one at a time inside a staged-validation loop.
246///
247/// # Errors
248///
249/// Returns `postcard::Error` on header deserialization failure (truncated
250/// data, schema mismatch, etc.).
251pub fn deserialize_derived_header(bytes: &[u8]) -> Result<(DerivedHeader, &[u8]), postcard::Error> {
252    postcard::take_from_bytes(bytes)
253}
254
255/// Decode a single [`PersistedEntry`] from the head of `bytes`, returning
256/// the entry and the remaining tail.
257///
258/// Callers iterate this function inside a staged-validation loop until
259/// `tail.is_empty()`, accumulating entries for atomic commit.
260///
261/// # Errors
262///
263/// Returns `postcard::Error` on entry deserialization failure.  A single
264/// failing entry aborts the whole load in the staged-validation loop (fatal
265/// framing rejection).
266pub fn deserialize_next_entry(bytes: &[u8]) -> Result<(PersistedEntry, &[u8]), postcard::Error> {
267    postcard::take_from_bytes(bytes)
268}
269
270// ============================================================================
271// SHA-256 + path helpers (unchanged from DB03)
272// ============================================================================
273
274/// Computes the SHA-256 hash of a file at the given path.
275///
276/// # Errors
277///
278/// Returns an IO error if the file cannot be read.
279pub fn compute_file_sha256(path: &Path) -> std::io::Result<[u8; 32]> {
280    let data = std::fs::read(path)?;
281    let mut hasher = Sha256::new();
282    hasher.update(&data);
283    let result = hasher.finalize();
284    let mut hash = [0u8; 32];
285    hash.copy_from_slice(&result);
286    Ok(hash)
287}
288
289/// Returns the path to the derived facts file for a given snapshot path.
290///
291/// The derived file lives alongside the snapshot: if the snapshot is at
292/// `.sqry/graph/snapshot.sqry`, the derived file is at
293/// `.sqry/graph/derived.sqry`.
294#[must_use]
295pub fn derived_path_for_snapshot(snapshot_path: &Path, filename: &str) -> PathBuf {
296    snapshot_path
297        .parent()
298        .unwrap_or(Path::new("."))
299        .join(filename)
300}
301
302/// Saves a derived header to disk (warm-path compatibility shim).
303///
304/// This function is retained as a thin compatibility shim for the existing
305/// warm-path tests and callers that previously called `save_manifest`.
306/// New code should use the full `save_derived` function (SAVE_PATH unit).
307///
308/// # Errors
309///
310/// Returns an error if serialization or file writing fails.
311pub fn save_manifest(path: &Path, manifest: &DerivedHeader) -> anyhow::Result<()> {
312    let bytes = postcard::to_allocvec(manifest)?;
313    std::fs::write(path, bytes)?;
314    Ok(())
315}
316
317/// Loads a derived header from disk (warm-path compatibility shim).
318///
319/// Returns `None` if the file doesn't exist, can't be read, or can't be
320/// deserialized.  Note: this decodes the whole file as a `DerivedHeader` and
321/// does NOT validate magic / format_version — that responsibility lives in the
322/// LOAD_PATH unit's staged-validation loop.
323#[must_use]
324pub fn load_manifest(path: &Path) -> Option<DerivedHeader> {
325    let bytes = std::fs::read(path).ok()?;
326    postcard::from_bytes(&bytes).ok()
327}
328
329// ============================================================================
330// save_derived — SAVE_PATH unit
331// ============================================================================
332
333/// Writes the QueryDb's persistent cache entries to `path` using an atomic
334/// write.
335///
336/// # Algorithm
337///
338/// 1. [`validate_path_in_workspace`] before any IO — rejects symlink targets,
339///    symlinked ancestor directories, and paths outside the workspace.
340/// 2. Collect all persistent cache entries via
341///    [`QueryDb::iter_persistent_cache_entries`] into a `Vec` so shard locks
342///    are released before any allocation-intensive encoding begins.
343/// 3. Build a [`DerivedHeader`] from the current DB state with
344///    `entry_count = entries.len()`.
345/// 4. [`serialize_derived_stream`] → byte vector.
346/// 5. [`atomic_write_bytes`] — tempfile-in-same-dir + fsync + rename so the
347///    target is never left partially written.
348///
349/// # Non-mutating
350///
351/// Takes `&QueryDb` (not `&mut`). Save is a read-only operation on the DB;
352/// it does not mutate revisions, the cache, or any other internal state.
353///
354/// # Errors
355///
356/// - [`sqry_core::persistence::PathSafetyError`] wrapped as `anyhow::Error`
357///   when the target path fails workspace validation.
358/// - [`postcard::Error`] wrapped as `anyhow::Error` on serialisation failure.
359/// - [`std::io::Error`] wrapped as `anyhow::Error` on atomic write failure.
360pub fn save_derived(
361    db: &crate::QueryDb,
362    snapshot_sha256: [u8; 32],
363    path: &Path,
364    workspace_root: &Path,
365) -> anyhow::Result<()> {
366    // Step 1: Path safety validation — must happen before any IO.
367    //
368    // The returned `canonical_path` is what every subsequent IO operation
369    // must use. The raw `path` parameter may be relative, contain `..`
370    // components, or otherwise differ from the validated target; writing
371    // via the raw path would defeat the validation entirely (Codex review
372    // finding).
373    let canonical_path = validate_path_in_workspace(path, workspace_root)?;
374
375    // Step 2: Collect persistent entries (releases all shard locks before IO).
376    let persistent: Vec<PersistedEntry> = db
377        .iter_persistent_cache_entries()
378        .map(|e| PersistedEntry {
379            query_type_id: e.query_type_id,
380            raw_key_bytes: e.raw_key_bytes.to_vec(),
381            raw_result_bytes: e.raw_result_bytes.to_vec(),
382            deps: e.deps,
383        })
384        .collect();
385
386    // Step 3: Build header — entry_count is now known.
387    let header = DerivedHeader::new(
388        snapshot_sha256,
389        db.edge_revision(),
390        db.metadata_revision(),
391        db.inputs().all_revisions(),
392        persistent.len() as u64,
393    );
394
395    // Step 4: Serialize header + entry stream into a single buffer.
396    let bytes = serialize_derived_stream(&header, persistent)?;
397
398    // Step 5: Atomic write — tempfile + fsync + rename. MUST target the
399    // validated canonical path, never the raw caller input.
400    atomic_write_bytes(&canonical_path, &bytes)?;
401
402    Ok(())
403}
404
405// ============================================================================
406// load_derived — LOAD_PATH unit
407// ============================================================================
408
409/// Failure modes for [`load_derived`].
410///
411/// The caller should treat [`LoadError::NotFound`] as a soft miss (the derived
412/// file simply doesn't exist yet — normal on first run) and all other variants
413/// as hard errors that warrant deleting or ignoring the derived file.
414#[derive(Debug, thiserror::Error)]
415pub enum LoadError {
416    /// The derived-cache file does not exist at `path`.
417    #[error("derived-cache file not found: {path}")]
418    NotFound {
419        /// The path that was checked.
420        path: PathBuf,
421    },
422    /// The file's `snapshot_sha256` header field does not match `snapshot_sha256`.
423    ///
424    /// The derived file was produced from a different graph snapshot (stale
425    /// or corrupted). The file should be deleted and queries recomputed.
426    #[error("derived-cache snapshot SHA mismatch — file discarded")]
427    StaleSnapshot,
428    /// The file is structurally corrupt (bad magic, wrong version, truncated
429    /// entry stream, etc.).
430    #[error("derived-cache file is corrupt: {detail}")]
431    Corrupt {
432        /// Human-readable description of the corruption detected.
433        detail: String,
434    },
435    /// The path failed workspace safety validation before the file was opened.
436    ///
437    /// Wraps [`sqry_core::persistence::PathSafetyError`].
438    #[error("derived-cache path validation failed: {0}")]
439    PathSafety(#[from] PathSafetyError),
440    /// An IO error occurred while opening or reading the file.
441    #[error("derived-cache IO error: {0}")]
442    Io(#[from] std::io::Error),
443    /// A successful `load_derived` call has already been applied to this DB.
444    ///
445    /// Subsequent calls are no-ops: the cold-load window is closed after the
446    /// first successful load, preventing accidental double-apply of stale
447    /// or different on-disk state.
448    #[error("derived-cache load already applied to this DB; subsequent calls are no-ops")]
449    AlreadyLoaded,
450}
451
452/// Outcome of a successful [`load_derived`] call.
453#[derive(Debug, Clone)]
454pub enum LoadOutcome {
455    /// The derived file was loaded and `entries` cache entries were applied.
456    Applied {
457        /// Number of entries committed to the cache.
458        ///
459        /// Unknown-`query_type_id` entries (forward-compat skip) are NOT
460        /// counted here; only entries that were actually staged and committed
461        /// are included.
462        entries: usize,
463    },
464    /// The load was skipped for `reason`.
465    Skipped(SkipReason),
466}
467
468/// Reason for skipping a load attempt.
469///
470/// Currently no slots are defined; the enum is forward-compatible for future
471/// skip conditions (e.g., `Disabled`, `FileTooLarge`, `RateLimited`).
472#[derive(Debug, Clone)]
473pub enum SkipReason {
474    // No current variants — placeholder for forward compatibility.
475}
476
477/// Staged entry carrying only raw bytes + type id + deps — no typed value.
478///
479/// Produced by the validation loop in [`load_derived`] and consumed by
480/// [`QueryDb::commit_staged_load`]. The staged form is intentionally
481/// type-erased: deserialising each query's typed key/value is unnecessary
482/// for cold-load warming.
483pub struct StagedEntry {
484    /// Stable on-disk query type discriminator from the stream.
485    pub query_type_id: u32,
486    /// Raw postcard-serialised key bytes from the persisted entry.
487    pub raw_key_bytes: Vec<u8>,
488    /// Raw postcard-serialised result bytes from the persisted entry.
489    pub raw_result_bytes: Vec<u8>,
490    /// Three-tier dependency snapshot at cache time.
491    pub deps: QueryDeps,
492}
493
494/// Returns `true` if `id` is one of the 15 built-in query type IDs.
495///
496/// Used by the validation loop to decide whether to stage or silently skip
497/// an entry. Unknown IDs (forward-compat additions, downstream IDs, 0x0000)
498/// are skipped without error to allow rolling upgrades and file sharing
499/// across sqry versions.
500#[inline]
501fn is_known_builtin(id: u32) -> bool {
502    use crate::queries::type_ids;
503    matches!(
504        id,
505        type_ids::CALLERS
506            | type_ids::CALLEES
507            | type_ids::IMPORTS
508            | type_ids::EXPORTS
509            | type_ids::REFERENCES
510            | type_ids::IMPLEMENTS
511            | type_ids::CYCLES
512            | type_ids::IS_IN_CYCLE
513            | type_ids::UNUSED
514            | type_ids::IS_NODE_UNUSED
515            | type_ids::REACHABILITY
516            | type_ids::ENTRY_POINTS
517            | type_ids::REACHABLE_FROM_ENTRY_POINTS
518            | type_ids::SCC
519            | type_ids::CONDENSATION
520    )
521}
522
523/// Load a derived file at `path` into a pristine [`QueryDb`].
524///
525/// # Staged-validation + infallible-commit contract (spec §5.7)
526///
527/// 1. Path validation happens **before** any file IO.
528/// 2. All fallible work (file open, header decode, magic/version check, SHA
529///    match, entry stream decode) runs in the validation phase and returns
530///    `Err(...)` without touching the DB.
531/// 3. Once all entries are staged successfully, [`QueryDb::commit_staged_load`]
532///    is called.  That function is **infallible by construction** — it contains
533///    no `?`, no `Result`-bearing call, and no `map_err`.
534/// 4. After commit, `cold_load_allowed` is flipped to `false` to prevent a
535///    second load from overwriting the committed state.
536///
537/// # Errors
538///
539/// - [`LoadError::PathSafety`] — path fails workspace validation.
540/// - [`LoadError::NotFound`] — file does not exist (`ENOENT`).
541/// - [`LoadError::Io`] — other IO errors.
542/// - [`LoadError::Corrupt`] — magic mismatch, version mismatch, or truncated
543///   entry stream.
544/// - [`LoadError::StaleSnapshot`] — SHA-256 in the header doesn't match
545///   `snapshot_sha256`.
546/// - [`LoadError::AlreadyLoaded`] — a successful load has already been applied
547///   to `db`.
548pub fn load_derived(
549    db: &mut crate::QueryDb,
550    snapshot_sha256: [u8; 32],
551    path: &Path,
552    workspace_root: &Path,
553) -> Result<LoadOutcome, LoadError> {
554    // Step 1: Path safety validation — must happen before any IO.
555    //
556    // The returned `canonical_path` is what every subsequent IO operation
557    // must use. Reading via the raw `path` would defeat the validation
558    // (Codex review finding).
559    let canonical_path = validate_path_in_workspace(path, workspace_root)?;
560
561    // Step 5 (early): Check cold-load window before any file IO.
562    //
563    // Spec §5.7 lists this as step 5, but elevating the check to before the
564    // file open satisfies both the atomicity contract (DB is never double-loaded)
565    // and the test requirement that AlreadyLoaded is returned without reading
566    // the file. This is strictly safer: no point paying for disk reads when
567    // the load cannot proceed regardless.
568    if !db.cold_load_allowed.load(Ordering::Acquire) {
569        return Err(LoadError::AlreadyLoaded);
570    }
571
572    // Step 2: Open the file and read all bytes — via the validated
573    // canonical path, never the raw caller input.
574    let bytes = match std::fs::read(&canonical_path) {
575        Ok(b) => b,
576        Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
577            return Err(LoadError::NotFound {
578                path: canonical_path.clone(),
579            });
580        }
581        Err(e) => return Err(LoadError::Io(e)),
582    };
583
584    // Step 3 (inline with step 2): bytes are now in memory.
585
586    // Step 4: Decode and validate the header.
587    let (header, mut tail) =
588        deserialize_derived_header(&bytes).map_err(|e| LoadError::Corrupt {
589            detail: format!("header decode: {e}"),
590        })?;
591
592    if header.magic != DERIVED_MAGIC {
593        return Err(LoadError::Corrupt {
594            detail: "magic mismatch".to_owned(),
595        });
596    }
597    if header.format_version != DERIVED_FORMAT_VERSION {
598        return Err(LoadError::Corrupt {
599            detail: format!(
600                "version mismatch: expected {DERIVED_FORMAT_VERSION}, got {}",
601                header.format_version
602            ),
603        });
604    }
605    if header.snapshot_sha256 != snapshot_sha256 {
606        return Err(LoadError::StaleSnapshot);
607    }
608
609    // Step 6: Streaming entry validation — accumulate into `staged`.
610    // DB is NOT touched if any entry decode fails.
611    let mut staged: Vec<StagedEntry> = Vec::new();
612    while !tail.is_empty() {
613        let (entry, rest) = deserialize_next_entry(tail).map_err(|e| LoadError::Corrupt {
614            detail: format!("entry decode: {e}"),
615        })?;
616        tail = rest;
617
618        if !is_known_builtin(entry.query_type_id) {
619            // Unknown or reserved ID — skip silently for forward/backward compat.
620            continue;
621        }
622
623        staged.push(StagedEntry {
624            query_type_id: entry.query_type_id,
625            raw_key_bytes: entry.raw_key_bytes,
626            raw_result_bytes: entry.raw_result_bytes,
627            deps: entry.deps,
628        });
629    }
630
631    // --- COMMIT BOUNDARY ---
632    // All validation above passed. From here on: no `?`, no `Result`.
633    // Steps 7–9 are the infallible commit phase.
634
635    // Step 7: Commit staged entries — INFALLIBLE.
636    let entries_applied = staged.len();
637    db.commit_staged_load(header, staged);
638
639    // Step 8: Close the cold-load window.
640    db.cold_load_allowed.store(false, Ordering::Release);
641
642    // Step 9: Return success.
643    Ok(LoadOutcome::Applied {
644        entries: entries_applied,
645    })
646}
647
648// ============================================================================
649// Tests
650// ============================================================================
651
652// ============================================================================
653// save_path_tests — SAVE_PATH acceptance tests
654// ============================================================================
655
656// ============================================================================
657// load_path_tests — LOAD_PATH acceptance tests
658// ============================================================================
659
660#[cfg(test)]
661mod load_path_tests {
662    use std::sync::Arc;
663
664    use sqry_core::graph::unified::concurrent::CodeGraph;
665    use tempfile::TempDir;
666
667    use super::*;
668    use crate::queries::type_ids;
669    use crate::{QueryDb, QueryDbConfig};
670
671    // -------------------------------------------------------------------------
672    // Test helpers
673    // -------------------------------------------------------------------------
674
675    /// Build a minimal empty `QueryDb` backed by an empty `CodeGraph`.
676    fn empty_db() -> QueryDb {
677        let snapshot = Arc::new(CodeGraph::new().snapshot());
678        QueryDb::new(snapshot, QueryDbConfig::default())
679    }
680
681    /// Build a v02 stream with `n_entries` valid entries of type CALLERS
682    /// and return the serialised bytes.
683    fn make_valid_stream(sha: [u8; 32], n_entries: usize) -> Vec<u8> {
684        let entries: Vec<PersistedEntry> = (0..n_entries)
685            .map(|i| PersistedEntry {
686                query_type_id: type_ids::CALLERS,
687                raw_key_bytes: vec![i as u8],
688                raw_result_bytes: vec![0xAA, i as u8],
689                deps: QueryDeps::default(),
690            })
691            .collect();
692        let header = DerivedHeader::new(sha, 5, 3, vec![], entries.len() as u64);
693        serialize_derived_stream(&header, entries).unwrap()
694    }
695
696    // -------------------------------------------------------------------------
697    // AC 15: happy_path_roundtrip
698    // -------------------------------------------------------------------------
699
700    /// Save a DB with entries via `save_derived`, then load via `load_derived`
701    /// and assert `Applied.entries` == what was saved.
702    #[test]
703    fn happy_path_roundtrip() {
704        let dir = TempDir::new().unwrap();
705        let path = dir.path().join("derived.sqry");
706        let workspace_root = dir.path();
707        let sha: [u8; 32] = [0x42; 32];
708
709        // Build a stream with 3 known-type entries.
710        let bytes = make_valid_stream(sha, 3);
711        std::fs::write(&path, &bytes).unwrap();
712
713        let mut db = empty_db();
714        let outcome = load_derived(&mut db, sha, &path, workspace_root).unwrap();
715
716        match outcome {
717            LoadOutcome::Applied { entries } => {
718                assert_eq!(entries, 3, "expected 3 entries applied");
719            }
720            LoadOutcome::Skipped(_) => panic!("unexpected Skipped outcome"),
721        }
722    }
723
724    // -------------------------------------------------------------------------
725    // AC 16: missing_file_returns_not_found
726    // -------------------------------------------------------------------------
727
728    #[test]
729    fn missing_file_returns_not_found() {
730        let dir = TempDir::new().unwrap();
731        let path = dir.path().join("nonexistent.sqry");
732        let workspace_root = dir.path();
733
734        let mut db = empty_db();
735        let err = load_derived(&mut db, [0u8; 32], &path, workspace_root)
736            .expect_err("missing file must return Err");
737
738        assert!(
739            matches!(err, LoadError::NotFound { .. }),
740            "expected NotFound, got: {err}"
741        );
742    }
743
744    // -------------------------------------------------------------------------
745    // AC 17: sha_mismatch_returns_stale_snapshot
746    // -------------------------------------------------------------------------
747
748    #[test]
749    fn sha_mismatch_returns_stale_snapshot() {
750        let dir = TempDir::new().unwrap();
751        let path = dir.path().join("derived.sqry");
752        let workspace_root = dir.path();
753
754        let saved_sha: [u8; 32] = [0x11; 32];
755        let caller_sha: [u8; 32] = [0x22; 32]; // different
756
757        let bytes = make_valid_stream(saved_sha, 0);
758        std::fs::write(&path, &bytes).unwrap();
759
760        let mut db = empty_db();
761        let err = load_derived(&mut db, caller_sha, &path, workspace_root)
762            .expect_err("SHA mismatch must return Err");
763
764        assert!(
765            matches!(err, LoadError::StaleSnapshot),
766            "expected StaleSnapshot, got: {err}"
767        );
768    }
769
770    // -------------------------------------------------------------------------
771    // AC 18: magic_mismatch_returns_corrupt
772    // -------------------------------------------------------------------------
773
774    #[test]
775    fn magic_mismatch_returns_corrupt() {
776        let dir = TempDir::new().unwrap();
777        let path = dir.path().join("derived.sqry");
778        let workspace_root = dir.path();
779
780        let sha: [u8; 32] = [0x33; 32];
781
782        // Build a valid stream then corrupt the first byte of the magic.
783        let mut bytes = make_valid_stream(sha, 0);
784        bytes[0] ^= 0xFF; // flip bits in magic[0]
785        std::fs::write(&path, &bytes).unwrap();
786
787        let mut db = empty_db();
788        let err = load_derived(&mut db, sha, &path, workspace_root)
789            .expect_err("magic mismatch must return Err");
790
791        // Either a decode error (postcard fails) or a Corrupt(magic mismatch).
792        assert!(
793            matches!(err, LoadError::Corrupt { .. }),
794            "expected Corrupt, got: {err}"
795        );
796    }
797
798    // -------------------------------------------------------------------------
799    // AC 19: truncated_file_returns_corrupt_and_db_unchanged
800    // -------------------------------------------------------------------------
801
802    /// Truncating the entry stream (after a valid header) triggers
803    /// `Err(Corrupt)` and the DB is NOT mutated (edge_revision stays 0).
804    #[test]
805    fn truncated_file_returns_corrupt_and_db_unchanged() {
806        let dir = TempDir::new().unwrap();
807        let path = dir.path().join("derived.sqry");
808        let workspace_root = dir.path();
809
810        let sha: [u8; 32] = [0x44; 32];
811
812        // Build a stream with 2 entries, then truncate to just after the header.
813        let full_bytes = make_valid_stream(sha, 2);
814
815        // Find where the header ends by deserialising it.
816        let (_header, tail) = deserialize_derived_header(&full_bytes).unwrap();
817        let header_len = full_bytes.len() - tail.len();
818
819        // Write header + partial first entry (cut off mid-entry).
820        let partial_entry_start = header_len;
821        // Write 3 bytes of the first entry (guaranteed truncation for any
822        // entry longer than 3 bytes — our entries have key + value + deps).
823        let truncated_len = partial_entry_start + 3;
824        let truncated_bytes = &full_bytes[..truncated_len];
825        std::fs::write(&path, truncated_bytes).unwrap();
826
827        let mut db = empty_db();
828        let initial_edge_rev = db.edge_revision();
829
830        let err = load_derived(&mut db, sha, &path, workspace_root)
831            .expect_err("truncated file must return Err");
832
833        assert!(
834            matches!(err, LoadError::Corrupt { .. }),
835            "expected Corrupt, got: {err}"
836        );
837
838        // DB must be untouched: edge_revision unchanged.
839        assert_eq!(
840            db.edge_revision(),
841            initial_edge_rev,
842            "DB edge_revision must be unchanged after failed load"
843        );
844        // cold_load_allowed must still be true so a retry on a repaired file
845        // is correct.
846        assert!(
847            db.cold_load_allowed(),
848            "cold_load_allowed must remain true after failed load"
849        );
850    }
851
852    // -------------------------------------------------------------------------
853    // AC 20: unknown_query_type_id_skipped_silently
854    // -------------------------------------------------------------------------
855
856    /// Stream: 2 CALLERS entries, 1 unknown-ID entry, 2 CALLEES entries.
857    /// Expected: 4 entries applied (unknown skipped silently).
858    #[test]
859    fn unknown_query_type_id_skipped_silently() {
860        let dir = TempDir::new().unwrap();
861        let path = dir.path().join("derived.sqry");
862        let workspace_root = dir.path();
863        let sha: [u8; 32] = [0x55; 32];
864
865        // An ID far outside the built-in range.
866        const UNKNOWN_ID: u32 = 0xBEEF;
867
868        let entries = vec![
869            PersistedEntry {
870                query_type_id: type_ids::CALLERS,
871                raw_key_bytes: vec![1],
872                raw_result_bytes: vec![0xA1],
873                deps: QueryDeps::default(),
874            },
875            PersistedEntry {
876                query_type_id: type_ids::CALLERS,
877                raw_key_bytes: vec![2],
878                raw_result_bytes: vec![0xA2],
879                deps: QueryDeps::default(),
880            },
881            PersistedEntry {
882                query_type_id: UNKNOWN_ID,
883                raw_key_bytes: vec![3],
884                raw_result_bytes: vec![0xA3],
885                deps: QueryDeps::default(),
886            },
887            PersistedEntry {
888                query_type_id: type_ids::CALLEES,
889                raw_key_bytes: vec![4],
890                raw_result_bytes: vec![0xA4],
891                deps: QueryDeps::default(),
892            },
893            PersistedEntry {
894                query_type_id: type_ids::CALLEES,
895                raw_key_bytes: vec![5],
896                raw_result_bytes: vec![0xA5],
897                deps: QueryDeps::default(),
898            },
899        ];
900
901        let header = DerivedHeader::new(sha, 0, 0, vec![], entries.len() as u64);
902        let bytes = serialize_derived_stream(&header, entries).unwrap();
903        std::fs::write(&path, &bytes).unwrap();
904
905        let mut db = empty_db();
906        let outcome = load_derived(&mut db, sha, &path, workspace_root).unwrap();
907
908        match outcome {
909            LoadOutcome::Applied { entries } => {
910                assert_eq!(
911                    entries, 4,
912                    "unknown entry must be silently skipped; expected 4 applied"
913                );
914            }
915            LoadOutcome::Skipped(_) => panic!("unexpected Skipped outcome"),
916        }
917    }
918
919    // -------------------------------------------------------------------------
920    // AC 21: second_load_returns_already_loaded
921    // -------------------------------------------------------------------------
922
923    /// Second call returns `AlreadyLoaded` without opening the file.
924    ///
925    /// We verify the error kind by pattern-matching; the "without file IO"
926    /// property is verified by checking that the error is returned even when
927    /// the file is deleted between calls.
928    #[test]
929    fn second_load_returns_already_loaded() {
930        let dir = TempDir::new().unwrap();
931        let path = dir.path().join("derived.sqry");
932        let workspace_root = dir.path();
933        let sha: [u8; 32] = [0x66; 32];
934
935        let bytes = make_valid_stream(sha, 1);
936        std::fs::write(&path, &bytes).unwrap();
937
938        let mut db = empty_db();
939
940        // First load — must succeed.
941        load_derived(&mut db, sha, &path, workspace_root).unwrap();
942
943        // Delete the file to confirm the second call doesn't do any IO.
944        std::fs::remove_file(&path).unwrap();
945
946        // Second load — must return AlreadyLoaded without reading the file.
947        let err = load_derived(&mut db, sha, &path, workspace_root)
948            .expect_err("second load must return Err");
949
950        assert!(
951            matches!(err, LoadError::AlreadyLoaded),
952            "expected AlreadyLoaded, got: {err}"
953        );
954    }
955
956    // -------------------------------------------------------------------------
957    // AC 22: header_restoration_restores_three_tiers
958    // -------------------------------------------------------------------------
959
960    /// After a successful load the DB's three revision tiers match the header.
961    #[test]
962    fn header_restoration_restores_three_tiers() {
963        use sqry_core::graph::unified::file::id::FileId;
964
965        let dir = TempDir::new().unwrap();
966        let path = dir.path().join("derived.sqry");
967        let workspace_root = dir.path();
968        let sha: [u8; 32] = [0x77; 32];
969
970        let file_revisions = vec![(FileId::new(1), 7u64), (FileId::new(2), 99u64)];
971        let header = DerivedHeader::new(
972            sha,
973            /*edge_revision=*/ 42,
974            /*metadata_revision=*/ 17,
975            file_revisions.clone(),
976            /*entry_count=*/ 0,
977        );
978        let bytes = serialize_derived_stream(&header, std::iter::empty()).unwrap();
979        std::fs::write(&path, &bytes).unwrap();
980
981        let mut db = empty_db();
982        let outcome = load_derived(&mut db, sha, &path, workspace_root).unwrap();
983        assert!(
984            matches!(outcome, LoadOutcome::Applied { entries: 0 }),
985            "expected Applied(0), got: {outcome:?}"
986        );
987
988        // Tier 2: global edge revision.
989        assert_eq!(db.edge_revision(), 42, "edge_revision must be restored");
990        // Tier 3: global metadata revision.
991        assert_eq!(
992            db.metadata_revision(),
993            17,
994            "metadata_revision must be restored"
995        );
996        // Tier 1: per-file revisions.
997        assert_eq!(
998            db.inputs().revision(FileId::new(1)),
999            Some(7),
1000            "file 1 revision must be restored"
1001        );
1002        assert_eq!(
1003            db.inputs().revision(FileId::new(2)),
1004            Some(99),
1005            "file 2 revision must be restored"
1006        );
1007    }
1008}
1009
1010#[cfg(test)]
1011mod save_path_tests {
1012    use std::sync::Arc;
1013
1014    use sqry_core::graph::unified::concurrent::CodeGraph;
1015    use tempfile::TempDir;
1016
1017    use super::*;
1018    use crate::{QueryDb, QueryDbConfig};
1019
1020    /// Build a minimal, empty `QueryDb` backed by an empty `CodeGraph`.
1021    fn empty_db() -> QueryDb {
1022        let snapshot = Arc::new(CodeGraph::new().snapshot());
1023        QueryDb::new(snapshot, QueryDbConfig::default())
1024    }
1025
1026    /// AC 6: save → read back bytes → deserialize header → assert fields match.
1027    ///
1028    /// Uses an empty `QueryDb` so `entry_count = 0`.  The snapshot SHA,
1029    /// edge_revision, and metadata_revision are all asserted to match exactly.
1030    #[test]
1031    fn save_then_read_back_header_fields_match() {
1032        let dir = TempDir::new().unwrap();
1033        let path = dir.path().join("derived.sqry");
1034        let workspace_root = dir.path();
1035
1036        let db = empty_db();
1037        let snapshot_sha: [u8; 32] = [0xAB; 32];
1038
1039        save_derived(&db, snapshot_sha, &path, workspace_root).unwrap();
1040
1041        let bytes = std::fs::read(&path).unwrap();
1042        let (header, tail) = deserialize_derived_header(&bytes).unwrap();
1043
1044        assert_eq!(
1045            header.snapshot_sha256, snapshot_sha,
1046            "snapshot SHA mismatch"
1047        );
1048        assert_eq!(
1049            header.edge_revision,
1050            db.edge_revision(),
1051            "edge_revision mismatch"
1052        );
1053        assert_eq!(
1054            header.metadata_revision,
1055            db.metadata_revision(),
1056            "metadata_revision mismatch"
1057        );
1058        assert_eq!(header.entry_count, 0, "expected 0 entries for empty db");
1059        assert!(header.is_valid_v02(), "header must pass v02 validation");
1060        assert!(tail.is_empty(), "no entry bytes expected after header");
1061    }
1062
1063    /// AC 7 (unix-only): save rejects a symlinked target path.
1064    ///
1065    /// `validate_path_in_workspace` returns `PathSafetyError::SymlinkTarget`
1066    /// which propagates as `anyhow::Error`.  The symlink test requires Unix
1067    /// `std::os::unix::fs::symlink`; gated on `#[cfg(unix)]`.
1068    #[test]
1069    #[cfg(unix)]
1070    fn save_rejects_symlinked_target_path() {
1071        use std::os::unix::fs::symlink;
1072
1073        let dir = TempDir::new().unwrap();
1074        let real_file = dir.path().join("real.sqry");
1075        std::fs::write(&real_file, b"placeholder").unwrap();
1076
1077        // Create a symlink inside the workspace pointing at the real file.
1078        let symlink_path = dir.path().join("link.sqry");
1079        symlink(&real_file, &symlink_path).unwrap();
1080
1081        let db = empty_db();
1082        let workspace_root = dir.path();
1083        let snapshot_sha: [u8; 32] = [0u8; 32];
1084
1085        let err = save_derived(&db, snapshot_sha, &symlink_path, workspace_root)
1086            .expect_err("save must reject symlinked target");
1087
1088        // The error must be rooted in PathSafetyError::SymlinkTarget.
1089        let is_symlink_error = err
1090            .chain()
1091            .any(|e| e.to_string().contains("symlink") || e.to_string().contains("SymlinkTarget"));
1092        assert!(
1093            is_symlink_error,
1094            "expected SymlinkTarget error; got: {err:#}"
1095        );
1096    }
1097
1098    /// AC 8: save with an empty cache writes a valid v02 header followed by
1099    /// an empty entry stream (tail is empty after decoding the header).
1100    #[test]
1101    fn save_empty_cache_writes_header_only() {
1102        let dir = TempDir::new().unwrap();
1103        let path = dir.path().join("derived.sqry");
1104        let workspace_root = dir.path();
1105
1106        let db = empty_db();
1107        let snapshot_sha: [u8; 32] = [0xCC; 32];
1108
1109        save_derived(&db, snapshot_sha, &path, workspace_root).unwrap();
1110
1111        let bytes = std::fs::read(&path).unwrap();
1112        assert!(
1113            !bytes.is_empty(),
1114            "output must be non-empty even for 0 entries"
1115        );
1116
1117        let (header, tail) = deserialize_derived_header(&bytes).unwrap();
1118        assert!(header.is_valid_v02());
1119        assert_eq!(header.entry_count, 0);
1120        assert!(
1121            tail.is_empty(),
1122            "empty cache must produce no entry bytes after the header"
1123        );
1124    }
1125
1126    /// AC 9: save is idempotent — calling save twice (with delete in between)
1127    /// produces byte-identical output.
1128    ///
1129    /// This verifies that the header's `saved_at` field can differ between
1130    /// calls (it records wall time), but the critical fields — snapshot SHA,
1131    /// revisions, entry_count — remain stable.
1132    #[test]
1133    fn save_is_idempotent_header_fields_stable_across_repeat_calls() {
1134        let dir = TempDir::new().unwrap();
1135        let path = dir.path().join("derived.sqry");
1136        let workspace_root = dir.path();
1137
1138        let db = empty_db();
1139        let snapshot_sha: [u8; 32] = [0x55; 32];
1140
1141        // First save.
1142        save_derived(&db, snapshot_sha, &path, workspace_root).unwrap();
1143        let first_bytes = std::fs::read(&path).unwrap();
1144
1145        // Delete the file, then save again.
1146        std::fs::remove_file(&path).unwrap();
1147        save_derived(&db, snapshot_sha, &path, workspace_root).unwrap();
1148        let second_bytes = std::fs::read(&path).unwrap();
1149
1150        // Both outputs must decode to headers with identical critical fields.
1151        let (h1, tail1) = deserialize_derived_header(&first_bytes).unwrap();
1152        let (h2, tail2) = deserialize_derived_header(&second_bytes).unwrap();
1153
1154        assert_eq!(h1.snapshot_sha256, h2.snapshot_sha256);
1155        assert_eq!(h1.edge_revision, h2.edge_revision);
1156        assert_eq!(h1.metadata_revision, h2.metadata_revision);
1157        assert_eq!(h1.entry_count, h2.entry_count);
1158        assert_eq!(h1.file_revisions, h2.file_revisions);
1159        assert!(tail1.is_empty());
1160        assert!(tail2.is_empty());
1161
1162        // The byte streams must be identical (saved_at is encoded in the same
1163        // second for a fast test run; if they diverge by a second boundary the
1164        // only differing field is saved_at which is NOT a correctness concern —
1165        // but in a typical CI run this comparison holds).
1166        //
1167        // We do NOT assert byte equality since saved_at can tick between the
1168        // two calls.  Field-level equality above is the correctness guarantee.
1169        let _ = (first_bytes, second_bytes); // Silence unused-variable lint.
1170    }
1171}
1172
1173#[cfg(test)]
1174mod tests {
1175    use super::*;
1176    use tempfile::NamedTempFile;
1177
1178    // ---- Constants ---------------------------------------------------------
1179
1180    #[test]
1181    fn magic_is_16_bytes_exactly() {
1182        assert_eq!(DERIVED_MAGIC.len(), 16);
1183        assert_eq!(&DERIVED_MAGIC, b"SQRY_DERIVED_V02");
1184    }
1185
1186    #[test]
1187    fn format_version_is_two() {
1188        assert_eq!(DERIVED_FORMAT_VERSION, 2);
1189    }
1190
1191    // ---- DerivedHeader round-trip ------------------------------------------
1192
1193    #[test]
1194    fn header_round_trip() {
1195        let h = DerivedHeader {
1196            magic: DERIVED_MAGIC,
1197            format_version: DERIVED_FORMAT_VERSION,
1198            snapshot_sha256: [0xAB; 32],
1199            edge_revision: 7,
1200            metadata_revision: 3,
1201            file_revisions: vec![(FileId::new(1), 42), (FileId::new(2), 99)],
1202            entry_count: 42,
1203            saved_at: 1_700_000_000,
1204        };
1205        let bytes = postcard::to_allocvec(&h).unwrap();
1206        let decoded: DerivedHeader = postcard::from_bytes(&bytes).unwrap();
1207        assert_eq!(decoded, h);
1208    }
1209
1210    #[test]
1211    fn header_is_valid_v02() {
1212        let h = DerivedHeader::new([0u8; 32], 1, 2, vec![], 0);
1213        assert!(h.is_valid_v02());
1214    }
1215
1216    #[test]
1217    fn header_with_wrong_magic_is_not_valid_v02() {
1218        let mut h = DerivedHeader::new([0u8; 32], 1, 2, vec![], 0);
1219        h.magic[0] = b'X'; // corrupt first byte
1220        assert!(!h.is_valid_v02());
1221    }
1222
1223    #[test]
1224    fn header_with_wrong_format_version_is_not_valid_v02() {
1225        let mut h = DerivedHeader::new([0u8; 32], 1, 2, vec![], 0);
1226        h.format_version = 1;
1227        assert!(!h.is_valid_v02());
1228    }
1229
1230    // ---- Stream round-trip -------------------------------------------------
1231
1232    #[test]
1233    fn stream_round_trip() {
1234        let header = DerivedHeader {
1235            magic: DERIVED_MAGIC,
1236            format_version: DERIVED_FORMAT_VERSION,
1237            snapshot_sha256: [0x55; 32],
1238            edge_revision: 10,
1239            metadata_revision: 5,
1240            file_revisions: vec![(FileId::new(3), 7)],
1241            entry_count: 2,
1242            saved_at: 1_700_000_001,
1243        };
1244        let entries = vec![
1245            PersistedEntry {
1246                query_type_id: 0x0001,
1247                raw_key_bytes: vec![1, 2, 3],
1248                raw_result_bytes: vec![4, 5, 6],
1249                deps: QueryDeps {
1250                    file_deps: vec![(FileId::new(1), 1)],
1251                    edge_revision: Some(10),
1252                    metadata_revision: None,
1253                },
1254            },
1255            PersistedEntry {
1256                query_type_id: 0x0002,
1257                raw_key_bytes: vec![7],
1258                raw_result_bytes: vec![8],
1259                deps: QueryDeps {
1260                    file_deps: vec![],
1261                    edge_revision: None,
1262                    metadata_revision: Some(5),
1263                },
1264            },
1265        ];
1266
1267        let bytes = serialize_derived_stream(&header, entries.clone()).unwrap();
1268
1269        let (decoded_header, mut tail) = deserialize_derived_header(&bytes).unwrap();
1270        assert_eq!(decoded_header, header);
1271
1272        let mut decoded_entries = Vec::new();
1273        while !tail.is_empty() {
1274            let (entry, rest) = deserialize_next_entry(tail).unwrap();
1275            decoded_entries.push(entry);
1276            tail = rest;
1277        }
1278
1279        assert_eq!(decoded_entries.len(), 2);
1280        assert_eq!(decoded_entries[0].query_type_id, entries[0].query_type_id);
1281        assert_eq!(decoded_entries[0].raw_key_bytes, entries[0].raw_key_bytes);
1282        assert_eq!(
1283            decoded_entries[0].raw_result_bytes,
1284            entries[0].raw_result_bytes
1285        );
1286        assert_eq!(decoded_entries[0].deps, entries[0].deps);
1287        assert_eq!(decoded_entries[1].query_type_id, entries[1].query_type_id);
1288        assert_eq!(decoded_entries[1].deps, entries[1].deps);
1289    }
1290
1291    #[test]
1292    fn stream_with_zero_entries() {
1293        let header = DerivedHeader::new([0xCC; 32], 0, 0, vec![], 0);
1294        let bytes = serialize_derived_stream(&header, std::iter::empty()).unwrap();
1295        let (decoded_header, tail) = deserialize_derived_header(&bytes).unwrap();
1296        assert_eq!(decoded_header, header);
1297        assert!(tail.is_empty(), "no entries means empty tail");
1298    }
1299
1300    // ---- Legacy v01 magic mismatch guard -----------------------------------
1301
1302    #[test]
1303    fn legacy_v01_magic_is_not_v02_magic() {
1304        // The prior warm-only DerivedManifest (DB03) carried only:
1305        //   snapshot_sha256: [u8; 32], entry_count: usize, saved_at: u64
1306        // Decoding those bytes into DerivedHeader may succeed (postcard is
1307        // schema-free) but the decoded `magic` field will be garbage bytes
1308        // from the first 16 bytes of a SHA-256 hash, NOT b"SQRY_DERIVED_V02".
1309        // LOAD_PATH rejects files where is_valid_v02() returns false.
1310        //
1311        // This test pins the invariant: a plausible first-32-bytes of a v01
1312        // file (an all-zeros or any SHA-256 value) cannot accidentally be
1313        // equal to DERIVED_MAGIC. Belt-and-suspenders.
1314        let hypothetical_v01_first_16 = [0u8; 16]; // worst case: all-zero hash prefix
1315        assert_ne!(
1316            &DERIVED_MAGIC[..],
1317            &hypothetical_v01_first_16[..],
1318            "DERIVED_MAGIC must not equal any plausible v01 SHA-256 prefix"
1319        );
1320
1321        // Also verify a non-zero SHA prefix (e.g., a common hash byte pattern)
1322        // doesn't accidentally match.
1323        let sha_like_prefix: [u8; 16] = [
1324            0x6b, 0x86, 0xb2, 0x73, 0xff, 0x34, 0xfc, 0xe1, 0x9d, 0x6b, 0x80, 0x4e, 0xff, 0x5a,
1325            0x3f, 0x57,
1326        ];
1327        assert_ne!(&DERIVED_MAGIC[..], &sha_like_prefix[..]);
1328
1329        // Confirm DERIVED_MAGIC is exactly b"SQRY_DERIVED_V02" — not some
1330        // hash lookalike — so this test cannot pass vacuously.
1331        let magic_as_ascii = std::str::from_utf8(&DERIVED_MAGIC).expect("DERIVED_MAGIC is ASCII");
1332        assert_eq!(magic_as_ascii, "SQRY_DERIVED_V02");
1333    }
1334
1335    #[test]
1336    fn legacy_v01_bytes_decode_as_invalid_header() {
1337        // Build a realistic v01 DerivedManifest byte sequence:
1338        //   old struct was { snapshot_sha256: [u8; 32], entry_count: usize, saved_at: u64 }
1339        // postcard encodes [u8; 32] as 32 raw bytes, usize as varint, u64 as
1340        // 8-byte LE (or varint depending on postcard version — varint here).
1341        //
1342        // When decoded as DerivedHeader, the first 16 bytes become `magic`
1343        // (32-byte hash prefix) and the next 2 bytes become `format_version`.
1344        // Neither will match DERIVED_MAGIC / DERIVED_FORMAT_VERSION, so
1345        // is_valid_v02() returns false → LOAD_PATH rejects cleanly.
1346
1347        #[derive(Serialize)]
1348        struct OldManifest {
1349            snapshot_sha256: [u8; 32],
1350            entry_count: usize,
1351            saved_at: u64,
1352        }
1353
1354        let old = OldManifest {
1355            snapshot_sha256: [0xDE; 32],
1356            entry_count: 5,
1357            saved_at: 1_700_000_000,
1358        };
1359        let v01_bytes = postcard::to_allocvec(&old).unwrap();
1360
1361        // Attempt to decode as DerivedHeader — may succeed or fail depending
1362        // on field count alignment.  If it succeeds, the decoded header MUST
1363        // fail is_valid_v02().
1364        match postcard::from_bytes::<DerivedHeader>(&v01_bytes) {
1365            Ok(decoded) => {
1366                assert!(
1367                    !decoded.is_valid_v02(),
1368                    "v01 bytes accidentally decoded as valid v02 header — \
1369                     LOAD_PATH rejection would be bypassed"
1370                );
1371            }
1372            Err(_) => {
1373                // Decode failed outright — also fine.  LOAD_PATH handles
1374                // deserialization errors as a Corrupt rejection.
1375            }
1376        }
1377    }
1378
1379    // ---- DB03 warm-path compat tests (rewritten in terms of DerivedHeader) -
1380
1381    /// Warm-path round-trip — rewritten from DB03's `manifest_round_trip` to
1382    /// use `DerivedHeader` directly.  Coverage intent is preserved: verify
1383    /// that a header saved via `save_manifest` / `load_manifest` survives a
1384    /// disk round-trip with matching fields.
1385    #[test]
1386    fn manifest_round_trip() {
1387        let hash = [42u8; 32];
1388        let header = DerivedHeader::new(hash, 0, 0, vec![], 100);
1389
1390        assert!(header.matches_snapshot(&hash));
1391        assert!(!header.matches_snapshot(&[0u8; 32]));
1392        assert!(header.is_valid_v02());
1393
1394        let temp = NamedTempFile::new().unwrap();
1395        save_manifest(temp.path(), &header).unwrap();
1396
1397        // load_manifest decodes raw postcard bytes; the full v02 header
1398        // survives the round-trip.
1399        let loaded = load_manifest(temp.path()).unwrap();
1400        assert_eq!(loaded.snapshot_sha256, hash);
1401        assert_eq!(loaded.entry_count, 100);
1402        assert!(loaded.matches_snapshot(&hash));
1403        assert!(loaded.is_valid_v02());
1404    }
1405
1406    #[test]
1407    fn derived_path_computation() {
1408        let snapshot = Path::new("/home/user/.sqry/graph/snapshot.sqry");
1409        let derived = derived_path_for_snapshot(snapshot, "derived.sqry");
1410        assert_eq!(
1411            derived,
1412            PathBuf::from("/home/user/.sqry/graph/derived.sqry")
1413        );
1414    }
1415
1416    #[test]
1417    fn load_manifest_missing_file() {
1418        let result = load_manifest(Path::new("/nonexistent/path/derived.sqry"));
1419        assert!(result.is_none());
1420    }
1421
1422    #[test]
1423    fn file_sha256() {
1424        let temp = NamedTempFile::new().unwrap();
1425        std::fs::write(temp.path(), b"hello world").unwrap();
1426        let hash = compute_file_sha256(temp.path()).unwrap();
1427        // SHA-256 of "hello world"
1428        assert_eq!(hash.len(), 32);
1429        assert_ne!(hash, [0u8; 32]); // non-zero
1430    }
1431
1432    // ---- QueryDeps ---------------------------------------------------------
1433
1434    #[test]
1435    fn query_deps_default_is_empty() {
1436        let deps = QueryDeps::default();
1437        assert!(deps.file_deps.is_empty());
1438        assert!(deps.edge_revision.is_none());
1439        assert!(deps.metadata_revision.is_none());
1440    }
1441
1442    #[test]
1443    fn query_deps_round_trip() {
1444        let deps = QueryDeps {
1445            file_deps: vec![(FileId::new(1), 7), (FileId::new(2), 3)],
1446            edge_revision: Some(99),
1447            metadata_revision: Some(4),
1448        };
1449        let bytes = postcard::to_allocvec(&deps).unwrap();
1450        let decoded: QueryDeps = postcard::from_bytes(&bytes).unwrap();
1451        assert_eq!(decoded, deps);
1452    }
1453}