sqry_db/persistence.rs
1//! Derived facts persistence to `.sqry/graph/derived.sqry`.
2//!
3//! Hot derived facts (cached query results) are persisted to a companion
4//! file alongside the main snapshot. On reload, the cache is warmed from
5//! the persisted facts if the graph identity (snapshot SHA-256) matches.
6//!
7//! # Format (v02)
8//!
9//! The derived file is a postcard stream with the layout:
10//!
11//! ```text
12//! [DerivedHeader][PersistedEntry][PersistedEntry]...[PersistedEntry]
13//! ```
14//!
15//! The header is always first and carries the magic bytes, format version,
16//! snapshot identity, all three revision tiers, and the entry count.
17//! Each subsequent record is a [`PersistedEntry`] carrying the serialized
18//! key, value, and dependency metadata for one cached query result.
19//!
20//! Streaming decode (postcard `take_from_bytes`) lets fatal framing
21//! corruption at entry N be caught before any entry is committed, while
22//! still supporting large entry counts without peak-RAM serialization of
23//! the whole file.
24//!
25//! # Magic + version
26//!
27//! Magic: [`DERIVED_MAGIC`] — exactly 16 ASCII bytes `b"SQRY_DERIVED_V02"`.
28//! Format version: [`DERIVED_FORMAT_VERSION`] = `2`. Version `1` is
29//! reserved and intentionally skipped to avoid schema collision with the
30//! prior warm-only `DerivedManifest` (DB03, three-field struct).
31//!
32//! # Stale detection
33//!
34//! If the snapshot's SHA-256 doesn't match the header's `snapshot_sha256`,
35//! the entire derived file is discarded and queries recompute on demand.
36
37use std::path::{Path, PathBuf};
38use std::sync::atomic::Ordering;
39
40use serde::{Deserialize, Serialize};
41use sha2::{Digest, Sha256};
42use sqry_core::graph::unified::file::id::FileId;
43use sqry_core::persistence::{PathSafetyError, atomic_write_bytes, validate_path_in_workspace};
44
45// ============================================================================
46// Constants
47// ============================================================================
48
49/// Magic bytes for the v02 derived-cache file format.
50///
51/// Exactly 16 ASCII bytes. Chosen to be a fixed 16-byte header guard so any
52/// file not starting with this exact sequence is immediately rejected at load.
53/// 16 bytes (not 15) was chosen to fix the iter1-flagged inconsistency in the
54/// prior `"SQRY_DERIVED_V1"` string (15 bytes).
55pub const DERIVED_MAGIC: [u8; 16] = *b"SQRY_DERIVED_V02";
56
57/// Format revision for the current derived-cache wire format.
58///
59/// Value `2` skips `1` to avoid schema collision with the prior warm-only
60/// `DerivedManifest` (DB03) which used a 3-field postcard struct. The
61/// `LOAD_PATH` unit rejects any file whose decoded `format_version != 2`.
62pub const DERIVED_FORMAT_VERSION: u16 = 2;
63
64// ============================================================================
65// QueryDeps — serializable three-tier dependency snapshot
66// ============================================================================
67
68/// Serializable snapshot of the three-tier dependency metadata recorded
69/// during query execution.
70///
71/// Stored inside each [`PersistedEntry`] so the LOAD_PATH layer can
72/// reconstruct `CachedResult`'s dependency fields after cold-start
73/// rehydration.
74///
75/// Field names intentionally mirror the `CachedResult` fields:
76/// - Tier 1: `file_deps` — `(FileId, revision_at_read_time)` pairs.
77/// - Tier 2: `edge_revision` — global edge revision at cache time.
78/// - Tier 3: `metadata_revision` — global metadata revision at cache time.
79#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
80pub struct QueryDeps {
81 /// Tier 1: file-level dependencies.
82 ///
83 /// Each entry is `(FileId, revision_at_read_time)`. An empty `Vec`
84 /// means the query did not touch any file-specific data (rare but valid
85 /// for pure global queries).
86 pub file_deps: Vec<(FileId, u64)>,
87 /// Tier 2: global edge revision at cache time.
88 ///
89 /// `None` if the query does not track `TRACKS_EDGE_REVISION`.
90 pub edge_revision: Option<u64>,
91 /// Tier 3: global metadata revision at cache time.
92 ///
93 /// `None` if the query does not track `TRACKS_METADATA_REVISION`.
94 pub metadata_revision: Option<u64>,
95}
96
97// ============================================================================
98// DerivedHeader — file-level header (v02)
99// ============================================================================
100
101/// File-level header. Always first in the derived file.
102///
103/// Carries the magic guard, format version, snapshot identity, all three
104/// revision tiers, the entry count, and the save timestamp.
105///
106/// Field order MUST NOT be changed: postcard serialization is order-sensitive.
107#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
108pub struct DerivedHeader {
109 /// Magic bytes. EXACTLY 16 ASCII bytes: `b"SQRY_DERIVED_V02"`.
110 pub magic: [u8; 16],
111 /// Format revision. Current: 2.
112 pub format_version: u16,
113 /// SHA-256 of the main `snapshot.sqry` file.
114 pub snapshot_sha256: [u8; 32],
115 /// Saved global edge revision.
116 pub edge_revision: u64,
117 /// Saved global metadata revision.
118 pub metadata_revision: u64,
119 /// Saved per-file revisions.
120 pub file_revisions: Vec<(FileId, u64)>,
121 /// Number of `PersistedEntry` records following the header.
122 pub entry_count: u64,
123 /// Unix seconds when saved.
124 pub saved_at: u64,
125}
126
127impl DerivedHeader {
128 /// Creates a new v02 header for the given snapshot hash and revision
129 /// state. `saved_at` is populated from `SystemTime::now`.
130 #[must_use]
131 pub fn new(
132 snapshot_sha256: [u8; 32],
133 edge_revision: u64,
134 metadata_revision: u64,
135 file_revisions: Vec<(FileId, u64)>,
136 entry_count: u64,
137 ) -> Self {
138 let saved_at = std::time::SystemTime::now()
139 .duration_since(std::time::UNIX_EPOCH)
140 .unwrap_or_default()
141 .as_secs();
142 Self {
143 magic: DERIVED_MAGIC,
144 format_version: DERIVED_FORMAT_VERSION,
145 snapshot_sha256,
146 edge_revision,
147 metadata_revision,
148 file_revisions,
149 entry_count,
150 saved_at,
151 }
152 }
153
154 /// Returns `true` if the magic bytes and format version identify a valid
155 /// v02 derived file.
156 ///
157 /// Used by LOAD_PATH to reject legacy v01 files and corrupted files
158 /// before attempting entry decode.
159 #[must_use]
160 pub fn is_valid_v02(&self) -> bool {
161 self.magic == DERIVED_MAGIC && self.format_version == DERIVED_FORMAT_VERSION
162 }
163
164 /// Checks if this header matches the given snapshot hash.
165 #[must_use]
166 pub fn matches_snapshot(&self, snapshot_sha256: &[u8; 32]) -> bool {
167 self.snapshot_sha256 == *snapshot_sha256
168 }
169}
170
171// ============================================================================
172// Legacy DB03 alias
173// ============================================================================
174
175/// Legacy DB03 alias for [`DerivedHeader`].
176///
177/// The warm-only `DerivedManifest` (DB03, three-field struct) has been
178/// superseded by `DerivedHeader` v02. This alias is retained so that any
179/// code referencing `DerivedManifest` compiles without changes during the
180/// PN3 transition. New code should use [`DerivedHeader`] directly.
181// Legacy DB03 alias
182pub type DerivedManifest = DerivedHeader;
183
184// ============================================================================
185// PersistedEntry — per-entry wire record
186// ============================================================================
187
188/// One persisted cache entry in the derived file.
189///
190/// Follows the [`DerivedHeader`] in the stream, repeated `entry_count` times.
191/// The LOAD_PATH unit decodes entries one-by-one with
192/// [`deserialize_next_entry`] so framing corruption at entry N is caught
193/// before any entries are committed.
194#[derive(Debug, Clone, Serialize, Deserialize)]
195pub struct PersistedEntry {
196 /// Stable on-disk query type discriminator. Must match a registered
197 /// query's `DerivedQuery::QUERY_TYPE_ID`; unknown IDs are silently
198 /// skipped by LOAD_PATH.
199 pub query_type_id: u32,
200 /// Postcard-serialized query key bytes.
201 pub raw_key_bytes: Vec<u8>,
202 /// Postcard-serialized query result bytes.
203 pub raw_result_bytes: Vec<u8>,
204 /// Three-tier dependency snapshot at cache time.
205 pub deps: QueryDeps,
206}
207
208// ============================================================================
209// Stream helpers
210// ============================================================================
211
212/// Serialize the header + iterator of entries into a single `Vec<u8>`.
213///
214/// Wire layout: `[header postcard bytes][entry postcard bytes]*`.
215///
216/// Each record is independently postcard-encoded and concatenated. The LOAD_PATH
217/// layer uses [`deserialize_derived_header`] + repeated [`deserialize_next_entry`]
218/// to decode the stream incrementally without peak-RAM buffering of all entries.
219///
220/// # Errors
221///
222/// Returns `postcard::Error` if serialization of the header or any entry
223/// fails. All `postcard::to_allocvec` calls are infallible for well-formed
224/// structs in practice; the `?` propagation is for forward-compatibility.
225pub fn serialize_derived_stream<I>(
226 header: &DerivedHeader,
227 entries: I,
228) -> Result<Vec<u8>, postcard::Error>
229where
230 I: IntoIterator<Item = PersistedEntry>,
231{
232 let mut buf = postcard::to_allocvec(header)?;
233 for entry in entries {
234 let entry_bytes = postcard::to_allocvec(&entry)?;
235 buf.extend_from_slice(&entry_bytes);
236 }
237 Ok(buf)
238}
239
240/// Deserialize the header from the beginning of `bytes`, returning the
241/// header and the remaining byte slice (the entry stream tail).
242///
243/// Does NOT decode entries — that is the caller's responsibility. LOAD_PATH
244/// calls [`deserialize_next_entry`] repeatedly on the returned tail to decode
245/// entries one at a time inside a staged-validation loop.
246///
247/// # Errors
248///
249/// Returns `postcard::Error` on header deserialization failure (truncated
250/// data, schema mismatch, etc.).
251pub fn deserialize_derived_header(bytes: &[u8]) -> Result<(DerivedHeader, &[u8]), postcard::Error> {
252 postcard::take_from_bytes(bytes)
253}
254
255/// Decode a single [`PersistedEntry`] from the head of `bytes`, returning
256/// the entry and the remaining tail.
257///
258/// Callers iterate this function inside a staged-validation loop until
259/// `tail.is_empty()`, accumulating entries for atomic commit.
260///
261/// # Errors
262///
263/// Returns `postcard::Error` on entry deserialization failure. A single
264/// failing entry aborts the whole load in the staged-validation loop (fatal
265/// framing rejection).
266pub fn deserialize_next_entry(bytes: &[u8]) -> Result<(PersistedEntry, &[u8]), postcard::Error> {
267 postcard::take_from_bytes(bytes)
268}
269
270// ============================================================================
271// SHA-256 + path helpers (unchanged from DB03)
272// ============================================================================
273
274/// Computes the SHA-256 hash of a file at the given path.
275///
276/// # Errors
277///
278/// Returns an IO error if the file cannot be read.
279pub fn compute_file_sha256(path: &Path) -> std::io::Result<[u8; 32]> {
280 let data = std::fs::read(path)?;
281 let mut hasher = Sha256::new();
282 hasher.update(&data);
283 let result = hasher.finalize();
284 let mut hash = [0u8; 32];
285 hash.copy_from_slice(&result);
286 Ok(hash)
287}
288
289/// Returns the path to the derived facts file for a given snapshot path.
290///
291/// The derived file lives alongside the snapshot: if the snapshot is at
292/// `.sqry/graph/snapshot.sqry`, the derived file is at
293/// `.sqry/graph/derived.sqry`.
294#[must_use]
295pub fn derived_path_for_snapshot(snapshot_path: &Path, filename: &str) -> PathBuf {
296 snapshot_path
297 .parent()
298 .unwrap_or(Path::new("."))
299 .join(filename)
300}
301
302/// Saves a derived header to disk (warm-path compatibility shim).
303///
304/// This function is retained as a thin compatibility shim for the existing
305/// warm-path tests and callers that previously called `save_manifest`.
306/// New code should use the full `save_derived` function (SAVE_PATH unit).
307///
308/// # Errors
309///
310/// Returns an error if serialization or file writing fails.
311pub fn save_manifest(path: &Path, manifest: &DerivedHeader) -> anyhow::Result<()> {
312 let bytes = postcard::to_allocvec(manifest)?;
313 std::fs::write(path, bytes)?;
314 Ok(())
315}
316
317/// Loads a derived header from disk (warm-path compatibility shim).
318///
319/// Returns `None` if the file doesn't exist, can't be read, or can't be
320/// deserialized. Note: this decodes the whole file as a `DerivedHeader` and
321/// does NOT validate magic / format_version — that responsibility lives in the
322/// LOAD_PATH unit's staged-validation loop.
323#[must_use]
324pub fn load_manifest(path: &Path) -> Option<DerivedHeader> {
325 let bytes = std::fs::read(path).ok()?;
326 postcard::from_bytes(&bytes).ok()
327}
328
329// ============================================================================
330// save_derived — SAVE_PATH unit
331// ============================================================================
332
333/// Writes the QueryDb's persistent cache entries to `path` using an atomic
334/// write.
335///
336/// # Algorithm
337///
338/// 1. [`validate_path_in_workspace`] before any IO — rejects symlink targets,
339/// symlinked ancestor directories, and paths outside the workspace.
340/// 2. Collect all persistent cache entries via
341/// [`QueryDb::iter_persistent_cache_entries`] into a `Vec` so shard locks
342/// are released before any allocation-intensive encoding begins.
343/// 3. Build a [`DerivedHeader`] from the current DB state with
344/// `entry_count = entries.len()`.
345/// 4. [`serialize_derived_stream`] → byte vector.
346/// 5. [`atomic_write_bytes`] — tempfile-in-same-dir + fsync + rename so the
347/// target is never left partially written.
348///
349/// # Non-mutating
350///
351/// Takes `&QueryDb` (not `&mut`). Save is a read-only operation on the DB;
352/// it does not mutate revisions, the cache, or any other internal state.
353///
354/// # Errors
355///
356/// - [`sqry_core::persistence::PathSafetyError`] wrapped as `anyhow::Error`
357/// when the target path fails workspace validation.
358/// - [`postcard::Error`] wrapped as `anyhow::Error` on serialisation failure.
359/// - [`std::io::Error`] wrapped as `anyhow::Error` on atomic write failure.
360pub fn save_derived(
361 db: &crate::QueryDb,
362 snapshot_sha256: [u8; 32],
363 path: &Path,
364 workspace_root: &Path,
365) -> anyhow::Result<()> {
366 // Step 1: Path safety validation — must happen before any IO.
367 //
368 // The returned `canonical_path` is what every subsequent IO operation
369 // must use. The raw `path` parameter may be relative, contain `..`
370 // components, or otherwise differ from the validated target; writing
371 // via the raw path would defeat the validation entirely (Codex review
372 // finding).
373 let canonical_path = validate_path_in_workspace(path, workspace_root)?;
374
375 // Step 2: Collect persistent entries (releases all shard locks before IO).
376 let persistent: Vec<PersistedEntry> = db
377 .iter_persistent_cache_entries()
378 .map(|e| PersistedEntry {
379 query_type_id: e.query_type_id,
380 raw_key_bytes: e.raw_key_bytes.to_vec(),
381 raw_result_bytes: e.raw_result_bytes.to_vec(),
382 deps: e.deps,
383 })
384 .collect();
385
386 // Step 3: Build header — entry_count is now known.
387 let header = DerivedHeader::new(
388 snapshot_sha256,
389 db.edge_revision(),
390 db.metadata_revision(),
391 db.inputs().all_revisions(),
392 persistent.len() as u64,
393 );
394
395 // Step 4: Serialize header + entry stream into a single buffer.
396 let bytes = serialize_derived_stream(&header, persistent)?;
397
398 // Step 5: Atomic write — tempfile + fsync + rename. MUST target the
399 // validated canonical path, never the raw caller input.
400 atomic_write_bytes(&canonical_path, &bytes)?;
401
402 Ok(())
403}
404
405// ============================================================================
406// load_derived — LOAD_PATH unit
407// ============================================================================
408
409/// Failure modes for [`load_derived`].
410///
411/// The caller should treat [`LoadError::NotFound`] as a soft miss (the derived
412/// file simply doesn't exist yet — normal on first run) and all other variants
413/// as hard errors that warrant deleting or ignoring the derived file.
414#[derive(Debug, thiserror::Error)]
415pub enum LoadError {
416 /// The derived-cache file does not exist at `path`.
417 #[error("derived-cache file not found: {path}")]
418 NotFound {
419 /// The path that was checked.
420 path: PathBuf,
421 },
422 /// The file's `snapshot_sha256` header field does not match `snapshot_sha256`.
423 ///
424 /// The derived file was produced from a different graph snapshot (stale
425 /// or corrupted). The file should be deleted and queries recomputed.
426 #[error("derived-cache snapshot SHA mismatch — file discarded")]
427 StaleSnapshot,
428 /// The file is structurally corrupt (bad magic, wrong version, truncated
429 /// entry stream, etc.).
430 #[error("derived-cache file is corrupt: {detail}")]
431 Corrupt {
432 /// Human-readable description of the corruption detected.
433 detail: String,
434 },
435 /// The path failed workspace safety validation before the file was opened.
436 ///
437 /// Wraps [`sqry_core::persistence::PathSafetyError`].
438 #[error("derived-cache path validation failed: {0}")]
439 PathSafety(#[from] PathSafetyError),
440 /// An IO error occurred while opening or reading the file.
441 #[error("derived-cache IO error: {0}")]
442 Io(#[from] std::io::Error),
443 /// A successful `load_derived` call has already been applied to this DB.
444 ///
445 /// Subsequent calls are no-ops: the cold-load window is closed after the
446 /// first successful load, preventing accidental double-apply of stale
447 /// or different on-disk state.
448 #[error("derived-cache load already applied to this DB; subsequent calls are no-ops")]
449 AlreadyLoaded,
450}
451
452/// Outcome of a successful [`load_derived`] call.
453#[derive(Debug, Clone)]
454pub enum LoadOutcome {
455 /// The derived file was loaded and `entries` cache entries were applied.
456 Applied {
457 /// Number of entries committed to the cache.
458 ///
459 /// Unknown-`query_type_id` entries (forward-compat skip) are NOT
460 /// counted here; only entries that were actually staged and committed
461 /// are included.
462 entries: usize,
463 },
464 /// The load was skipped for `reason`.
465 Skipped(SkipReason),
466}
467
468/// Reason for skipping a load attempt.
469///
470/// Currently no slots are defined; the enum is forward-compatible for future
471/// skip conditions (e.g., `Disabled`, `FileTooLarge`, `RateLimited`).
472#[derive(Debug, Clone)]
473pub enum SkipReason {
474 // No current variants — placeholder for forward compatibility.
475}
476
477/// Staged entry carrying only raw bytes + type id + deps — no typed value.
478///
479/// Produced by the validation loop in [`load_derived`] and consumed by
480/// [`QueryDb::commit_staged_load`]. The staged form is intentionally
481/// type-erased: deserialising each query's typed key/value is unnecessary
482/// for cold-load warming.
483pub struct StagedEntry {
484 /// Stable on-disk query type discriminator from the stream.
485 pub query_type_id: u32,
486 /// Raw postcard-serialised key bytes from the persisted entry.
487 pub raw_key_bytes: Vec<u8>,
488 /// Raw postcard-serialised result bytes from the persisted entry.
489 pub raw_result_bytes: Vec<u8>,
490 /// Three-tier dependency snapshot at cache time.
491 pub deps: QueryDeps,
492}
493
494/// Returns `true` if `id` is one of the 15 built-in query type IDs.
495///
496/// Used by the validation loop to decide whether to stage or silently skip
497/// an entry. Unknown IDs (forward-compat additions, downstream IDs, 0x0000)
498/// are skipped without error to allow rolling upgrades and file sharing
499/// across sqry versions.
500#[inline]
501fn is_known_builtin(id: u32) -> bool {
502 use crate::queries::type_ids;
503 matches!(
504 id,
505 type_ids::CALLERS
506 | type_ids::CALLEES
507 | type_ids::IMPORTS
508 | type_ids::EXPORTS
509 | type_ids::REFERENCES
510 | type_ids::IMPLEMENTS
511 | type_ids::CYCLES
512 | type_ids::IS_IN_CYCLE
513 | type_ids::UNUSED
514 | type_ids::IS_NODE_UNUSED
515 | type_ids::REACHABILITY
516 | type_ids::ENTRY_POINTS
517 | type_ids::REACHABLE_FROM_ENTRY_POINTS
518 | type_ids::SCC
519 | type_ids::CONDENSATION
520 )
521}
522
523/// Load a derived file at `path` into a pristine [`QueryDb`].
524///
525/// # Staged-validation + infallible-commit contract (spec §5.7)
526///
527/// 1. Path validation happens **before** any file IO.
528/// 2. All fallible work (file open, header decode, magic/version check, SHA
529/// match, entry stream decode) runs in the validation phase and returns
530/// `Err(...)` without touching the DB.
531/// 3. Once all entries are staged successfully, [`QueryDb::commit_staged_load`]
532/// is called. That function is **infallible by construction** — it contains
533/// no `?`, no `Result`-bearing call, and no `map_err`.
534/// 4. After commit, `cold_load_allowed` is flipped to `false` to prevent a
535/// second load from overwriting the committed state.
536///
537/// # Errors
538///
539/// - [`LoadError::PathSafety`] — path fails workspace validation.
540/// - [`LoadError::NotFound`] — file does not exist (`ENOENT`).
541/// - [`LoadError::Io`] — other IO errors.
542/// - [`LoadError::Corrupt`] — magic mismatch, version mismatch, or truncated
543/// entry stream.
544/// - [`LoadError::StaleSnapshot`] — SHA-256 in the header doesn't match
545/// `snapshot_sha256`.
546/// - [`LoadError::AlreadyLoaded`] — a successful load has already been applied
547/// to `db`.
548pub fn load_derived(
549 db: &mut crate::QueryDb,
550 snapshot_sha256: [u8; 32],
551 path: &Path,
552 workspace_root: &Path,
553) -> Result<LoadOutcome, LoadError> {
554 // Step 1: Path safety validation — must happen before any IO.
555 //
556 // The returned `canonical_path` is what every subsequent IO operation
557 // must use. Reading via the raw `path` would defeat the validation
558 // (Codex review finding).
559 let canonical_path = validate_path_in_workspace(path, workspace_root)?;
560
561 // Step 5 (early): Check cold-load window before any file IO.
562 //
563 // Spec §5.7 lists this as step 5, but elevating the check to before the
564 // file open satisfies both the atomicity contract (DB is never double-loaded)
565 // and the test requirement that AlreadyLoaded is returned without reading
566 // the file. This is strictly safer: no point paying for disk reads when
567 // the load cannot proceed regardless.
568 if !db.cold_load_allowed.load(Ordering::Acquire) {
569 return Err(LoadError::AlreadyLoaded);
570 }
571
572 // Step 2: Open the file and read all bytes — via the validated
573 // canonical path, never the raw caller input.
574 let bytes = match std::fs::read(&canonical_path) {
575 Ok(b) => b,
576 Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
577 return Err(LoadError::NotFound {
578 path: canonical_path.clone(),
579 });
580 }
581 Err(e) => return Err(LoadError::Io(e)),
582 };
583
584 // Step 3 (inline with step 2): bytes are now in memory.
585
586 // Step 4: Decode and validate the header.
587 let (header, mut tail) =
588 deserialize_derived_header(&bytes).map_err(|e| LoadError::Corrupt {
589 detail: format!("header decode: {e}"),
590 })?;
591
592 if header.magic != DERIVED_MAGIC {
593 return Err(LoadError::Corrupt {
594 detail: "magic mismatch".to_owned(),
595 });
596 }
597 if header.format_version != DERIVED_FORMAT_VERSION {
598 return Err(LoadError::Corrupt {
599 detail: format!(
600 "version mismatch: expected {DERIVED_FORMAT_VERSION}, got {}",
601 header.format_version
602 ),
603 });
604 }
605 if header.snapshot_sha256 != snapshot_sha256 {
606 return Err(LoadError::StaleSnapshot);
607 }
608
609 // Step 6: Streaming entry validation — accumulate into `staged`.
610 // DB is NOT touched if any entry decode fails.
611 let mut staged: Vec<StagedEntry> = Vec::new();
612 while !tail.is_empty() {
613 let (entry, rest) = deserialize_next_entry(tail).map_err(|e| LoadError::Corrupt {
614 detail: format!("entry decode: {e}"),
615 })?;
616 tail = rest;
617
618 if !is_known_builtin(entry.query_type_id) {
619 // Unknown or reserved ID — skip silently for forward/backward compat.
620 continue;
621 }
622
623 staged.push(StagedEntry {
624 query_type_id: entry.query_type_id,
625 raw_key_bytes: entry.raw_key_bytes,
626 raw_result_bytes: entry.raw_result_bytes,
627 deps: entry.deps,
628 });
629 }
630
631 // --- COMMIT BOUNDARY ---
632 // All validation above passed. From here on: no `?`, no `Result`.
633 // Steps 7–9 are the infallible commit phase.
634
635 // Step 7: Commit staged entries — INFALLIBLE.
636 let entries_applied = staged.len();
637 db.commit_staged_load(header, staged);
638
639 // Step 8: Close the cold-load window.
640 db.cold_load_allowed.store(false, Ordering::Release);
641
642 // Step 9: Return success.
643 Ok(LoadOutcome::Applied {
644 entries: entries_applied,
645 })
646}
647
648// ============================================================================
649// Tests
650// ============================================================================
651
652// ============================================================================
653// save_path_tests — SAVE_PATH acceptance tests
654// ============================================================================
655
656// ============================================================================
657// load_path_tests — LOAD_PATH acceptance tests
658// ============================================================================
659
660#[cfg(test)]
661mod load_path_tests {
662 use std::sync::Arc;
663
664 use sqry_core::graph::unified::concurrent::CodeGraph;
665 use tempfile::TempDir;
666
667 use super::*;
668 use crate::queries::type_ids;
669 use crate::{QueryDb, QueryDbConfig};
670
671 // -------------------------------------------------------------------------
672 // Test helpers
673 // -------------------------------------------------------------------------
674
675 /// Build a minimal empty `QueryDb` backed by an empty `CodeGraph`.
676 fn empty_db() -> QueryDb {
677 let snapshot = Arc::new(CodeGraph::new().snapshot());
678 QueryDb::new(snapshot, QueryDbConfig::default())
679 }
680
681 /// Build a v02 stream with `n_entries` valid entries of type CALLERS
682 /// and return the serialised bytes.
683 fn make_valid_stream(sha: [u8; 32], n_entries: usize) -> Vec<u8> {
684 let entries: Vec<PersistedEntry> = (0..n_entries)
685 .map(|i| PersistedEntry {
686 query_type_id: type_ids::CALLERS,
687 raw_key_bytes: vec![i as u8],
688 raw_result_bytes: vec![0xAA, i as u8],
689 deps: QueryDeps::default(),
690 })
691 .collect();
692 let header = DerivedHeader::new(sha, 5, 3, vec![], entries.len() as u64);
693 serialize_derived_stream(&header, entries).unwrap()
694 }
695
696 // -------------------------------------------------------------------------
697 // AC 15: happy_path_roundtrip
698 // -------------------------------------------------------------------------
699
700 /// Save a DB with entries via `save_derived`, then load via `load_derived`
701 /// and assert `Applied.entries` == what was saved.
702 #[test]
703 fn happy_path_roundtrip() {
704 let dir = TempDir::new().unwrap();
705 let path = dir.path().join("derived.sqry");
706 let workspace_root = dir.path();
707 let sha: [u8; 32] = [0x42; 32];
708
709 // Build a stream with 3 known-type entries.
710 let bytes = make_valid_stream(sha, 3);
711 std::fs::write(&path, &bytes).unwrap();
712
713 let mut db = empty_db();
714 let outcome = load_derived(&mut db, sha, &path, workspace_root).unwrap();
715
716 match outcome {
717 LoadOutcome::Applied { entries } => {
718 assert_eq!(entries, 3, "expected 3 entries applied");
719 }
720 LoadOutcome::Skipped(_) => panic!("unexpected Skipped outcome"),
721 }
722 }
723
724 // -------------------------------------------------------------------------
725 // AC 16: missing_file_returns_not_found
726 // -------------------------------------------------------------------------
727
728 #[test]
729 fn missing_file_returns_not_found() {
730 let dir = TempDir::new().unwrap();
731 let path = dir.path().join("nonexistent.sqry");
732 let workspace_root = dir.path();
733
734 let mut db = empty_db();
735 let err = load_derived(&mut db, [0u8; 32], &path, workspace_root)
736 .expect_err("missing file must return Err");
737
738 assert!(
739 matches!(err, LoadError::NotFound { .. }),
740 "expected NotFound, got: {err}"
741 );
742 }
743
744 // -------------------------------------------------------------------------
745 // AC 17: sha_mismatch_returns_stale_snapshot
746 // -------------------------------------------------------------------------
747
748 #[test]
749 fn sha_mismatch_returns_stale_snapshot() {
750 let dir = TempDir::new().unwrap();
751 let path = dir.path().join("derived.sqry");
752 let workspace_root = dir.path();
753
754 let saved_sha: [u8; 32] = [0x11; 32];
755 let caller_sha: [u8; 32] = [0x22; 32]; // different
756
757 let bytes = make_valid_stream(saved_sha, 0);
758 std::fs::write(&path, &bytes).unwrap();
759
760 let mut db = empty_db();
761 let err = load_derived(&mut db, caller_sha, &path, workspace_root)
762 .expect_err("SHA mismatch must return Err");
763
764 assert!(
765 matches!(err, LoadError::StaleSnapshot),
766 "expected StaleSnapshot, got: {err}"
767 );
768 }
769
770 // -------------------------------------------------------------------------
771 // AC 18: magic_mismatch_returns_corrupt
772 // -------------------------------------------------------------------------
773
774 #[test]
775 fn magic_mismatch_returns_corrupt() {
776 let dir = TempDir::new().unwrap();
777 let path = dir.path().join("derived.sqry");
778 let workspace_root = dir.path();
779
780 let sha: [u8; 32] = [0x33; 32];
781
782 // Build a valid stream then corrupt the first byte of the magic.
783 let mut bytes = make_valid_stream(sha, 0);
784 bytes[0] ^= 0xFF; // flip bits in magic[0]
785 std::fs::write(&path, &bytes).unwrap();
786
787 let mut db = empty_db();
788 let err = load_derived(&mut db, sha, &path, workspace_root)
789 .expect_err("magic mismatch must return Err");
790
791 // Either a decode error (postcard fails) or a Corrupt(magic mismatch).
792 assert!(
793 matches!(err, LoadError::Corrupt { .. }),
794 "expected Corrupt, got: {err}"
795 );
796 }
797
798 // -------------------------------------------------------------------------
799 // AC 19: truncated_file_returns_corrupt_and_db_unchanged
800 // -------------------------------------------------------------------------
801
802 /// Truncating the entry stream (after a valid header) triggers
803 /// `Err(Corrupt)` and the DB is NOT mutated (edge_revision stays 0).
804 #[test]
805 fn truncated_file_returns_corrupt_and_db_unchanged() {
806 let dir = TempDir::new().unwrap();
807 let path = dir.path().join("derived.sqry");
808 let workspace_root = dir.path();
809
810 let sha: [u8; 32] = [0x44; 32];
811
812 // Build a stream with 2 entries, then truncate to just after the header.
813 let full_bytes = make_valid_stream(sha, 2);
814
815 // Find where the header ends by deserialising it.
816 let (_header, tail) = deserialize_derived_header(&full_bytes).unwrap();
817 let header_len = full_bytes.len() - tail.len();
818
819 // Write header + partial first entry (cut off mid-entry).
820 let partial_entry_start = header_len;
821 // Write 3 bytes of the first entry (guaranteed truncation for any
822 // entry longer than 3 bytes — our entries have key + value + deps).
823 let truncated_len = partial_entry_start + 3;
824 let truncated_bytes = &full_bytes[..truncated_len];
825 std::fs::write(&path, truncated_bytes).unwrap();
826
827 let mut db = empty_db();
828 let initial_edge_rev = db.edge_revision();
829
830 let err = load_derived(&mut db, sha, &path, workspace_root)
831 .expect_err("truncated file must return Err");
832
833 assert!(
834 matches!(err, LoadError::Corrupt { .. }),
835 "expected Corrupt, got: {err}"
836 );
837
838 // DB must be untouched: edge_revision unchanged.
839 assert_eq!(
840 db.edge_revision(),
841 initial_edge_rev,
842 "DB edge_revision must be unchanged after failed load"
843 );
844 // cold_load_allowed must still be true so a retry on a repaired file
845 // is correct.
846 assert!(
847 db.cold_load_allowed(),
848 "cold_load_allowed must remain true after failed load"
849 );
850 }
851
852 // -------------------------------------------------------------------------
853 // AC 20: unknown_query_type_id_skipped_silently
854 // -------------------------------------------------------------------------
855
856 /// Stream: 2 CALLERS entries, 1 unknown-ID entry, 2 CALLEES entries.
857 /// Expected: 4 entries applied (unknown skipped silently).
858 #[test]
859 fn unknown_query_type_id_skipped_silently() {
860 let dir = TempDir::new().unwrap();
861 let path = dir.path().join("derived.sqry");
862 let workspace_root = dir.path();
863 let sha: [u8; 32] = [0x55; 32];
864
865 // An ID far outside the built-in range.
866 const UNKNOWN_ID: u32 = 0xBEEF;
867
868 let entries = vec![
869 PersistedEntry {
870 query_type_id: type_ids::CALLERS,
871 raw_key_bytes: vec![1],
872 raw_result_bytes: vec![0xA1],
873 deps: QueryDeps::default(),
874 },
875 PersistedEntry {
876 query_type_id: type_ids::CALLERS,
877 raw_key_bytes: vec![2],
878 raw_result_bytes: vec![0xA2],
879 deps: QueryDeps::default(),
880 },
881 PersistedEntry {
882 query_type_id: UNKNOWN_ID,
883 raw_key_bytes: vec![3],
884 raw_result_bytes: vec![0xA3],
885 deps: QueryDeps::default(),
886 },
887 PersistedEntry {
888 query_type_id: type_ids::CALLEES,
889 raw_key_bytes: vec![4],
890 raw_result_bytes: vec![0xA4],
891 deps: QueryDeps::default(),
892 },
893 PersistedEntry {
894 query_type_id: type_ids::CALLEES,
895 raw_key_bytes: vec![5],
896 raw_result_bytes: vec![0xA5],
897 deps: QueryDeps::default(),
898 },
899 ];
900
901 let header = DerivedHeader::new(sha, 0, 0, vec![], entries.len() as u64);
902 let bytes = serialize_derived_stream(&header, entries).unwrap();
903 std::fs::write(&path, &bytes).unwrap();
904
905 let mut db = empty_db();
906 let outcome = load_derived(&mut db, sha, &path, workspace_root).unwrap();
907
908 match outcome {
909 LoadOutcome::Applied { entries } => {
910 assert_eq!(
911 entries, 4,
912 "unknown entry must be silently skipped; expected 4 applied"
913 );
914 }
915 LoadOutcome::Skipped(_) => panic!("unexpected Skipped outcome"),
916 }
917 }
918
919 // -------------------------------------------------------------------------
920 // AC 21: second_load_returns_already_loaded
921 // -------------------------------------------------------------------------
922
923 /// Second call returns `AlreadyLoaded` without opening the file.
924 ///
925 /// We verify the error kind by pattern-matching; the "without file IO"
926 /// property is verified by checking that the error is returned even when
927 /// the file is deleted between calls.
928 #[test]
929 fn second_load_returns_already_loaded() {
930 let dir = TempDir::new().unwrap();
931 let path = dir.path().join("derived.sqry");
932 let workspace_root = dir.path();
933 let sha: [u8; 32] = [0x66; 32];
934
935 let bytes = make_valid_stream(sha, 1);
936 std::fs::write(&path, &bytes).unwrap();
937
938 let mut db = empty_db();
939
940 // First load — must succeed.
941 load_derived(&mut db, sha, &path, workspace_root).unwrap();
942
943 // Delete the file to confirm the second call doesn't do any IO.
944 std::fs::remove_file(&path).unwrap();
945
946 // Second load — must return AlreadyLoaded without reading the file.
947 let err = load_derived(&mut db, sha, &path, workspace_root)
948 .expect_err("second load must return Err");
949
950 assert!(
951 matches!(err, LoadError::AlreadyLoaded),
952 "expected AlreadyLoaded, got: {err}"
953 );
954 }
955
956 // -------------------------------------------------------------------------
957 // AC 22: header_restoration_restores_three_tiers
958 // -------------------------------------------------------------------------
959
960 /// After a successful load the DB's three revision tiers match the header.
961 #[test]
962 fn header_restoration_restores_three_tiers() {
963 use sqry_core::graph::unified::file::id::FileId;
964
965 let dir = TempDir::new().unwrap();
966 let path = dir.path().join("derived.sqry");
967 let workspace_root = dir.path();
968 let sha: [u8; 32] = [0x77; 32];
969
970 let file_revisions = vec![(FileId::new(1), 7u64), (FileId::new(2), 99u64)];
971 let header = DerivedHeader::new(
972 sha,
973 /*edge_revision=*/ 42,
974 /*metadata_revision=*/ 17,
975 file_revisions.clone(),
976 /*entry_count=*/ 0,
977 );
978 let bytes = serialize_derived_stream(&header, std::iter::empty()).unwrap();
979 std::fs::write(&path, &bytes).unwrap();
980
981 let mut db = empty_db();
982 let outcome = load_derived(&mut db, sha, &path, workspace_root).unwrap();
983 assert!(
984 matches!(outcome, LoadOutcome::Applied { entries: 0 }),
985 "expected Applied(0), got: {outcome:?}"
986 );
987
988 // Tier 2: global edge revision.
989 assert_eq!(db.edge_revision(), 42, "edge_revision must be restored");
990 // Tier 3: global metadata revision.
991 assert_eq!(
992 db.metadata_revision(),
993 17,
994 "metadata_revision must be restored"
995 );
996 // Tier 1: per-file revisions.
997 assert_eq!(
998 db.inputs().revision(FileId::new(1)),
999 Some(7),
1000 "file 1 revision must be restored"
1001 );
1002 assert_eq!(
1003 db.inputs().revision(FileId::new(2)),
1004 Some(99),
1005 "file 2 revision must be restored"
1006 );
1007 }
1008}
1009
1010#[cfg(test)]
1011mod save_path_tests {
1012 use std::sync::Arc;
1013
1014 use sqry_core::graph::unified::concurrent::CodeGraph;
1015 use tempfile::TempDir;
1016
1017 use super::*;
1018 use crate::{QueryDb, QueryDbConfig};
1019
1020 /// Build a minimal, empty `QueryDb` backed by an empty `CodeGraph`.
1021 fn empty_db() -> QueryDb {
1022 let snapshot = Arc::new(CodeGraph::new().snapshot());
1023 QueryDb::new(snapshot, QueryDbConfig::default())
1024 }
1025
1026 /// AC 6: save → read back bytes → deserialize header → assert fields match.
1027 ///
1028 /// Uses an empty `QueryDb` so `entry_count = 0`. The snapshot SHA,
1029 /// edge_revision, and metadata_revision are all asserted to match exactly.
1030 #[test]
1031 fn save_then_read_back_header_fields_match() {
1032 let dir = TempDir::new().unwrap();
1033 let path = dir.path().join("derived.sqry");
1034 let workspace_root = dir.path();
1035
1036 let db = empty_db();
1037 let snapshot_sha: [u8; 32] = [0xAB; 32];
1038
1039 save_derived(&db, snapshot_sha, &path, workspace_root).unwrap();
1040
1041 let bytes = std::fs::read(&path).unwrap();
1042 let (header, tail) = deserialize_derived_header(&bytes).unwrap();
1043
1044 assert_eq!(
1045 header.snapshot_sha256, snapshot_sha,
1046 "snapshot SHA mismatch"
1047 );
1048 assert_eq!(
1049 header.edge_revision,
1050 db.edge_revision(),
1051 "edge_revision mismatch"
1052 );
1053 assert_eq!(
1054 header.metadata_revision,
1055 db.metadata_revision(),
1056 "metadata_revision mismatch"
1057 );
1058 assert_eq!(header.entry_count, 0, "expected 0 entries for empty db");
1059 assert!(header.is_valid_v02(), "header must pass v02 validation");
1060 assert!(tail.is_empty(), "no entry bytes expected after header");
1061 }
1062
1063 /// AC 7 (unix-only): save rejects a symlinked target path.
1064 ///
1065 /// `validate_path_in_workspace` returns `PathSafetyError::SymlinkTarget`
1066 /// which propagates as `anyhow::Error`. The symlink test requires Unix
1067 /// `std::os::unix::fs::symlink`; gated on `#[cfg(unix)]`.
1068 #[test]
1069 #[cfg(unix)]
1070 fn save_rejects_symlinked_target_path() {
1071 use std::os::unix::fs::symlink;
1072
1073 let dir = TempDir::new().unwrap();
1074 let real_file = dir.path().join("real.sqry");
1075 std::fs::write(&real_file, b"placeholder").unwrap();
1076
1077 // Create a symlink inside the workspace pointing at the real file.
1078 let symlink_path = dir.path().join("link.sqry");
1079 symlink(&real_file, &symlink_path).unwrap();
1080
1081 let db = empty_db();
1082 let workspace_root = dir.path();
1083 let snapshot_sha: [u8; 32] = [0u8; 32];
1084
1085 let err = save_derived(&db, snapshot_sha, &symlink_path, workspace_root)
1086 .expect_err("save must reject symlinked target");
1087
1088 // The error must be rooted in PathSafetyError::SymlinkTarget.
1089 let is_symlink_error = err
1090 .chain()
1091 .any(|e| e.to_string().contains("symlink") || e.to_string().contains("SymlinkTarget"));
1092 assert!(
1093 is_symlink_error,
1094 "expected SymlinkTarget error; got: {err:#}"
1095 );
1096 }
1097
1098 /// AC 8: save with an empty cache writes a valid v02 header followed by
1099 /// an empty entry stream (tail is empty after decoding the header).
1100 #[test]
1101 fn save_empty_cache_writes_header_only() {
1102 let dir = TempDir::new().unwrap();
1103 let path = dir.path().join("derived.sqry");
1104 let workspace_root = dir.path();
1105
1106 let db = empty_db();
1107 let snapshot_sha: [u8; 32] = [0xCC; 32];
1108
1109 save_derived(&db, snapshot_sha, &path, workspace_root).unwrap();
1110
1111 let bytes = std::fs::read(&path).unwrap();
1112 assert!(
1113 !bytes.is_empty(),
1114 "output must be non-empty even for 0 entries"
1115 );
1116
1117 let (header, tail) = deserialize_derived_header(&bytes).unwrap();
1118 assert!(header.is_valid_v02());
1119 assert_eq!(header.entry_count, 0);
1120 assert!(
1121 tail.is_empty(),
1122 "empty cache must produce no entry bytes after the header"
1123 );
1124 }
1125
1126 /// AC 9: save is idempotent — calling save twice (with delete in between)
1127 /// produces byte-identical output.
1128 ///
1129 /// This verifies that the header's `saved_at` field can differ between
1130 /// calls (it records wall time), but the critical fields — snapshot SHA,
1131 /// revisions, entry_count — remain stable.
1132 #[test]
1133 fn save_is_idempotent_header_fields_stable_across_repeat_calls() {
1134 let dir = TempDir::new().unwrap();
1135 let path = dir.path().join("derived.sqry");
1136 let workspace_root = dir.path();
1137
1138 let db = empty_db();
1139 let snapshot_sha: [u8; 32] = [0x55; 32];
1140
1141 // First save.
1142 save_derived(&db, snapshot_sha, &path, workspace_root).unwrap();
1143 let first_bytes = std::fs::read(&path).unwrap();
1144
1145 // Delete the file, then save again.
1146 std::fs::remove_file(&path).unwrap();
1147 save_derived(&db, snapshot_sha, &path, workspace_root).unwrap();
1148 let second_bytes = std::fs::read(&path).unwrap();
1149
1150 // Both outputs must decode to headers with identical critical fields.
1151 let (h1, tail1) = deserialize_derived_header(&first_bytes).unwrap();
1152 let (h2, tail2) = deserialize_derived_header(&second_bytes).unwrap();
1153
1154 assert_eq!(h1.snapshot_sha256, h2.snapshot_sha256);
1155 assert_eq!(h1.edge_revision, h2.edge_revision);
1156 assert_eq!(h1.metadata_revision, h2.metadata_revision);
1157 assert_eq!(h1.entry_count, h2.entry_count);
1158 assert_eq!(h1.file_revisions, h2.file_revisions);
1159 assert!(tail1.is_empty());
1160 assert!(tail2.is_empty());
1161
1162 // The byte streams must be identical (saved_at is encoded in the same
1163 // second for a fast test run; if they diverge by a second boundary the
1164 // only differing field is saved_at which is NOT a correctness concern —
1165 // but in a typical CI run this comparison holds).
1166 //
1167 // We do NOT assert byte equality since saved_at can tick between the
1168 // two calls. Field-level equality above is the correctness guarantee.
1169 let _ = (first_bytes, second_bytes); // Silence unused-variable lint.
1170 }
1171}
1172
1173#[cfg(test)]
1174mod tests {
1175 use super::*;
1176 use tempfile::NamedTempFile;
1177
1178 // ---- Constants ---------------------------------------------------------
1179
1180 #[test]
1181 fn magic_is_16_bytes_exactly() {
1182 assert_eq!(DERIVED_MAGIC.len(), 16);
1183 assert_eq!(&DERIVED_MAGIC, b"SQRY_DERIVED_V02");
1184 }
1185
1186 #[test]
1187 fn format_version_is_two() {
1188 assert_eq!(DERIVED_FORMAT_VERSION, 2);
1189 }
1190
1191 // ---- DerivedHeader round-trip ------------------------------------------
1192
1193 #[test]
1194 fn header_round_trip() {
1195 let h = DerivedHeader {
1196 magic: DERIVED_MAGIC,
1197 format_version: DERIVED_FORMAT_VERSION,
1198 snapshot_sha256: [0xAB; 32],
1199 edge_revision: 7,
1200 metadata_revision: 3,
1201 file_revisions: vec![(FileId::new(1), 42), (FileId::new(2), 99)],
1202 entry_count: 42,
1203 saved_at: 1_700_000_000,
1204 };
1205 let bytes = postcard::to_allocvec(&h).unwrap();
1206 let decoded: DerivedHeader = postcard::from_bytes(&bytes).unwrap();
1207 assert_eq!(decoded, h);
1208 }
1209
1210 #[test]
1211 fn header_is_valid_v02() {
1212 let h = DerivedHeader::new([0u8; 32], 1, 2, vec![], 0);
1213 assert!(h.is_valid_v02());
1214 }
1215
1216 #[test]
1217 fn header_with_wrong_magic_is_not_valid_v02() {
1218 let mut h = DerivedHeader::new([0u8; 32], 1, 2, vec![], 0);
1219 h.magic[0] = b'X'; // corrupt first byte
1220 assert!(!h.is_valid_v02());
1221 }
1222
1223 #[test]
1224 fn header_with_wrong_format_version_is_not_valid_v02() {
1225 let mut h = DerivedHeader::new([0u8; 32], 1, 2, vec![], 0);
1226 h.format_version = 1;
1227 assert!(!h.is_valid_v02());
1228 }
1229
1230 // ---- Stream round-trip -------------------------------------------------
1231
1232 #[test]
1233 fn stream_round_trip() {
1234 let header = DerivedHeader {
1235 magic: DERIVED_MAGIC,
1236 format_version: DERIVED_FORMAT_VERSION,
1237 snapshot_sha256: [0x55; 32],
1238 edge_revision: 10,
1239 metadata_revision: 5,
1240 file_revisions: vec![(FileId::new(3), 7)],
1241 entry_count: 2,
1242 saved_at: 1_700_000_001,
1243 };
1244 let entries = vec![
1245 PersistedEntry {
1246 query_type_id: 0x0001,
1247 raw_key_bytes: vec![1, 2, 3],
1248 raw_result_bytes: vec![4, 5, 6],
1249 deps: QueryDeps {
1250 file_deps: vec![(FileId::new(1), 1)],
1251 edge_revision: Some(10),
1252 metadata_revision: None,
1253 },
1254 },
1255 PersistedEntry {
1256 query_type_id: 0x0002,
1257 raw_key_bytes: vec![7],
1258 raw_result_bytes: vec![8],
1259 deps: QueryDeps {
1260 file_deps: vec![],
1261 edge_revision: None,
1262 metadata_revision: Some(5),
1263 },
1264 },
1265 ];
1266
1267 let bytes = serialize_derived_stream(&header, entries.clone()).unwrap();
1268
1269 let (decoded_header, mut tail) = deserialize_derived_header(&bytes).unwrap();
1270 assert_eq!(decoded_header, header);
1271
1272 let mut decoded_entries = Vec::new();
1273 while !tail.is_empty() {
1274 let (entry, rest) = deserialize_next_entry(tail).unwrap();
1275 decoded_entries.push(entry);
1276 tail = rest;
1277 }
1278
1279 assert_eq!(decoded_entries.len(), 2);
1280 assert_eq!(decoded_entries[0].query_type_id, entries[0].query_type_id);
1281 assert_eq!(decoded_entries[0].raw_key_bytes, entries[0].raw_key_bytes);
1282 assert_eq!(
1283 decoded_entries[0].raw_result_bytes,
1284 entries[0].raw_result_bytes
1285 );
1286 assert_eq!(decoded_entries[0].deps, entries[0].deps);
1287 assert_eq!(decoded_entries[1].query_type_id, entries[1].query_type_id);
1288 assert_eq!(decoded_entries[1].deps, entries[1].deps);
1289 }
1290
1291 #[test]
1292 fn stream_with_zero_entries() {
1293 let header = DerivedHeader::new([0xCC; 32], 0, 0, vec![], 0);
1294 let bytes = serialize_derived_stream(&header, std::iter::empty()).unwrap();
1295 let (decoded_header, tail) = deserialize_derived_header(&bytes).unwrap();
1296 assert_eq!(decoded_header, header);
1297 assert!(tail.is_empty(), "no entries means empty tail");
1298 }
1299
1300 // ---- Legacy v01 magic mismatch guard -----------------------------------
1301
1302 #[test]
1303 fn legacy_v01_magic_is_not_v02_magic() {
1304 // The prior warm-only DerivedManifest (DB03) carried only:
1305 // snapshot_sha256: [u8; 32], entry_count: usize, saved_at: u64
1306 // Decoding those bytes into DerivedHeader may succeed (postcard is
1307 // schema-free) but the decoded `magic` field will be garbage bytes
1308 // from the first 16 bytes of a SHA-256 hash, NOT b"SQRY_DERIVED_V02".
1309 // LOAD_PATH rejects files where is_valid_v02() returns false.
1310 //
1311 // This test pins the invariant: a plausible first-32-bytes of a v01
1312 // file (an all-zeros or any SHA-256 value) cannot accidentally be
1313 // equal to DERIVED_MAGIC. Belt-and-suspenders.
1314 let hypothetical_v01_first_16 = [0u8; 16]; // worst case: all-zero hash prefix
1315 assert_ne!(
1316 &DERIVED_MAGIC[..],
1317 &hypothetical_v01_first_16[..],
1318 "DERIVED_MAGIC must not equal any plausible v01 SHA-256 prefix"
1319 );
1320
1321 // Also verify a non-zero SHA prefix (e.g., a common hash byte pattern)
1322 // doesn't accidentally match.
1323 let sha_like_prefix: [u8; 16] = [
1324 0x6b, 0x86, 0xb2, 0x73, 0xff, 0x34, 0xfc, 0xe1, 0x9d, 0x6b, 0x80, 0x4e, 0xff, 0x5a,
1325 0x3f, 0x57,
1326 ];
1327 assert_ne!(&DERIVED_MAGIC[..], &sha_like_prefix[..]);
1328
1329 // Confirm DERIVED_MAGIC is exactly b"SQRY_DERIVED_V02" — not some
1330 // hash lookalike — so this test cannot pass vacuously.
1331 let magic_as_ascii = std::str::from_utf8(&DERIVED_MAGIC).expect("DERIVED_MAGIC is ASCII");
1332 assert_eq!(magic_as_ascii, "SQRY_DERIVED_V02");
1333 }
1334
1335 #[test]
1336 fn legacy_v01_bytes_decode_as_invalid_header() {
1337 // Build a realistic v01 DerivedManifest byte sequence:
1338 // old struct was { snapshot_sha256: [u8; 32], entry_count: usize, saved_at: u64 }
1339 // postcard encodes [u8; 32] as 32 raw bytes, usize as varint, u64 as
1340 // 8-byte LE (or varint depending on postcard version — varint here).
1341 //
1342 // When decoded as DerivedHeader, the first 16 bytes become `magic`
1343 // (32-byte hash prefix) and the next 2 bytes become `format_version`.
1344 // Neither will match DERIVED_MAGIC / DERIVED_FORMAT_VERSION, so
1345 // is_valid_v02() returns false → LOAD_PATH rejects cleanly.
1346
1347 #[derive(Serialize)]
1348 struct OldManifest {
1349 snapshot_sha256: [u8; 32],
1350 entry_count: usize,
1351 saved_at: u64,
1352 }
1353
1354 let old = OldManifest {
1355 snapshot_sha256: [0xDE; 32],
1356 entry_count: 5,
1357 saved_at: 1_700_000_000,
1358 };
1359 let v01_bytes = postcard::to_allocvec(&old).unwrap();
1360
1361 // Attempt to decode as DerivedHeader — may succeed or fail depending
1362 // on field count alignment. If it succeeds, the decoded header MUST
1363 // fail is_valid_v02().
1364 match postcard::from_bytes::<DerivedHeader>(&v01_bytes) {
1365 Ok(decoded) => {
1366 assert!(
1367 !decoded.is_valid_v02(),
1368 "v01 bytes accidentally decoded as valid v02 header — \
1369 LOAD_PATH rejection would be bypassed"
1370 );
1371 }
1372 Err(_) => {
1373 // Decode failed outright — also fine. LOAD_PATH handles
1374 // deserialization errors as a Corrupt rejection.
1375 }
1376 }
1377 }
1378
1379 // ---- DB03 warm-path compat tests (rewritten in terms of DerivedHeader) -
1380
1381 /// Warm-path round-trip — rewritten from DB03's `manifest_round_trip` to
1382 /// use `DerivedHeader` directly. Coverage intent is preserved: verify
1383 /// that a header saved via `save_manifest` / `load_manifest` survives a
1384 /// disk round-trip with matching fields.
1385 #[test]
1386 fn manifest_round_trip() {
1387 let hash = [42u8; 32];
1388 let header = DerivedHeader::new(hash, 0, 0, vec![], 100);
1389
1390 assert!(header.matches_snapshot(&hash));
1391 assert!(!header.matches_snapshot(&[0u8; 32]));
1392 assert!(header.is_valid_v02());
1393
1394 let temp = NamedTempFile::new().unwrap();
1395 save_manifest(temp.path(), &header).unwrap();
1396
1397 // load_manifest decodes raw postcard bytes; the full v02 header
1398 // survives the round-trip.
1399 let loaded = load_manifest(temp.path()).unwrap();
1400 assert_eq!(loaded.snapshot_sha256, hash);
1401 assert_eq!(loaded.entry_count, 100);
1402 assert!(loaded.matches_snapshot(&hash));
1403 assert!(loaded.is_valid_v02());
1404 }
1405
1406 #[test]
1407 fn derived_path_computation() {
1408 let snapshot = Path::new("/home/user/.sqry/graph/snapshot.sqry");
1409 let derived = derived_path_for_snapshot(snapshot, "derived.sqry");
1410 assert_eq!(
1411 derived,
1412 PathBuf::from("/home/user/.sqry/graph/derived.sqry")
1413 );
1414 }
1415
1416 #[test]
1417 fn load_manifest_missing_file() {
1418 let result = load_manifest(Path::new("/nonexistent/path/derived.sqry"));
1419 assert!(result.is_none());
1420 }
1421
1422 #[test]
1423 fn file_sha256() {
1424 let temp = NamedTempFile::new().unwrap();
1425 std::fs::write(temp.path(), b"hello world").unwrap();
1426 let hash = compute_file_sha256(temp.path()).unwrap();
1427 // SHA-256 of "hello world"
1428 assert_eq!(hash.len(), 32);
1429 assert_ne!(hash, [0u8; 32]); // non-zero
1430 }
1431
1432 // ---- QueryDeps ---------------------------------------------------------
1433
1434 #[test]
1435 fn query_deps_default_is_empty() {
1436 let deps = QueryDeps::default();
1437 assert!(deps.file_deps.is_empty());
1438 assert!(deps.edge_revision.is_none());
1439 assert!(deps.metadata_revision.is_none());
1440 }
1441
1442 #[test]
1443 fn query_deps_round_trip() {
1444 let deps = QueryDeps {
1445 file_deps: vec![(FileId::new(1), 7), (FileId::new(2), 3)],
1446 edge_revision: Some(99),
1447 metadata_revision: Some(4),
1448 };
1449 let bytes = postcard::to_allocvec(&deps).unwrap();
1450 let decoded: QueryDeps = postcard::from_bytes(&bytes).unwrap();
1451 assert_eq!(decoded, deps);
1452 }
1453}