Skip to main content

sqry_core/project/
persistence.rs

1//! Project state persistence
2//!
3//! Implements persistence for Project metadata (`repo_index`, `file_table`) as specified in:
4//! - `docs/development/project-persistence-019af0c9-428d-7000-b1d6-08961d6930b0/02_DESIGN.md`
5//!
6//! # Overview
7//!
8//! When `cache.persistent = true`, Project state is saved to disk during teardown/shutdown:
9//! - Repo index and file table → versioned JSON in `<state_root>/<project_id>.json`
10//! - Node index → via `IndexStorage` (existing mechanism)
11//!
12//! On initialization, persisted state is loaded if present and valid (version + checksum match).
13
14use crate::config::{CacheConfig, IndexingConfig};
15use crate::project::types::{FileEntry, ProjectId, RepoId, StringId};
16use serde::{Deserialize, Serialize};
17use std::collections::HashMap;
18use std::fs::{self, File};
19use std::io::{BufReader, BufWriter, Write};
20use std::path::{Path, PathBuf};
21use std::sync::Arc;
22use std::time::SystemTime;
23
24struct TempFileGuard<'a> {
25    path: &'a Path,
26    should_cleanup: bool,
27}
28
29impl Drop for TempFileGuard<'_> {
30    fn drop(&mut self) {
31        if self.should_cleanup {
32            let _ = fs::remove_file(self.path);
33        }
34    }
35}
36
37/// Current schema version for persisted state.
38///
39/// # Version Evolution (from `02_DESIGN.md` L-3)
40///
41/// - Bump when adding required fields, changing field types, or renaming fields.
42/// - Keep when adding optional fields with `#[serde(default)]`.
43const SCHEMA_VERSION: u32 = 1;
44
45/// Persisted project state including repo/file metadata.
46///
47/// Serialized to `<state_root>/<project_id>.json` with atomic temp + rename.
48#[derive(Debug, Clone, Serialize, Deserialize)]
49pub struct PersistedProjectState {
50    /// Schema version for compatibility checking.
51    pub version: u32,
52
53    /// Project identifier (must match current project).
54    pub project_id: u64,
55
56    /// Canonical index root path.
57    pub index_root: PathBuf,
58
59    /// Config fingerprint for invalidation detection.
60    ///
61    /// # Fields included (from `02_DESIGN.md` M-2):
62    /// - `cache.persistent` (bool)
63    /// - `cache.directory` (String)
64    /// - `indexing.max_file_size` (u64)
65    /// - `indexing.additional_ignored_dirs` (`Vec<String>`)
66    ///
67    /// Computed as: `blake3(sorted_json(fields)).truncate_to_u64()`
68    pub config_fingerprint: u64,
69
70    /// Repository index: maps git root paths to `RepoId`s.
71    pub repo_index: Vec<(PathBuf, u64)>,
72
73    /// File table entries.
74    pub files: Vec<PersistedFileEntry>,
75
76    /// When this state was generated.
77    #[serde(with = "system_time_serde")]
78    pub generated_at: SystemTime,
79
80    /// Blake3 checksum of serialized payload (hex, excluding this field).
81    ///
82    /// Set after serialization; validated on load.
83    #[serde(default)]
84    pub checksum: String,
85}
86
87/// Persisted file entry metadata.
88#[derive(Debug, Clone, Serialize, Deserialize)]
89pub struct PersistedFileEntry {
90    /// Relative path (normalized).
91    pub path: String,
92
93    /// Repository ID (as u64).
94    pub repo_id: u64,
95
96    /// Git root path for `RepoId` reconstruction.
97    ///
98    /// Stored to enable proper `RepoId` restoration (fixes HIGH finding from review).
99    /// `None` if file has no git root (`RepoId::NONE`).
100    #[serde(default)]
101    pub git_root: Option<String>,
102
103    /// Last modification time.
104    #[serde(with = "option_system_time_serde")]
105    pub last_modified: Option<SystemTime>,
106
107    /// File size in bytes.
108    pub size: u64,
109
110    /// Content hash for change detection.
111    #[serde(default)]
112    pub content_hash: Option<u64>,
113
114    /// Detected language ID (e.g., "rust", "python").
115    #[serde(default)]
116    pub language_id: Option<String>,
117}
118
119/// Helper for reading/writing persisted project state.
120pub struct ProjectPersistence {
121    /// Root directory for state files: `<index_root>/<cache.directory>/project-state/`
122    state_root: PathBuf,
123    /// Index root for path validation (used during construction for security checks).
124    #[allow(dead_code)] // Used in ::new() for path validation, not stored for runtime use
125    index_root: PathBuf,
126}
127
128impl ProjectPersistence {
129    /// Create a new persistence helper.
130    ///
131    /// # Arguments
132    ///
133    /// * `index_root` - The project's index root path
134    /// * `cache_directory` - The cache directory from config (relative paths only for security)
135    ///
136    /// # Security
137    ///
138    /// Only relative paths are allowed for `cache_directory`. Absolute paths or paths with
139    /// `..` traversal are rejected to prevent writes outside the project root.
140    /// (Fixes HIGH path traversal finding from review)
141    #[must_use]
142    pub fn new(index_root: &Path, cache_directory: &str) -> Self {
143        let resolved_cache = Self::resolve_cache_directory(index_root, cache_directory);
144
145        Self {
146            state_root: resolved_cache.join("project-state"),
147            index_root: index_root.to_path_buf(),
148        }
149    }
150
151    fn resolve_cache_directory(index_root: &Path, cache_directory: &str) -> PathBuf {
152        let cache_path = Path::new(cache_directory);
153
154        if cache_path.is_absolute() {
155            log::warn!(
156                "Absolute cache directory '{cache_directory}' rejected for security; using default '.sqry-cache'"
157            );
158            return Self::default_cache_root(index_root);
159        }
160
161        Self::resolve_relative_cache_directory(index_root, cache_directory)
162    }
163
164    fn resolve_relative_cache_directory(index_root: &Path, cache_directory: &str) -> PathBuf {
165        let joined = index_root.join(cache_directory);
166        if let Ok(canonical) = joined.canonicalize() {
167            return Self::validate_canonical_cache_path(
168                index_root,
169                cache_directory,
170                canonical.as_path(),
171                &joined,
172            );
173        }
174
175        if cache_directory.contains("..") {
176            log::warn!(
177                "Cache directory '{cache_directory}' contains traversal; using default '.sqry-cache'"
178            );
179            return Self::default_cache_root(index_root);
180        }
181
182        joined
183    }
184
185    fn validate_canonical_cache_path(
186        index_root: &Path,
187        cache_directory: &str,
188        canonical: &Path,
189        joined: &Path,
190    ) -> PathBuf {
191        if let Ok(canonical_root) = index_root.canonicalize() {
192            if canonical.starts_with(&canonical_root) {
193                return joined.to_path_buf();
194            }
195
196            log::warn!(
197                "Cache directory '{cache_directory}' escapes project root; using default '.sqry-cache'"
198            );
199            return Self::default_cache_root(index_root);
200        }
201
202        joined.to_path_buf()
203    }
204
205    fn default_cache_root(index_root: &Path) -> PathBuf {
206        index_root.join(".sqry-cache")
207    }
208
209    /// Ensure the state root directory exists.
210    ///
211    /// # Errors
212    ///
213    /// Returns an error if the directory cannot be created.
214    pub fn ensure_state_root(&self) -> std::io::Result<()> {
215        fs::create_dir_all(&self.state_root)
216    }
217
218    /// Get the path for a project's state file.
219    #[must_use]
220    pub fn state_file_path(&self, project_id: ProjectId) -> PathBuf {
221        self.state_root.join(format!("{project_id}.json"))
222    }
223
224    /// Write metadata to disk with atomic temp + rename.
225    ///
226    /// # Errors
227    ///
228    /// Returns an error if:
229    /// - The state root cannot be created
230    /// - Serialization fails
231    /// - File I/O fails
232    ///
233    /// # Safety
234    ///
235    /// Uses atomic temp + rename pattern with directory fsync for durability.
236    /// Temp files are cleaned up on all error paths (fixes LOW finding from review).
237    pub fn write_metadata(&self, state: &PersistedProjectState) -> std::io::Result<()> {
238        self.ensure_state_root()?;
239
240        let target_path = self
241            .state_root
242            .join(format!("proj_{:016x}.json", state.project_id));
243        let temp_path = self
244            .state_root
245            .join(format!("proj_{:016x}.json.tmp", state.project_id));
246
247        let mut guard = TempFileGuard {
248            path: &temp_path,
249            should_cleanup: true,
250        };
251
252        // Serialize to temp file (compact JSON for performance - fixes LOW finding)
253        let file = File::create(&temp_path)?;
254        let mut writer = BufWriter::new(file);
255        serde_json::to_writer(&mut writer, state)
256            .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))?;
257        writer.flush()?;
258
259        // Sync file to disk
260        writer.into_inner()?.sync_all()?;
261
262        // Atomic rename
263        fs::rename(&temp_path, &target_path)?;
264
265        // Sync directory to ensure rename is durable (fixes LOW finding from review)
266        if let Ok(dir) = File::open(&self.state_root) {
267            let _ = dir.sync_all();
268        }
269
270        // Success - don't cleanup temp file (it's now the target)
271        guard.should_cleanup = false;
272
273        log::info!(
274            "Persisted project state to '{}' ({} repos, {} files)",
275            target_path.display(),
276            state.repo_index.len(),
277            state.files.len()
278        );
279
280        Ok(())
281    }
282
283    /// Read metadata from disk if present and valid.
284    ///
285    /// # Returns
286    ///
287    /// - `Ok(Some(state))` if valid state was loaded
288    /// - `Ok(None)` if state file doesn't exist
289    /// - `Err(_)` if file exists but cannot be read/parsed
290    ///
291    /// # Errors
292    ///
293    /// Returns an error if the file exists but:
294    /// - Cannot be opened
295    /// - Cannot be parsed as JSON
296    /// - Checksum validation fails
297    pub fn read_metadata(
298        &self,
299        project_id: ProjectId,
300    ) -> std::io::Result<Option<PersistedProjectState>> {
301        let path = self
302            .state_root
303            .join(format!("proj_{:016x}.json", project_id.as_u64()));
304
305        if !path.exists() {
306            return Ok(None);
307        }
308
309        let file = File::open(&path)?;
310        let reader = BufReader::new(file);
311        let state: PersistedProjectState = serde_json::from_reader(reader)
312            .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))?;
313
314        // Validate checksum
315        let computed_checksum = compute_state_checksum(&state);
316        if state.checksum != computed_checksum {
317            log::warn!(
318                "Checksum mismatch for '{}': expected {}, got {}",
319                path.display(),
320                state.checksum,
321                computed_checksum
322            );
323            return Err(std::io::Error::new(
324                std::io::ErrorKind::InvalidData,
325                "checksum mismatch",
326            ));
327        }
328
329        log::debug!(
330            "Loaded persisted state from '{}' (version {}, {} repos, {} files)",
331            path.display(),
332            state.version,
333            state.repo_index.len(),
334            state.files.len()
335        );
336
337        Ok(Some(state))
338    }
339}
340
341/// Compute config fingerprint for invalidation detection.
342///
343/// # Fields included (from `02_DESIGN.md` M-2, expanded per review findings):
344/// - `cache.persistent` (bool)
345/// - `cache.directory` (String)
346/// - `indexing.max_file_size` (u64)
347/// - `indexing.max_depth` (u32)
348/// - `indexing.enable_scope_extraction` (bool)
349/// - `indexing.enable_relation_extraction` (bool)
350/// - `indexing.additional_ignored_dirs` (`Vec<String>`)
351///
352/// All indexing toggles that affect detection/indexing behavior are included
353/// to ensure stale state is invalidated when config changes.
354/// (Fixes MEDIUM config fingerprint finding from review)
355#[must_use]
356pub fn compute_config_fingerprint(cache: &CacheConfig, indexing: &IndexingConfig) -> u64 {
357    use blake3::Hasher;
358
359    let mut hasher = Hasher::new();
360
361    // Hash cache fields
362    hasher.update(&[u8::from(cache.persistent)]);
363    hasher.update(cache.directory.as_bytes());
364
365    // Hash indexing fields (all fields that affect detection/indexing behavior)
366    hasher.update(&indexing.max_file_size.to_le_bytes());
367    hasher.update(&indexing.max_depth.to_le_bytes());
368    hasher.update(&[u8::from(indexing.enable_scope_extraction)]);
369    hasher.update(&[u8::from(indexing.enable_relation_extraction)]);
370
371    // Sort additional_ignored_dirs for determinism
372    let mut dirs = indexing.additional_ignored_dirs.clone();
373    dirs.sort();
374    for dir in &dirs {
375        hasher.update(dir.as_bytes());
376    }
377
378    // Truncate to u64
379    let hash = hasher.finalize();
380    let bytes = hash.as_bytes();
381    u64::from_le_bytes([
382        bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], bytes[5], bytes[6], bytes[7],
383    ])
384}
385
386/// Compute checksum of persisted state (excluding the checksum field itself).
387///
388/// Includes all persisted fields for integrity verification:
389/// - version, `project_id`, `index_root`, `config_fingerprint`
390/// - `repo_index` entries
391/// - file entries (path, `repo_id`, `git_root`, size, `content_hash`, `language_id`, `last_modified`)
392/// - `generated_at` timestamp
393#[must_use]
394pub fn compute_state_checksum(state: &PersistedProjectState) -> String {
395    use blake3::Hasher;
396
397    let mut hasher = Hasher::new();
398
399    // Hash all fields except checksum
400    hasher.update(&state.version.to_le_bytes());
401    hasher.update(&state.project_id.to_le_bytes());
402    hasher.update(state.index_root.as_os_str().as_encoded_bytes());
403    hasher.update(&state.config_fingerprint.to_le_bytes());
404
405    // Hash repo_index
406    hash_repo_index(&mut hasher, &state.repo_index);
407
408    // Hash files (including new fields: git_root, content_hash, language_id)
409    hash_file_entries(&mut hasher, &state.files);
410
411    // Hash generated_at
412    hash_system_time(&mut hasher, state.generated_at);
413
414    // Return hex string
415    let hash = hasher.finalize();
416    hex::encode(&hash.as_bytes()[..16]) // First 16 bytes = 32 hex chars
417}
418
419fn hash_repo_index(hasher: &mut blake3::Hasher, repo_index: &[(PathBuf, u64)]) {
420    for (path, repo_id) in repo_index {
421        hasher.update(path.as_os_str().as_encoded_bytes());
422        hasher.update(&repo_id.to_le_bytes());
423    }
424}
425
426fn hash_file_entries(hasher: &mut blake3::Hasher, files: &[PersistedFileEntry]) {
427    for file in files {
428        hash_file_entry(hasher, file);
429    }
430}
431
432fn hash_file_entry(hasher: &mut blake3::Hasher, file: &PersistedFileEntry) {
433    hasher.update(file.path.as_bytes());
434    hasher.update(&file.repo_id.to_le_bytes());
435    hash_optional_str(hasher, file.git_root.as_deref());
436    hasher.update(&file.size.to_le_bytes());
437    if let Some(content_hash) = file.content_hash {
438        hasher.update(&content_hash.to_le_bytes());
439    }
440    hash_optional_str(hasher, file.language_id.as_deref());
441    hash_optional_time(hasher, file.last_modified);
442}
443
444fn hash_optional_str(hasher: &mut blake3::Hasher, value: Option<&str>) {
445    if let Some(value) = value {
446        hasher.update(value.as_bytes());
447    }
448}
449
450fn hash_optional_time(hasher: &mut blake3::Hasher, time: Option<SystemTime>) {
451    if let Some(time) = time {
452        hash_system_time(hasher, time);
453    }
454}
455
456fn hash_system_time(hasher: &mut blake3::Hasher, time: SystemTime) {
457    if let Ok(duration) = time.duration_since(std::time::UNIX_EPOCH) {
458        hasher.update(&duration.as_secs().to_le_bytes());
459    }
460}
461
462/// Build a `PersistedProjectState` from in-memory project data.
463///
464/// Stores complete file metadata including `git_root` path for proper `RepoId`
465/// reconstruction on restore. (Fixes HIGH `RepoId` restoration finding)
466#[must_use]
467#[allow(clippy::implicit_hasher)] // Standard HashMap is intentional
468pub fn build_persisted_state(
469    project_id: ProjectId,
470    index_root: &Path,
471    config_fingerprint: u64,
472    repo_index: &HashMap<PathBuf, RepoId>,
473    file_table: &HashMap<StringId, FileEntry>,
474) -> PersistedProjectState {
475    // Build reverse lookup: RepoId -> git_root path
476    let repo_id_to_path: HashMap<u64, &Path> = repo_index
477        .iter()
478        .map(|(path, repo_id)| (repo_id.as_u64(), path.as_path()))
479        .collect();
480
481    let repo_entries: Vec<(PathBuf, u64)> = repo_index
482        .iter()
483        .map(|(path, repo_id)| (path.clone(), repo_id.as_u64()))
484        .collect();
485
486    let file_entries: Vec<PersistedFileEntry> = file_table
487        .values()
488        .map(|entry| {
489            // Look up git_root from repo_id for proper reconstruction
490            let git_root = if entry.repo_id.is_none() {
491                None
492            } else {
493                repo_id_to_path
494                    .get(&entry.repo_id.as_u64())
495                    .map(|p| p.to_string_lossy().to_string())
496            };
497
498            PersistedFileEntry {
499                path: entry.path.to_string(),
500                repo_id: entry.repo_id.as_u64(),
501                git_root,
502                last_modified: entry.modified_at,
503                size: 0, // FileEntry doesn't track size; could stat the file but adds latency
504                content_hash: entry.content_hash,
505                language_id: entry
506                    .language_id
507                    .as_ref()
508                    .map(std::string::ToString::to_string),
509            }
510        })
511        .collect();
512
513    let mut state = PersistedProjectState {
514        version: SCHEMA_VERSION,
515        project_id: project_id.as_u64(),
516        index_root: index_root.to_path_buf(),
517        config_fingerprint,
518        repo_index: repo_entries,
519        files: file_entries,
520        generated_at: SystemTime::now(),
521        checksum: String::new(),
522    };
523
524    // Compute and set checksum
525    state.checksum = compute_state_checksum(&state);
526
527    state
528}
529
530/// Restore `repo_index` from persisted state.
531#[must_use]
532pub fn restore_repo_index(state: &PersistedProjectState) -> HashMap<PathBuf, RepoId> {
533    state
534        .repo_index
535        .iter()
536        .map(|(path, repo_id)| {
537            let repo = if *repo_id == 0 {
538                RepoId::NONE
539            } else {
540                // RepoId stores raw u64, reconstruct directly
541                // Note: This is safe because we persisted the u64 from as_u64()
542                RepoId::from_git_root(path) // Re-compute to ensure consistency
543            };
544            (path.clone(), repo)
545        })
546        .collect()
547}
548
549/// Restore `file_table` from persisted state.
550///
551/// Properly reconstructs `RepoId` from the stored `git_root` path, ensuring
552/// file-repo associations survive persist/restore cycles.
553/// (Fixes HIGH `RepoId` restoration finding from review)
554#[must_use]
555pub fn restore_file_table(state: &PersistedProjectState) -> HashMap<StringId, FileEntry> {
556    state
557        .files
558        .iter()
559        .map(|entry| {
560            let path: StringId = Arc::from(entry.path.as_str());
561
562            // Reconstruct RepoId from git_root path (fixes HIGH finding)
563            let repo_id = restore_repo_id(entry);
564
565            // Restore all file metadata (fixes MEDIUM finding)
566            let language_id = restore_language_id(entry);
567
568            let file_entry = FileEntry::with_metadata(
569                Arc::clone(&path),
570                repo_id,
571                entry.content_hash,
572                entry.last_modified,
573                language_id,
574            );
575            (path, file_entry)
576        })
577        .collect()
578}
579
580fn restore_repo_id(entry: &PersistedFileEntry) -> RepoId {
581    if entry.repo_id == 0 {
582        return RepoId::NONE;
583    }
584
585    if let Some(ref git_root) = entry.git_root {
586        return RepoId::from_git_root(Path::new(git_root));
587    }
588
589    log::warn!(
590        "File '{}' has repo_id {} but no git_root; using RepoId::NONE",
591        entry.path,
592        entry.repo_id
593    );
594    RepoId::NONE
595}
596
597fn restore_language_id(entry: &PersistedFileEntry) -> Option<StringId> {
598    entry
599        .language_id
600        .as_ref()
601        .map(|value| Arc::from(value.as_str()))
602}
603
604/// Serde support for `SystemTime`.
605mod system_time_serde {
606    use serde::{Deserialize, Deserializer, Serialize, Serializer};
607    use std::time::{Duration, SystemTime, UNIX_EPOCH};
608
609    pub fn serialize<S>(time: &SystemTime, serializer: S) -> Result<S::Ok, S::Error>
610    where
611        S: Serializer,
612    {
613        let duration = time.duration_since(UNIX_EPOCH).unwrap_or(Duration::ZERO);
614        (duration.as_secs(), duration.subsec_nanos()).serialize(serializer)
615    }
616
617    pub fn deserialize<'de, D>(deserializer: D) -> Result<SystemTime, D::Error>
618    where
619        D: Deserializer<'de>,
620    {
621        let (secs, nanos): (u64, u32) = Deserialize::deserialize(deserializer)?;
622        Ok(UNIX_EPOCH + Duration::new(secs, nanos))
623    }
624}
625
626/// Serde support for `Option<SystemTime>`.
627mod option_system_time_serde {
628    use serde::{Deserialize, Deserializer, Serialize, Serializer};
629    use std::time::{Duration, SystemTime, UNIX_EPOCH};
630
631    #[allow(clippy::ref_option)] // serde `with` attribute requires &Option<T> signature
632    pub fn serialize<S>(time: &Option<SystemTime>, serializer: S) -> Result<S::Ok, S::Error>
633    where
634        S: Serializer,
635    {
636        match time {
637            Some(t) => {
638                let duration = t.duration_since(UNIX_EPOCH).unwrap_or(Duration::ZERO);
639                Some((duration.as_secs(), duration.subsec_nanos())).serialize(serializer)
640            }
641            None => None::<(u64, u32)>.serialize(serializer),
642        }
643    }
644
645    pub fn deserialize<'de, D>(deserializer: D) -> Result<Option<SystemTime>, D::Error>
646    where
647        D: Deserializer<'de>,
648    {
649        let opt: Option<(u64, u32)> = Option::deserialize(deserializer)?;
650        Ok(opt.map(|(secs, nanos)| UNIX_EPOCH + Duration::new(secs, nanos)))
651    }
652}
653
654#[cfg(test)]
655mod tests {
656    use super::*;
657    use tempfile::TempDir;
658
659    #[test]
660    fn test_config_fingerprint_stable() {
661        let cache = CacheConfig {
662            directory: ".sqry-cache".to_string(),
663            persistent: true,
664        };
665        let indexing = IndexingConfig::default();
666
667        let fp1 = compute_config_fingerprint(&cache, &indexing);
668        let fp2 = compute_config_fingerprint(&cache, &indexing);
669
670        assert_eq!(fp1, fp2, "Fingerprint should be stable for same input");
671    }
672
673    #[test]
674    fn test_config_fingerprint_changes_on_persistent() {
675        let mut cache = CacheConfig::default();
676        let indexing = IndexingConfig::default();
677
678        let fp1 = compute_config_fingerprint(&cache, &indexing);
679        cache.persistent = false;
680        let fp2 = compute_config_fingerprint(&cache, &indexing);
681
682        assert_ne!(
683            fp1, fp2,
684            "Fingerprint should change when persistent changes"
685        );
686    }
687
688    #[test]
689    fn test_config_fingerprint_changes_on_directory() {
690        let mut cache = CacheConfig::default();
691        let indexing = IndexingConfig::default();
692
693        let fp1 = compute_config_fingerprint(&cache, &indexing);
694        cache.directory = ".other-cache".to_string();
695        let fp2 = compute_config_fingerprint(&cache, &indexing);
696
697        assert_ne!(fp1, fp2, "Fingerprint should change when directory changes");
698    }
699
700    #[test]
701    fn test_config_fingerprint_changes_on_max_file_size() {
702        let cache = CacheConfig::default();
703        let mut indexing = IndexingConfig::default();
704
705        let fp1 = compute_config_fingerprint(&cache, &indexing);
706        indexing.max_file_size = 1024;
707        let fp2 = compute_config_fingerprint(&cache, &indexing);
708
709        assert_ne!(
710            fp1, fp2,
711            "Fingerprint should change when max_file_size changes"
712        );
713    }
714
715    #[test]
716    fn test_state_checksum_stable() {
717        let state = PersistedProjectState {
718            version: 1,
719            project_id: 12345,
720            index_root: PathBuf::from("/test/project"),
721            config_fingerprint: 67890,
722            repo_index: vec![(PathBuf::from("/test/repo"), 11111)],
723            files: vec![PersistedFileEntry {
724                path: "src/main.rs".to_string(),
725                repo_id: 11111,
726                git_root: Some("/test/repo".to_string()),
727                last_modified: None,
728                size: 1024,
729                content_hash: Some(0xdead_beef),
730                language_id: Some("rust".to_string()),
731            }],
732            generated_at: SystemTime::UNIX_EPOCH,
733            checksum: String::new(),
734        };
735
736        let cs1 = compute_state_checksum(&state);
737        let cs2 = compute_state_checksum(&state);
738
739        assert_eq!(cs1, cs2, "Checksum should be stable for same state");
740    }
741
742    #[test]
743    fn test_config_fingerprint_changes_on_max_depth() {
744        let cache = CacheConfig::default();
745        let mut indexing = IndexingConfig::default();
746
747        let fp1 = compute_config_fingerprint(&cache, &indexing);
748        indexing.max_depth = 50;
749        let fp2 = compute_config_fingerprint(&cache, &indexing);
750
751        assert_ne!(fp1, fp2, "Fingerprint should change when max_depth changes");
752    }
753
754    #[test]
755    fn test_config_fingerprint_changes_on_scope_extraction() {
756        let cache = CacheConfig::default();
757        let mut indexing = IndexingConfig::default();
758
759        let fp1 = compute_config_fingerprint(&cache, &indexing);
760        indexing.enable_scope_extraction = !indexing.enable_scope_extraction;
761        let fp2 = compute_config_fingerprint(&cache, &indexing);
762
763        assert_ne!(
764            fp1, fp2,
765            "Fingerprint should change when enable_scope_extraction changes"
766        );
767    }
768
769    #[test]
770    fn test_config_fingerprint_changes_on_relation_extraction() {
771        let cache = CacheConfig::default();
772        let mut indexing = IndexingConfig::default();
773
774        let fp1 = compute_config_fingerprint(&cache, &indexing);
775        indexing.enable_relation_extraction = !indexing.enable_relation_extraction;
776        let fp2 = compute_config_fingerprint(&cache, &indexing);
777
778        assert_ne!(
779            fp1, fp2,
780            "Fingerprint should change when enable_relation_extraction changes"
781        );
782    }
783
784    #[test]
785    fn test_persistence_round_trip() {
786        let tmp = TempDir::new().unwrap();
787        let index_root = tmp.path();
788        let persistence = ProjectPersistence::new(index_root, ".sqry-cache");
789
790        let project_id = ProjectId::from_index_root(index_root);
791        let mut repo_index = HashMap::new();
792        repo_index.insert(index_root.to_path_buf(), RepoId::from_git_root(index_root));
793
794        let mut file_table = HashMap::new();
795        let path: StringId = Arc::from("src/main.rs");
796        file_table.insert(
797            Arc::clone(&path),
798            FileEntry::new(path, RepoId::from_git_root(index_root)),
799        );
800
801        let fingerprint =
802            compute_config_fingerprint(&CacheConfig::default(), &IndexingConfig::default());
803
804        let state = build_persisted_state(
805            project_id,
806            index_root,
807            fingerprint,
808            &repo_index,
809            &file_table,
810        );
811
812        // Write
813        persistence.write_metadata(&state).unwrap();
814
815        // Read back
816        let loaded = persistence.read_metadata(project_id).unwrap();
817        assert!(loaded.is_some());
818
819        let loaded_state = loaded.unwrap();
820        assert_eq!(loaded_state.version, state.version);
821        assert_eq!(loaded_state.project_id, state.project_id);
822        assert_eq!(loaded_state.config_fingerprint, state.config_fingerprint);
823        assert_eq!(loaded_state.repo_index.len(), state.repo_index.len());
824        assert_eq!(loaded_state.files.len(), state.files.len());
825    }
826
827    #[test]
828    fn test_persistence_missing_file_returns_none() {
829        let tmp = TempDir::new().unwrap();
830        let persistence = ProjectPersistence::new(tmp.path(), ".sqry-cache");
831        let project_id = ProjectId::from_index_root(tmp.path());
832
833        let result = persistence.read_metadata(project_id).unwrap();
834        assert!(result.is_none());
835    }
836
837    #[test]
838    fn test_opt_out_no_write() {
839        // This test verifies the design: when persistent=false, persist is skipped.
840        // The actual check happens in Project::persist_if_configured, not here.
841        // This test just ensures the helper works correctly.
842        let cache = CacheConfig {
843            directory: ".sqry-cache".to_string(),
844            persistent: false,
845        };
846
847        assert!(!cache.persistent, "persistent should be false");
848    }
849
850    #[test]
851    fn test_path_traversal_rejected() {
852        let tmp = TempDir::new().unwrap();
853        let index_root = tmp.path();
854
855        // Test path traversal with ".." is rejected
856        let persistence = ProjectPersistence::new(index_root, "../escape");
857        assert!(
858            persistence.state_root.starts_with(index_root),
859            "Path traversal should be rejected; state_root should be under index_root"
860        );
861
862        // Test absolute path is rejected
863        #[cfg(unix)]
864        let abs_path = "/tmp/absolute";
865        #[cfg(windows)]
866        let abs_path = "C:\\tmp\\absolute";
867        let persistence = ProjectPersistence::new(index_root, abs_path);
868        assert!(
869            persistence.state_root.starts_with(index_root),
870            "Absolute path should be rejected; state_root should be under index_root"
871        );
872    }
873
874    #[test]
875    fn test_repo_id_restoration_with_git_root() {
876        // Build a state with git_root stored
877        let state = PersistedProjectState {
878            version: 1,
879            project_id: 12345,
880            index_root: PathBuf::from("/test/project"),
881            config_fingerprint: 67890,
882            repo_index: vec![(PathBuf::from("/test/repo"), 11111)],
883            files: vec![PersistedFileEntry {
884                path: "src/main.rs".to_string(),
885                repo_id: 11111,
886                git_root: Some("/test/repo".to_string()),
887                last_modified: None,
888                size: 1024,
889                content_hash: None,
890                language_id: None,
891            }],
892            generated_at: SystemTime::UNIX_EPOCH,
893            checksum: String::new(),
894        };
895
896        // Restore file_table
897        let file_table = restore_file_table(&state);
898
899        // Verify RepoId is properly restored
900        let entry = file_table.get("src/main.rs").expect("file should exist");
901        let expected_repo_id = RepoId::from_git_root(Path::new("/test/repo"));
902        assert_eq!(
903            entry.repo_id, expected_repo_id,
904            "RepoId should be reconstructed from git_root"
905        );
906        assert!(entry.repo_id.is_some(), "RepoId should not be NONE");
907    }
908
909    #[test]
910    fn test_repo_id_none_preserved() {
911        // Build a state with RepoId::NONE (repo_id = 0, no git_root)
912        let state = PersistedProjectState {
913            version: 1,
914            project_id: 12345,
915            index_root: PathBuf::from("/test/project"),
916            config_fingerprint: 67890,
917            repo_index: vec![],
918            files: vec![PersistedFileEntry {
919                path: "outside/file.txt".to_string(),
920                repo_id: 0,
921                git_root: None,
922                last_modified: None,
923                size: 0,
924                content_hash: None,
925                language_id: None,
926            }],
927            generated_at: SystemTime::UNIX_EPOCH,
928            checksum: String::new(),
929        };
930
931        // Restore file_table
932        let file_table = restore_file_table(&state);
933
934        // Verify RepoId::NONE is preserved
935        let entry = file_table
936            .get("outside/file.txt")
937            .expect("file should exist");
938        assert!(entry.repo_id.is_none(), "RepoId::NONE should be preserved");
939    }
940
941    #[test]
942    fn test_file_metadata_round_trip() {
943        let tmp = TempDir::new().unwrap();
944        let index_root = tmp.path();
945
946        let mut repo_index = HashMap::new();
947        let repo_id = RepoId::from_git_root(index_root);
948        repo_index.insert(index_root.to_path_buf(), repo_id);
949
950        let mut file_table = HashMap::new();
951        let path: StringId = Arc::from("src/lib.rs");
952        let lang: StringId = Arc::from("rust");
953        let now = SystemTime::now();
954
955        let original_entry = FileEntry::with_metadata(
956            Arc::clone(&path),
957            repo_id,
958            Some(0x1234_5678_9abc_def0),
959            Some(now),
960            Some(Arc::clone(&lang)),
961        );
962        file_table.insert(Arc::clone(&path), original_entry.clone());
963
964        let fingerprint =
965            compute_config_fingerprint(&CacheConfig::default(), &IndexingConfig::default());
966
967        // Build persisted state
968        let state = build_persisted_state(
969            ProjectId::from_index_root(index_root),
970            index_root,
971            fingerprint,
972            &repo_index,
973            &file_table,
974        );
975
976        // Verify git_root is stored
977        assert!(
978            state.files[0].git_root.is_some(),
979            "git_root should be stored"
980        );
981
982        // Restore file_table
983        let restored = restore_file_table(&state);
984        let restored_entry = restored.get("src/lib.rs").expect("file should exist");
985
986        // Verify all metadata is preserved
987        assert_eq!(restored_entry.repo_id, repo_id, "RepoId should match");
988        assert_eq!(
989            restored_entry.content_hash, original_entry.content_hash,
990            "content_hash should be preserved"
991        );
992        assert_eq!(
993            restored_entry.language_id.as_deref(),
994            Some("rust"),
995            "language_id should be preserved"
996        );
997    }
998}