Skip to main content

sqry_core/config/
snapshot.rs

1//! Configuration snapshot for `CodeGraph` embedding.
2//!
3//! This module captures the effective sqry configuration used during graph build,
4//! enabling provenance tracking, reproducibility, and security audits.
5//!
6//! # Overview
7//!
8//! The [`ConfigSnapshot`] captures all runtime limits/knobs from:
9//! - Environment variables (`SQRY_*`)
10//! - CLI arguments (future)
11//! - Project config (`.sqry-config.toml`)
12//! - Defaults
13//!
14//! Each entry includes provenance metadata (source, default, min/max range)
15//! per  requirements.
16//!
17//! # Schema Version
18//!
19//! The `CONFIG_SCHEMA_VERSION` constant tracks the schema version for
20//! forward/backward compatibility in graph exports.
21
22use serde::{Deserialize, Serialize};
23use std::time::SystemTime;
24
25use super::buffers::{
26    self, DEFAULT_INDEX_BUFFER, DEFAULT_MAX_PREDICATES, DEFAULT_MAX_QUERY_LENGTH,
27    DEFAULT_MAX_REPOSITORIES, DEFAULT_MAX_SOURCE_FILE_SIZE, DEFAULT_MMAP_THRESHOLD,
28    DEFAULT_PARSE_BUFFER, DEFAULT_READ_BUFFER, DEFAULT_WATCH_EVENT_QUEUE, DEFAULT_WRITE_BUFFER,
29};
30
31/// Current schema version for config snapshots.
32///
33/// Increment when:
34/// - Adding new config entries
35/// - Changing entry field semantics
36/// - Modifying serialization format
37pub const CONFIG_SCHEMA_VERSION: u32 = 1;
38
39/// Source of a configuration value.
40///
41/// Ordered by precedence (highest to lowest):
42/// CLI > Env > `ProjectConfig` > Default
43#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
44#[serde(rename_all = "snake_case")]
45pub enum ConfigSource {
46    /// Value from CLI argument (highest precedence)
47    Cli,
48    /// Value from environment variable
49    Env,
50    /// Value from `.sqry-config.toml`
51    ProjectConfig,
52    /// Default value (lowest precedence)
53    Default,
54}
55
56impl std::fmt::Display for ConfigSource {
57    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
58        match self {
59            ConfigSource::Cli => write!(f, "cli"),
60            ConfigSource::Env => write!(f, "env"),
61            ConfigSource::ProjectConfig => write!(f, "project_config"),
62            ConfigSource::Default => write!(f, "default"),
63        }
64    }
65}
66
67/// Scope of a configuration entry.
68#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
69#[serde(rename_all = "snake_case")]
70pub enum ConfigScope {
71    /// Applies globally across all projects
72    Global,
73    /// Applies to a specific project only
74    Project,
75}
76
77/// Risk category for a configuration entry.
78///
79/// Used for security audits to identify which settings
80/// affect system safety.
81#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
82#[serde(rename_all = "snake_case")]
83pub enum ConfigRisk {
84    /// Denial of Service prevention (memory/CPU exhaustion)
85    Dos,
86    /// Performance tuning (throughput, latency)
87    Perf,
88    /// Security-sensitive (access control, limits)
89    Security,
90    /// System reliability (timeouts, retries)
91    Reliability,
92}
93
94/// A single configuration entry captured in the graph.
95///
96/// Contains the effective value plus provenance metadata for auditing.
97#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
98pub struct ConfigEntry {
99    /// Configuration key name (e.g., "`SQRY_MAX_SOURCE_FILE_SIZE`")
100    pub name: String,
101
102    /// Effective value after precedence resolution (serialized as string)
103    pub effective_value: String,
104
105    /// Default value for this entry
106    pub default_value: String,
107
108    /// Minimum allowed value (if bounded)
109    #[serde(default)]
110    pub min_value: Option<String>,
111
112    /// Maximum allowed value (if bounded)
113    #[serde(default)]
114    pub max_value: Option<String>,
115
116    /// Source that provided the effective value
117    pub source: ConfigSource,
118
119    /// Scope of this configuration
120    pub scope: ConfigScope,
121
122    /// Risk category for security audits
123    #[serde(default)]
124    pub risk: Option<ConfigRisk>,
125
126    /// Human-readable description
127    #[serde(default)]
128    pub description: Option<String>,
129}
130
131impl ConfigEntry {
132    /// Create a new config entry with required fields.
133    #[must_use]
134    pub fn new(
135        name: impl Into<String>,
136        effective_value: impl Into<String>,
137        default_value: impl Into<String>,
138        source: ConfigSource,
139        scope: ConfigScope,
140    ) -> Self {
141        Self {
142            name: name.into(),
143            effective_value: effective_value.into(),
144            default_value: default_value.into(),
145            min_value: None,
146            max_value: None,
147            source,
148            scope,
149            risk: None,
150            description: None,
151        }
152    }
153
154    /// Set the min/max range for this entry.
155    #[must_use]
156    pub fn with_range(mut self, min: impl Into<String>, max: impl Into<String>) -> Self {
157        self.min_value = Some(min.into());
158        self.max_value = Some(max.into());
159        self
160    }
161
162    /// Set the risk category for this entry.
163    #[must_use]
164    pub fn with_risk(mut self, risk: ConfigRisk) -> Self {
165        self.risk = Some(risk);
166        self
167    }
168
169    /// Set the description for this entry.
170    #[must_use]
171    pub fn with_description(mut self, desc: impl Into<String>) -> Self {
172        self.description = Some(desc.into());
173        self
174    }
175}
176
177/// Snapshot of effective configuration used for graph build.
178///
179/// Captures all runtime limits and knobs with provenance metadata.
180/// This is embedded into the `CodeGraph` for auditing and reproducibility.
181#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
182pub struct ConfigSnapshot {
183    /// Schema version for forward/backward compatibility
184    pub schema_version: u32,
185
186    /// Timestamp when the snapshot was collected
187    #[serde(with = "system_time_serde")]
188    pub collected_at: SystemTime,
189
190    /// All configuration entries
191    pub entries: Vec<ConfigEntry>,
192}
193
194impl ConfigSnapshot {
195    /// Create a new empty snapshot with current timestamp.
196    #[must_use]
197    pub fn new() -> Self {
198        Self {
199            schema_version: CONFIG_SCHEMA_VERSION,
200            collected_at: SystemTime::now(),
201            entries: Vec::new(),
202        }
203    }
204
205    /// Add an entry to the snapshot.
206    pub fn add_entry(&mut self, entry: ConfigEntry) {
207        self.entries.push(entry);
208    }
209
210    /// Get the number of entries.
211    #[must_use]
212    pub fn len(&self) -> usize {
213        self.entries.len()
214    }
215
216    /// Check if the snapshot is empty.
217    #[must_use]
218    pub fn is_empty(&self) -> bool {
219        self.entries.is_empty()
220    }
221
222    /// Find an entry by name.
223    #[must_use]
224    pub fn get(&self, name: &str) -> Option<&ConfigEntry> {
225        self.entries.iter().find(|e| e.name == name)
226    }
227
228    /// Compute a content hash of the snapshot for sidecar verification.
229    ///
230    /// Uses BLAKE3 for deterministic, cross-process-stable hashing.
231    /// The hash covers:
232    /// - Schema version
233    /// - All `ConfigEntry` fields (complete coverage for integrity)
234    ///
235    /// `collected_at` is intentionally excluded: it is collection-time metadata,
236    /// not config content. Two snapshots with identical config values must produce
237    /// identical hashes regardless of when they were collected.
238    #[must_use]
239    pub fn compute_hash(&self) -> String {
240        use crate::hash::hash_bytes;
241        use std::fmt::Write;
242
243        // Build canonical representation for hashing
244        // Use a deterministic format that includes all fields.
245        // NOTE: collected_at is intentionally excluded — it is collection-time metadata
246        // and must not affect the content hash (two snapshots with identical config
247        // values but different timestamps would otherwise produce different hashes,
248        // breaking change-detection and causing flaky tests).
249        let mut canonical = String::new();
250
251        // Schema version
252        let _ = writeln!(canonical, "schema_version:{}", self.schema_version);
253
254        // Sort entries by name for deterministic ordering
255        let mut sorted_entries: Vec<_> = self.entries.iter().collect();
256        sorted_entries.sort_by_key(|e| &e.name);
257
258        // Include ALL fields from each entry
259        for entry in sorted_entries {
260            let _ = writeln!(canonical, "entry:{}", entry.name);
261            let _ = writeln!(canonical, "  effective_value:{}", entry.effective_value);
262            let _ = writeln!(canonical, "  default_value:{}", entry.default_value);
263            let _ = writeln!(
264                canonical,
265                "  min_value:{}",
266                entry.min_value.as_deref().unwrap_or("")
267            );
268            let _ = writeln!(
269                canonical,
270                "  max_value:{}",
271                entry.max_value.as_deref().unwrap_or("")
272            );
273            let _ = writeln!(canonical, "  source:{}", entry.source);
274            let _ = writeln!(canonical, "  scope:{:?}", entry.scope);
275            if let Some(risk) = entry.risk {
276                let _ = writeln!(canonical, "  risk:{risk:?}");
277            } else {
278                let _ = writeln!(canonical, "  risk:");
279            }
280            let _ = writeln!(
281                canonical,
282                "  description:{}",
283                entry.description.as_deref().unwrap_or("")
284            );
285        }
286
287        // Hash with BLAKE3 and return hex string
288        hash_bytes(canonical.as_bytes()).to_hex()
289    }
290}
291
292impl Default for ConfigSnapshot {
293    fn default() -> Self {
294        Self::new()
295    }
296}
297
298/// Serde module for `SystemTime` serialization.
299mod system_time_serde {
300    use serde::{Deserialize, Deserializer, Serialize, Serializer};
301    use std::time::{Duration, SystemTime, UNIX_EPOCH};
302
303    pub fn serialize<S>(time: &SystemTime, serializer: S) -> Result<S::Ok, S::Error>
304    where
305        S: Serializer,
306    {
307        let duration = time.duration_since(UNIX_EPOCH).unwrap_or(Duration::ZERO);
308        let millis = u64::try_from(duration.as_millis()).map_err(serde::ser::Error::custom)?;
309        millis.serialize(serializer)
310    }
311
312    pub fn deserialize<'de, D>(deserializer: D) -> Result<SystemTime, D::Error>
313    where
314        D: Deserializer<'de>,
315    {
316        let millis = u64::deserialize(deserializer)?;
317        Ok(UNIX_EPOCH + Duration::from_millis(millis))
318    }
319}
320
321/// Builder for constructing a [`ConfigSnapshot`] from various sources.
322///
323/// Implements precedence: CLI > Env > `ProjectConfig` > Default
324pub struct ConfigSnapshotBuilder {
325    snapshot: ConfigSnapshot,
326}
327
328impl ConfigSnapshotBuilder {
329    /// Create a new builder.
330    #[must_use]
331    pub fn new() -> Self {
332        Self {
333            snapshot: ConfigSnapshot::new(),
334        }
335    }
336
337    /// Build the complete snapshot by resolving all known config entries.
338    ///
339    /// This collects all documented configuration items from:
340    /// - `DoS` prevention limits
341    /// - Git limits
342    /// - Buffer sizes
343    /// - Memory management
344    /// - Cache settings (including regex cache and lexer pool)
345    #[must_use]
346    pub fn build(mut self) -> ConfigSnapshot {
347        // DoS Prevention Limits
348        self.add_dos_limits();
349
350        // Git Limits (P1-17)
351        self.add_git_limits();
352
353        // Buffer Sizes
354        self.add_buffer_sizes();
355
356        // Memory Management
357        self.add_memory_settings();
358
359        // Cache Configuration (includes regex cache, lexer pool)
360        self.add_cache_settings();
361
362        self.snapshot
363    }
364
365    /// Add `DoS` prevention limit entries.
366    fn add_dos_limits(&mut self) {
367        // SQRY_MAX_SOURCE_FILE_SIZE
368        let effective = buffers::max_source_file_size();
369        let source = Self::detect_source(
370            "SQRY_MAX_SOURCE_FILE_SIZE",
371            &effective,
372            &DEFAULT_MAX_SOURCE_FILE_SIZE,
373        );
374        self.snapshot.add_entry(
375            ConfigEntry::new(
376                "SQRY_MAX_SOURCE_FILE_SIZE",
377                effective.to_string(),
378                DEFAULT_MAX_SOURCE_FILE_SIZE.to_string(),
379                source,
380                ConfigScope::Global,
381            )
382            .with_range("1048576", "524288000") // 1 MB - 500 MB
383            .with_risk(ConfigRisk::Dos)
384            .with_description("Maximum source file size for indexing"),
385        );
386
387        // SQRY_MAX_REPOSITORIES
388        let effective = buffers::max_repositories();
389        let source = Self::detect_source(
390            "SQRY_MAX_REPOSITORIES",
391            &effective,
392            &DEFAULT_MAX_REPOSITORIES,
393        );
394        self.snapshot.add_entry(
395            ConfigEntry::new(
396                "SQRY_MAX_REPOSITORIES",
397                effective.to_string(),
398                DEFAULT_MAX_REPOSITORIES.to_string(),
399                source,
400                ConfigScope::Global,
401            )
402            .with_range("10", "10000")
403            .with_risk(ConfigRisk::Dos)
404            .with_description("Maximum repositories per workspace"),
405        );
406
407        // SQRY_WATCH_EVENT_QUEUE
408        let effective = buffers::watch_event_queue_capacity();
409        let source = Self::detect_source(
410            "SQRY_WATCH_EVENT_QUEUE",
411            &effective,
412            &DEFAULT_WATCH_EVENT_QUEUE,
413        );
414        self.snapshot.add_entry(
415            ConfigEntry::new(
416                "SQRY_WATCH_EVENT_QUEUE",
417                effective.to_string(),
418                DEFAULT_WATCH_EVENT_QUEUE.to_string(),
419                source,
420                ConfigScope::Global,
421            )
422            .with_range("100", "100000")
423            .with_risk(ConfigRisk::Dos)
424            .with_description("Maximum queued filesystem events"),
425        );
426
427        // SQRY_MAX_QUERY_LENGTH
428        let effective = buffers::max_query_length();
429        let source = Self::detect_source(
430            "SQRY_MAX_QUERY_LENGTH",
431            &effective,
432            &DEFAULT_MAX_QUERY_LENGTH,
433        );
434        self.snapshot.add_entry(
435            ConfigEntry::new(
436                "SQRY_MAX_QUERY_LENGTH",
437                effective.to_string(),
438                DEFAULT_MAX_QUERY_LENGTH.to_string(),
439                source,
440                ConfigScope::Global,
441            )
442            .with_range("1024", "102400") // 1 KB - 100 KB
443            .with_risk(ConfigRisk::Dos)
444            .with_description("Maximum query string length in bytes"),
445        );
446
447        // SQRY_MAX_PREDICATES
448        let effective = buffers::max_predicates();
449        let source =
450            Self::detect_source("SQRY_MAX_PREDICATES", &effective, &DEFAULT_MAX_PREDICATES);
451        self.snapshot.add_entry(
452            ConfigEntry::new(
453                "SQRY_MAX_PREDICATES",
454                effective.to_string(),
455                DEFAULT_MAX_PREDICATES.to_string(),
456                source,
457                ConfigScope::Global,
458            )
459            .with_range("10", "1000")
460            .with_risk(ConfigRisk::Dos)
461            .with_description("Maximum predicates per query"),
462        );
463    }
464
465    /// Add buffer size entries.
466    fn add_buffer_sizes(&mut self) {
467        // SQRY_READ_BUFFER
468        let effective = buffers::read_buffer_size();
469        let source = Self::detect_source("SQRY_READ_BUFFER", &effective, &DEFAULT_READ_BUFFER);
470        self.snapshot.add_entry(
471            ConfigEntry::new(
472                "SQRY_READ_BUFFER",
473                effective.to_string(),
474                DEFAULT_READ_BUFFER.to_string(),
475                source,
476                ConfigScope::Global,
477            )
478            .with_range("1024", "1048576") // 1 KB - 1 MB
479            .with_risk(ConfigRisk::Perf)
480            .with_description("Read buffer size for file I/O"),
481        );
482
483        // SQRY_WRITE_BUFFER
484        let effective = buffers::write_buffer_size();
485        let source = Self::detect_source("SQRY_WRITE_BUFFER", &effective, &DEFAULT_WRITE_BUFFER);
486        self.snapshot.add_entry(
487            ConfigEntry::new(
488                "SQRY_WRITE_BUFFER",
489                effective.to_string(),
490                DEFAULT_WRITE_BUFFER.to_string(),
491                source,
492                ConfigScope::Global,
493            )
494            .with_range("1024", "1048576")
495            .with_risk(ConfigRisk::Perf)
496            .with_description("Write buffer size for file I/O"),
497        );
498
499        // SQRY_PARSE_BUFFER
500        let effective = buffers::parse_buffer_size();
501        let source = Self::detect_source("SQRY_PARSE_BUFFER", &effective, &DEFAULT_PARSE_BUFFER);
502        self.snapshot.add_entry(
503            ConfigEntry::new(
504                "SQRY_PARSE_BUFFER",
505                effective.to_string(),
506                DEFAULT_PARSE_BUFFER.to_string(),
507                source,
508                ConfigScope::Global,
509            )
510            .with_range("4096", "10485760") // 4 KB - 10 MB
511            .with_risk(ConfigRisk::Perf)
512            .with_description("Parse buffer size for tree-sitter"),
513        );
514
515        // SQRY_INDEX_BUFFER
516        let effective = buffers::index_buffer_size();
517        let source = Self::detect_source("SQRY_INDEX_BUFFER", &effective, &DEFAULT_INDEX_BUFFER);
518        self.snapshot.add_entry(
519            ConfigEntry::new(
520                "SQRY_INDEX_BUFFER",
521                effective.to_string(),
522                DEFAULT_INDEX_BUFFER.to_string(),
523                source,
524                ConfigScope::Global,
525            )
526            .with_range("65536", "104857600") // 64 KB - 100 MB
527            .with_risk(ConfigRisk::Perf)
528            .with_description("Index buffer size for serialization"),
529        );
530    }
531
532    /// Add memory management entries.
533    fn add_memory_settings(&mut self) {
534        // SQRY_MMAP_THRESHOLD
535        let effective = buffers::mmap_threshold();
536        let source =
537            Self::detect_source_u64("SQRY_MMAP_THRESHOLD", effective, DEFAULT_MMAP_THRESHOLD);
538        self.snapshot.add_entry(
539            ConfigEntry::new(
540                "SQRY_MMAP_THRESHOLD",
541                effective.to_string(),
542                DEFAULT_MMAP_THRESHOLD.to_string(),
543                source,
544                ConfigScope::Global,
545            )
546            .with_range("1048576", "1073741824") // 1 MB - 1 GB
547            .with_risk(ConfigRisk::Perf)
548            .with_description("File size threshold for memory-mapped I/O"),
549        );
550    }
551
552    /// Add cache configuration entries.
553    fn add_cache_settings(&mut self) {
554        // SQRY_CACHE_BUDGET_ENTRIES
555        let default_entries: usize = 10_000;
556        let effective = std::env::var("SQRY_CACHE_BUDGET_ENTRIES")
557            .ok()
558            .and_then(|s| s.parse().ok())
559            .unwrap_or(default_entries);
560        let source = Self::detect_source("SQRY_CACHE_BUDGET_ENTRIES", &effective, &default_entries);
561        self.snapshot.add_entry(
562            ConfigEntry::new(
563                "SQRY_CACHE_BUDGET_ENTRIES",
564                effective.to_string(),
565                default_entries.to_string(),
566                source,
567                ConfigScope::Global,
568            )
569            .with_risk(ConfigRisk::Perf)
570            .with_description("Maximum cache entries"),
571        );
572
573        // SQRY_CACHE_BUDGET_BYTES
574        let default_bytes: u64 = 100 * 1024 * 1024; // 100 MB
575        let effective = std::env::var("SQRY_CACHE_BUDGET_BYTES")
576            .ok()
577            .and_then(|s| s.parse().ok())
578            .unwrap_or(default_bytes);
579        let source = Self::detect_source_u64("SQRY_CACHE_BUDGET_BYTES", effective, default_bytes);
580        self.snapshot.add_entry(
581            ConfigEntry::new(
582                "SQRY_CACHE_BUDGET_BYTES",
583                effective.to_string(),
584                default_bytes.to_string(),
585                source,
586                ConfigScope::Global,
587            )
588            .with_risk(ConfigRisk::Perf)
589            .with_description("Maximum cache size in bytes"),
590        );
591
592        // SQRY_CACHE_MAX_BYTES (CacheConfig size cap)
593        // Default: 50 MB per CacheConfig::DEFAULT_MAX_BYTES
594        let default_cache_max: u64 = 50 * 1024 * 1024; // 50 MB
595        let effective = std::env::var("SQRY_CACHE_MAX_BYTES")
596            .ok()
597            .and_then(|s| s.parse().ok())
598            .unwrap_or(default_cache_max);
599        let source = Self::detect_source_u64("SQRY_CACHE_MAX_BYTES", effective, default_cache_max);
600        self.snapshot.add_entry(
601            ConfigEntry::new(
602                "SQRY_CACHE_MAX_BYTES",
603                effective.to_string(),
604                default_cache_max.to_string(),
605                source,
606                ConfigScope::Global,
607            )
608            .with_range("1048576", "10737418240") // 1 MB - 10 GB
609            .with_risk(ConfigRisk::Dos)
610            .with_description("Cache size cap (CacheConfig max_bytes limit)"),
611        );
612
613        // SQRY_REGEX_CACHE_SIZE
614        let default_regex_cache: usize = 100;
615        let effective = std::env::var("SQRY_REGEX_CACHE_SIZE")
616            .ok()
617            .and_then(|s| s.parse().ok())
618            .filter(|&s| (1..=10_000).contains(&s))
619            .unwrap_or(default_regex_cache);
620        let source = Self::detect_source("SQRY_REGEX_CACHE_SIZE", &effective, &default_regex_cache);
621        self.snapshot.add_entry(
622            ConfigEntry::new(
623                "SQRY_REGEX_CACHE_SIZE",
624                effective.to_string(),
625                default_regex_cache.to_string(),
626                source,
627                ConfigScope::Global,
628            )
629            .with_range("1", "10000")
630            .with_risk(ConfigRisk::Perf)
631            .with_description("LRU cache size for compiled regexes"),
632        );
633
634        // SQRY_LEXER_POOL_MAX
635        let default_lexer_pool: usize = 4;
636        let effective = std::env::var("SQRY_LEXER_POOL_MAX")
637            .ok()
638            .and_then(|s| s.parse().ok())
639            .unwrap_or(default_lexer_pool);
640        let source = Self::detect_source("SQRY_LEXER_POOL_MAX", &effective, &default_lexer_pool);
641        self.snapshot.add_entry(
642            ConfigEntry::new(
643                "SQRY_LEXER_POOL_MAX",
644                effective.to_string(),
645                default_lexer_pool.to_string(),
646                source,
647                ConfigScope::Global,
648            )
649            .with_risk(ConfigRisk::Perf)
650            .with_description("Maximum lexer pool size"),
651        );
652    }
653
654    /// Add Git-related limit entries.
655    fn add_git_limits(&mut self) {
656        // SQRY_GIT_MAX_OUTPUT_SIZE
657        // Uses crate::git::max_git_output_size() but we need to access the value directly
658        // to avoid circular dependencies. The function clamps to 1MB-100MB range.
659        let default_git_output: usize = 10 * 1024 * 1024; // 10 MB
660        let min_git_output: usize = 1024 * 1024; // 1 MB
661        let max_git_output: usize = 100 * 1024 * 1024; // 100 MB
662
663        let effective = std::env::var("SQRY_GIT_MAX_OUTPUT_SIZE")
664            .ok()
665            .and_then(|s| s.parse().ok())
666            .map_or(default_git_output, |size: usize| {
667                size.clamp(min_git_output, max_git_output)
668            });
669        let source =
670            Self::detect_source("SQRY_GIT_MAX_OUTPUT_SIZE", &effective, &default_git_output);
671        self.snapshot.add_entry(
672            ConfigEntry::new(
673                "SQRY_GIT_MAX_OUTPUT_SIZE",
674                effective.to_string(),
675                default_git_output.to_string(),
676                source,
677                ConfigScope::Global,
678            )
679            .with_range(min_git_output.to_string(), max_git_output.to_string())
680            .with_risk(ConfigRisk::Dos)
681            .with_description("Maximum git command output size to prevent memory exhaustion"),
682        );
683    }
684
685    /// Detect the source of a usize config value.
686    fn detect_source<T: PartialEq>(env_var: &str, effective: &T, default: &T) -> ConfigSource {
687        if std::env::var(env_var).is_ok() {
688            ConfigSource::Env
689        } else if effective != default {
690            // Value differs from default but not from env - must be from project config or CLI
691            // For now, we assume project config since CLI integration is future work
692            ConfigSource::ProjectConfig
693        } else {
694            ConfigSource::Default
695        }
696    }
697
698    /// Detect the source of a u64 config value.
699    fn detect_source_u64(env_var: &str, effective: u64, default: u64) -> ConfigSource {
700        if std::env::var(env_var).is_ok() {
701            ConfigSource::Env
702        } else if effective != default {
703            ConfigSource::ProjectConfig
704        } else {
705            ConfigSource::Default
706        }
707    }
708}
709
710impl Default for ConfigSnapshotBuilder {
711    fn default() -> Self {
712        Self::new()
713    }
714}
715
716/// Collect a complete configuration snapshot.
717///
718/// This is the primary entry point for capturing the effective configuration
719/// for embedding into the `CodeGraph`.
720///
721/// # Example
722///
723/// ```
724/// use sqry_core::config::snapshot::collect_snapshot;
725///
726/// let snapshot = collect_snapshot();
727/// assert!(snapshot.len() > 0);
728/// assert_eq!(snapshot.schema_version, 1);
729/// ```
730#[must_use]
731pub fn collect_snapshot() -> ConfigSnapshot {
732    ConfigSnapshotBuilder::new().build()
733}
734
735/// Inventory of all known config entry names.
736///
737/// Used for completeness validation per Invariant
738/// Must include ALL entries from `HARD_LIMIT_INVENTORY.md` that affect indexing safety/perf.
739pub const CONFIG_INVENTORY: &[&str] = &[
740    // DoS Prevention
741    "SQRY_MAX_SOURCE_FILE_SIZE",
742    "SQRY_MAX_REPOSITORIES",
743    "SQRY_WATCH_EVENT_QUEUE",
744    "SQRY_MAX_QUERY_LENGTH",
745    "SQRY_MAX_PREDICATES",
746    "SQRY_GIT_MAX_OUTPUT_SIZE", // P1-17: Git output limit
747    // Buffers
748    "SQRY_READ_BUFFER",
749    "SQRY_WRITE_BUFFER",
750    "SQRY_PARSE_BUFFER",
751    "SQRY_INDEX_BUFFER",
752    // Memory
753    "SQRY_MMAP_THRESHOLD",
754    // Cache
755    "SQRY_CACHE_BUDGET_ENTRIES",
756    "SQRY_CACHE_BUDGET_BYTES",
757    "SQRY_CACHE_MAX_BYTES",  // Cache size cap (DoS prevention)
758    "SQRY_REGEX_CACHE_SIZE", // Regex compilation cache
759    "SQRY_LEXER_POOL_MAX",   // Lexer pool size
760];
761
762/// Validate that a snapshot contains all required entries.
763///
764/// Returns an error with the list of missing entries if validation fails.
765///
766/// # Errors
767///
768/// Returns `Err` with a list of missing entry names if the snapshot
769/// is incomplete.
770pub fn validate_completeness(snapshot: &ConfigSnapshot) -> Result<(), Vec<&'static str>> {
771    let present: std::collections::HashSet<_> =
772        snapshot.entries.iter().map(|e| e.name.as_str()).collect();
773    let missing: Vec<_> = CONFIG_INVENTORY
774        .iter()
775        .filter(|name| !present.contains(*name))
776        .copied()
777        .collect();
778
779    if missing.is_empty() {
780        Ok(())
781    } else {
782        Err(missing)
783    }
784}
785
786/// Sidecar provenance artifact for resilient config recovery.
787///
788/// This is written alongside the main index for recovery if the
789/// graph is corrupted or unavailable.
790#[derive(Debug, Clone, Serialize, Deserialize)]
791pub struct ConfigProvenance {
792    /// Schema version matching the embedded snapshot
793    pub schema_version: u32,
794
795    /// Hash of the config snapshot for integrity verification
796    pub config_hash: String,
797
798    /// Timestamp when sidecar was generated
799    #[serde(with = "system_time_serde")]
800    pub generated_at: SystemTime,
801
802    /// All configuration entries (same as in-graph data)
803    pub entries: Vec<ConfigEntry>,
804}
805
806impl ConfigProvenance {
807    /// Create a new provenance sidecar from a config snapshot.
808    #[must_use]
809    pub fn from_snapshot(snapshot: &ConfigSnapshot) -> Self {
810        Self {
811            schema_version: snapshot.schema_version,
812            config_hash: snapshot.compute_hash(),
813            generated_at: SystemTime::now(),
814            entries: snapshot.entries.clone(),
815        }
816    }
817
818    /// Write the sidecar to a JSON file.
819    ///
820    /// # Errors
821    ///
822    /// Returns an error if the file cannot be created or written.
823    pub fn save(&self, path: &std::path::Path) -> std::io::Result<()> {
824        let file = std::fs::File::create(path)?;
825        serde_json::to_writer_pretty(file, self)?;
826        Ok(())
827    }
828
829    /// Load a sidecar from a JSON file.
830    ///
831    /// # Errors
832    ///
833    /// Returns an error if the file cannot be read or parsed.
834    pub fn load(path: &std::path::Path) -> std::io::Result<Self> {
835        let file = std::fs::File::open(path)?;
836        let provenance: Self = serde_json::from_reader(file)?;
837        Ok(provenance)
838    }
839
840    /// Verify that the sidecar matches a config snapshot.
841    #[must_use]
842    pub fn verify(&self, snapshot: &ConfigSnapshot) -> bool {
843        self.config_hash == snapshot.compute_hash()
844            && self.schema_version == snapshot.schema_version
845    }
846}
847
848/// Standard filename for the config provenance sidecar.
849pub const CONFIG_PROVENANCE_FILENAME: &str = "config-provenance.json";
850
851#[cfg(test)]
852mod tests {
853    use super::*;
854    use serial_test::serial;
855
856    #[test]
857    fn test_config_entry_creation() {
858        let entry = ConfigEntry::new(
859            "TEST_VAR",
860            "100",
861            "50",
862            ConfigSource::Env,
863            ConfigScope::Global,
864        )
865        .with_range("10", "1000")
866        .with_risk(ConfigRisk::Dos)
867        .with_description("Test variable");
868
869        assert_eq!(entry.name, "TEST_VAR");
870        assert_eq!(entry.effective_value, "100");
871        assert_eq!(entry.default_value, "50");
872        assert_eq!(entry.min_value, Some("10".to_string()));
873        assert_eq!(entry.max_value, Some("1000".to_string()));
874        assert_eq!(entry.source, ConfigSource::Env);
875        assert_eq!(entry.scope, ConfigScope::Global);
876        assert_eq!(entry.risk, Some(ConfigRisk::Dos));
877        assert_eq!(entry.description, Some("Test variable".to_string()));
878    }
879
880    #[test]
881    fn test_config_snapshot_new() {
882        let snapshot = ConfigSnapshot::new();
883        assert_eq!(snapshot.schema_version, CONFIG_SCHEMA_VERSION);
884        assert!(snapshot.is_empty());
885    }
886
887    #[test]
888    fn test_config_snapshot_add_entry() {
889        let mut snapshot = ConfigSnapshot::new();
890        snapshot.add_entry(ConfigEntry::new(
891            "TEST",
892            "value",
893            "default",
894            ConfigSource::Default,
895            ConfigScope::Global,
896        ));
897
898        assert_eq!(snapshot.len(), 1);
899        assert!(!snapshot.is_empty());
900        assert!(snapshot.get("TEST").is_some());
901        assert!(snapshot.get("NONEXISTENT").is_none());
902    }
903
904    #[test]
905    fn test_config_snapshot_hash() {
906        let mut snapshot1 = ConfigSnapshot::new();
907        snapshot1.add_entry(ConfigEntry::new(
908            "TEST",
909            "value",
910            "default",
911            ConfigSource::Default,
912            ConfigScope::Global,
913        ));
914
915        let mut snapshot2 = ConfigSnapshot::new();
916        snapshot2.add_entry(ConfigEntry::new(
917            "TEST",
918            "value",
919            "default",
920            ConfigSource::Default,
921            ConfigScope::Global,
922        ));
923
924        // Same content should produce same hash
925        assert_eq!(snapshot1.compute_hash(), snapshot2.compute_hash());
926
927        // Different content should produce different hash
928        snapshot2.add_entry(ConfigEntry::new(
929            "TEST2",
930            "value2",
931            "default2",
932            ConfigSource::Env,
933            ConfigScope::Global,
934        ));
935        assert_ne!(snapshot1.compute_hash(), snapshot2.compute_hash());
936    }
937
938    #[test]
939    #[serial]
940    fn test_collect_snapshot_defaults() {
941        // Clear all env vars to get defaults
942        for var in CONFIG_INVENTORY {
943            unsafe { std::env::remove_var(var) };
944        }
945
946        let snapshot = collect_snapshot();
947
948        // Should have all inventory entries
949        assert_eq!(snapshot.len(), CONFIG_INVENTORY.len());
950
951        // Validate completeness
952        assert!(validate_completeness(&snapshot).is_ok());
953
954        // Check a specific entry
955        let entry = snapshot.get("SQRY_MAX_SOURCE_FILE_SIZE").unwrap();
956        assert_eq!(entry.source, ConfigSource::Default);
957        assert_eq!(
958            entry.effective_value,
959            DEFAULT_MAX_SOURCE_FILE_SIZE.to_string()
960        );
961    }
962
963    #[test]
964    #[serial]
965    fn test_collect_snapshot_env_override() {
966        unsafe {
967            std::env::set_var("SQRY_MAX_SOURCE_FILE_SIZE", "104857600"); // 100 MB
968        }
969
970        let snapshot = collect_snapshot();
971        let entry = snapshot.get("SQRY_MAX_SOURCE_FILE_SIZE").unwrap();
972
973        assert_eq!(entry.source, ConfigSource::Env);
974        assert_eq!(entry.effective_value, "104857600");
975
976        unsafe {
977            std::env::remove_var("SQRY_MAX_SOURCE_FILE_SIZE");
978        }
979    }
980
981    #[test]
982    fn test_validate_completeness_missing() {
983        let snapshot = ConfigSnapshot::new(); // Empty snapshot
984        let result = validate_completeness(&snapshot);
985
986        assert!(result.is_err());
987        let missing = result.unwrap_err();
988        assert_eq!(missing.len(), CONFIG_INVENTORY.len());
989    }
990
991    #[test]
992    fn test_config_source_display() {
993        assert_eq!(ConfigSource::Cli.to_string(), "cli");
994        assert_eq!(ConfigSource::Env.to_string(), "env");
995        assert_eq!(ConfigSource::ProjectConfig.to_string(), "project_config");
996        assert_eq!(ConfigSource::Default.to_string(), "default");
997    }
998
999    #[test]
1000    fn test_config_entry_serialization() {
1001        let entry = ConfigEntry::new("TEST", "100", "50", ConfigSource::Env, ConfigScope::Global)
1002            .with_range("10", "1000")
1003            .with_risk(ConfigRisk::Dos);
1004
1005        let json = serde_json::to_string(&entry).unwrap();
1006        let deserialized: ConfigEntry = serde_json::from_str(&json).unwrap();
1007
1008        assert_eq!(entry, deserialized);
1009    }
1010
1011    #[test]
1012    fn test_config_snapshot_serialization() {
1013        let snapshot = collect_snapshot();
1014        let json = serde_json::to_string(&snapshot).unwrap();
1015        let deserialized: ConfigSnapshot = serde_json::from_str(&json).unwrap();
1016
1017        assert_eq!(snapshot.schema_version, deserialized.schema_version);
1018        assert_eq!(snapshot.entries.len(), deserialized.entries.len());
1019    }
1020
1021    #[test]
1022    fn test_config_provenance_from_snapshot() {
1023        let snapshot = collect_snapshot();
1024        let provenance = ConfigProvenance::from_snapshot(&snapshot);
1025
1026        assert_eq!(provenance.schema_version, snapshot.schema_version);
1027        assert_eq!(provenance.config_hash, snapshot.compute_hash());
1028        assert_eq!(provenance.entries.len(), snapshot.entries.len());
1029    }
1030
1031    #[test]
1032    fn test_config_provenance_verify() {
1033        let snapshot = collect_snapshot();
1034        let provenance = ConfigProvenance::from_snapshot(&snapshot);
1035
1036        // Same snapshot should verify
1037        assert!(provenance.verify(&snapshot));
1038
1039        // Modified snapshot should not verify
1040        let mut modified = snapshot.clone();
1041        modified.add_entry(ConfigEntry::new(
1042            "NEW_ENTRY",
1043            "value",
1044            "default",
1045            ConfigSource::Default,
1046            ConfigScope::Global,
1047        ));
1048        assert!(!provenance.verify(&modified));
1049    }
1050
1051    #[test]
1052    fn test_config_provenance_save_load() {
1053        let snapshot = collect_snapshot();
1054        let provenance = ConfigProvenance::from_snapshot(&snapshot);
1055
1056        let temp_dir = tempfile::tempdir().unwrap();
1057        let path = temp_dir.path().join("test-provenance.json");
1058
1059        // Save
1060        provenance.save(&path).unwrap();
1061        assert!(path.exists());
1062
1063        // Load
1064        let loaded = ConfigProvenance::load(&path).unwrap();
1065        assert_eq!(loaded.schema_version, provenance.schema_version);
1066        assert_eq!(loaded.config_hash, provenance.config_hash);
1067        assert_eq!(loaded.entries.len(), provenance.entries.len());
1068
1069        // Verify loaded matches original snapshot
1070        assert!(loaded.verify(&snapshot));
1071    }
1072
1073    /// Regression test for `postcard` serialization of `ConfigEntry` with optional fields.
1074    ///
1075    /// This test prevents reintroduction of `#[serde(skip_serializing_if)]` on
1076    /// `ConfigEntry` fields. Binary formats like `postcard` require fixed field order;
1077    /// conditionally omitting fields corrupts the stream and breaks deserialization.
1078    ///
1079    /// Bug: 2025-12-12 - Used `skip_serializing_if` on `min_value`, `max_value`, `risk`,
1080    /// `description`
1081    /// Fix: Changed to `#[serde(default)]` which doesn't affect serialization
1082    #[test]
1083    fn test_config_entry_postcard_roundtrip_with_none() {
1084        // Test case 1: All optional fields are None
1085        let entry_all_none = ConfigEntry {
1086            name: "test.setting".to_string(),
1087            effective_value: "123".to_string(),
1088            default_value: "123".to_string(),
1089            min_value: None,
1090            max_value: None,
1091            source: ConfigSource::Default,
1092            scope: ConfigScope::Global,
1093            risk: None,
1094            description: None,
1095        };
1096
1097        // Test case 2: All optional fields are Some
1098        let entry_all_some = ConfigEntry {
1099            name: "test.setting".to_string(),
1100            effective_value: "456".to_string(),
1101            default_value: "100".to_string(),
1102            min_value: Some("1".to_string()),
1103            max_value: Some("1000".to_string()),
1104            source: ConfigSource::Env,
1105            scope: ConfigScope::Project,
1106            risk: Some(ConfigRisk::Dos),
1107            description: Some("Test description".to_string()),
1108        };
1109
1110        // Test case 3: Mixed None/Some (the pattern that triggered the bug)
1111        let entry_mixed = ConfigEntry {
1112            name: "mixed.setting".to_string(),
1113            effective_value: "789".to_string(),
1114            default_value: "500".to_string(),
1115            min_value: Some("100".to_string()),
1116            max_value: None, // This None after a Some triggered the deserialization bug
1117            source: ConfigSource::ProjectConfig,
1118            scope: ConfigScope::Global,
1119            risk: None,
1120            description: Some("Mixed case".to_string()),
1121        };
1122
1123        // Test all cases
1124        for (name, entry) in [
1125            ("all_none", entry_all_none),
1126            ("all_some", entry_all_some),
1127            ("mixed", entry_mixed),
1128        ] {
1129            let serialized = postcard::to_allocvec(&entry)
1130                .unwrap_or_else(|e| panic!("Failed to serialize {name}: {e}"));
1131            let deserialized: ConfigEntry = postcard::from_bytes(&serialized)
1132                .unwrap_or_else(|e| panic!("Failed to deserialize {name}: {e}"));
1133
1134            assert_eq!(
1135                entry,
1136                deserialized,
1137                "Roundtrip failed for {name}: serialized {len} bytes",
1138                len = serialized.len()
1139            );
1140        }
1141    }
1142
1143    /// Test `ConfigSnapshot` postcard roundtrip (embedded in legacy index metadata).
1144    #[test]
1145    fn test_config_snapshot_postcard_roundtrip() {
1146        let mut snapshot = ConfigSnapshot::new();
1147
1148        // Add entries with various optional field combinations
1149        snapshot.add_entry(
1150            ConfigEntry::new(
1151                "SETTING_1",
1152                "value1",
1153                "default1",
1154                ConfigSource::Default,
1155                ConfigScope::Global,
1156            )
1157            .with_range("0", "100")
1158            .with_risk(ConfigRisk::Dos)
1159            .with_description("First setting"),
1160        );
1161
1162        snapshot.add_entry(ConfigEntry::new(
1163            "SETTING_2",
1164            "value2",
1165            "default2",
1166            ConfigSource::Env,
1167            ConfigScope::Project,
1168        )); // No optional fields set
1169
1170        snapshot.add_entry(
1171            ConfigEntry::new(
1172                "SETTING_3",
1173                "value3",
1174                "default3",
1175                ConfigSource::ProjectConfig,
1176                ConfigScope::Global,
1177            )
1178            .with_risk(ConfigRisk::Perf), // Only risk set
1179        );
1180
1181        let serialized = postcard::to_allocvec(&snapshot).expect("Failed to serialize snapshot");
1182        let deserialized: ConfigSnapshot =
1183            postcard::from_bytes(&serialized).expect("Failed to deserialize snapshot");
1184
1185        assert_eq!(snapshot.schema_version, deserialized.schema_version);
1186        assert_eq!(snapshot.entries.len(), deserialized.entries.len());
1187
1188        for (original, restored) in snapshot.entries.iter().zip(deserialized.entries.iter()) {
1189            assert_eq!(original, restored, "Entry mismatch for {}", original.name);
1190        }
1191    }
1192}