Skip to main content

sqry_core/config/
snapshot.rs

1//! Configuration snapshot for `CodeGraph` embedding.
2//!
3//! This module captures the effective sqry configuration used during graph build,
4//! enabling provenance tracking, reproducibility, and security audits.
5//!
6//! # Overview
7//!
8//! The [`ConfigSnapshot`] captures all runtime limits/knobs from:
9//! - Environment variables (`SQRY_*`)
10//! - CLI arguments (future)
11//! - Project config (`.sqry-config.toml`)
12//! - Defaults
13//!
14//! Each entry includes provenance metadata (source, default, min/max range)
15//! per  requirements.
16//!
17//! # Schema Version
18//!
19//! The `CONFIG_SCHEMA_VERSION` constant tracks the schema version for
20//! forward/backward compatibility in graph exports.
21
22use serde::{Deserialize, Serialize};
23use std::time::SystemTime;
24
25use super::buffers::{
26    self, DEFAULT_INDEX_BUFFER, DEFAULT_MAX_PREDICATES, DEFAULT_MAX_QUERY_LENGTH,
27    DEFAULT_MAX_REPOSITORIES, DEFAULT_MAX_SOURCE_FILE_SIZE, DEFAULT_MMAP_THRESHOLD,
28    DEFAULT_PARSE_BUFFER, DEFAULT_READ_BUFFER, DEFAULT_WATCH_EVENT_QUEUE, DEFAULT_WRITE_BUFFER,
29};
30
31/// Current schema version for config snapshots.
32///
33/// Increment when:
34/// - Adding new config entries
35/// - Changing entry field semantics
36/// - Modifying serialization format
37pub const CONFIG_SCHEMA_VERSION: u32 = 1;
38
39/// Source of a configuration value.
40///
41/// Ordered by precedence (highest to lowest):
42/// CLI > Env > `ProjectConfig` > Default
43#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
44#[serde(rename_all = "snake_case")]
45pub enum ConfigSource {
46    /// Value from CLI argument (highest precedence)
47    Cli,
48    /// Value from environment variable
49    Env,
50    /// Value from `.sqry-config.toml`
51    ProjectConfig,
52    /// Default value (lowest precedence)
53    Default,
54}
55
56impl std::fmt::Display for ConfigSource {
57    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
58        match self {
59            ConfigSource::Cli => write!(f, "cli"),
60            ConfigSource::Env => write!(f, "env"),
61            ConfigSource::ProjectConfig => write!(f, "project_config"),
62            ConfigSource::Default => write!(f, "default"),
63        }
64    }
65}
66
67/// Scope of a configuration entry.
68#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
69#[serde(rename_all = "snake_case")]
70pub enum ConfigScope {
71    /// Applies globally across all projects
72    Global,
73    /// Applies to a specific project only
74    Project,
75}
76
77/// Risk category for a configuration entry.
78///
79/// Used for security audits to identify which settings
80/// affect system safety.
81#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
82#[serde(rename_all = "snake_case")]
83pub enum ConfigRisk {
84    /// Denial of Service prevention (memory/CPU exhaustion)
85    Dos,
86    /// Performance tuning (throughput, latency)
87    Perf,
88    /// Security-sensitive (access control, limits)
89    Security,
90    /// System reliability (timeouts, retries)
91    Reliability,
92}
93
94/// A single configuration entry captured in the graph.
95///
96/// Contains the effective value plus provenance metadata for auditing.
97#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
98pub struct ConfigEntry {
99    /// Configuration key name (e.g., "`SQRY_MAX_SOURCE_FILE_SIZE`")
100    pub name: String,
101
102    /// Effective value after precedence resolution (serialized as string)
103    pub effective_value: String,
104
105    /// Default value for this entry
106    pub default_value: String,
107
108    /// Minimum allowed value (if bounded)
109    #[serde(default)]
110    pub min_value: Option<String>,
111
112    /// Maximum allowed value (if bounded)
113    #[serde(default)]
114    pub max_value: Option<String>,
115
116    /// Source that provided the effective value
117    pub source: ConfigSource,
118
119    /// Scope of this configuration
120    pub scope: ConfigScope,
121
122    /// Risk category for security audits
123    #[serde(default)]
124    pub risk: Option<ConfigRisk>,
125
126    /// Human-readable description
127    #[serde(default)]
128    pub description: Option<String>,
129}
130
131impl ConfigEntry {
132    /// Create a new config entry with required fields.
133    #[must_use]
134    pub fn new(
135        name: impl Into<String>,
136        effective_value: impl Into<String>,
137        default_value: impl Into<String>,
138        source: ConfigSource,
139        scope: ConfigScope,
140    ) -> Self {
141        Self {
142            name: name.into(),
143            effective_value: effective_value.into(),
144            default_value: default_value.into(),
145            min_value: None,
146            max_value: None,
147            source,
148            scope,
149            risk: None,
150            description: None,
151        }
152    }
153
154    /// Set the min/max range for this entry.
155    #[must_use]
156    pub fn with_range(mut self, min: impl Into<String>, max: impl Into<String>) -> Self {
157        self.min_value = Some(min.into());
158        self.max_value = Some(max.into());
159        self
160    }
161
162    /// Set the risk category for this entry.
163    #[must_use]
164    pub fn with_risk(mut self, risk: ConfigRisk) -> Self {
165        self.risk = Some(risk);
166        self
167    }
168
169    /// Set the description for this entry.
170    #[must_use]
171    pub fn with_description(mut self, desc: impl Into<String>) -> Self {
172        self.description = Some(desc.into());
173        self
174    }
175}
176
177/// Snapshot of effective configuration used for graph build.
178///
179/// Captures all runtime limits and knobs with provenance metadata.
180/// This is embedded into the `CodeGraph` for auditing and reproducibility.
181#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
182pub struct ConfigSnapshot {
183    /// Schema version for forward/backward compatibility
184    pub schema_version: u32,
185
186    /// Timestamp when the snapshot was collected
187    #[serde(with = "system_time_serde")]
188    pub collected_at: SystemTime,
189
190    /// All configuration entries
191    pub entries: Vec<ConfigEntry>,
192}
193
194impl ConfigSnapshot {
195    /// Create a new empty snapshot with current timestamp.
196    #[must_use]
197    pub fn new() -> Self {
198        Self {
199            schema_version: CONFIG_SCHEMA_VERSION,
200            collected_at: SystemTime::now(),
201            entries: Vec::new(),
202        }
203    }
204
205    /// Add an entry to the snapshot.
206    pub fn add_entry(&mut self, entry: ConfigEntry) {
207        self.entries.push(entry);
208    }
209
210    /// Get the number of entries.
211    #[must_use]
212    pub fn len(&self) -> usize {
213        self.entries.len()
214    }
215
216    /// Check if the snapshot is empty.
217    #[must_use]
218    pub fn is_empty(&self) -> bool {
219        self.entries.is_empty()
220    }
221
222    /// Find an entry by name.
223    #[must_use]
224    pub fn get(&self, name: &str) -> Option<&ConfigEntry> {
225        self.entries.iter().find(|e| e.name == name)
226    }
227
228    /// Compute a hash of the snapshot for sidecar verification.
229    ///
230    /// Uses BLAKE3 for deterministic, cross-process-stable hashing.
231    /// The hash covers:
232    /// - Schema version
233    /// - Collected timestamp (for provenance)
234    /// - All `ConfigEntry` fields (complete coverage for integrity)
235    #[must_use]
236    pub fn compute_hash(&self) -> String {
237        use crate::hash::hash_bytes;
238        use std::fmt::Write;
239        use std::time::UNIX_EPOCH;
240
241        // Build canonical representation for hashing
242        // Use a deterministic format that includes all fields
243        let mut canonical = String::new();
244
245        // Schema version
246        let _ = writeln!(canonical, "schema_version:{}", self.schema_version);
247
248        // Timestamp (milliseconds since epoch for consistency)
249        let millis = self
250            .collected_at
251            .duration_since(UNIX_EPOCH)
252            .unwrap_or_default()
253            .as_millis();
254        let _ = writeln!(canonical, "collected_at:{millis}");
255
256        // Sort entries by name for deterministic ordering
257        let mut sorted_entries: Vec<_> = self.entries.iter().collect();
258        sorted_entries.sort_by_key(|e| &e.name);
259
260        // Include ALL fields from each entry
261        for entry in sorted_entries {
262            let _ = writeln!(canonical, "entry:{}", entry.name);
263            let _ = writeln!(canonical, "  effective_value:{}", entry.effective_value);
264            let _ = writeln!(canonical, "  default_value:{}", entry.default_value);
265            let _ = writeln!(
266                canonical,
267                "  min_value:{}",
268                entry.min_value.as_deref().unwrap_or("")
269            );
270            let _ = writeln!(
271                canonical,
272                "  max_value:{}",
273                entry.max_value.as_deref().unwrap_or("")
274            );
275            let _ = writeln!(canonical, "  source:{}", entry.source);
276            let _ = writeln!(canonical, "  scope:{:?}", entry.scope);
277            if let Some(risk) = entry.risk {
278                let _ = writeln!(canonical, "  risk:{risk:?}");
279            } else {
280                let _ = writeln!(canonical, "  risk:");
281            }
282            let _ = writeln!(
283                canonical,
284                "  description:{}",
285                entry.description.as_deref().unwrap_or("")
286            );
287        }
288
289        // Hash with BLAKE3 and return hex string
290        hash_bytes(canonical.as_bytes()).to_hex()
291    }
292}
293
294impl Default for ConfigSnapshot {
295    fn default() -> Self {
296        Self::new()
297    }
298}
299
300/// Serde module for `SystemTime` serialization.
301mod system_time_serde {
302    use serde::{Deserialize, Deserializer, Serialize, Serializer};
303    use std::time::{Duration, SystemTime, UNIX_EPOCH};
304
305    pub fn serialize<S>(time: &SystemTime, serializer: S) -> Result<S::Ok, S::Error>
306    where
307        S: Serializer,
308    {
309        let duration = time.duration_since(UNIX_EPOCH).unwrap_or(Duration::ZERO);
310        let millis = u64::try_from(duration.as_millis()).map_err(serde::ser::Error::custom)?;
311        millis.serialize(serializer)
312    }
313
314    pub fn deserialize<'de, D>(deserializer: D) -> Result<SystemTime, D::Error>
315    where
316        D: Deserializer<'de>,
317    {
318        let millis = u64::deserialize(deserializer)?;
319        Ok(UNIX_EPOCH + Duration::from_millis(millis))
320    }
321}
322
323/// Builder for constructing a [`ConfigSnapshot`] from various sources.
324///
325/// Implements precedence: CLI > Env > `ProjectConfig` > Default
326pub struct ConfigSnapshotBuilder {
327    snapshot: ConfigSnapshot,
328}
329
330impl ConfigSnapshotBuilder {
331    /// Create a new builder.
332    #[must_use]
333    pub fn new() -> Self {
334        Self {
335            snapshot: ConfigSnapshot::new(),
336        }
337    }
338
339    /// Build the complete snapshot by resolving all known config entries.
340    ///
341    /// This collects all documented configuration items from:
342    /// - `DoS` prevention limits
343    /// - Git limits
344    /// - Buffer sizes
345    /// - Memory management
346    /// - Cache settings (including regex cache and lexer pool)
347    #[must_use]
348    pub fn build(mut self) -> ConfigSnapshot {
349        // DoS Prevention Limits
350        self.add_dos_limits();
351
352        // Git Limits (P1-17)
353        self.add_git_limits();
354
355        // Buffer Sizes
356        self.add_buffer_sizes();
357
358        // Memory Management
359        self.add_memory_settings();
360
361        // Cache Configuration (includes regex cache, lexer pool)
362        self.add_cache_settings();
363
364        self.snapshot
365    }
366
367    /// Add `DoS` prevention limit entries.
368    fn add_dos_limits(&mut self) {
369        // SQRY_MAX_SOURCE_FILE_SIZE
370        let effective = buffers::max_source_file_size();
371        let source = Self::detect_source(
372            "SQRY_MAX_SOURCE_FILE_SIZE",
373            &effective,
374            &DEFAULT_MAX_SOURCE_FILE_SIZE,
375        );
376        self.snapshot.add_entry(
377            ConfigEntry::new(
378                "SQRY_MAX_SOURCE_FILE_SIZE",
379                effective.to_string(),
380                DEFAULT_MAX_SOURCE_FILE_SIZE.to_string(),
381                source,
382                ConfigScope::Global,
383            )
384            .with_range("1048576", "524288000") // 1 MB - 500 MB
385            .with_risk(ConfigRisk::Dos)
386            .with_description("Maximum source file size for indexing"),
387        );
388
389        // SQRY_MAX_REPOSITORIES
390        let effective = buffers::max_repositories();
391        let source = Self::detect_source(
392            "SQRY_MAX_REPOSITORIES",
393            &effective,
394            &DEFAULT_MAX_REPOSITORIES,
395        );
396        self.snapshot.add_entry(
397            ConfigEntry::new(
398                "SQRY_MAX_REPOSITORIES",
399                effective.to_string(),
400                DEFAULT_MAX_REPOSITORIES.to_string(),
401                source,
402                ConfigScope::Global,
403            )
404            .with_range("10", "10000")
405            .with_risk(ConfigRisk::Dos)
406            .with_description("Maximum repositories per workspace"),
407        );
408
409        // SQRY_WATCH_EVENT_QUEUE
410        let effective = buffers::watch_event_queue_capacity();
411        let source = Self::detect_source(
412            "SQRY_WATCH_EVENT_QUEUE",
413            &effective,
414            &DEFAULT_WATCH_EVENT_QUEUE,
415        );
416        self.snapshot.add_entry(
417            ConfigEntry::new(
418                "SQRY_WATCH_EVENT_QUEUE",
419                effective.to_string(),
420                DEFAULT_WATCH_EVENT_QUEUE.to_string(),
421                source,
422                ConfigScope::Global,
423            )
424            .with_range("100", "100000")
425            .with_risk(ConfigRisk::Dos)
426            .with_description("Maximum queued filesystem events"),
427        );
428
429        // SQRY_MAX_QUERY_LENGTH
430        let effective = buffers::max_query_length();
431        let source = Self::detect_source(
432            "SQRY_MAX_QUERY_LENGTH",
433            &effective,
434            &DEFAULT_MAX_QUERY_LENGTH,
435        );
436        self.snapshot.add_entry(
437            ConfigEntry::new(
438                "SQRY_MAX_QUERY_LENGTH",
439                effective.to_string(),
440                DEFAULT_MAX_QUERY_LENGTH.to_string(),
441                source,
442                ConfigScope::Global,
443            )
444            .with_range("1024", "102400") // 1 KB - 100 KB
445            .with_risk(ConfigRisk::Dos)
446            .with_description("Maximum query string length in bytes"),
447        );
448
449        // SQRY_MAX_PREDICATES
450        let effective = buffers::max_predicates();
451        let source =
452            Self::detect_source("SQRY_MAX_PREDICATES", &effective, &DEFAULT_MAX_PREDICATES);
453        self.snapshot.add_entry(
454            ConfigEntry::new(
455                "SQRY_MAX_PREDICATES",
456                effective.to_string(),
457                DEFAULT_MAX_PREDICATES.to_string(),
458                source,
459                ConfigScope::Global,
460            )
461            .with_range("10", "1000")
462            .with_risk(ConfigRisk::Dos)
463            .with_description("Maximum predicates per query"),
464        );
465    }
466
467    /// Add buffer size entries.
468    fn add_buffer_sizes(&mut self) {
469        // SQRY_READ_BUFFER
470        let effective = buffers::read_buffer_size();
471        let source = Self::detect_source("SQRY_READ_BUFFER", &effective, &DEFAULT_READ_BUFFER);
472        self.snapshot.add_entry(
473            ConfigEntry::new(
474                "SQRY_READ_BUFFER",
475                effective.to_string(),
476                DEFAULT_READ_BUFFER.to_string(),
477                source,
478                ConfigScope::Global,
479            )
480            .with_range("1024", "1048576") // 1 KB - 1 MB
481            .with_risk(ConfigRisk::Perf)
482            .with_description("Read buffer size for file I/O"),
483        );
484
485        // SQRY_WRITE_BUFFER
486        let effective = buffers::write_buffer_size();
487        let source = Self::detect_source("SQRY_WRITE_BUFFER", &effective, &DEFAULT_WRITE_BUFFER);
488        self.snapshot.add_entry(
489            ConfigEntry::new(
490                "SQRY_WRITE_BUFFER",
491                effective.to_string(),
492                DEFAULT_WRITE_BUFFER.to_string(),
493                source,
494                ConfigScope::Global,
495            )
496            .with_range("1024", "1048576")
497            .with_risk(ConfigRisk::Perf)
498            .with_description("Write buffer size for file I/O"),
499        );
500
501        // SQRY_PARSE_BUFFER
502        let effective = buffers::parse_buffer_size();
503        let source = Self::detect_source("SQRY_PARSE_BUFFER", &effective, &DEFAULT_PARSE_BUFFER);
504        self.snapshot.add_entry(
505            ConfigEntry::new(
506                "SQRY_PARSE_BUFFER",
507                effective.to_string(),
508                DEFAULT_PARSE_BUFFER.to_string(),
509                source,
510                ConfigScope::Global,
511            )
512            .with_range("4096", "10485760") // 4 KB - 10 MB
513            .with_risk(ConfigRisk::Perf)
514            .with_description("Parse buffer size for tree-sitter"),
515        );
516
517        // SQRY_INDEX_BUFFER
518        let effective = buffers::index_buffer_size();
519        let source = Self::detect_source("SQRY_INDEX_BUFFER", &effective, &DEFAULT_INDEX_BUFFER);
520        self.snapshot.add_entry(
521            ConfigEntry::new(
522                "SQRY_INDEX_BUFFER",
523                effective.to_string(),
524                DEFAULT_INDEX_BUFFER.to_string(),
525                source,
526                ConfigScope::Global,
527            )
528            .with_range("65536", "104857600") // 64 KB - 100 MB
529            .with_risk(ConfigRisk::Perf)
530            .with_description("Index buffer size for serialization"),
531        );
532    }
533
534    /// Add memory management entries.
535    fn add_memory_settings(&mut self) {
536        // SQRY_MMAP_THRESHOLD
537        let effective = buffers::mmap_threshold();
538        let source =
539            Self::detect_source_u64("SQRY_MMAP_THRESHOLD", effective, DEFAULT_MMAP_THRESHOLD);
540        self.snapshot.add_entry(
541            ConfigEntry::new(
542                "SQRY_MMAP_THRESHOLD",
543                effective.to_string(),
544                DEFAULT_MMAP_THRESHOLD.to_string(),
545                source,
546                ConfigScope::Global,
547            )
548            .with_range("1048576", "1073741824") // 1 MB - 1 GB
549            .with_risk(ConfigRisk::Perf)
550            .with_description("File size threshold for memory-mapped I/O"),
551        );
552    }
553
554    /// Add cache configuration entries.
555    fn add_cache_settings(&mut self) {
556        // SQRY_CACHE_BUDGET_ENTRIES
557        let default_entries: usize = 10_000;
558        let effective = std::env::var("SQRY_CACHE_BUDGET_ENTRIES")
559            .ok()
560            .and_then(|s| s.parse().ok())
561            .unwrap_or(default_entries);
562        let source = Self::detect_source("SQRY_CACHE_BUDGET_ENTRIES", &effective, &default_entries);
563        self.snapshot.add_entry(
564            ConfigEntry::new(
565                "SQRY_CACHE_BUDGET_ENTRIES",
566                effective.to_string(),
567                default_entries.to_string(),
568                source,
569                ConfigScope::Global,
570            )
571            .with_risk(ConfigRisk::Perf)
572            .with_description("Maximum cache entries"),
573        );
574
575        // SQRY_CACHE_BUDGET_BYTES
576        let default_bytes: u64 = 100 * 1024 * 1024; // 100 MB
577        let effective = std::env::var("SQRY_CACHE_BUDGET_BYTES")
578            .ok()
579            .and_then(|s| s.parse().ok())
580            .unwrap_or(default_bytes);
581        let source = Self::detect_source_u64("SQRY_CACHE_BUDGET_BYTES", effective, default_bytes);
582        self.snapshot.add_entry(
583            ConfigEntry::new(
584                "SQRY_CACHE_BUDGET_BYTES",
585                effective.to_string(),
586                default_bytes.to_string(),
587                source,
588                ConfigScope::Global,
589            )
590            .with_risk(ConfigRisk::Perf)
591            .with_description("Maximum cache size in bytes"),
592        );
593
594        // SQRY_CACHE_MAX_BYTES (CacheConfig size cap)
595        // Default: 50 MB per CacheConfig::DEFAULT_MAX_BYTES
596        let default_cache_max: u64 = 50 * 1024 * 1024; // 50 MB
597        let effective = std::env::var("SQRY_CACHE_MAX_BYTES")
598            .ok()
599            .and_then(|s| s.parse().ok())
600            .unwrap_or(default_cache_max);
601        let source = Self::detect_source_u64("SQRY_CACHE_MAX_BYTES", effective, default_cache_max);
602        self.snapshot.add_entry(
603            ConfigEntry::new(
604                "SQRY_CACHE_MAX_BYTES",
605                effective.to_string(),
606                default_cache_max.to_string(),
607                source,
608                ConfigScope::Global,
609            )
610            .with_range("1048576", "10737418240") // 1 MB - 10 GB
611            .with_risk(ConfigRisk::Dos)
612            .with_description("Cache size cap (CacheConfig max_bytes limit)"),
613        );
614
615        // SQRY_REGEX_CACHE_SIZE
616        let default_regex_cache: usize = 100;
617        let effective = std::env::var("SQRY_REGEX_CACHE_SIZE")
618            .ok()
619            .and_then(|s| s.parse().ok())
620            .filter(|&s| (1..=10_000).contains(&s))
621            .unwrap_or(default_regex_cache);
622        let source = Self::detect_source("SQRY_REGEX_CACHE_SIZE", &effective, &default_regex_cache);
623        self.snapshot.add_entry(
624            ConfigEntry::new(
625                "SQRY_REGEX_CACHE_SIZE",
626                effective.to_string(),
627                default_regex_cache.to_string(),
628                source,
629                ConfigScope::Global,
630            )
631            .with_range("1", "10000")
632            .with_risk(ConfigRisk::Perf)
633            .with_description("LRU cache size for compiled regexes"),
634        );
635
636        // SQRY_LEXER_POOL_MAX
637        let default_lexer_pool: usize = 4;
638        let effective = std::env::var("SQRY_LEXER_POOL_MAX")
639            .ok()
640            .and_then(|s| s.parse().ok())
641            .unwrap_or(default_lexer_pool);
642        let source = Self::detect_source("SQRY_LEXER_POOL_MAX", &effective, &default_lexer_pool);
643        self.snapshot.add_entry(
644            ConfigEntry::new(
645                "SQRY_LEXER_POOL_MAX",
646                effective.to_string(),
647                default_lexer_pool.to_string(),
648                source,
649                ConfigScope::Global,
650            )
651            .with_risk(ConfigRisk::Perf)
652            .with_description("Maximum lexer pool size"),
653        );
654    }
655
656    /// Add Git-related limit entries.
657    fn add_git_limits(&mut self) {
658        // SQRY_GIT_MAX_OUTPUT_SIZE
659        // Uses crate::git::max_git_output_size() but we need to access the value directly
660        // to avoid circular dependencies. The function clamps to 1MB-100MB range.
661        let default_git_output: usize = 10 * 1024 * 1024; // 10 MB
662        let min_git_output: usize = 1024 * 1024; // 1 MB
663        let max_git_output: usize = 100 * 1024 * 1024; // 100 MB
664
665        let effective = std::env::var("SQRY_GIT_MAX_OUTPUT_SIZE")
666            .ok()
667            .and_then(|s| s.parse().ok())
668            .map_or(default_git_output, |size: usize| {
669                size.clamp(min_git_output, max_git_output)
670            });
671        let source =
672            Self::detect_source("SQRY_GIT_MAX_OUTPUT_SIZE", &effective, &default_git_output);
673        self.snapshot.add_entry(
674            ConfigEntry::new(
675                "SQRY_GIT_MAX_OUTPUT_SIZE",
676                effective.to_string(),
677                default_git_output.to_string(),
678                source,
679                ConfigScope::Global,
680            )
681            .with_range(min_git_output.to_string(), max_git_output.to_string())
682            .with_risk(ConfigRisk::Dos)
683            .with_description("Maximum git command output size to prevent memory exhaustion"),
684        );
685    }
686
687    /// Detect the source of a usize config value.
688    fn detect_source<T: PartialEq>(env_var: &str, effective: &T, default: &T) -> ConfigSource {
689        if std::env::var(env_var).is_ok() {
690            ConfigSource::Env
691        } else if effective != default {
692            // Value differs from default but not from env - must be from project config or CLI
693            // For now, we assume project config since CLI integration is future work
694            ConfigSource::ProjectConfig
695        } else {
696            ConfigSource::Default
697        }
698    }
699
700    /// Detect the source of a u64 config value.
701    fn detect_source_u64(env_var: &str, effective: u64, default: u64) -> ConfigSource {
702        if std::env::var(env_var).is_ok() {
703            ConfigSource::Env
704        } else if effective != default {
705            ConfigSource::ProjectConfig
706        } else {
707            ConfigSource::Default
708        }
709    }
710}
711
712impl Default for ConfigSnapshotBuilder {
713    fn default() -> Self {
714        Self::new()
715    }
716}
717
718/// Collect a complete configuration snapshot.
719///
720/// This is the primary entry point for capturing the effective configuration
721/// for embedding into the `CodeGraph`.
722///
723/// # Example
724///
725/// ```
726/// use sqry_core::config::snapshot::collect_snapshot;
727///
728/// let snapshot = collect_snapshot();
729/// assert!(snapshot.len() > 0);
730/// assert_eq!(snapshot.schema_version, 1);
731/// ```
732#[must_use]
733pub fn collect_snapshot() -> ConfigSnapshot {
734    ConfigSnapshotBuilder::new().build()
735}
736
737/// Inventory of all known config entry names.
738///
739/// Used for completeness validation per Invariant
740/// Must include ALL entries from `HARD_LIMIT_INVENTORY.md` that affect indexing safety/perf.
741pub const CONFIG_INVENTORY: &[&str] = &[
742    // DoS Prevention
743    "SQRY_MAX_SOURCE_FILE_SIZE",
744    "SQRY_MAX_REPOSITORIES",
745    "SQRY_WATCH_EVENT_QUEUE",
746    "SQRY_MAX_QUERY_LENGTH",
747    "SQRY_MAX_PREDICATES",
748    "SQRY_GIT_MAX_OUTPUT_SIZE", // P1-17: Git output limit
749    // Buffers
750    "SQRY_READ_BUFFER",
751    "SQRY_WRITE_BUFFER",
752    "SQRY_PARSE_BUFFER",
753    "SQRY_INDEX_BUFFER",
754    // Memory
755    "SQRY_MMAP_THRESHOLD",
756    // Cache
757    "SQRY_CACHE_BUDGET_ENTRIES",
758    "SQRY_CACHE_BUDGET_BYTES",
759    "SQRY_CACHE_MAX_BYTES",  // Cache size cap (DoS prevention)
760    "SQRY_REGEX_CACHE_SIZE", // Regex compilation cache
761    "SQRY_LEXER_POOL_MAX",   // Lexer pool size
762];
763
764/// Validate that a snapshot contains all required entries.
765///
766/// Returns an error with the list of missing entries if validation fails.
767///
768/// # Errors
769///
770/// Returns `Err` with a list of missing entry names if the snapshot
771/// is incomplete.
772pub fn validate_completeness(snapshot: &ConfigSnapshot) -> Result<(), Vec<&'static str>> {
773    let present: std::collections::HashSet<_> =
774        snapshot.entries.iter().map(|e| e.name.as_str()).collect();
775    let missing: Vec<_> = CONFIG_INVENTORY
776        .iter()
777        .filter(|name| !present.contains(*name))
778        .copied()
779        .collect();
780
781    if missing.is_empty() {
782        Ok(())
783    } else {
784        Err(missing)
785    }
786}
787
788/// Sidecar provenance artifact for resilient config recovery.
789///
790/// This is written alongside the main index for recovery if the
791/// graph is corrupted or unavailable.
792#[derive(Debug, Clone, Serialize, Deserialize)]
793pub struct ConfigProvenance {
794    /// Schema version matching the embedded snapshot
795    pub schema_version: u32,
796
797    /// Hash of the config snapshot for integrity verification
798    pub config_hash: String,
799
800    /// Timestamp when sidecar was generated
801    #[serde(with = "system_time_serde")]
802    pub generated_at: SystemTime,
803
804    /// All configuration entries (same as in-graph data)
805    pub entries: Vec<ConfigEntry>,
806}
807
808impl ConfigProvenance {
809    /// Create a new provenance sidecar from a config snapshot.
810    #[must_use]
811    pub fn from_snapshot(snapshot: &ConfigSnapshot) -> Self {
812        Self {
813            schema_version: snapshot.schema_version,
814            config_hash: snapshot.compute_hash(),
815            generated_at: SystemTime::now(),
816            entries: snapshot.entries.clone(),
817        }
818    }
819
820    /// Write the sidecar to a JSON file.
821    ///
822    /// # Errors
823    ///
824    /// Returns an error if the file cannot be created or written.
825    pub fn save(&self, path: &std::path::Path) -> std::io::Result<()> {
826        let file = std::fs::File::create(path)?;
827        serde_json::to_writer_pretty(file, self)?;
828        Ok(())
829    }
830
831    /// Load a sidecar from a JSON file.
832    ///
833    /// # Errors
834    ///
835    /// Returns an error if the file cannot be read or parsed.
836    pub fn load(path: &std::path::Path) -> std::io::Result<Self> {
837        let file = std::fs::File::open(path)?;
838        let provenance: Self = serde_json::from_reader(file)?;
839        Ok(provenance)
840    }
841
842    /// Verify that the sidecar matches a config snapshot.
843    #[must_use]
844    pub fn verify(&self, snapshot: &ConfigSnapshot) -> bool {
845        self.config_hash == snapshot.compute_hash()
846            && self.schema_version == snapshot.schema_version
847    }
848}
849
850/// Standard filename for the config provenance sidecar.
851pub const CONFIG_PROVENANCE_FILENAME: &str = "config-provenance.json";
852
853#[cfg(test)]
854mod tests {
855    use super::*;
856    use serial_test::serial;
857
858    #[test]
859    fn test_config_entry_creation() {
860        let entry = ConfigEntry::new(
861            "TEST_VAR",
862            "100",
863            "50",
864            ConfigSource::Env,
865            ConfigScope::Global,
866        )
867        .with_range("10", "1000")
868        .with_risk(ConfigRisk::Dos)
869        .with_description("Test variable");
870
871        assert_eq!(entry.name, "TEST_VAR");
872        assert_eq!(entry.effective_value, "100");
873        assert_eq!(entry.default_value, "50");
874        assert_eq!(entry.min_value, Some("10".to_string()));
875        assert_eq!(entry.max_value, Some("1000".to_string()));
876        assert_eq!(entry.source, ConfigSource::Env);
877        assert_eq!(entry.scope, ConfigScope::Global);
878        assert_eq!(entry.risk, Some(ConfigRisk::Dos));
879        assert_eq!(entry.description, Some("Test variable".to_string()));
880    }
881
882    #[test]
883    fn test_config_snapshot_new() {
884        let snapshot = ConfigSnapshot::new();
885        assert_eq!(snapshot.schema_version, CONFIG_SCHEMA_VERSION);
886        assert!(snapshot.is_empty());
887    }
888
889    #[test]
890    fn test_config_snapshot_add_entry() {
891        let mut snapshot = ConfigSnapshot::new();
892        snapshot.add_entry(ConfigEntry::new(
893            "TEST",
894            "value",
895            "default",
896            ConfigSource::Default,
897            ConfigScope::Global,
898        ));
899
900        assert_eq!(snapshot.len(), 1);
901        assert!(!snapshot.is_empty());
902        assert!(snapshot.get("TEST").is_some());
903        assert!(snapshot.get("NONEXISTENT").is_none());
904    }
905
906    #[test]
907    fn test_config_snapshot_hash() {
908        let mut snapshot1 = ConfigSnapshot::new();
909        snapshot1.add_entry(ConfigEntry::new(
910            "TEST",
911            "value",
912            "default",
913            ConfigSource::Default,
914            ConfigScope::Global,
915        ));
916
917        let mut snapshot2 = ConfigSnapshot::new();
918        snapshot2.add_entry(ConfigEntry::new(
919            "TEST",
920            "value",
921            "default",
922            ConfigSource::Default,
923            ConfigScope::Global,
924        ));
925
926        // Same content should produce same hash
927        assert_eq!(snapshot1.compute_hash(), snapshot2.compute_hash());
928
929        // Different content should produce different hash
930        snapshot2.add_entry(ConfigEntry::new(
931            "TEST2",
932            "value2",
933            "default2",
934            ConfigSource::Env,
935            ConfigScope::Global,
936        ));
937        assert_ne!(snapshot1.compute_hash(), snapshot2.compute_hash());
938    }
939
940    #[test]
941    #[serial]
942    fn test_collect_snapshot_defaults() {
943        // Clear all env vars to get defaults
944        for var in CONFIG_INVENTORY {
945            unsafe { std::env::remove_var(var) };
946        }
947
948        let snapshot = collect_snapshot();
949
950        // Should have all inventory entries
951        assert_eq!(snapshot.len(), CONFIG_INVENTORY.len());
952
953        // Validate completeness
954        assert!(validate_completeness(&snapshot).is_ok());
955
956        // Check a specific entry
957        let entry = snapshot.get("SQRY_MAX_SOURCE_FILE_SIZE").unwrap();
958        assert_eq!(entry.source, ConfigSource::Default);
959        assert_eq!(
960            entry.effective_value,
961            DEFAULT_MAX_SOURCE_FILE_SIZE.to_string()
962        );
963    }
964
965    #[test]
966    #[serial]
967    fn test_collect_snapshot_env_override() {
968        unsafe {
969            std::env::set_var("SQRY_MAX_SOURCE_FILE_SIZE", "104857600"); // 100 MB
970        }
971
972        let snapshot = collect_snapshot();
973        let entry = snapshot.get("SQRY_MAX_SOURCE_FILE_SIZE").unwrap();
974
975        assert_eq!(entry.source, ConfigSource::Env);
976        assert_eq!(entry.effective_value, "104857600");
977
978        unsafe {
979            std::env::remove_var("SQRY_MAX_SOURCE_FILE_SIZE");
980        }
981    }
982
983    #[test]
984    fn test_validate_completeness_missing() {
985        let snapshot = ConfigSnapshot::new(); // Empty snapshot
986        let result = validate_completeness(&snapshot);
987
988        assert!(result.is_err());
989        let missing = result.unwrap_err();
990        assert_eq!(missing.len(), CONFIG_INVENTORY.len());
991    }
992
993    #[test]
994    fn test_config_source_display() {
995        assert_eq!(ConfigSource::Cli.to_string(), "cli");
996        assert_eq!(ConfigSource::Env.to_string(), "env");
997        assert_eq!(ConfigSource::ProjectConfig.to_string(), "project_config");
998        assert_eq!(ConfigSource::Default.to_string(), "default");
999    }
1000
1001    #[test]
1002    fn test_config_entry_serialization() {
1003        let entry = ConfigEntry::new("TEST", "100", "50", ConfigSource::Env, ConfigScope::Global)
1004            .with_range("10", "1000")
1005            .with_risk(ConfigRisk::Dos);
1006
1007        let json = serde_json::to_string(&entry).unwrap();
1008        let deserialized: ConfigEntry = serde_json::from_str(&json).unwrap();
1009
1010        assert_eq!(entry, deserialized);
1011    }
1012
1013    #[test]
1014    fn test_config_snapshot_serialization() {
1015        let snapshot = collect_snapshot();
1016        let json = serde_json::to_string(&snapshot).unwrap();
1017        let deserialized: ConfigSnapshot = serde_json::from_str(&json).unwrap();
1018
1019        assert_eq!(snapshot.schema_version, deserialized.schema_version);
1020        assert_eq!(snapshot.entries.len(), deserialized.entries.len());
1021    }
1022
1023    #[test]
1024    fn test_config_provenance_from_snapshot() {
1025        let snapshot = collect_snapshot();
1026        let provenance = ConfigProvenance::from_snapshot(&snapshot);
1027
1028        assert_eq!(provenance.schema_version, snapshot.schema_version);
1029        assert_eq!(provenance.config_hash, snapshot.compute_hash());
1030        assert_eq!(provenance.entries.len(), snapshot.entries.len());
1031    }
1032
1033    #[test]
1034    fn test_config_provenance_verify() {
1035        let snapshot = collect_snapshot();
1036        let provenance = ConfigProvenance::from_snapshot(&snapshot);
1037
1038        // Same snapshot should verify
1039        assert!(provenance.verify(&snapshot));
1040
1041        // Modified snapshot should not verify
1042        let mut modified = snapshot.clone();
1043        modified.add_entry(ConfigEntry::new(
1044            "NEW_ENTRY",
1045            "value",
1046            "default",
1047            ConfigSource::Default,
1048            ConfigScope::Global,
1049        ));
1050        assert!(!provenance.verify(&modified));
1051    }
1052
1053    #[test]
1054    fn test_config_provenance_save_load() {
1055        let snapshot = collect_snapshot();
1056        let provenance = ConfigProvenance::from_snapshot(&snapshot);
1057
1058        let temp_dir = tempfile::tempdir().unwrap();
1059        let path = temp_dir.path().join("test-provenance.json");
1060
1061        // Save
1062        provenance.save(&path).unwrap();
1063        assert!(path.exists());
1064
1065        // Load
1066        let loaded = ConfigProvenance::load(&path).unwrap();
1067        assert_eq!(loaded.schema_version, provenance.schema_version);
1068        assert_eq!(loaded.config_hash, provenance.config_hash);
1069        assert_eq!(loaded.entries.len(), provenance.entries.len());
1070
1071        // Verify loaded matches original snapshot
1072        assert!(loaded.verify(&snapshot));
1073    }
1074
1075    /// Regression test for `postcard` serialization of `ConfigEntry` with optional fields.
1076    ///
1077    /// This test prevents reintroduction of `#[serde(skip_serializing_if)]` on
1078    /// `ConfigEntry` fields. Binary formats like `postcard` require fixed field order;
1079    /// conditionally omitting fields corrupts the stream and breaks deserialization.
1080    ///
1081    /// Bug: 2025-12-12 - Used `skip_serializing_if` on `min_value`, `max_value`, `risk`,
1082    /// `description`
1083    /// Fix: Changed to `#[serde(default)]` which doesn't affect serialization
1084    #[test]
1085    fn test_config_entry_postcard_roundtrip_with_none() {
1086        // Test case 1: All optional fields are None
1087        let entry_all_none = ConfigEntry {
1088            name: "test.setting".to_string(),
1089            effective_value: "123".to_string(),
1090            default_value: "123".to_string(),
1091            min_value: None,
1092            max_value: None,
1093            source: ConfigSource::Default,
1094            scope: ConfigScope::Global,
1095            risk: None,
1096            description: None,
1097        };
1098
1099        // Test case 2: All optional fields are Some
1100        let entry_all_some = ConfigEntry {
1101            name: "test.setting".to_string(),
1102            effective_value: "456".to_string(),
1103            default_value: "100".to_string(),
1104            min_value: Some("1".to_string()),
1105            max_value: Some("1000".to_string()),
1106            source: ConfigSource::Env,
1107            scope: ConfigScope::Project,
1108            risk: Some(ConfigRisk::Dos),
1109            description: Some("Test description".to_string()),
1110        };
1111
1112        // Test case 3: Mixed None/Some (the pattern that triggered the bug)
1113        let entry_mixed = ConfigEntry {
1114            name: "mixed.setting".to_string(),
1115            effective_value: "789".to_string(),
1116            default_value: "500".to_string(),
1117            min_value: Some("100".to_string()),
1118            max_value: None, // This None after a Some triggered the deserialization bug
1119            source: ConfigSource::ProjectConfig,
1120            scope: ConfigScope::Global,
1121            risk: None,
1122            description: Some("Mixed case".to_string()),
1123        };
1124
1125        // Test all cases
1126        for (name, entry) in [
1127            ("all_none", entry_all_none),
1128            ("all_some", entry_all_some),
1129            ("mixed", entry_mixed),
1130        ] {
1131            let serialized = postcard::to_allocvec(&entry)
1132                .unwrap_or_else(|e| panic!("Failed to serialize {name}: {e}"));
1133            let deserialized: ConfigEntry = postcard::from_bytes(&serialized)
1134                .unwrap_or_else(|e| panic!("Failed to deserialize {name}: {e}"));
1135
1136            assert_eq!(
1137                entry,
1138                deserialized,
1139                "Roundtrip failed for {name}: serialized {len} bytes",
1140                len = serialized.len()
1141            );
1142        }
1143    }
1144
1145    /// Test `ConfigSnapshot` postcard roundtrip (embedded in legacy index metadata).
1146    #[test]
1147    fn test_config_snapshot_postcard_roundtrip() {
1148        let mut snapshot = ConfigSnapshot::new();
1149
1150        // Add entries with various optional field combinations
1151        snapshot.add_entry(
1152            ConfigEntry::new(
1153                "SETTING_1",
1154                "value1",
1155                "default1",
1156                ConfigSource::Default,
1157                ConfigScope::Global,
1158            )
1159            .with_range("0", "100")
1160            .with_risk(ConfigRisk::Dos)
1161            .with_description("First setting"),
1162        );
1163
1164        snapshot.add_entry(ConfigEntry::new(
1165            "SETTING_2",
1166            "value2",
1167            "default2",
1168            ConfigSource::Env,
1169            ConfigScope::Project,
1170        )); // No optional fields set
1171
1172        snapshot.add_entry(
1173            ConfigEntry::new(
1174                "SETTING_3",
1175                "value3",
1176                "default3",
1177                ConfigSource::ProjectConfig,
1178                ConfigScope::Global,
1179            )
1180            .with_risk(ConfigRisk::Perf), // Only risk set
1181        );
1182
1183        let serialized = postcard::to_allocvec(&snapshot).expect("Failed to serialize snapshot");
1184        let deserialized: ConfigSnapshot =
1185            postcard::from_bytes(&serialized).expect("Failed to deserialize snapshot");
1186
1187        assert_eq!(snapshot.schema_version, deserialized.schema_version);
1188        assert_eq!(snapshot.entries.len(), deserialized.entries.len());
1189
1190        for (original, restored) in snapshot.entries.iter().zip(deserialized.entries.iter()) {
1191            assert_eq!(original, restored, "Entry mismatch for {}", original.name);
1192        }
1193    }
1194}