Skip to main content

uni_common/
config.rs

1// SPDX-License-Identifier: Apache-2.0
2// Copyright 2024-2026 Dragonscale Team
3
4use std::path::{Path, PathBuf};
5use std::thread;
6use std::time::Duration;
7
8#[derive(Clone, Debug)]
9pub struct CompactionConfig {
10    /// Enable background compaction (default: true)
11    pub enabled: bool,
12
13    /// Max L1 runs before triggering compaction (default: 4)
14    pub max_l1_runs: usize,
15
16    /// Max L1 size in bytes before compaction (default: 256MB)
17    pub max_l1_size_bytes: u64,
18
19    /// Max age of oldest L1 run before compaction (default: 1 hour)
20    pub max_l1_age: Duration,
21
22    /// Background check interval (default: 30s)
23    pub check_interval: Duration,
24
25    /// Number of compaction worker threads (default: 1)
26    pub worker_threads: usize,
27}
28
29impl Default for CompactionConfig {
30    fn default() -> Self {
31        Self {
32            enabled: true,
33            max_l1_runs: 4,
34            max_l1_size_bytes: 256 * 1024 * 1024,
35            max_l1_age: Duration::from_secs(3600),
36            check_interval: Duration::from_secs(30),
37            worker_threads: 1,
38        }
39    }
40}
41
42/// Configuration for background index rebuilding.
43#[derive(Clone, Debug)]
44pub struct IndexRebuildConfig {
45    /// Maximum number of retry attempts for failed index builds (default: 3).
46    pub max_retries: u32,
47
48    /// Delay between retry attempts (default: 60s).
49    pub retry_delay: Duration,
50
51    /// How often to check for pending index rebuild tasks (default: 5s).
52    pub worker_check_interval: Duration,
53}
54
55impl Default for IndexRebuildConfig {
56    fn default() -> Self {
57        Self {
58            max_retries: 3,
59            retry_delay: Duration::from_secs(60),
60            worker_check_interval: Duration::from_secs(5),
61        }
62    }
63}
64
65#[derive(Clone, Copy, Debug)]
66pub struct WriteThrottleConfig {
67    /// L1 run count to start throttling (default: 8)
68    pub soft_limit: usize,
69
70    /// L1 run count to stop writes entirely (default: 16)
71    pub hard_limit: usize,
72
73    /// Base delay when throttling (default: 10ms)
74    pub base_delay: Duration,
75}
76
77impl Default for WriteThrottleConfig {
78    fn default() -> Self {
79        Self {
80            soft_limit: 8,
81            hard_limit: 16,
82            base_delay: Duration::from_millis(10),
83        }
84    }
85}
86
87#[derive(Clone, Debug)]
88pub struct ObjectStoreConfig {
89    pub connect_timeout: Duration,
90    pub read_timeout: Duration,
91    pub write_timeout: Duration,
92    pub max_retries: u32,
93    pub retry_backoff_base: Duration,
94    pub retry_backoff_max: Duration,
95}
96
97impl Default for ObjectStoreConfig {
98    fn default() -> Self {
99        Self {
100            connect_timeout: Duration::from_secs(10),
101            read_timeout: Duration::from_secs(30),
102            write_timeout: Duration::from_secs(60),
103            max_retries: 3,
104            retry_backoff_base: Duration::from_millis(100),
105            retry_backoff_max: Duration::from_secs(10),
106        }
107    }
108}
109
110/// Security configuration for file system operations.
111/// Controls which paths can be accessed by BACKUP, COPY, and EXPORT commands.
112///
113/// Disabled by default for backward compatibility in embedded mode.
114/// MUST be enabled for server mode with untrusted clients.
115#[derive(Clone, Debug, Default)]
116pub struct FileSandboxConfig {
117    /// If true, file operations are restricted to allowed_paths.
118    /// If false, all paths are allowed (NOT RECOMMENDED for server mode).
119    pub enabled: bool,
120
121    /// List of allowed base directories for file operations.
122    /// Paths must be absolute and canonical.
123    /// File operations are only allowed within these directories.
124    pub allowed_paths: Vec<PathBuf>,
125}
126
127/// Deployment mode for the database.
128///
129/// Used to determine appropriate security defaults.
130#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
131pub enum DeploymentMode {
132    /// Embedded/library mode where the host application controls access.
133    /// File sandbox is disabled by default for backward compatibility.
134    #[default]
135    Embedded,
136    /// Server mode with untrusted clients.
137    /// File sandbox is enabled by default with restricted paths.
138    Server,
139}
140
141/// HTTP server configuration.
142///
143/// Controls CORS, authentication, and other HTTP-related security settings.
144///
145/// # Security
146///
147/// **CWE-942 (Overly Permissive CORS)**, **CWE-306 (Missing Authentication)**:
148/// Production deployments should configure explicit `allowed_origins` and
149/// enable API key authentication.
150#[derive(Clone, Debug)]
151pub struct ServerConfig {
152    /// Allowed CORS origins.
153    ///
154    /// - Empty vector: No CORS headers (most restrictive)
155    /// - `["*"]`: Allow all origins (NOT RECOMMENDED for production)
156    /// - Explicit list: Only allow specified origins (RECOMMENDED)
157    ///
158    /// # Security
159    ///
160    /// **CWE-942**: Using `["*"]` allows any website to make requests to
161    /// your server, potentially exposing sensitive data.
162    pub allowed_origins: Vec<String>,
163
164    /// Optional API key for request authentication.
165    ///
166    /// When set, all API requests must include the header:
167    /// `X-API-Key: <key>`
168    ///
169    /// # Security
170    ///
171    /// **CWE-306**: Without authentication, any client can execute queries.
172    /// Enable this for any deployment accessible beyond localhost.
173    pub api_key: Option<String>,
174
175    /// Whether to require API key for metrics endpoint.
176    ///
177    /// Default: false (metrics are public for observability tooling)
178    pub require_auth_for_metrics: bool,
179}
180
181impl Default for ServerConfig {
182    fn default() -> Self {
183        Self {
184            // Default to localhost-only origin for development safety
185            allowed_origins: vec!["http://localhost:3000".to_string()],
186            api_key: None,
187            require_auth_for_metrics: false,
188        }
189    }
190}
191
192impl ServerConfig {
193    /// Create a permissive config for local development only.
194    ///
195    /// # Security
196    ///
197    /// **WARNING**: Do not use in production. This config allows all CORS origins
198    /// and has no authentication.
199    #[must_use]
200    pub fn development() -> Self {
201        Self {
202            allowed_origins: vec!["*".to_string()],
203            api_key: None,
204            require_auth_for_metrics: false,
205        }
206    }
207
208    /// Create a production config with explicit origins and required API key.
209    ///
210    /// # Panics
211    ///
212    /// Panics if `api_key` is empty.
213    #[must_use]
214    pub fn production(allowed_origins: Vec<String>, api_key: String) -> Self {
215        assert!(
216            !api_key.is_empty(),
217            "API key must not be empty for production"
218        );
219        Self {
220            allowed_origins,
221            api_key: Some(api_key),
222            require_auth_for_metrics: true,
223        }
224    }
225
226    /// Returns a security warning if the config is insecure.
227    pub fn security_warning(&self) -> Option<&'static str> {
228        if self.allowed_origins.contains(&"*".to_string()) && self.api_key.is_none() {
229            Some(
230                "Server config has permissive CORS (allow all origins) and no API key. \
231                 This is insecure for production deployments.",
232            )
233        } else if self.allowed_origins.contains(&"*".to_string()) {
234            Some(
235                "Server config has permissive CORS (allow all origins). \
236                 Consider restricting to specific origins for production.",
237            )
238        } else if self.api_key.is_none() {
239            Some(
240                "Server config has no API key authentication. \
241                 Enable api_key for production deployments.",
242            )
243        } else {
244            None
245        }
246    }
247}
248
249impl FileSandboxConfig {
250    /// Creates a sandboxed config that only allows operations in the specified directories.
251    pub fn sandboxed(paths: Vec<PathBuf>) -> Self {
252        Self {
253            enabled: true,
254            allowed_paths: paths,
255        }
256    }
257
258    /// Creates a config with appropriate defaults for the deployment mode.
259    ///
260    /// # Security
261    ///
262    /// - **Embedded mode**: Sandbox disabled (host application controls access)
263    /// - **Server mode**: Sandbox enabled with default paths `/var/lib/uni/data` and
264    ///   `/var/lib/uni/backups`
265    ///
266    /// **CWE-22 (Path Traversal)**: Server deployments MUST enable the sandbox to
267    /// prevent arbitrary file read/write via BACKUP, COPY, and EXPORT commands.
268    pub fn default_for_mode(mode: DeploymentMode) -> Self {
269        match mode {
270            DeploymentMode::Embedded => Self {
271                enabled: false,
272                allowed_paths: vec![],
273            },
274            DeploymentMode::Server => Self {
275                enabled: true,
276                allowed_paths: vec![
277                    PathBuf::from("/var/lib/uni/data"),
278                    PathBuf::from("/var/lib/uni/backups"),
279                ],
280            },
281        }
282    }
283
284    /// Returns a security warning message if the sandbox is disabled.
285    ///
286    /// Call this at startup to alert administrators about potential security risks.
287    /// Returns `Some(message)` if a warning should be displayed, `None` otherwise.
288    ///
289    /// # Security
290    ///
291    /// **CWE-22 (Path Traversal)**, **CWE-73 (External Control of File Name)**:
292    /// Disabled sandbox allows unrestricted filesystem access for BACKUP, COPY,
293    /// and EXPORT commands, which can lead to:
294    /// - Arbitrary file read/write in server deployments
295    /// - Data exfiltration to attacker-controlled paths
296    /// - Potential privilege escalation via file overwrites
297    ///
298    /// # Example
299    ///
300    /// ```ignore
301    /// if let Some(warning) = config.file_sandbox.security_warning() {
302    ///     tracing::warn!(target: "uni_db::security", "{}", warning);
303    /// }
304    /// ```
305    pub fn security_warning(&self) -> Option<&'static str> {
306        if !self.enabled {
307            Some(
308                "File sandbox is DISABLED. This allows unrestricted filesystem access \
309                 for BACKUP, COPY, and EXPORT commands. Enable sandbox for server \
310                 deployments: file_sandbox.enabled = true",
311            )
312        } else {
313            None
314        }
315    }
316
317    /// Returns whether the sandbox is in a potentially insecure state.
318    ///
319    /// Returns `true` if the sandbox is disabled or enabled with no allowed paths.
320    pub fn is_potentially_insecure(&self) -> bool {
321        !self.enabled || self.allowed_paths.is_empty()
322    }
323
324    /// Validate that a path is within the allowed sandbox.
325    /// Returns Ok(canonical_path) if allowed, Err if not.
326    pub fn validate_path(&self, path: &str) -> Result<PathBuf, String> {
327        if !self.enabled {
328            // Sandbox disabled - allow all paths
329            return Ok(PathBuf::from(path));
330        }
331
332        if self.allowed_paths.is_empty() {
333            return Err("File sandbox is enabled but no allowed paths configured".to_string());
334        }
335
336        // Resolve the path to canonical form to prevent traversal attacks
337        let input_path = Path::new(path);
338
339        // For paths that don't exist yet (e.g., export destinations), we need to
340        // check their parent directory exists and is within allowed paths
341        let canonical = if input_path.exists() {
342            input_path
343                .canonicalize()
344                .map_err(|e| format!("Failed to canonicalize path: {}", e))?
345        } else {
346            // Path doesn't exist - check parent
347            let parent = input_path
348                .parent()
349                .ok_or_else(|| "Invalid path: no parent directory".to_string())?;
350            if !parent.exists() {
351                return Err(format!(
352                    "Parent directory does not exist: {}",
353                    parent.display()
354                ));
355            }
356            let canonical_parent = parent
357                .canonicalize()
358                .map_err(|e| format!("Failed to canonicalize parent: {}", e))?;
359            // Reconstruct with canonical parent + original filename
360            let filename = input_path
361                .file_name()
362                .ok_or_else(|| "Invalid path: no filename".to_string())?;
363            canonical_parent.join(filename)
364        };
365
366        // Check if the canonical path is within any allowed directory
367        for allowed in &self.allowed_paths {
368            // Ensure allowed path is canonical too
369            let canonical_allowed = if allowed.exists() {
370                allowed.canonicalize().unwrap_or_else(|_| allowed.clone())
371            } else {
372                allowed.clone()
373            };
374
375            if canonical.starts_with(&canonical_allowed) {
376                return Ok(canonical);
377            }
378        }
379
380        Err(format!(
381            "Path '{}' is outside allowed sandbox directories. Allowed: {:?}",
382            path, self.allowed_paths
383        ))
384    }
385}
386
387#[derive(Clone, Debug)]
388pub struct UniConfig {
389    /// Maximum adjacency cache size in bytes (default: 1GB)
390    pub cache_size: usize,
391
392    /// Number of worker threads for parallel execution
393    pub parallelism: usize,
394
395    /// Size of each data morsel/batch (number of rows)
396    pub batch_size: usize,
397
398    /// Maximum size of traversal frontier before pruning
399    pub max_frontier_size: usize,
400
401    /// Auto-flush threshold for L0 buffer (default: 10_000 mutations)
402    pub auto_flush_threshold: usize,
403
404    /// Auto-flush interval for L0 buffer (default: 5 seconds).
405    /// Flush triggers if time elapsed AND mutation count >= auto_flush_min_mutations.
406    /// Set to None to disable time-based flush.
407    pub auto_flush_interval: Option<Duration>,
408
409    /// Minimum mutations required before time-based flush triggers (default: 1).
410    /// Prevents unnecessary flushes when there's minimal activity.
411    pub auto_flush_min_mutations: usize,
412
413    /// Enable write-ahead logging (default: true)
414    pub wal_enabled: bool,
415
416    /// Compaction configuration
417    pub compaction: CompactionConfig,
418
419    /// Write throttling configuration
420    pub throttle: WriteThrottleConfig,
421
422    /// File sandbox configuration for BACKUP/COPY/EXPORT commands.
423    /// MUST be enabled with allowed paths in server mode to prevent arbitrary file access.
424    pub file_sandbox: FileSandboxConfig,
425
426    /// Default query execution timeout (default: 30s)
427    pub query_timeout: Duration,
428
429    /// Default maximum memory per query (default: 1GB)
430    pub max_query_memory: usize,
431
432    /// Maximum transaction buffer memory in bytes (default: 1GB).
433    /// Limits memory usage during transactions to prevent OOM.
434    pub max_transaction_memory: usize,
435
436    /// Maximum rows for in-memory compaction (default: 5M, ~725MB at 145 bytes/row).
437    /// Configurable OOM guard to prevent memory exhaustion during compaction.
438    pub max_compaction_rows: usize,
439
440    /// Enable in-memory VID-to-labels index for O(1) lookups (default: true).
441    /// Memory cost: ~42 bytes per vertex (1M vertices ≈ 42MB).
442    pub enable_vid_labels_index: bool,
443
444    /// Object store resilience configuration
445    pub object_store: ObjectStoreConfig,
446
447    /// Background index rebuild configuration
448    pub index_rebuild: IndexRebuildConfig,
449}
450
451impl Default for UniConfig {
452    fn default() -> Self {
453        let parallelism = thread::available_parallelism()
454            .map(|n| n.get())
455            .unwrap_or(4);
456
457        Self {
458            cache_size: 1024 * 1024 * 1024, // 1GB
459            parallelism,
460            batch_size: 1024, // Default morsel size
461            max_frontier_size: 1_000_000,
462            auto_flush_threshold: 10_000,
463            auto_flush_interval: Some(Duration::from_secs(5)),
464            auto_flush_min_mutations: 1,
465            wal_enabled: true,
466            compaction: CompactionConfig::default(),
467            throttle: WriteThrottleConfig::default(),
468            file_sandbox: FileSandboxConfig::default(),
469            query_timeout: Duration::from_secs(30),
470            max_query_memory: 1024 * 1024 * 1024,       // 1GB
471            max_transaction_memory: 1024 * 1024 * 1024, // 1GB
472            max_compaction_rows: 5_000_000,             // 5M rows
473            enable_vid_labels_index: true,              // Enable by default
474            object_store: ObjectStoreConfig::default(),
475            index_rebuild: IndexRebuildConfig::default(),
476        }
477    }
478}
479
480/// Cloud storage backend configuration.
481///
482/// Supports Amazon S3, Google Cloud Storage, and Azure Blob Storage.
483/// Each variant contains the credentials and connection parameters for
484/// its respective cloud provider.
485///
486/// # Examples
487///
488/// ```ignore
489/// // Create S3 configuration from environment variables
490/// let config = CloudStorageConfig::s3_from_env("my-bucket");
491///
492/// // Create explicit S3 configuration for LocalStack testing
493/// let config = CloudStorageConfig::S3 {
494///     bucket: "test-bucket".to_string(),
495///     region: Some("us-east-1".to_string()),
496///     endpoint: Some("http://localhost:4566".to_string()),
497///     access_key_id: Some("test".to_string()),
498///     secret_access_key: Some("test".to_string()),
499///     session_token: None,
500///     virtual_hosted_style: false,
501/// };
502/// ```
503#[derive(Clone, Debug)]
504pub enum CloudStorageConfig {
505    /// Amazon S3 storage configuration.
506    S3 {
507        /// S3 bucket name.
508        bucket: String,
509        /// AWS region (e.g., "us-east-1"). Uses AWS_REGION env var if None.
510        region: Option<String>,
511        /// Custom endpoint URL for S3-compatible services (MinIO, LocalStack).
512        endpoint: Option<String>,
513        /// AWS access key ID. Uses AWS_ACCESS_KEY_ID env var if None.
514        access_key_id: Option<String>,
515        /// AWS secret access key. Uses AWS_SECRET_ACCESS_KEY env var if None.
516        secret_access_key: Option<String>,
517        /// AWS session token for temporary credentials.
518        session_token: Option<String>,
519        /// Use virtual-hosted-style requests (bucket.s3.region.amazonaws.com).
520        virtual_hosted_style: bool,
521    },
522    /// Google Cloud Storage configuration.
523    Gcs {
524        /// GCS bucket name.
525        bucket: String,
526        /// Path to service account JSON key file.
527        service_account_path: Option<String>,
528        /// Service account JSON key content (alternative to path).
529        service_account_key: Option<String>,
530    },
531    /// Azure Blob Storage configuration.
532    Azure {
533        /// Azure container name.
534        container: String,
535        /// Azure storage account name.
536        account: String,
537        /// Azure storage account access key.
538        access_key: Option<String>,
539        /// Azure SAS token for limited access.
540        sas_token: Option<String>,
541    },
542}
543
544impl CloudStorageConfig {
545    /// Creates an S3 configuration using environment variables.
546    ///
547    /// Reads credentials from standard AWS environment variables:
548    /// - `AWS_ACCESS_KEY_ID`
549    /// - `AWS_SECRET_ACCESS_KEY`
550    /// - `AWS_SESSION_TOKEN` (optional)
551    /// - `AWS_REGION` or `AWS_DEFAULT_REGION`
552    /// - `AWS_ENDPOINT_URL` (optional, for S3-compatible services)
553    #[must_use]
554    pub fn s3_from_env(bucket: &str) -> Self {
555        Self::S3 {
556            bucket: bucket.to_string(),
557            region: std::env::var("AWS_REGION")
558                .or_else(|_| std::env::var("AWS_DEFAULT_REGION"))
559                .ok(),
560            endpoint: std::env::var("AWS_ENDPOINT_URL").ok(),
561            access_key_id: std::env::var("AWS_ACCESS_KEY_ID").ok(),
562            secret_access_key: std::env::var("AWS_SECRET_ACCESS_KEY").ok(),
563            session_token: std::env::var("AWS_SESSION_TOKEN").ok(),
564            virtual_hosted_style: false,
565        }
566    }
567
568    /// Creates a GCS configuration using environment variables.
569    ///
570    /// Reads service account path from `GOOGLE_APPLICATION_CREDENTIALS`.
571    #[must_use]
572    pub fn gcs_from_env(bucket: &str) -> Self {
573        Self::Gcs {
574            bucket: bucket.to_string(),
575            service_account_path: std::env::var("GOOGLE_APPLICATION_CREDENTIALS").ok(),
576            service_account_key: None,
577        }
578    }
579
580    /// Creates an Azure configuration using environment variables.
581    ///
582    /// Reads credentials from Azure environment variables:
583    /// - `AZURE_STORAGE_ACCOUNT`
584    /// - `AZURE_STORAGE_ACCESS_KEY` (optional)
585    /// - `AZURE_STORAGE_SAS_TOKEN` (optional)
586    ///
587    /// # Panics
588    ///
589    /// Panics if `AZURE_STORAGE_ACCOUNT` is not set.
590    #[must_use]
591    pub fn azure_from_env(container: &str) -> Self {
592        Self::Azure {
593            container: container.to_string(),
594            account: std::env::var("AZURE_STORAGE_ACCOUNT")
595                .expect("AZURE_STORAGE_ACCOUNT environment variable required"),
596            access_key: std::env::var("AZURE_STORAGE_ACCESS_KEY").ok(),
597            sas_token: std::env::var("AZURE_STORAGE_SAS_TOKEN").ok(),
598        }
599    }
600
601    /// Returns the bucket/container name for this configuration.
602    #[must_use]
603    pub fn bucket_name(&self) -> &str {
604        match self {
605            Self::S3 { bucket, .. } => bucket,
606            Self::Gcs { bucket, .. } => bucket,
607            Self::Azure { container, .. } => container,
608        }
609    }
610
611    /// Returns a URL-style identifier for this storage location.
612    #[must_use]
613    pub fn to_url(&self) -> String {
614        match self {
615            Self::S3 { bucket, .. } => format!("s3://{bucket}"),
616            Self::Gcs { bucket, .. } => format!("gs://{bucket}"),
617            Self::Azure {
618                container, account, ..
619            } => format!("az://{account}/{container}"),
620        }
621    }
622}
623
624#[cfg(test)]
625mod security_tests {
626    use super::*;
627
628    /// Tests for CWE-22 (Path Traversal) prevention in file sandbox.
629    mod file_sandbox {
630        use super::*;
631
632        #[test]
633        fn test_sandbox_disabled_allows_all_paths() {
634            let config = FileSandboxConfig::default();
635            assert!(!config.enabled);
636            // When disabled, all paths are allowed
637            assert!(config.validate_path("/tmp/test").is_ok());
638        }
639
640        #[test]
641        fn test_sandbox_enabled_with_no_paths_rejects() {
642            let config = FileSandboxConfig {
643                enabled: true,
644                allowed_paths: vec![],
645            };
646            let result = config.validate_path("/tmp/test");
647            assert!(result.is_err());
648            assert!(result.unwrap_err().contains("no allowed paths configured"));
649        }
650
651        #[test]
652        fn test_sandbox_rejects_outside_path() {
653            let config = FileSandboxConfig {
654                enabled: true,
655                allowed_paths: vec![PathBuf::from("/var/lib/uni")],
656            };
657            let result = config.validate_path("/etc/passwd");
658            assert!(result.is_err());
659            assert!(result.unwrap_err().contains("outside allowed sandbox"));
660        }
661
662        #[test]
663        fn test_is_potentially_insecure() {
664            // Disabled is insecure
665            let disabled = FileSandboxConfig::default();
666            assert!(disabled.is_potentially_insecure());
667
668            // Enabled with no paths is insecure
669            let no_paths = FileSandboxConfig {
670                enabled: true,
671                allowed_paths: vec![],
672            };
673            assert!(no_paths.is_potentially_insecure());
674
675            // Enabled with paths is secure
676            let secure = FileSandboxConfig::sandboxed(vec![PathBuf::from("/data")]);
677            assert!(!secure.is_potentially_insecure());
678        }
679
680        #[test]
681        fn test_security_warning_when_disabled() {
682            let disabled = FileSandboxConfig::default();
683            assert!(disabled.security_warning().is_some());
684
685            let enabled = FileSandboxConfig::sandboxed(vec![PathBuf::from("/data")]);
686            assert!(enabled.security_warning().is_none());
687        }
688
689        #[test]
690        fn test_deployment_mode_defaults() {
691            let embedded = FileSandboxConfig::default_for_mode(DeploymentMode::Embedded);
692            assert!(!embedded.enabled);
693
694            let server = FileSandboxConfig::default_for_mode(DeploymentMode::Server);
695            assert!(server.enabled);
696            assert!(!server.allowed_paths.is_empty());
697        }
698    }
699}