uni_common/config.rs
1// SPDX-License-Identifier: Apache-2.0
2// Copyright 2024-2026 Dragonscale Team
3
4use std::path::{Path, PathBuf};
5use std::thread;
6use std::time::Duration;
7
8#[derive(Clone, Debug)]
9pub struct CompactionConfig {
10 /// Enable background compaction (default: true)
11 pub enabled: bool,
12
13 /// Max L1 runs before triggering compaction (default: 4)
14 pub max_l1_runs: usize,
15
16 /// Max L1 size in bytes before compaction (default: 256MB)
17 pub max_l1_size_bytes: u64,
18
19 /// Max age of oldest L1 run before compaction (default: 1 hour)
20 pub max_l1_age: Duration,
21
22 /// Background check interval (default: 30s)
23 pub check_interval: Duration,
24
25 /// Number of compaction worker threads (default: 1)
26 pub worker_threads: usize,
27}
28
29impl Default for CompactionConfig {
30 fn default() -> Self {
31 Self {
32 enabled: true,
33 max_l1_runs: 4,
34 max_l1_size_bytes: 256 * 1024 * 1024,
35 max_l1_age: Duration::from_secs(3600),
36 check_interval: Duration::from_secs(30),
37 worker_threads: 1,
38 }
39 }
40}
41
42/// Configuration for background index rebuilding.
43#[derive(Clone, Debug)]
44pub struct IndexRebuildConfig {
45 /// Maximum number of retry attempts for failed index builds (default: 3).
46 pub max_retries: u32,
47
48 /// Delay between retry attempts (default: 60s).
49 pub retry_delay: Duration,
50
51 /// How often to check for pending index rebuild tasks (default: 5s).
52 pub worker_check_interval: Duration,
53}
54
55impl Default for IndexRebuildConfig {
56 fn default() -> Self {
57 Self {
58 max_retries: 3,
59 retry_delay: Duration::from_secs(60),
60 worker_check_interval: Duration::from_secs(5),
61 }
62 }
63}
64
65#[derive(Clone, Copy, Debug)]
66pub struct WriteThrottleConfig {
67 /// L1 run count to start throttling (default: 8)
68 pub soft_limit: usize,
69
70 /// L1 run count to stop writes entirely (default: 16)
71 pub hard_limit: usize,
72
73 /// Base delay when throttling (default: 10ms)
74 pub base_delay: Duration,
75}
76
77impl Default for WriteThrottleConfig {
78 fn default() -> Self {
79 Self {
80 soft_limit: 8,
81 hard_limit: 16,
82 base_delay: Duration::from_millis(10),
83 }
84 }
85}
86
87#[derive(Clone, Debug)]
88pub struct ObjectStoreConfig {
89 pub connect_timeout: Duration,
90 pub read_timeout: Duration,
91 pub write_timeout: Duration,
92 pub max_retries: u32,
93 pub retry_backoff_base: Duration,
94 pub retry_backoff_max: Duration,
95}
96
97impl Default for ObjectStoreConfig {
98 fn default() -> Self {
99 Self {
100 connect_timeout: Duration::from_secs(10),
101 read_timeout: Duration::from_secs(30),
102 write_timeout: Duration::from_secs(60),
103 max_retries: 3,
104 retry_backoff_base: Duration::from_millis(100),
105 retry_backoff_max: Duration::from_secs(10),
106 }
107 }
108}
109
110/// Security configuration for file system operations.
111/// Controls which paths can be accessed by BACKUP, COPY, and EXPORT commands.
112///
113/// Disabled by default for backward compatibility in embedded mode.
114/// MUST be enabled for server mode with untrusted clients.
115#[derive(Clone, Debug, Default)]
116pub struct FileSandboxConfig {
117 /// If true, file operations are restricted to allowed_paths.
118 /// If false, all paths are allowed (NOT RECOMMENDED for server mode).
119 pub enabled: bool,
120
121 /// List of allowed base directories for file operations.
122 /// Paths must be absolute and canonical.
123 /// File operations are only allowed within these directories.
124 pub allowed_paths: Vec<PathBuf>,
125}
126
127/// Deployment mode for the database.
128///
129/// Used to determine appropriate security defaults.
130#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
131pub enum DeploymentMode {
132 /// Embedded/library mode where the host application controls access.
133 /// File sandbox is disabled by default for backward compatibility.
134 #[default]
135 Embedded,
136 /// Server mode with untrusted clients.
137 /// File sandbox is enabled by default with restricted paths.
138 Server,
139}
140
141/// HTTP server configuration.
142///
143/// Controls CORS, authentication, and other HTTP-related security settings.
144///
145/// # Security
146///
147/// **CWE-942 (Overly Permissive CORS)**, **CWE-306 (Missing Authentication)**:
148/// Production deployments should configure explicit `allowed_origins` and
149/// enable API key authentication.
150#[derive(Clone, Debug)]
151pub struct ServerConfig {
152 /// Allowed CORS origins.
153 ///
154 /// - Empty vector: No CORS headers (most restrictive)
155 /// - `["*"]`: Allow all origins (NOT RECOMMENDED for production)
156 /// - Explicit list: Only allow specified origins (RECOMMENDED)
157 ///
158 /// # Security
159 ///
160 /// **CWE-942**: Using `["*"]` allows any website to make requests to
161 /// your server, potentially exposing sensitive data.
162 pub allowed_origins: Vec<String>,
163
164 /// Optional API key for request authentication.
165 ///
166 /// When set, all API requests must include the header:
167 /// `X-API-Key: <key>`
168 ///
169 /// # Security
170 ///
171 /// **CWE-306**: Without authentication, any client can execute queries.
172 /// Enable this for any deployment accessible beyond localhost.
173 pub api_key: Option<String>,
174
175 /// Whether to require API key for metrics endpoint.
176 ///
177 /// Default: false (metrics are public for observability tooling)
178 pub require_auth_for_metrics: bool,
179}
180
181impl Default for ServerConfig {
182 fn default() -> Self {
183 Self {
184 // Default to localhost-only origin for development safety
185 allowed_origins: vec!["http://localhost:3000".to_string()],
186 api_key: None,
187 require_auth_for_metrics: false,
188 }
189 }
190}
191
192impl ServerConfig {
193 /// Create a permissive config for local development only.
194 ///
195 /// # Security
196 ///
197 /// **WARNING**: Do not use in production. This config allows all CORS origins
198 /// and has no authentication.
199 #[must_use]
200 pub fn development() -> Self {
201 Self {
202 allowed_origins: vec!["*".to_string()],
203 api_key: None,
204 require_auth_for_metrics: false,
205 }
206 }
207
208 /// Create a production config with explicit origins and required API key.
209 ///
210 /// # Panics
211 ///
212 /// Panics if `api_key` is empty.
213 #[must_use]
214 pub fn production(allowed_origins: Vec<String>, api_key: String) -> Self {
215 assert!(
216 !api_key.is_empty(),
217 "API key must not be empty for production"
218 );
219 Self {
220 allowed_origins,
221 api_key: Some(api_key),
222 require_auth_for_metrics: true,
223 }
224 }
225
226 /// Returns a security warning if the config is insecure.
227 pub fn security_warning(&self) -> Option<&'static str> {
228 if self.allowed_origins.contains(&"*".to_string()) && self.api_key.is_none() {
229 Some(
230 "Server config has permissive CORS (allow all origins) and no API key. \
231 This is insecure for production deployments.",
232 )
233 } else if self.allowed_origins.contains(&"*".to_string()) {
234 Some(
235 "Server config has permissive CORS (allow all origins). \
236 Consider restricting to specific origins for production.",
237 )
238 } else if self.api_key.is_none() {
239 Some(
240 "Server config has no API key authentication. \
241 Enable api_key for production deployments.",
242 )
243 } else {
244 None
245 }
246 }
247}
248
249impl FileSandboxConfig {
250 /// Creates a sandboxed config that only allows operations in the specified directories.
251 pub fn sandboxed(paths: Vec<PathBuf>) -> Self {
252 Self {
253 enabled: true,
254 allowed_paths: paths,
255 }
256 }
257
258 /// Creates a config with appropriate defaults for the deployment mode.
259 ///
260 /// # Security
261 ///
262 /// - **Embedded mode**: Sandbox disabled (host application controls access)
263 /// - **Server mode**: Sandbox enabled with default paths `/var/lib/uni/data` and
264 /// `/var/lib/uni/backups`
265 ///
266 /// **CWE-22 (Path Traversal)**: Server deployments MUST enable the sandbox to
267 /// prevent arbitrary file read/write via BACKUP, COPY, and EXPORT commands.
268 pub fn default_for_mode(mode: DeploymentMode) -> Self {
269 match mode {
270 DeploymentMode::Embedded => Self {
271 enabled: false,
272 allowed_paths: vec![],
273 },
274 DeploymentMode::Server => Self {
275 enabled: true,
276 allowed_paths: vec![
277 PathBuf::from("/var/lib/uni/data"),
278 PathBuf::from("/var/lib/uni/backups"),
279 ],
280 },
281 }
282 }
283
284 /// Returns a security warning message if the sandbox is disabled.
285 ///
286 /// Call this at startup to alert administrators about potential security risks.
287 /// Returns `Some(message)` if a warning should be displayed, `None` otherwise.
288 ///
289 /// # Security
290 ///
291 /// **CWE-22 (Path Traversal)**, **CWE-73 (External Control of File Name)**:
292 /// Disabled sandbox allows unrestricted filesystem access for BACKUP, COPY,
293 /// and EXPORT commands, which can lead to:
294 /// - Arbitrary file read/write in server deployments
295 /// - Data exfiltration to attacker-controlled paths
296 /// - Potential privilege escalation via file overwrites
297 ///
298 /// # Example
299 ///
300 /// ```ignore
301 /// if let Some(warning) = config.file_sandbox.security_warning() {
302 /// tracing::warn!(target: "uni_db::security", "{}", warning);
303 /// }
304 /// ```
305 pub fn security_warning(&self) -> Option<&'static str> {
306 if !self.enabled {
307 Some(
308 "File sandbox is DISABLED. This allows unrestricted filesystem access \
309 for BACKUP, COPY, and EXPORT commands. Enable sandbox for server \
310 deployments: file_sandbox.enabled = true",
311 )
312 } else {
313 None
314 }
315 }
316
317 /// Returns whether the sandbox is in a potentially insecure state.
318 ///
319 /// Returns `true` if the sandbox is disabled or enabled with no allowed paths.
320 pub fn is_potentially_insecure(&self) -> bool {
321 !self.enabled || self.allowed_paths.is_empty()
322 }
323
324 /// Validate that a path is within the allowed sandbox.
325 /// Returns Ok(canonical_path) if allowed, Err if not.
326 pub fn validate_path(&self, path: &str) -> Result<PathBuf, String> {
327 if !self.enabled {
328 // Sandbox disabled - allow all paths
329 return Ok(PathBuf::from(path));
330 }
331
332 if self.allowed_paths.is_empty() {
333 return Err("File sandbox is enabled but no allowed paths configured".to_string());
334 }
335
336 // Resolve the path to canonical form to prevent traversal attacks
337 let input_path = Path::new(path);
338
339 // For paths that don't exist yet (e.g., export destinations), we need to
340 // check their parent directory exists and is within allowed paths
341 let canonical = if input_path.exists() {
342 input_path
343 .canonicalize()
344 .map_err(|e| format!("Failed to canonicalize path: {}", e))?
345 } else {
346 // Path doesn't exist - check parent
347 let parent = input_path
348 .parent()
349 .ok_or_else(|| "Invalid path: no parent directory".to_string())?;
350 if !parent.exists() {
351 return Err(format!(
352 "Parent directory does not exist: {}",
353 parent.display()
354 ));
355 }
356 let canonical_parent = parent
357 .canonicalize()
358 .map_err(|e| format!("Failed to canonicalize parent: {}", e))?;
359 // Reconstruct with canonical parent + original filename
360 let filename = input_path
361 .file_name()
362 .ok_or_else(|| "Invalid path: no filename".to_string())?;
363 canonical_parent.join(filename)
364 };
365
366 // Check if the canonical path is within any allowed directory
367 for allowed in &self.allowed_paths {
368 // Ensure allowed path is canonical too
369 let canonical_allowed = if allowed.exists() {
370 allowed.canonicalize().unwrap_or_else(|_| allowed.clone())
371 } else {
372 allowed.clone()
373 };
374
375 if canonical.starts_with(&canonical_allowed) {
376 return Ok(canonical);
377 }
378 }
379
380 Err(format!(
381 "Path '{}' is outside allowed sandbox directories. Allowed: {:?}",
382 path, self.allowed_paths
383 ))
384 }
385}
386
387#[derive(Clone, Debug)]
388pub struct UniConfig {
389 /// Maximum adjacency cache size in bytes (default: 1GB)
390 pub cache_size: usize,
391
392 /// Number of worker threads for parallel execution
393 pub parallelism: usize,
394
395 /// Size of each data morsel/batch (number of rows)
396 pub batch_size: usize,
397
398 /// Maximum size of traversal frontier before pruning
399 pub max_frontier_size: usize,
400
401 /// Auto-flush threshold for L0 buffer (default: 10_000 mutations)
402 pub auto_flush_threshold: usize,
403
404 /// Auto-flush interval for L0 buffer (default: 5 seconds).
405 /// Flush triggers if time elapsed AND mutation count >= auto_flush_min_mutations.
406 /// Set to None to disable time-based flush.
407 pub auto_flush_interval: Option<Duration>,
408
409 /// Minimum mutations required before time-based flush triggers (default: 1).
410 /// Prevents unnecessary flushes when there's minimal activity.
411 pub auto_flush_min_mutations: usize,
412
413 /// Enable write-ahead logging (default: true)
414 pub wal_enabled: bool,
415
416 /// Compaction configuration
417 pub compaction: CompactionConfig,
418
419 /// Write throttling configuration
420 pub throttle: WriteThrottleConfig,
421
422 /// File sandbox configuration for BACKUP/COPY/EXPORT commands.
423 /// MUST be enabled with allowed paths in server mode to prevent arbitrary file access.
424 pub file_sandbox: FileSandboxConfig,
425
426 /// Default query execution timeout (default: 30s)
427 pub query_timeout: Duration,
428
429 /// Default maximum memory per query (default: 1GB)
430 pub max_query_memory: usize,
431
432 /// Maximum transaction buffer memory in bytes (default: 1GB).
433 /// Limits memory usage during transactions to prevent OOM.
434 pub max_transaction_memory: usize,
435
436 /// Maximum rows for in-memory compaction (default: 5M, ~725MB at 145 bytes/row).
437 /// Configurable OOM guard to prevent memory exhaustion during compaction.
438 pub max_compaction_rows: usize,
439
440 /// Enable in-memory VID-to-labels index for O(1) lookups (default: true).
441 /// Memory cost: ~42 bytes per vertex (1M vertices ≈ 42MB).
442 pub enable_vid_labels_index: bool,
443
444 /// Object store resilience configuration
445 pub object_store: ObjectStoreConfig,
446
447 /// Background index rebuild configuration
448 pub index_rebuild: IndexRebuildConfig,
449}
450
451impl Default for UniConfig {
452 fn default() -> Self {
453 let parallelism = thread::available_parallelism()
454 .map(|n| n.get())
455 .unwrap_or(4);
456
457 Self {
458 cache_size: 1024 * 1024 * 1024, // 1GB
459 parallelism,
460 batch_size: 1024, // Default morsel size
461 max_frontier_size: 1_000_000,
462 auto_flush_threshold: 10_000,
463 auto_flush_interval: Some(Duration::from_secs(5)),
464 auto_flush_min_mutations: 1,
465 wal_enabled: true,
466 compaction: CompactionConfig::default(),
467 throttle: WriteThrottleConfig::default(),
468 file_sandbox: FileSandboxConfig::default(),
469 query_timeout: Duration::from_secs(30),
470 max_query_memory: 1024 * 1024 * 1024, // 1GB
471 max_transaction_memory: 1024 * 1024 * 1024, // 1GB
472 max_compaction_rows: 5_000_000, // 5M rows
473 enable_vid_labels_index: true, // Enable by default
474 object_store: ObjectStoreConfig::default(),
475 index_rebuild: IndexRebuildConfig::default(),
476 }
477 }
478}
479
480/// Cloud storage backend configuration.
481///
482/// Supports Amazon S3, Google Cloud Storage, and Azure Blob Storage.
483/// Each variant contains the credentials and connection parameters for
484/// its respective cloud provider.
485///
486/// # Examples
487///
488/// ```ignore
489/// // Create S3 configuration from environment variables
490/// let config = CloudStorageConfig::s3_from_env("my-bucket");
491///
492/// // Create explicit S3 configuration for LocalStack testing
493/// let config = CloudStorageConfig::S3 {
494/// bucket: "test-bucket".to_string(),
495/// region: Some("us-east-1".to_string()),
496/// endpoint: Some("http://localhost:4566".to_string()),
497/// access_key_id: Some("test".to_string()),
498/// secret_access_key: Some("test".to_string()),
499/// session_token: None,
500/// virtual_hosted_style: false,
501/// };
502/// ```
503#[derive(Clone, Debug)]
504pub enum CloudStorageConfig {
505 /// Amazon S3 storage configuration.
506 S3 {
507 /// S3 bucket name.
508 bucket: String,
509 /// AWS region (e.g., "us-east-1"). Uses AWS_REGION env var if None.
510 region: Option<String>,
511 /// Custom endpoint URL for S3-compatible services (MinIO, LocalStack).
512 endpoint: Option<String>,
513 /// AWS access key ID. Uses AWS_ACCESS_KEY_ID env var if None.
514 access_key_id: Option<String>,
515 /// AWS secret access key. Uses AWS_SECRET_ACCESS_KEY env var if None.
516 secret_access_key: Option<String>,
517 /// AWS session token for temporary credentials.
518 session_token: Option<String>,
519 /// Use virtual-hosted-style requests (bucket.s3.region.amazonaws.com).
520 virtual_hosted_style: bool,
521 },
522 /// Google Cloud Storage configuration.
523 Gcs {
524 /// GCS bucket name.
525 bucket: String,
526 /// Path to service account JSON key file.
527 service_account_path: Option<String>,
528 /// Service account JSON key content (alternative to path).
529 service_account_key: Option<String>,
530 },
531 /// Azure Blob Storage configuration.
532 Azure {
533 /// Azure container name.
534 container: String,
535 /// Azure storage account name.
536 account: String,
537 /// Azure storage account access key.
538 access_key: Option<String>,
539 /// Azure SAS token for limited access.
540 sas_token: Option<String>,
541 },
542}
543
544impl CloudStorageConfig {
545 /// Creates an S3 configuration using environment variables.
546 ///
547 /// Reads credentials from standard AWS environment variables:
548 /// - `AWS_ACCESS_KEY_ID`
549 /// - `AWS_SECRET_ACCESS_KEY`
550 /// - `AWS_SESSION_TOKEN` (optional)
551 /// - `AWS_REGION` or `AWS_DEFAULT_REGION`
552 /// - `AWS_ENDPOINT_URL` (optional, for S3-compatible services)
553 #[must_use]
554 pub fn s3_from_env(bucket: &str) -> Self {
555 Self::S3 {
556 bucket: bucket.to_string(),
557 region: std::env::var("AWS_REGION")
558 .or_else(|_| std::env::var("AWS_DEFAULT_REGION"))
559 .ok(),
560 endpoint: std::env::var("AWS_ENDPOINT_URL").ok(),
561 access_key_id: std::env::var("AWS_ACCESS_KEY_ID").ok(),
562 secret_access_key: std::env::var("AWS_SECRET_ACCESS_KEY").ok(),
563 session_token: std::env::var("AWS_SESSION_TOKEN").ok(),
564 virtual_hosted_style: false,
565 }
566 }
567
568 /// Creates a GCS configuration using environment variables.
569 ///
570 /// Reads service account path from `GOOGLE_APPLICATION_CREDENTIALS`.
571 #[must_use]
572 pub fn gcs_from_env(bucket: &str) -> Self {
573 Self::Gcs {
574 bucket: bucket.to_string(),
575 service_account_path: std::env::var("GOOGLE_APPLICATION_CREDENTIALS").ok(),
576 service_account_key: None,
577 }
578 }
579
580 /// Creates an Azure configuration using environment variables.
581 ///
582 /// Reads credentials from Azure environment variables:
583 /// - `AZURE_STORAGE_ACCOUNT`
584 /// - `AZURE_STORAGE_ACCESS_KEY` (optional)
585 /// - `AZURE_STORAGE_SAS_TOKEN` (optional)
586 ///
587 /// # Panics
588 ///
589 /// Panics if `AZURE_STORAGE_ACCOUNT` is not set.
590 #[must_use]
591 pub fn azure_from_env(container: &str) -> Self {
592 Self::Azure {
593 container: container.to_string(),
594 account: std::env::var("AZURE_STORAGE_ACCOUNT")
595 .expect("AZURE_STORAGE_ACCOUNT environment variable required"),
596 access_key: std::env::var("AZURE_STORAGE_ACCESS_KEY").ok(),
597 sas_token: std::env::var("AZURE_STORAGE_SAS_TOKEN").ok(),
598 }
599 }
600
601 /// Returns the bucket/container name for this configuration.
602 #[must_use]
603 pub fn bucket_name(&self) -> &str {
604 match self {
605 Self::S3 { bucket, .. } => bucket,
606 Self::Gcs { bucket, .. } => bucket,
607 Self::Azure { container, .. } => container,
608 }
609 }
610
611 /// Returns a URL-style identifier for this storage location.
612 #[must_use]
613 pub fn to_url(&self) -> String {
614 match self {
615 Self::S3 { bucket, .. } => format!("s3://{bucket}"),
616 Self::Gcs { bucket, .. } => format!("gs://{bucket}"),
617 Self::Azure {
618 container, account, ..
619 } => format!("az://{account}/{container}"),
620 }
621 }
622}
623
624#[cfg(test)]
625mod security_tests {
626 use super::*;
627
628 /// Tests for CWE-22 (Path Traversal) prevention in file sandbox.
629 mod file_sandbox {
630 use super::*;
631
632 #[test]
633 fn test_sandbox_disabled_allows_all_paths() {
634 let config = FileSandboxConfig::default();
635 assert!(!config.enabled);
636 // When disabled, all paths are allowed
637 assert!(config.validate_path("/tmp/test").is_ok());
638 }
639
640 #[test]
641 fn test_sandbox_enabled_with_no_paths_rejects() {
642 let config = FileSandboxConfig {
643 enabled: true,
644 allowed_paths: vec![],
645 };
646 let result = config.validate_path("/tmp/test");
647 assert!(result.is_err());
648 assert!(result.unwrap_err().contains("no allowed paths configured"));
649 }
650
651 #[test]
652 fn test_sandbox_rejects_outside_path() {
653 let config = FileSandboxConfig {
654 enabled: true,
655 allowed_paths: vec![PathBuf::from("/var/lib/uni")],
656 };
657 let result = config.validate_path("/etc/passwd");
658 assert!(result.is_err());
659 assert!(result.unwrap_err().contains("outside allowed sandbox"));
660 }
661
662 #[test]
663 fn test_is_potentially_insecure() {
664 // Disabled is insecure
665 let disabled = FileSandboxConfig::default();
666 assert!(disabled.is_potentially_insecure());
667
668 // Enabled with no paths is insecure
669 let no_paths = FileSandboxConfig {
670 enabled: true,
671 allowed_paths: vec![],
672 };
673 assert!(no_paths.is_potentially_insecure());
674
675 // Enabled with paths is secure
676 let secure = FileSandboxConfig::sandboxed(vec![PathBuf::from("/data")]);
677 assert!(!secure.is_potentially_insecure());
678 }
679
680 #[test]
681 fn test_security_warning_when_disabled() {
682 let disabled = FileSandboxConfig::default();
683 assert!(disabled.security_warning().is_some());
684
685 let enabled = FileSandboxConfig::sandboxed(vec![PathBuf::from("/data")]);
686 assert!(enabled.security_warning().is_none());
687 }
688
689 #[test]
690 fn test_deployment_mode_defaults() {
691 let embedded = FileSandboxConfig::default_for_mode(DeploymentMode::Embedded);
692 assert!(!embedded.enabled);
693
694 let server = FileSandboxConfig::default_for_mode(DeploymentMode::Server);
695 assert!(server.enabled);
696 assert!(!server.allowed_paths.is_empty());
697 }
698 }
699}