Skip to main content

magic_bird/
config.rs

1//! Configuration for BIRD.
2//!
3//! BIRD_ROOT resolution order:
4//! 1. Explicit path passed to Config::new()
5//! 2. BIRD_ROOT environment variable
6//! 3. Default: ~/.local/share/bird
7
8use std::path::{Path, PathBuf};
9use std::str::FromStr;
10
11use directories::ProjectDirs;
12use serde::{Deserialize, Serialize};
13
14use crate::{Error, Result};
15
16/// Storage mode for BIRD data.
17///
18/// - **Parquet**: Multi-writer safe using atomic file creation. Suitable for
19///   concurrent shell hooks (shq). Requires periodic compaction.
20/// - **DuckDB**: Single-writer using direct table inserts. Simpler but requires
21///   serialized writes. Suitable for sequential CLI tools (blq).
22#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
23#[serde(rename_all = "lowercase")]
24pub enum StorageMode {
25    /// Write to Parquet files (multi-writer safe, requires compaction)
26    #[default]
27    Parquet,
28    /// Write directly to DuckDB tables (single-writer, no compaction needed)
29    DuckDB,
30}
31
32impl std::fmt::Display for StorageMode {
33    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
34        match self {
35            StorageMode::Parquet => write!(f, "parquet"),
36            StorageMode::DuckDB => write!(f, "duckdb"),
37        }
38    }
39}
40
41impl FromStr for StorageMode {
42    type Err = Error;
43
44    fn from_str(s: &str) -> Result<Self> {
45        match s.to_lowercase().as_str() {
46            "parquet" => Ok(StorageMode::Parquet),
47            "duckdb" => Ok(StorageMode::DuckDB),
48            _ => Err(Error::Config(format!(
49                "Invalid storage mode '{}': expected 'parquet' or 'duckdb'",
50                s
51            ))),
52        }
53    }
54}
55
56/// Type of remote storage.
57#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
58#[serde(rename_all = "lowercase")]
59pub enum RemoteType {
60    /// S3-compatible object storage (s3://, gs://)
61    S3,
62    /// MotherDuck cloud database (md:)
63    MotherDuck,
64    /// PostgreSQL database
65    Postgres,
66    /// Local or network file path
67    File,
68}
69
70impl std::fmt::Display for RemoteType {
71    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
72        match self {
73            RemoteType::S3 => write!(f, "s3"),
74            RemoteType::MotherDuck => write!(f, "motherduck"),
75            RemoteType::Postgres => write!(f, "postgres"),
76            RemoteType::File => write!(f, "file"),
77        }
78    }
79}
80
81impl FromStr for RemoteType {
82    type Err = Error;
83
84    fn from_str(s: &str) -> Result<Self> {
85        match s.to_lowercase().as_str() {
86            "s3" | "gcs" => Ok(RemoteType::S3),
87            "motherduck" | "md" => Ok(RemoteType::MotherDuck),
88            "postgres" | "postgresql" | "pg" => Ok(RemoteType::Postgres),
89            "file" | "local" => Ok(RemoteType::File),
90            _ => Err(Error::Config(format!(
91                "Invalid remote type '{}': expected 's3', 'motherduck', 'postgres', or 'file'",
92                s
93            ))),
94        }
95    }
96}
97
98/// Access mode for remote storage.
99#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
100#[serde(rename_all = "snake_case")]
101pub enum RemoteMode {
102    /// Read and write access
103    #[default]
104    ReadWrite,
105    /// Read-only access
106    ReadOnly,
107}
108
109impl std::fmt::Display for RemoteMode {
110    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
111        match self {
112            RemoteMode::ReadWrite => write!(f, "read_write"),
113            RemoteMode::ReadOnly => write!(f, "read_only"),
114        }
115    }
116}
117
118/// Configuration for a remote storage location.
119#[derive(Debug, Clone, Serialize, Deserialize)]
120pub struct RemoteConfig {
121    /// Remote name (used as schema name: remote_{name})
122    pub name: String,
123
124    /// Type of remote storage
125    #[serde(rename = "type")]
126    pub remote_type: RemoteType,
127
128    /// URI for the remote (e.g., s3://bucket/path/bird.duckdb, md:database_name)
129    pub uri: String,
130
131    /// Access mode (read_write or read_only)
132    #[serde(default)]
133    pub mode: RemoteMode,
134
135    /// Credential provider for S3 (e.g., "credential_chain", "config")
136    #[serde(default)]
137    pub credential_provider: Option<String>,
138
139    /// Whether to auto-attach on connection open
140    #[serde(default = "default_true")]
141    pub auto_attach: bool,
142}
143
144fn default_true() -> bool {
145    true
146}
147
148impl RemoteConfig {
149    /// Create a new remote config.
150    pub fn new(name: impl Into<String>, remote_type: RemoteType, uri: impl Into<String>) -> Self {
151        Self {
152            name: name.into(),
153            remote_type,
154            uri: uri.into(),
155            mode: RemoteMode::default(),
156            credential_provider: None,
157            auto_attach: true,
158        }
159    }
160
161    /// Set read-only mode.
162    pub fn read_only(mut self) -> Self {
163        self.mode = RemoteMode::ReadOnly;
164        self
165    }
166
167    /// Get the DuckDB schema name for this remote.
168    pub fn schema_name(&self) -> String {
169        format!("remote_{}", self.name)
170    }
171
172    /// Get the quoted DuckDB schema name for this remote (for use in SQL).
173    pub fn quoted_schema_name(&self) -> String {
174        format!("\"remote_{}\"", self.name)
175    }
176
177    /// Generate the ATTACH SQL statement for this remote.
178    pub fn attach_sql(&self) -> String {
179        let mode_clause = match self.mode {
180            RemoteMode::ReadOnly => " (READ_ONLY)",
181            RemoteMode::ReadWrite => "",
182        };
183
184        let type_clause = match self.remote_type {
185            RemoteType::Postgres => " (TYPE postgres)",
186            _ => "",
187        };
188
189        format!(
190            "ATTACH '{}' AS {}{}{}",
191            self.uri,
192            self.quoted_schema_name(),
193            type_clause,
194            mode_clause
195        )
196    }
197
198    /// Get the base URL for blob storage (for S3/GCS remotes).
199    pub fn blob_base_url(&self) -> Option<String> {
200        match self.remote_type {
201            RemoteType::S3 => {
202                // Extract bucket/prefix from URI, append /blobs
203                // e.g., s3://bucket/path/bird.duckdb -> s3://bucket/path/blobs
204                if let Some(stripped) = self.uri.strip_suffix(".duckdb") {
205                    Some(format!("{}/blobs", stripped))
206                } else {
207                    Some(format!("{}/blobs", self.uri.trim_end_matches('/')))
208                }
209            }
210            _ => None,
211        }
212    }
213}
214
215/// Sync configuration for push/pull operations.
216#[derive(Debug, Clone, Default, Serialize, Deserialize)]
217pub struct SyncConfig {
218    /// Default remote for push/pull operations.
219    #[serde(default)]
220    pub default_remote: Option<String>,
221
222    /// Push data after compact operations.
223    #[serde(default)]
224    pub push_on_compact: bool,
225
226    /// Push data before archive operations.
227    #[serde(default)]
228    pub push_on_archive: bool,
229
230    /// Sync invocations table.
231    #[serde(default = "default_true")]
232    pub sync_invocations: bool,
233
234    /// Sync outputs table.
235    #[serde(default = "default_true")]
236    pub sync_outputs: bool,
237
238    /// Sync events table.
239    #[serde(default = "default_true")]
240    pub sync_events: bool,
241
242    /// Sync blob content files.
243    #[serde(default)]
244    pub sync_blobs: bool,
245
246    /// Minimum blob size to sync (bytes). Smaller blobs stay inline.
247    #[serde(default = "default_blob_sync_min")]
248    pub blob_sync_min_bytes: usize,
249}
250
251fn default_blob_sync_min() -> usize {
252    1024 // 1KB
253}
254
255/// Shell hook configuration.
256#[derive(Debug, Clone, Serialize, Deserialize, Default)]
257pub struct HooksConfig {
258    /// Command patterns to ignore (not record).
259    /// Uses glob-style matching. Defaults include shq/blq commands and job control.
260    #[serde(default = "default_ignore_patterns")]
261    pub ignore_patterns: Vec<String>,
262}
263
264fn default_ignore_patterns() -> Vec<String> {
265    vec![
266        // shq/blq commands (they handle their own recording or are queries)
267        "shq *".to_string(),
268        "shqr *".to_string(),
269        "blq *".to_string(),
270        // % aliases (expand to shq commands)
271        "%*".to_string(),
272        // Job control (noise, can cause issues)
273        "fg".to_string(),
274        "fg *".to_string(),
275        "bg".to_string(),
276        "bg *".to_string(),
277        "jobs".to_string(),
278        "jobs *".to_string(),
279        // Shell session commands
280        "exit".to_string(),
281        "logout".to_string(),
282        // Utility commands (noise)
283        "clear".to_string(),
284        "history".to_string(),
285        "history *".to_string(),
286    ]
287}
288
289/// BIRD configuration.
290#[derive(Debug, Clone, Serialize, Deserialize)]
291pub struct Config {
292    /// Root directory for all BIRD data.
293    pub bird_root: PathBuf,
294
295    /// Client identifier for this machine.
296    #[serde(default = "default_client_id")]
297    pub client_id: String,
298
299    /// Days to keep data in hot tier before archiving.
300    #[serde(default = "default_hot_days")]
301    pub hot_days: u32,
302
303    /// Threshold in bytes for inline vs blob storage.
304    #[serde(default = "default_inline_threshold")]
305    pub inline_threshold: usize,
306
307    /// Automatically extract events after `shq run` commands.
308    #[serde(default)]
309    pub auto_extract: bool,
310
311    /// Storage mode for writing data.
312    /// - parquet: Multi-writer safe, requires compaction (default)
313    /// - duckdb: Single-writer, no compaction needed
314    #[serde(default)]
315    pub storage_mode: StorageMode,
316
317    /// Remote storage configurations.
318    #[serde(default)]
319    pub remotes: Vec<RemoteConfig>,
320
321    /// Sync configuration for push/pull operations.
322    #[serde(default)]
323    pub sync: SyncConfig,
324
325    /// Shell hook configuration.
326    #[serde(default)]
327    pub hooks: HooksConfig,
328}
329
330fn default_client_id() -> String {
331    // Deterministic: username@hostname
332    let username = std::env::var("USER")
333        .or_else(|_| std::env::var("USERNAME"))
334        .unwrap_or_else(|_| "unknown".to_string());
335    let hostname = gethostname::gethostname()
336        .to_string_lossy()
337        .to_string();
338    format!("{}@{}", username, hostname)
339}
340
341fn default_hot_days() -> u32 {
342    14
343}
344
345fn default_inline_threshold() -> usize {
346    4_096 // 4KB - small for easy testing of blob storage
347}
348
349impl Config {
350    /// Create a new config with the given BIRD_ROOT.
351    pub fn with_root(bird_root: impl Into<PathBuf>) -> Self {
352        Self {
353            bird_root: bird_root.into(),
354            client_id: default_client_id(),
355            hot_days: default_hot_days(),
356            inline_threshold: default_inline_threshold(),
357            auto_extract: false,
358            storage_mode: StorageMode::default(),
359            remotes: Vec::new(),
360            sync: SyncConfig::default(),
361            hooks: HooksConfig::default(),
362        }
363    }
364
365    /// Create a new config with DuckDB storage mode.
366    pub fn with_duckdb_mode(bird_root: impl Into<PathBuf>) -> Self {
367        Self {
368            bird_root: bird_root.into(),
369            client_id: default_client_id(),
370            hot_days: default_hot_days(),
371            inline_threshold: default_inline_threshold(),
372            auto_extract: false,
373            storage_mode: StorageMode::DuckDB,
374            remotes: Vec::new(),
375            sync: SyncConfig::default(),
376            hooks: HooksConfig::default(),
377        }
378    }
379
380    /// Create a config using default BIRD_ROOT resolution.
381    pub fn default_location() -> Result<Self> {
382        let bird_root = resolve_bird_root()?;
383        Ok(Self::with_root(bird_root))
384    }
385
386    /// Load config from BIRD_ROOT/config.toml, or create default.
387    pub fn load() -> Result<Self> {
388        let bird_root = resolve_bird_root()?;
389        Self::load_from(&bird_root)
390    }
391
392    /// Load config from a specific BIRD_ROOT.
393    pub fn load_from(bird_root: &Path) -> Result<Self> {
394        let config_path = bird_root.join("config.toml");
395
396        if config_path.exists() {
397            let contents = std::fs::read_to_string(&config_path)?;
398            let mut config: Config = toml::from_str(&contents)
399                .map_err(|e| Error::Config(format!("Failed to parse config: {}", e)))?;
400            // Ensure bird_root matches the actual location
401            config.bird_root = bird_root.to_path_buf();
402            Ok(config)
403        } else {
404            Ok(Self::with_root(bird_root))
405        }
406    }
407
408    /// Save config to BIRD_ROOT/config.toml.
409    pub fn save(&self) -> Result<()> {
410        let config_path = self.bird_root.join("config.toml");
411        let contents = toml::to_string_pretty(self)
412            .map_err(|e| Error::Config(format!("Failed to serialize config: {}", e)))?;
413        std::fs::write(config_path, contents)?;
414        Ok(())
415    }
416
417    // Path helpers
418
419    /// Path to the DuckDB database file.
420    pub fn db_path(&self) -> PathBuf {
421        self.bird_root.join("db/bird.duckdb")
422    }
423
424    /// Path to the data directory.
425    pub fn data_dir(&self) -> PathBuf {
426        self.bird_root.join("db/data")
427    }
428
429    /// Path to the recent (hot) data directory.
430    pub fn recent_dir(&self) -> PathBuf {
431        self.data_dir().join("recent")
432    }
433
434    /// Path to the archive (cold) data directory.
435    pub fn archive_dir(&self) -> PathBuf {
436        self.data_dir().join("archive")
437    }
438
439    /// Path to invocations parquet files for a given date.
440    pub fn invocations_dir(&self, date: &chrono::NaiveDate) -> PathBuf {
441        self.recent_dir()
442            .join("invocations")
443            .join(format!("date={}", date))
444    }
445
446    /// Path to outputs parquet files for a given date.
447    pub fn outputs_dir(&self, date: &chrono::NaiveDate) -> PathBuf {
448        self.recent_dir()
449            .join("outputs")
450            .join(format!("date={}", date))
451    }
452
453    /// Path to sessions parquet files for a given date.
454    pub fn sessions_dir(&self, date: &chrono::NaiveDate) -> PathBuf {
455        self.recent_dir()
456            .join("sessions")
457            .join(format!("date={}", date))
458    }
459
460    /// Path to the SQL files directory.
461    pub fn sql_dir(&self) -> PathBuf {
462        self.bird_root.join("db/sql")
463    }
464
465    /// Path to the DuckDB extensions directory.
466    pub fn extensions_dir(&self) -> PathBuf {
467        self.bird_root.join("db/extensions")
468    }
469
470    /// Path to the blobs content directory.
471    pub fn blobs_dir(&self) -> PathBuf {
472        self.recent_dir().join("blobs/content")
473    }
474
475    /// Path to a specific blob file by hash and command.
476    pub fn blob_path(&self, hash: &str, cmd_hint: &str) -> PathBuf {
477        let prefix = &hash[..2.min(hash.len())];
478        let sanitized_cmd = sanitize_for_filename(cmd_hint);
479        self.blobs_dir()
480            .join(prefix)
481            .join(format!("{}--{}.bin", hash, sanitized_cmd))
482    }
483
484    /// Path to the event-formats.toml config file (legacy).
485    pub fn event_formats_path(&self) -> PathBuf {
486        self.bird_root.join("event-formats.toml")
487    }
488
489    /// Path to the format-hints.toml config file.
490    pub fn format_hints_path(&self) -> PathBuf {
491        self.bird_root.join("format-hints.toml")
492    }
493
494    /// Path to events parquet files for a given date.
495    pub fn events_dir(&self, date: &chrono::NaiveDate) -> PathBuf {
496        self.recent_dir()
497            .join("events")
498            .join(format!("date={}", date))
499    }
500
501    // Remote management helpers
502
503    /// Get a remote by name.
504    pub fn get_remote(&self, name: &str) -> Option<&RemoteConfig> {
505        self.remotes.iter().find(|r| r.name == name)
506    }
507
508    /// Add a remote configuration.
509    pub fn add_remote(&mut self, remote: RemoteConfig) {
510        // Remove existing remote with same name
511        self.remotes.retain(|r| r.name != remote.name);
512        self.remotes.push(remote);
513    }
514
515    /// Remove a remote by name. Returns true if removed.
516    pub fn remove_remote(&mut self, name: &str) -> bool {
517        let len_before = self.remotes.len();
518        self.remotes.retain(|r| r.name != name);
519        self.remotes.len() < len_before
520    }
521
522    /// Get all blob roots for multi-location resolution.
523    /// Returns local blobs dir first, then remote blob URLs.
524    pub fn blob_roots(&self) -> Vec<String> {
525        let mut roots = vec![self.blobs_dir().to_string_lossy().to_string()];
526
527        for remote in &self.remotes {
528            if let Some(blob_url) = remote.blob_base_url() {
529                roots.push(blob_url);
530            }
531        }
532
533        roots
534    }
535
536    /// Get remotes that should be auto-attached.
537    pub fn auto_attach_remotes(&self) -> Vec<&RemoteConfig> {
538        self.remotes.iter().filter(|r| r.auto_attach).collect()
539    }
540}
541
542/// Sanitize a string for use in filenames (used for blob naming).
543fn sanitize_for_filename(s: &str) -> String {
544    s.chars()
545        .map(|c| match c {
546            '/' | '\\' | ':' | '*' | '?' | '"' | '<' | '>' | '|' => '_',
547            ' ' => '-',
548            c if c.is_alphanumeric() || c == '-' || c == '_' || c == '.' => c,
549            _ => '_',
550        })
551        .take(32) // Shorter for blob filenames
552        .collect()
553}
554
555/// Resolve BIRD_ROOT using the standard resolution order.
556fn resolve_bird_root() -> Result<PathBuf> {
557    // 1. Environment variable
558    if let Ok(path) = std::env::var("BIRD_ROOT") {
559        return Ok(PathBuf::from(path));
560    }
561
562    // 2. XDG data directory (via directories crate)
563    if let Some(proj_dirs) = ProjectDirs::from("", "", "bird") {
564        return Ok(proj_dirs.data_dir().to_path_buf());
565    }
566
567    // 3. Fallback to ~/.local/share/bird
568    let home = std::env::var("HOME")
569        .map_err(|_| Error::Config("Could not determine home directory".to_string()))?;
570    Ok(PathBuf::from(home).join(".local/share/bird"))
571}
572
573#[cfg(test)]
574mod tests {
575    use super::*;
576    use tempfile::TempDir;
577
578    #[test]
579    fn test_config_with_root() {
580        let config = Config::with_root("/tmp/test-bird");
581        assert_eq!(config.bird_root, PathBuf::from("/tmp/test-bird"));
582        assert_eq!(config.hot_days, 14);
583        assert_eq!(config.inline_threshold, 4_096);
584    }
585
586    #[test]
587    fn test_blob_path() {
588        let config = Config::with_root("/tmp/test-bird");
589        let path = config.blob_path("abcdef123456", "make test");
590        assert_eq!(
591            path,
592            PathBuf::from("/tmp/test-bird/db/data/recent/blobs/content/ab/abcdef123456--make-test.bin")
593        );
594    }
595
596    #[test]
597    fn test_config_paths() {
598        let config = Config::with_root("/tmp/test-bird");
599        assert_eq!(config.db_path(), PathBuf::from("/tmp/test-bird/db/bird.duckdb"));
600        assert_eq!(config.recent_dir(), PathBuf::from("/tmp/test-bird/db/data/recent"));
601    }
602
603    #[test]
604    fn test_config_save_load() {
605        let tmp = TempDir::new().unwrap();
606        let bird_root = tmp.path().to_path_buf();
607
608        // Create the directory structure
609        std::fs::create_dir_all(&bird_root).unwrap();
610
611        let config = Config::with_root(&bird_root);
612        config.save().unwrap();
613
614        let loaded = Config::load_from(&bird_root).unwrap();
615        assert_eq!(loaded.hot_days, config.hot_days);
616        assert_eq!(loaded.inline_threshold, config.inline_threshold);
617    }
618}