Skip to main content

magic_bird/
config.rs

1//! Configuration for BIRD.
2//!
3//! BIRD_ROOT resolution order:
4//! 1. Explicit path passed to Config::new()
5//! 2. BIRD_ROOT environment variable
6//! 3. Default: ~/.local/share/bird
7
8use std::path::{Path, PathBuf};
9use std::str::FromStr;
10
11use directories::ProjectDirs;
12use serde::{Deserialize, Serialize};
13
14use crate::{Error, Result};
15
16/// Storage mode for BIRD data.
17///
18/// - **Parquet**: Multi-writer safe using atomic file creation. Suitable for
19///   concurrent shell hooks (shq). Requires periodic compaction.
20/// - **DuckDB**: Single-writer using direct table inserts. Simpler but requires
21///   serialized writes. Suitable for sequential CLI tools (blq).
22#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
23#[serde(rename_all = "lowercase")]
24pub enum StorageMode {
25    /// Write to Parquet files (multi-writer safe, requires compaction)
26    #[default]
27    Parquet,
28    /// Write directly to DuckDB tables (single-writer, no compaction needed)
29    DuckDB,
30}
31
32impl std::fmt::Display for StorageMode {
33    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
34        match self {
35            StorageMode::Parquet => write!(f, "parquet"),
36            StorageMode::DuckDB => write!(f, "duckdb"),
37        }
38    }
39}
40
41impl FromStr for StorageMode {
42    type Err = Error;
43
44    fn from_str(s: &str) -> Result<Self> {
45        match s.to_lowercase().as_str() {
46            "parquet" => Ok(StorageMode::Parquet),
47            "duckdb" => Ok(StorageMode::DuckDB),
48            _ => Err(Error::Config(format!(
49                "Invalid storage mode '{}': expected 'parquet' or 'duckdb'",
50                s
51            ))),
52        }
53    }
54}
55
56/// Type of remote storage.
57#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
58#[serde(rename_all = "lowercase")]
59pub enum RemoteType {
60    /// S3-compatible object storage (s3://, gs://)
61    S3,
62    /// MotherDuck cloud database (md:)
63    MotherDuck,
64    /// PostgreSQL database
65    Postgres,
66    /// Local or network file path
67    File,
68}
69
70impl std::fmt::Display for RemoteType {
71    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
72        match self {
73            RemoteType::S3 => write!(f, "s3"),
74            RemoteType::MotherDuck => write!(f, "motherduck"),
75            RemoteType::Postgres => write!(f, "postgres"),
76            RemoteType::File => write!(f, "file"),
77        }
78    }
79}
80
81impl FromStr for RemoteType {
82    type Err = Error;
83
84    fn from_str(s: &str) -> Result<Self> {
85        match s.to_lowercase().as_str() {
86            "s3" | "gcs" => Ok(RemoteType::S3),
87            "motherduck" | "md" => Ok(RemoteType::MotherDuck),
88            "postgres" | "postgresql" | "pg" => Ok(RemoteType::Postgres),
89            "file" | "local" => Ok(RemoteType::File),
90            _ => Err(Error::Config(format!(
91                "Invalid remote type '{}': expected 's3', 'motherduck', 'postgres', or 'file'",
92                s
93            ))),
94        }
95    }
96}
97
98/// Access mode for remote storage.
99#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
100#[serde(rename_all = "snake_case")]
101pub enum RemoteMode {
102    /// Read and write access
103    #[default]
104    ReadWrite,
105    /// Read-only access
106    ReadOnly,
107}
108
109impl std::fmt::Display for RemoteMode {
110    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
111        match self {
112            RemoteMode::ReadWrite => write!(f, "read_write"),
113            RemoteMode::ReadOnly => write!(f, "read_only"),
114        }
115    }
116}
117
118/// Configuration for a remote storage location.
119#[derive(Debug, Clone, Serialize, Deserialize)]
120pub struct RemoteConfig {
121    /// Remote name (used as schema name: remote_{name})
122    pub name: String,
123
124    /// Type of remote storage
125    #[serde(rename = "type")]
126    pub remote_type: RemoteType,
127
128    /// URI for the remote (e.g., s3://bucket/path/bird.duckdb, md:database_name)
129    pub uri: String,
130
131    /// Access mode (read_write or read_only)
132    #[serde(default)]
133    pub mode: RemoteMode,
134
135    /// Credential provider for S3 (e.g., "credential_chain", "config")
136    #[serde(default)]
137    pub credential_provider: Option<String>,
138
139    /// Whether to auto-attach on connection open
140    #[serde(default = "default_true")]
141    pub auto_attach: bool,
142}
143
144fn default_true() -> bool {
145    true
146}
147
148impl RemoteConfig {
149    /// Create a new remote config.
150    pub fn new(name: impl Into<String>, remote_type: RemoteType, uri: impl Into<String>) -> Self {
151        Self {
152            name: name.into(),
153            remote_type,
154            uri: uri.into(),
155            mode: RemoteMode::default(),
156            credential_provider: None,
157            auto_attach: true,
158        }
159    }
160
161    /// Set read-only mode.
162    pub fn read_only(mut self) -> Self {
163        self.mode = RemoteMode::ReadOnly;
164        self
165    }
166
167    /// Get the DuckDB schema name for this remote.
168    pub fn schema_name(&self) -> String {
169        format!("remote_{}", self.name)
170    }
171
172    /// Get the quoted DuckDB schema name for this remote (for use in SQL).
173    pub fn quoted_schema_name(&self) -> String {
174        format!("\"remote_{}\"", self.name)
175    }
176
177    /// Generate the ATTACH SQL statement for this remote.
178    pub fn attach_sql(&self) -> String {
179        let mode_clause = match self.mode {
180            RemoteMode::ReadOnly => " (READ_ONLY)",
181            RemoteMode::ReadWrite => "",
182        };
183
184        let type_clause = match self.remote_type {
185            RemoteType::Postgres => " (TYPE postgres)",
186            _ => "",
187        };
188
189        format!(
190            "ATTACH '{}' AS {}{}{}",
191            self.uri,
192            self.quoted_schema_name(),
193            type_clause,
194            mode_clause
195        )
196    }
197
198    /// Get the base URL for blob storage (for S3/GCS remotes).
199    pub fn blob_base_url(&self) -> Option<String> {
200        match self.remote_type {
201            RemoteType::S3 => {
202                // Extract bucket/prefix from URI, append /blobs
203                // e.g., s3://bucket/path/bird.duckdb -> s3://bucket/path/blobs
204                if let Some(stripped) = self.uri.strip_suffix(".duckdb") {
205                    Some(format!("{}/blobs", stripped))
206                } else {
207                    Some(format!("{}/blobs", self.uri.trim_end_matches('/')))
208                }
209            }
210            _ => None,
211        }
212    }
213}
214
215/// Sync configuration for push/pull operations.
216#[derive(Debug, Clone, Default, Serialize, Deserialize)]
217pub struct SyncConfig {
218    /// Default remote for push/pull operations.
219    #[serde(default)]
220    pub default_remote: Option<String>,
221
222    /// Push data after compact operations.
223    #[serde(default)]
224    pub push_on_compact: bool,
225
226    /// Push data before archive operations.
227    #[serde(default)]
228    pub push_on_archive: bool,
229
230    /// Sync invocations table.
231    #[serde(default = "default_true")]
232    pub sync_invocations: bool,
233
234    /// Sync outputs table.
235    #[serde(default = "default_true")]
236    pub sync_outputs: bool,
237
238    /// Sync events table.
239    #[serde(default = "default_true")]
240    pub sync_events: bool,
241
242    /// Sync blob content files.
243    #[serde(default)]
244    pub sync_blobs: bool,
245
246    /// Minimum blob size to sync (bytes). Smaller blobs stay inline.
247    #[serde(default = "default_blob_sync_min")]
248    pub blob_sync_min_bytes: usize,
249}
250
251fn default_blob_sync_min() -> usize {
252    1024 // 1KB
253}
254
255/// BIRD configuration.
256#[derive(Debug, Clone, Serialize, Deserialize)]
257pub struct Config {
258    /// Root directory for all BIRD data.
259    pub bird_root: PathBuf,
260
261    /// Client identifier for this machine.
262    #[serde(default = "default_client_id")]
263    pub client_id: String,
264
265    /// Days to keep data in hot tier before archiving.
266    #[serde(default = "default_hot_days")]
267    pub hot_days: u32,
268
269    /// Threshold in bytes for inline vs blob storage.
270    #[serde(default = "default_inline_threshold")]
271    pub inline_threshold: usize,
272
273    /// Automatically extract events after `shq run` commands.
274    #[serde(default)]
275    pub auto_extract: bool,
276
277    /// Storage mode for writing data.
278    /// - parquet: Multi-writer safe, requires compaction (default)
279    /// - duckdb: Single-writer, no compaction needed
280    #[serde(default)]
281    pub storage_mode: StorageMode,
282
283    /// Remote storage configurations.
284    #[serde(default)]
285    pub remotes: Vec<RemoteConfig>,
286
287    /// Sync configuration for push/pull operations.
288    #[serde(default)]
289    pub sync: SyncConfig,
290}
291
292fn default_client_id() -> String {
293    // Deterministic: username@hostname
294    let username = std::env::var("USER")
295        .or_else(|_| std::env::var("USERNAME"))
296        .unwrap_or_else(|_| "unknown".to_string());
297    let hostname = gethostname::gethostname()
298        .to_string_lossy()
299        .to_string();
300    format!("{}@{}", username, hostname)
301}
302
303fn default_hot_days() -> u32 {
304    14
305}
306
307fn default_inline_threshold() -> usize {
308    4_096 // 4KB - small for easy testing of blob storage
309}
310
311impl Config {
312    /// Create a new config with the given BIRD_ROOT.
313    pub fn with_root(bird_root: impl Into<PathBuf>) -> Self {
314        Self {
315            bird_root: bird_root.into(),
316            client_id: default_client_id(),
317            hot_days: default_hot_days(),
318            inline_threshold: default_inline_threshold(),
319            auto_extract: false,
320            storage_mode: StorageMode::default(),
321            remotes: Vec::new(),
322            sync: SyncConfig::default(),
323        }
324    }
325
326    /// Create a new config with DuckDB storage mode.
327    pub fn with_duckdb_mode(bird_root: impl Into<PathBuf>) -> Self {
328        Self {
329            bird_root: bird_root.into(),
330            client_id: default_client_id(),
331            hot_days: default_hot_days(),
332            inline_threshold: default_inline_threshold(),
333            auto_extract: false,
334            storage_mode: StorageMode::DuckDB,
335            remotes: Vec::new(),
336            sync: SyncConfig::default(),
337        }
338    }
339
340    /// Create a config using default BIRD_ROOT resolution.
341    pub fn default_location() -> Result<Self> {
342        let bird_root = resolve_bird_root()?;
343        Ok(Self::with_root(bird_root))
344    }
345
346    /// Load config from BIRD_ROOT/config.toml, or create default.
347    pub fn load() -> Result<Self> {
348        let bird_root = resolve_bird_root()?;
349        Self::load_from(&bird_root)
350    }
351
352    /// Load config from a specific BIRD_ROOT.
353    pub fn load_from(bird_root: &Path) -> Result<Self> {
354        let config_path = bird_root.join("config.toml");
355
356        if config_path.exists() {
357            let contents = std::fs::read_to_string(&config_path)?;
358            let mut config: Config = toml::from_str(&contents)
359                .map_err(|e| Error::Config(format!("Failed to parse config: {}", e)))?;
360            // Ensure bird_root matches the actual location
361            config.bird_root = bird_root.to_path_buf();
362            Ok(config)
363        } else {
364            Ok(Self::with_root(bird_root))
365        }
366    }
367
368    /// Save config to BIRD_ROOT/config.toml.
369    pub fn save(&self) -> Result<()> {
370        let config_path = self.bird_root.join("config.toml");
371        let contents = toml::to_string_pretty(self)
372            .map_err(|e| Error::Config(format!("Failed to serialize config: {}", e)))?;
373        std::fs::write(config_path, contents)?;
374        Ok(())
375    }
376
377    // Path helpers
378
379    /// Path to the DuckDB database file.
380    pub fn db_path(&self) -> PathBuf {
381        self.bird_root.join("db/bird.duckdb")
382    }
383
384    /// Path to the data directory.
385    pub fn data_dir(&self) -> PathBuf {
386        self.bird_root.join("db/data")
387    }
388
389    /// Path to the recent (hot) data directory.
390    pub fn recent_dir(&self) -> PathBuf {
391        self.data_dir().join("recent")
392    }
393
394    /// Path to the archive (cold) data directory.
395    pub fn archive_dir(&self) -> PathBuf {
396        self.data_dir().join("archive")
397    }
398
399    /// Path to invocations parquet files for a given date.
400    pub fn invocations_dir(&self, date: &chrono::NaiveDate) -> PathBuf {
401        self.recent_dir()
402            .join("invocations")
403            .join(format!("date={}", date))
404    }
405
406    /// Path to outputs parquet files for a given date.
407    pub fn outputs_dir(&self, date: &chrono::NaiveDate) -> PathBuf {
408        self.recent_dir()
409            .join("outputs")
410            .join(format!("date={}", date))
411    }
412
413    /// Path to sessions parquet files for a given date.
414    pub fn sessions_dir(&self, date: &chrono::NaiveDate) -> PathBuf {
415        self.recent_dir()
416            .join("sessions")
417            .join(format!("date={}", date))
418    }
419
420    /// Path to the SQL files directory.
421    pub fn sql_dir(&self) -> PathBuf {
422        self.bird_root.join("db/sql")
423    }
424
425    /// Path to the DuckDB extensions directory.
426    pub fn extensions_dir(&self) -> PathBuf {
427        self.bird_root.join("db/extensions")
428    }
429
430    /// Path to the blobs content directory.
431    pub fn blobs_dir(&self) -> PathBuf {
432        self.recent_dir().join("blobs/content")
433    }
434
435    /// Path to a specific blob file by hash and command.
436    pub fn blob_path(&self, hash: &str, cmd_hint: &str) -> PathBuf {
437        let prefix = &hash[..2.min(hash.len())];
438        let sanitized_cmd = sanitize_for_filename(cmd_hint);
439        self.blobs_dir()
440            .join(prefix)
441            .join(format!("{}--{}.bin", hash, sanitized_cmd))
442    }
443
444    /// Path to the event-formats.toml config file (legacy).
445    pub fn event_formats_path(&self) -> PathBuf {
446        self.bird_root.join("event-formats.toml")
447    }
448
449    /// Path to the format-hints.toml config file.
450    pub fn format_hints_path(&self) -> PathBuf {
451        self.bird_root.join("format-hints.toml")
452    }
453
454    /// Path to events parquet files for a given date.
455    pub fn events_dir(&self, date: &chrono::NaiveDate) -> PathBuf {
456        self.recent_dir()
457            .join("events")
458            .join(format!("date={}", date))
459    }
460
461    // Remote management helpers
462
463    /// Get a remote by name.
464    pub fn get_remote(&self, name: &str) -> Option<&RemoteConfig> {
465        self.remotes.iter().find(|r| r.name == name)
466    }
467
468    /// Add a remote configuration.
469    pub fn add_remote(&mut self, remote: RemoteConfig) {
470        // Remove existing remote with same name
471        self.remotes.retain(|r| r.name != remote.name);
472        self.remotes.push(remote);
473    }
474
475    /// Remove a remote by name. Returns true if removed.
476    pub fn remove_remote(&mut self, name: &str) -> bool {
477        let len_before = self.remotes.len();
478        self.remotes.retain(|r| r.name != name);
479        self.remotes.len() < len_before
480    }
481
482    /// Get all blob roots for multi-location resolution.
483    /// Returns local blobs dir first, then remote blob URLs.
484    pub fn blob_roots(&self) -> Vec<String> {
485        let mut roots = vec![self.blobs_dir().to_string_lossy().to_string()];
486
487        for remote in &self.remotes {
488            if let Some(blob_url) = remote.blob_base_url() {
489                roots.push(blob_url);
490            }
491        }
492
493        roots
494    }
495
496    /// Get remotes that should be auto-attached.
497    pub fn auto_attach_remotes(&self) -> Vec<&RemoteConfig> {
498        self.remotes.iter().filter(|r| r.auto_attach).collect()
499    }
500}
501
502/// Sanitize a string for use in filenames (used for blob naming).
503fn sanitize_for_filename(s: &str) -> String {
504    s.chars()
505        .map(|c| match c {
506            '/' | '\\' | ':' | '*' | '?' | '"' | '<' | '>' | '|' => '_',
507            ' ' => '-',
508            c if c.is_alphanumeric() || c == '-' || c == '_' || c == '.' => c,
509            _ => '_',
510        })
511        .take(32) // Shorter for blob filenames
512        .collect()
513}
514
515/// Resolve BIRD_ROOT using the standard resolution order.
516fn resolve_bird_root() -> Result<PathBuf> {
517    // 1. Environment variable
518    if let Ok(path) = std::env::var("BIRD_ROOT") {
519        return Ok(PathBuf::from(path));
520    }
521
522    // 2. XDG data directory (via directories crate)
523    if let Some(proj_dirs) = ProjectDirs::from("", "", "bird") {
524        return Ok(proj_dirs.data_dir().to_path_buf());
525    }
526
527    // 3. Fallback to ~/.local/share/bird
528    let home = std::env::var("HOME")
529        .map_err(|_| Error::Config("Could not determine home directory".to_string()))?;
530    Ok(PathBuf::from(home).join(".local/share/bird"))
531}
532
533#[cfg(test)]
534mod tests {
535    use super::*;
536    use tempfile::TempDir;
537
538    #[test]
539    fn test_config_with_root() {
540        let config = Config::with_root("/tmp/test-bird");
541        assert_eq!(config.bird_root, PathBuf::from("/tmp/test-bird"));
542        assert_eq!(config.hot_days, 14);
543        assert_eq!(config.inline_threshold, 4_096);
544    }
545
546    #[test]
547    fn test_blob_path() {
548        let config = Config::with_root("/tmp/test-bird");
549        let path = config.blob_path("abcdef123456", "make test");
550        assert_eq!(
551            path,
552            PathBuf::from("/tmp/test-bird/db/data/recent/blobs/content/ab/abcdef123456--make-test.bin")
553        );
554    }
555
556    #[test]
557    fn test_config_paths() {
558        let config = Config::with_root("/tmp/test-bird");
559        assert_eq!(config.db_path(), PathBuf::from("/tmp/test-bird/db/bird.duckdb"));
560        assert_eq!(config.recent_dir(), PathBuf::from("/tmp/test-bird/db/data/recent"));
561    }
562
563    #[test]
564    fn test_config_save_load() {
565        let tmp = TempDir::new().unwrap();
566        let bird_root = tmp.path().to_path_buf();
567
568        // Create the directory structure
569        std::fs::create_dir_all(&bird_root).unwrap();
570
571        let config = Config::with_root(&bird_root);
572        config.save().unwrap();
573
574        let loaded = Config::load_from(&bird_root).unwrap();
575        assert_eq!(loaded.hot_days, config.hot_days);
576        assert_eq!(loaded.inline_threshold, config.inline_threshold);
577    }
578}