Skip to main content

magic_bird/
config.rs

1//! Configuration for BIRD.
2//!
3//! BIRD_ROOT resolution order:
4//! 1. Explicit path passed to Config::new()
5//! 2. BIRD_ROOT environment variable
6//! 3. Default: ~/.local/share/bird
7
8use std::path::{Path, PathBuf};
9use std::str::FromStr;
10
11use directories::ProjectDirs;
12use serde::{Deserialize, Serialize};
13
14use crate::{Error, Result};
15
16/// Storage mode for BIRD data.
17///
18/// - **Parquet**: Multi-writer safe using atomic file creation. Suitable for
19///   concurrent shell hooks (shq). Requires periodic compaction.
20/// - **DuckDB**: Single-writer using direct table inserts. Simpler but requires
21///   serialized writes. Suitable for sequential CLI tools (blq).
22#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
23#[serde(rename_all = "lowercase")]
24pub enum StorageMode {
25    /// Write to Parquet files (multi-writer safe, requires compaction)
26    #[default]
27    Parquet,
28    /// Write directly to DuckDB tables (single-writer, no compaction needed)
29    DuckDB,
30}
31
32impl std::fmt::Display for StorageMode {
33    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
34        match self {
35            StorageMode::Parquet => write!(f, "parquet"),
36            StorageMode::DuckDB => write!(f, "duckdb"),
37        }
38    }
39}
40
41impl FromStr for StorageMode {
42    type Err = Error;
43
44    fn from_str(s: &str) -> Result<Self> {
45        match s.to_lowercase().as_str() {
46            "parquet" => Ok(StorageMode::Parquet),
47            "duckdb" => Ok(StorageMode::DuckDB),
48            _ => Err(Error::Config(format!(
49                "Invalid storage mode '{}': expected 'parquet' or 'duckdb'",
50                s
51            ))),
52        }
53    }
54}
55
56/// Type of remote storage.
57#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
58#[serde(rename_all = "lowercase")]
59pub enum RemoteType {
60    /// S3-compatible object storage (s3://, gs://)
61    S3,
62    /// MotherDuck cloud database (md:)
63    MotherDuck,
64    /// PostgreSQL database
65    Postgres,
66    /// Local or network file path
67    File,
68}
69
70impl std::fmt::Display for RemoteType {
71    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
72        match self {
73            RemoteType::S3 => write!(f, "s3"),
74            RemoteType::MotherDuck => write!(f, "motherduck"),
75            RemoteType::Postgres => write!(f, "postgres"),
76            RemoteType::File => write!(f, "file"),
77        }
78    }
79}
80
81impl FromStr for RemoteType {
82    type Err = Error;
83
84    fn from_str(s: &str) -> Result<Self> {
85        match s.to_lowercase().as_str() {
86            "s3" | "gcs" => Ok(RemoteType::S3),
87            "motherduck" | "md" => Ok(RemoteType::MotherDuck),
88            "postgres" | "postgresql" | "pg" => Ok(RemoteType::Postgres),
89            "file" | "local" => Ok(RemoteType::File),
90            _ => Err(Error::Config(format!(
91                "Invalid remote type '{}': expected 's3', 'motherduck', 'postgres', or 'file'",
92                s
93            ))),
94        }
95    }
96}
97
98/// Access mode for remote storage.
99#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
100#[serde(rename_all = "snake_case")]
101pub enum RemoteMode {
102    /// Read and write access
103    #[default]
104    ReadWrite,
105    /// Read-only access
106    ReadOnly,
107}
108
109impl std::fmt::Display for RemoteMode {
110    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
111        match self {
112            RemoteMode::ReadWrite => write!(f, "read_write"),
113            RemoteMode::ReadOnly => write!(f, "read_only"),
114        }
115    }
116}
117
118/// Configuration for a remote storage location.
119#[derive(Debug, Clone, Serialize, Deserialize)]
120pub struct RemoteConfig {
121    /// Remote name (used as schema name: remote_{name})
122    pub name: String,
123
124    /// Type of remote storage
125    #[serde(rename = "type")]
126    pub remote_type: RemoteType,
127
128    /// URI for the remote (e.g., s3://bucket/path/bird.duckdb, md:database_name)
129    pub uri: String,
130
131    /// Access mode (read_write or read_only)
132    #[serde(default)]
133    pub mode: RemoteMode,
134
135    /// Credential provider for S3 (e.g., "credential_chain", "config")
136    #[serde(default)]
137    pub credential_provider: Option<String>,
138
139    /// Whether to auto-attach on connection open
140    #[serde(default = "default_true")]
141    pub auto_attach: bool,
142}
143
144fn default_true() -> bool {
145    true
146}
147
148impl RemoteConfig {
149    /// Create a new remote config.
150    pub fn new(name: impl Into<String>, remote_type: RemoteType, uri: impl Into<String>) -> Self {
151        Self {
152            name: name.into(),
153            remote_type,
154            uri: uri.into(),
155            mode: RemoteMode::default(),
156            credential_provider: None,
157            auto_attach: true,
158        }
159    }
160
161    /// Set read-only mode.
162    pub fn read_only(mut self) -> Self {
163        self.mode = RemoteMode::ReadOnly;
164        self
165    }
166
167    /// Get the DuckDB schema name for this remote.
168    pub fn schema_name(&self) -> String {
169        format!("remote_{}", self.name)
170    }
171
172    /// Get the quoted DuckDB schema name for this remote (for use in SQL).
173    pub fn quoted_schema_name(&self) -> String {
174        format!("\"remote_{}\"", self.name)
175    }
176
177    /// Generate the ATTACH SQL statement for this remote.
178    pub fn attach_sql(&self) -> String {
179        let mode_clause = match self.mode {
180            RemoteMode::ReadOnly => " (READ_ONLY)",
181            RemoteMode::ReadWrite => "",
182        };
183
184        let type_clause = match self.remote_type {
185            RemoteType::Postgres => " (TYPE postgres)",
186            _ => "",
187        };
188
189        format!(
190            "ATTACH '{}' AS {}{}{}",
191            self.uri,
192            self.quoted_schema_name(),
193            type_clause,
194            mode_clause
195        )
196    }
197
198    /// Get the base URL for blob storage (for S3/GCS remotes).
199    pub fn blob_base_url(&self) -> Option<String> {
200        match self.remote_type {
201            RemoteType::S3 => {
202                // Extract bucket/prefix from URI, append /blobs
203                // e.g., s3://bucket/path/bird.duckdb -> s3://bucket/path/blobs
204                if let Some(stripped) = self.uri.strip_suffix(".duckdb") {
205                    Some(format!("{}/blobs", stripped))
206                } else {
207                    Some(format!("{}/blobs", self.uri.trim_end_matches('/')))
208                }
209            }
210            _ => None,
211        }
212    }
213
214    /// Get the data directory for file remotes.
215    ///
216    /// For a remote URI like `file:///path/to/db/bird.duckdb`, returns `/path/to/db/data`.
217    /// This is needed so parquet-mode remotes can resolve their relative file paths.
218    pub fn data_dir(&self) -> Option<std::path::PathBuf> {
219        if self.remote_type != RemoteType::File {
220            return None;
221        }
222
223        // Parse file:// URI to get the database path
224        let db_path = self.uri.strip_prefix("file://")?;
225        let db_path = std::path::Path::new(db_path);
226
227        // Data directory is sibling to the .duckdb file: /path/to/db/bird.duckdb -> /path/to/db/data
228        db_path.parent().map(|p| p.join("data"))
229    }
230}
231
232/// Sync configuration for push/pull operations.
233#[derive(Debug, Clone, Default, Serialize, Deserialize)]
234pub struct SyncConfig {
235    /// Default remote for push/pull operations.
236    #[serde(default)]
237    pub default_remote: Option<String>,
238
239    /// Push data after compact operations.
240    #[serde(default)]
241    pub push_on_compact: bool,
242
243    /// Push data before archive operations.
244    #[serde(default)]
245    pub push_on_archive: bool,
246
247    /// Sync invocations table.
248    #[serde(default = "default_true")]
249    pub sync_invocations: bool,
250
251    /// Sync outputs table.
252    #[serde(default = "default_true")]
253    pub sync_outputs: bool,
254
255    /// Sync events table.
256    #[serde(default = "default_true")]
257    pub sync_events: bool,
258
259    /// Sync blob content files.
260    #[serde(default)]
261    pub sync_blobs: bool,
262
263    /// Minimum blob size to sync (bytes). Smaller blobs stay inline.
264    #[serde(default = "default_blob_sync_min")]
265    pub blob_sync_min_bytes: usize,
266}
267
268fn default_blob_sync_min() -> usize {
269    1024 // 1KB
270}
271
272/// Shell hook configuration.
273#[derive(Debug, Clone, Serialize, Deserialize, Default)]
274pub struct HooksConfig {
275    /// Command patterns to ignore (not record).
276    /// Uses glob-style matching. Defaults include shq/blq commands and job control.
277    #[serde(default = "default_ignore_patterns")]
278    pub ignore_patterns: Vec<String>,
279}
280
281fn default_ignore_patterns() -> Vec<String> {
282    vec![
283        // shq/blq commands (they handle their own recording or are queries)
284        "shq *".to_string(),
285        "shqr *".to_string(),
286        "blq *".to_string(),
287        // % aliases (expand to shq commands)
288        "%*".to_string(),
289        // Job control (noise, can cause issues)
290        "fg".to_string(),
291        "fg *".to_string(),
292        "bg".to_string(),
293        "bg *".to_string(),
294        "jobs".to_string(),
295        "jobs *".to_string(),
296        // Shell session commands
297        "exit".to_string(),
298        "logout".to_string(),
299        // Utility commands (noise)
300        "clear".to_string(),
301        "history".to_string(),
302        "history *".to_string(),
303    ]
304}
305
306/// Retrospective buffer configuration.
307///
308/// The buffer captures output from all shell commands, allowing users to
309/// retroactively save commands they didn't explicitly capture with `shq run`.
310#[derive(Debug, Clone, Serialize, Deserialize)]
311pub struct BufferConfig {
312    /// Enable retrospective buffering. Disabled by default for security.
313    #[serde(default)]
314    pub enabled: bool,
315
316    /// Maximum number of commands to keep in buffer.
317    #[serde(default = "default_buffer_max_entries")]
318    pub max_entries: usize,
319
320    /// Maximum total size of buffer in megabytes.
321    #[serde(default = "default_buffer_max_size_mb")]
322    pub max_size_mb: usize,
323
324    /// Maximum age of buffer entries in hours.
325    #[serde(default = "default_buffer_max_age_hours")]
326    pub max_age_hours: u32,
327
328    /// Additional command patterns to exclude from buffering.
329    /// These are checked in addition to hooks.ignore_patterns.
330    /// Commands matching these patterns may contain sensitive output.
331    #[serde(default = "default_buffer_exclude_patterns")]
332    pub exclude_patterns: Vec<String>,
333}
334
335impl Default for BufferConfig {
336    fn default() -> Self {
337        Self {
338            enabled: false,
339            max_entries: default_buffer_max_entries(),
340            max_size_mb: default_buffer_max_size_mb(),
341            max_age_hours: default_buffer_max_age_hours(),
342            exclude_patterns: default_buffer_exclude_patterns(),
343        }
344    }
345}
346
347fn default_buffer_max_entries() -> usize {
348    100
349}
350
351fn default_buffer_max_size_mb() -> usize {
352    100
353}
354
355fn default_buffer_max_age_hours() -> u32 {
356    24
357}
358
359fn default_buffer_exclude_patterns() -> Vec<String> {
360    vec![
361        // Password/credential patterns
362        "*password*".to_string(),
363        "*passwd*".to_string(),
364        "*secret*".to_string(),
365        "*credential*".to_string(),
366        // Token/key patterns
367        "*token*".to_string(),
368        "*bearer*".to_string(),
369        "*api_key*".to_string(),
370        "*apikey*".to_string(),
371        "*api-key*".to_string(),
372        "*private_key*".to_string(),
373        "*privatekey*".to_string(),
374        // Security tool commands
375        "ssh *".to_string(),
376        "ssh-*".to_string(),
377        "gpg *".to_string(),
378        "pass *".to_string(),
379        "vault *".to_string(),
380        "aws sts *".to_string(),
381        "aws secretsmanager *".to_string(),
382        // Environment variable commands that might leak secrets
383        "export *SECRET*".to_string(),
384        "export *TOKEN*".to_string(),
385        "export *KEY*".to_string(),
386        "export *PASSWORD*".to_string(),
387        "printenv".to_string(),
388        "env".to_string(),
389    ]
390}
391
392/// BIRD configuration.
393#[derive(Debug, Clone, Serialize, Deserialize)]
394pub struct Config {
395    /// Root directory for all BIRD data.
396    pub bird_root: PathBuf,
397
398    /// Client identifier for this machine.
399    #[serde(default = "default_client_id")]
400    pub client_id: String,
401
402    /// Days to keep data in hot tier before archiving.
403    #[serde(default = "default_hot_days")]
404    pub hot_days: u32,
405
406    /// Threshold in bytes for inline vs blob storage.
407    #[serde(default = "default_inline_threshold")]
408    pub inline_threshold: usize,
409
410    /// Automatically extract events after `shq run` commands.
411    #[serde(default)]
412    pub auto_extract: bool,
413
414    /// Storage mode for writing data.
415    /// - parquet: Multi-writer safe, requires compaction (default)
416    /// - duckdb: Single-writer, no compaction needed
417    #[serde(default)]
418    pub storage_mode: StorageMode,
419
420    /// Remote storage configurations.
421    #[serde(default)]
422    pub remotes: Vec<RemoteConfig>,
423
424    /// Sync configuration for push/pull operations.
425    #[serde(default)]
426    pub sync: SyncConfig,
427
428    /// Shell hook configuration.
429    #[serde(default)]
430    pub hooks: HooksConfig,
431
432    /// Retrospective buffer configuration.
433    #[serde(default)]
434    pub buffer: BufferConfig,
435}
436
437fn default_client_id() -> String {
438    // Deterministic: username@hostname
439    let username = std::env::var("USER")
440        .or_else(|_| std::env::var("USERNAME"))
441        .unwrap_or_else(|_| "unknown".to_string());
442    let hostname = gethostname::gethostname()
443        .to_string_lossy()
444        .to_string();
445    format!("{}@{}", username, hostname)
446}
447
448fn default_hot_days() -> u32 {
449    14
450}
451
452fn default_inline_threshold() -> usize {
453    4_096 // 4KB - small for easy testing of blob storage
454}
455
456impl Config {
457    /// Create a new config with the given BIRD_ROOT.
458    pub fn with_root(bird_root: impl Into<PathBuf>) -> Self {
459        Self {
460            bird_root: bird_root.into(),
461            client_id: default_client_id(),
462            hot_days: default_hot_days(),
463            inline_threshold: default_inline_threshold(),
464            auto_extract: true,
465            storage_mode: StorageMode::default(),
466            remotes: Vec::new(),
467            sync: SyncConfig::default(),
468            hooks: HooksConfig::default(),
469            buffer: BufferConfig::default(),
470        }
471    }
472
473    /// Create a new config with DuckDB storage mode.
474    pub fn with_duckdb_mode(bird_root: impl Into<PathBuf>) -> Self {
475        Self {
476            bird_root: bird_root.into(),
477            client_id: default_client_id(),
478            hot_days: default_hot_days(),
479            inline_threshold: default_inline_threshold(),
480            auto_extract: true,
481            storage_mode: StorageMode::DuckDB,
482            remotes: Vec::new(),
483            sync: SyncConfig::default(),
484            hooks: HooksConfig::default(),
485            buffer: BufferConfig::default(),
486        }
487    }
488
489    /// Create a config using default BIRD_ROOT resolution.
490    pub fn default_location() -> Result<Self> {
491        let bird_root = resolve_bird_root()?;
492        Ok(Self::with_root(bird_root))
493    }
494
495    /// Load config from BIRD_ROOT/config.toml, or create default.
496    pub fn load() -> Result<Self> {
497        let bird_root = resolve_bird_root()?;
498        Self::load_from(&bird_root)
499    }
500
501    /// Load config from a specific BIRD_ROOT.
502    pub fn load_from(bird_root: &Path) -> Result<Self> {
503        let config_path = bird_root.join("config.toml");
504
505        if config_path.exists() {
506            let contents = std::fs::read_to_string(&config_path)?;
507            let mut config: Config = toml::from_str(&contents)
508                .map_err(|e| Error::Config(format!("Failed to parse config: {}", e)))?;
509            // Ensure bird_root matches the actual location
510            config.bird_root = bird_root.to_path_buf();
511            Ok(config)
512        } else {
513            Ok(Self::with_root(bird_root))
514        }
515    }
516
517    /// Save config to BIRD_ROOT/config.toml.
518    pub fn save(&self) -> Result<()> {
519        let config_path = self.bird_root.join("config.toml");
520        let contents = toml::to_string_pretty(self)
521            .map_err(|e| Error::Config(format!("Failed to serialize config: {}", e)))?;
522        std::fs::write(config_path, contents)?;
523        Ok(())
524    }
525
526    // Path helpers
527
528    /// Path to the DuckDB database file.
529    pub fn db_path(&self) -> PathBuf {
530        self.bird_root.join("db/bird.duckdb")
531    }
532
533    /// Path to the data directory.
534    pub fn data_dir(&self) -> PathBuf {
535        self.bird_root.join("db/data")
536    }
537
538    /// Path to the recent (hot) data directory.
539    pub fn recent_dir(&self) -> PathBuf {
540        self.data_dir().join("recent")
541    }
542
543    /// Path to the archive (cold) data directory.
544    pub fn archive_dir(&self) -> PathBuf {
545        self.data_dir().join("archive")
546    }
547
548    // =========================================================================
549    // V5 Schema Paths (attempts/outcomes)
550    // =========================================================================
551
552    /// Path to attempts parquet files for a given date (v5 schema).
553    ///
554    /// Partitioning: `recent/attempts/date=YYYY-MM-DD/`
555    pub fn attempts_dir(&self, date: &chrono::NaiveDate) -> PathBuf {
556        self.recent_dir()
557            .join("attempts")
558            .join(format!("date={}", date))
559    }
560
561    /// Path to outcomes parquet files for a given date (v5 schema).
562    ///
563    /// Partitioning: `recent/outcomes/date=YYYY-MM-DD/`
564    pub fn outcomes_dir(&self, date: &chrono::NaiveDate) -> PathBuf {
565        self.recent_dir()
566            .join("outcomes")
567            .join(format!("date={}", date))
568    }
569
570    // =========================================================================
571    // V4 Schema Paths (legacy, for backwards compatibility)
572    // =========================================================================
573
574    /// Path to invocations parquet files for a given date and status.
575    ///
576    /// **V4 Schema**: Status partitioning: `recent/invocations/status=<status>/date=YYYY-MM-DD/`
577    ///
578    /// In v5 schema, use `attempts_dir()` and `outcomes_dir()` instead.
579    pub fn invocations_dir_with_status(&self, status: &str, date: &chrono::NaiveDate) -> PathBuf {
580        self.recent_dir()
581            .join("invocations")
582            .join(format!("status={}", status))
583            .join(format!("date={}", date))
584    }
585
586    /// Path to invocations parquet files for a given date (defaults to "completed" status).
587    ///
588    /// **V4 Schema**: For backwards compatibility - use `invocations_dir_with_status` for explicit status.
589    ///
590    /// In v5 schema, use `attempts_dir()` and `outcomes_dir()` instead.
591    pub fn invocations_dir(&self, date: &chrono::NaiveDate) -> PathBuf {
592        self.invocations_dir_with_status("completed", date)
593    }
594
595    /// Path to the pending invocations directory (JSON files for crash recovery).
596    ///
597    /// **V4 Schema**: In v5, pending detection is via:
598    /// `SELECT * FROM attempts WHERE id NOT IN (SELECT attempt_id FROM outcomes)`
599    #[deprecated(
600        since = "0.2.0",
601        note = "V4 schema. In v5, pending detection is via the invocations view."
602    )]
603    pub fn pending_dir(&self) -> PathBuf {
604        self.bird_root.join("db/pending")
605    }
606
607    /// Path to outputs parquet files for a given date.
608    pub fn outputs_dir(&self, date: &chrono::NaiveDate) -> PathBuf {
609        self.recent_dir()
610            .join("outputs")
611            .join(format!("date={}", date))
612    }
613
614    /// Path to sessions parquet files for a given date.
615    pub fn sessions_dir(&self, date: &chrono::NaiveDate) -> PathBuf {
616        self.recent_dir()
617            .join("sessions")
618            .join(format!("date={}", date))
619    }
620
621    /// Path to the SQL files directory.
622    pub fn sql_dir(&self) -> PathBuf {
623        self.bird_root.join("db/sql")
624    }
625
626    /// Path to the DuckDB extensions directory.
627    pub fn extensions_dir(&self) -> PathBuf {
628        self.bird_root.join("db/extensions")
629    }
630
631    /// Path to the blobs content directory.
632    pub fn blobs_dir(&self) -> PathBuf {
633        self.recent_dir().join("blobs/content")
634    }
635
636    /// Path to the running directory for in-progress command output.
637    ///
638    /// Files here are temp files streamed during command execution:
639    /// `running/<invocation_id>.out`
640    ///
641    /// On completion, content is moved to blob storage. On crash, files survive
642    /// for recovery. Use `shq show --follow <id>` to tail while running.
643    pub fn running_dir(&self) -> PathBuf {
644        self.bird_root.join("running")
645    }
646
647    /// Path to a running output file for a specific invocation.
648    pub fn running_path(&self, invocation_id: &uuid::Uuid) -> PathBuf {
649        self.running_dir().join(format!("{}.out", invocation_id))
650    }
651
652    /// Path to the retrospective buffer directory.
653    ///
654    /// Contains output from recent shell commands that weren't explicitly captured.
655    /// Files: `<uuid>.out` (output) and `<uuid>.meta` (JSON metadata).
656    /// Protected with 700 permissions since buffer may contain sensitive output.
657    pub fn buffer_dir(&self) -> PathBuf {
658        self.bird_root.join("buffer")
659    }
660
661    /// Path to a buffer entry's output file.
662    pub fn buffer_output_path(&self, id: &uuid::Uuid) -> PathBuf {
663        self.buffer_dir().join(format!("{}.out", id))
664    }
665
666    /// Path to a buffer entry's metadata file.
667    pub fn buffer_meta_path(&self, id: &uuid::Uuid) -> PathBuf {
668        self.buffer_dir().join(format!("{}.meta", id))
669    }
670
671    /// Path to a specific blob file by hash and command.
672    pub fn blob_path(&self, hash: &str, cmd_hint: &str) -> PathBuf {
673        let prefix = &hash[..2.min(hash.len())];
674        let sanitized_cmd = sanitize_for_filename(cmd_hint);
675        self.blobs_dir()
676            .join(prefix)
677            .join(format!("{}--{}.bin", hash, sanitized_cmd))
678    }
679
680    /// Path to the event-formats.toml config file (legacy).
681    pub fn event_formats_path(&self) -> PathBuf {
682        self.bird_root.join("event-formats.toml")
683    }
684
685    /// Path to the format-hints.toml config file.
686    pub fn format_hints_path(&self) -> PathBuf {
687        self.bird_root.join("format-hints.toml")
688    }
689
690    /// Path to events parquet files for a given date.
691    pub fn events_dir(&self, date: &chrono::NaiveDate) -> PathBuf {
692        self.recent_dir()
693            .join("events")
694            .join(format!("date={}", date))
695    }
696
697    // Remote management helpers
698
699    /// Get a remote by name.
700    pub fn get_remote(&self, name: &str) -> Option<&RemoteConfig> {
701        self.remotes.iter().find(|r| r.name == name)
702    }
703
704    /// Add a remote configuration.
705    pub fn add_remote(&mut self, remote: RemoteConfig) {
706        // Remove existing remote with same name
707        self.remotes.retain(|r| r.name != remote.name);
708        self.remotes.push(remote);
709    }
710
711    /// Remove a remote by name. Returns true if removed.
712    pub fn remove_remote(&mut self, name: &str) -> bool {
713        let len_before = self.remotes.len();
714        self.remotes.retain(|r| r.name != name);
715        self.remotes.len() < len_before
716    }
717
718    /// Get all blob roots for multi-location resolution.
719    /// Returns local blobs dir first, then remote blob URLs.
720    pub fn blob_roots(&self) -> Vec<String> {
721        let mut roots = vec![self.blobs_dir().to_string_lossy().to_string()];
722
723        for remote in &self.remotes {
724            if let Some(blob_url) = remote.blob_base_url() {
725                roots.push(blob_url);
726            }
727        }
728
729        roots
730    }
731
732    /// Get remotes that should be auto-attached.
733    pub fn auto_attach_remotes(&self) -> Vec<&RemoteConfig> {
734        self.remotes.iter().filter(|r| r.auto_attach).collect()
735    }
736}
737
738/// Sanitize a string for use in filenames (used for blob naming).
739fn sanitize_for_filename(s: &str) -> String {
740    s.chars()
741        .map(|c| match c {
742            '/' | '\\' | ':' | '*' | '?' | '"' | '<' | '>' | '|' => '_',
743            ' ' => '-',
744            c if c.is_alphanumeric() || c == '-' || c == '_' || c == '.' => c,
745            _ => '_',
746        })
747        .take(32) // Shorter for blob filenames
748        .collect()
749}
750
751/// Resolve BIRD_ROOT using the standard resolution order.
752fn resolve_bird_root() -> Result<PathBuf> {
753    // 1. Environment variable
754    if let Ok(path) = std::env::var("BIRD_ROOT") {
755        return Ok(PathBuf::from(path));
756    }
757
758    // 2. XDG data directory (via directories crate)
759    if let Some(proj_dirs) = ProjectDirs::from("", "", "bird") {
760        return Ok(proj_dirs.data_dir().to_path_buf());
761    }
762
763    // 3. Fallback to ~/.local/share/bird
764    let home = std::env::var("HOME")
765        .map_err(|_| Error::Config("Could not determine home directory".to_string()))?;
766    Ok(PathBuf::from(home).join(".local/share/bird"))
767}
768
769#[cfg(test)]
770mod tests {
771    use super::*;
772    use tempfile::TempDir;
773
774    #[test]
775    fn test_config_with_root() {
776        let config = Config::with_root("/tmp/test-bird");
777        assert_eq!(config.bird_root, PathBuf::from("/tmp/test-bird"));
778        assert_eq!(config.hot_days, 14);
779        assert_eq!(config.inline_threshold, 4_096);
780    }
781
782    #[test]
783    fn test_blob_path() {
784        let config = Config::with_root("/tmp/test-bird");
785        let path = config.blob_path("abcdef123456", "make test");
786        assert_eq!(
787            path,
788            PathBuf::from("/tmp/test-bird/db/data/recent/blobs/content/ab/abcdef123456--make-test.bin")
789        );
790    }
791
792    #[test]
793    fn test_config_paths() {
794        let config = Config::with_root("/tmp/test-bird");
795        assert_eq!(config.db_path(), PathBuf::from("/tmp/test-bird/db/bird.duckdb"));
796        assert_eq!(config.recent_dir(), PathBuf::from("/tmp/test-bird/db/data/recent"));
797    }
798
799    #[test]
800    fn test_config_save_load() {
801        let tmp = TempDir::new().unwrap();
802        let bird_root = tmp.path().to_path_buf();
803
804        // Create the directory structure
805        std::fs::create_dir_all(&bird_root).unwrap();
806
807        let config = Config::with_root(&bird_root);
808        config.save().unwrap();
809
810        let loaded = Config::load_from(&bird_root).unwrap();
811        assert_eq!(loaded.hot_days, config.hot_days);
812        assert_eq!(loaded.inline_threshold, config.inline_threshold);
813    }
814
815    // V5 schema path tests
816
817    #[test]
818    fn test_attempts_dir() {
819        let config = Config::with_root("/tmp/test-bird");
820        let date = chrono::NaiveDate::from_ymd_opt(2024, 1, 15).unwrap();
821        assert_eq!(
822            config.attempts_dir(&date),
823            PathBuf::from("/tmp/test-bird/db/data/recent/attempts/date=2024-01-15")
824        );
825    }
826
827    #[test]
828    fn test_outcomes_dir() {
829        let config = Config::with_root("/tmp/test-bird");
830        let date = chrono::NaiveDate::from_ymd_opt(2024, 1, 15).unwrap();
831        assert_eq!(
832            config.outcomes_dir(&date),
833            PathBuf::from("/tmp/test-bird/db/data/recent/outcomes/date=2024-01-15")
834        );
835    }
836
837    #[test]
838    fn test_running_dir() {
839        let config = Config::with_root("/tmp/test-bird");
840        assert_eq!(
841            config.running_dir(),
842            PathBuf::from("/tmp/test-bird/running")
843        );
844    }
845
846    #[test]
847    fn test_running_path() {
848        let config = Config::with_root("/tmp/test-bird");
849        let id = uuid::Uuid::parse_str("01234567-89ab-cdef-0123-456789abcdef").unwrap();
850        assert_eq!(
851            config.running_path(&id),
852            PathBuf::from("/tmp/test-bird/running/01234567-89ab-cdef-0123-456789abcdef.out")
853        );
854    }
855}