Skip to main content

magic_bird/
init.rs

1//! BIRD initialization - creates directory structure and database.
2//!
3//! # Schema Architecture
4//!
5//! BIRD uses a multi-schema architecture for flexible data organization:
6//!
7//! ## Data Schemas (contain actual tables)
8//! - `local` - Locally generated data (tables in DuckDB mode, parquet views in parquet mode)
9//! - `cached_<name>` - One per remote, contains data pulled/synced from that remote
10//! - `cached_placeholder` - Empty tables (ensures `caches` views work with no cached data)
11//!
12//! ## Attached Schemas (live remote connections)
13//! - `remote_<name>` - Attached remote databases (read-only)
14//! - `remote_placeholder` - Empty tables (ensures `remotes` views work with no remotes)
15//!
16//! ## Union Schemas (dynamic views)
17//! - `caches` - Union of all `cached_*` schemas
18//! - `remotes` - Union of all `remote_*` schemas
19//! - `main` - Union of `local` + `caches` (all data we own locally)
20//! - `unified` - Union of `main` + `remotes` (everything)
21//! - `cwd` - Views filtered to current working directory
22//!
23//! ## Reserved Schema Names
24//! - `local`, `main`, `unified`, `cwd`, `caches`, `remotes` - Core schemas
25//! - `cached_*` - Reserved prefix for cached remote data
26//! - `remote_*` - Reserved prefix for attached remotes
27//! - `project` - Reserved for attached project-level database
28
29use std::fs;
30
31use crate::config::StorageMode;
32use crate::{Config, Error, Result};
33
34/// Initialize a new BIRD installation.
35///
36/// Creates the directory structure and initializes the DuckDB database
37/// with the schema architecture.
38pub fn initialize(config: &Config) -> Result<()> {
39    let bird_root = &config.bird_root;
40
41    // Check if already initialized
42    if config.db_path().exists() {
43        return Err(Error::AlreadyInitialized(bird_root.clone()));
44    }
45
46    // Create directory structure
47    create_directories(config)?;
48
49    // Initialize DuckDB with schemas
50    init_database(config)?;
51
52    // Save config
53    config.save()?;
54
55    // Create default event-formats.toml
56    create_event_formats_config(config)?;
57
58    Ok(())
59}
60
61/// Create the BIRD directory structure.
62fn create_directories(config: &Config) -> Result<()> {
63    // Common directories for both modes
64    let mut dirs = vec![
65        config.bird_root.join("db"),
66        config.blobs_dir(), // blobs/content
67        config.archive_dir().join("blobs/content"),
68        config.extensions_dir(),
69        config.sql_dir(),
70    ];
71
72    // Parquet mode needs partition directories
73    if config.storage_mode == StorageMode::Parquet {
74        dirs.extend([
75            config.recent_dir().join("invocations"),
76            config.recent_dir().join("outputs"),
77            config.recent_dir().join("sessions"),
78            config.recent_dir().join("events"),
79        ]);
80    }
81
82    for dir in &dirs {
83        fs::create_dir_all(dir)?;
84    }
85
86    Ok(())
87}
88
89/// Initialize the DuckDB database with schema architecture.
90fn init_database(config: &Config) -> Result<()> {
91    let conn = duckdb::Connection::open(config.db_path())?;
92
93    // Enable community extensions
94    conn.execute("SET allow_community_extensions = true", [])?;
95
96    // Install and load required extensions
97    // This pre-installs to the default location so connect() is fast
98    install_extensions(&conn)?;
99
100    // Set file search path so views use relative paths
101    let data_dir = config.data_dir();
102    conn.execute(
103        &format!("SET file_search_path = '{}'", data_dir.display()),
104        [],
105    )?;
106
107    // Create core schemas
108    create_core_schemas(&conn)?;
109
110    // Create blob_registry table in main schema (used by both modes)
111    create_blob_registry(&conn)?;
112
113    // Mode-specific initialization for local schema
114    match config.storage_mode {
115        StorageMode::Parquet => {
116            // Create seed parquet files with correct schema but no rows
117            create_seed_files(&conn, config)?;
118            // Create local schema with views over parquet files
119            create_local_parquet_views(&conn)?;
120        }
121        StorageMode::DuckDB => {
122            // Create local schema with tables for direct storage
123            create_local_tables(&conn)?;
124        }
125    }
126
127    // Create placeholder schemas (for empty unions)
128    create_placeholder_schemas(&conn)?;
129
130    // Create union schemas (caches, remotes, main, bird)
131    create_union_schemas(&conn)?;
132
133    // Create helper views in main schema
134    create_helper_views(&conn)?;
135
136    // Create cwd schema views (placeholders, rebuilt at connection time)
137    create_cwd_views(&conn)?;
138
139    Ok(())
140}
141
142/// Create core schemas used by BIRD.
143fn create_core_schemas(conn: &duckdb::Connection) -> Result<()> {
144    conn.execute_batch(
145        r#"
146        -- Data schemas
147        CREATE SCHEMA IF NOT EXISTS local;
148        CREATE SCHEMA IF NOT EXISTS cached_placeholder;
149        CREATE SCHEMA IF NOT EXISTS remote_placeholder;
150
151        -- Union schemas
152        CREATE SCHEMA IF NOT EXISTS caches;
153        CREATE SCHEMA IF NOT EXISTS remotes;
154        -- main already exists as default schema
155        CREATE SCHEMA IF NOT EXISTS unified;
156        CREATE SCHEMA IF NOT EXISTS cwd;
157        "#,
158    )?;
159    Ok(())
160}
161
162/// Create placeholder schemas with empty tables.
163/// These ensure union views work even when no cached/remote schemas exist.
164fn create_placeholder_schemas(conn: &duckdb::Connection) -> Result<()> {
165    // Cached placeholder - empty tables with correct schema
166    conn.execute_batch(
167        r#"
168        CREATE TABLE cached_placeholder.sessions (
169            session_id VARCHAR, client_id VARCHAR, invoker VARCHAR, invoker_pid INTEGER,
170            invoker_type VARCHAR, registered_at TIMESTAMP, cwd VARCHAR, date DATE,
171            _source VARCHAR
172        );
173        CREATE TABLE cached_placeholder.invocations (
174            id UUID, session_id VARCHAR, timestamp TIMESTAMP, duration_ms BIGINT,
175            cwd VARCHAR, cmd VARCHAR, executable VARCHAR, exit_code INTEGER,
176            format_hint VARCHAR, client_id VARCHAR, hostname VARCHAR, username VARCHAR,
177            tag VARCHAR, date DATE, _source VARCHAR
178        );
179        CREATE TABLE cached_placeholder.outputs (
180            id UUID, invocation_id UUID, stream VARCHAR, content_hash VARCHAR,
181            byte_length BIGINT, storage_type VARCHAR, storage_ref VARCHAR,
182            content_type VARCHAR, date DATE, _source VARCHAR
183        );
184        CREATE TABLE cached_placeholder.events (
185            id UUID, invocation_id UUID, client_id VARCHAR, hostname VARCHAR,
186            event_type VARCHAR, severity VARCHAR, ref_file VARCHAR, ref_line INTEGER,
187            ref_column INTEGER, message VARCHAR, error_code VARCHAR, test_name VARCHAR,
188            status VARCHAR, format_used VARCHAR, date DATE, _source VARCHAR
189        );
190        "#,
191    )?;
192
193    // Remote placeholder - same structure
194    conn.execute_batch(
195        r#"
196        CREATE TABLE remote_placeholder.sessions (
197            session_id VARCHAR, client_id VARCHAR, invoker VARCHAR, invoker_pid INTEGER,
198            invoker_type VARCHAR, registered_at TIMESTAMP, cwd VARCHAR, date DATE,
199            _source VARCHAR
200        );
201        CREATE TABLE remote_placeholder.invocations (
202            id UUID, session_id VARCHAR, timestamp TIMESTAMP, duration_ms BIGINT,
203            cwd VARCHAR, cmd VARCHAR, executable VARCHAR, exit_code INTEGER,
204            format_hint VARCHAR, client_id VARCHAR, hostname VARCHAR, username VARCHAR,
205            tag VARCHAR, date DATE, _source VARCHAR
206        );
207        CREATE TABLE remote_placeholder.outputs (
208            id UUID, invocation_id UUID, stream VARCHAR, content_hash VARCHAR,
209            byte_length BIGINT, storage_type VARCHAR, storage_ref VARCHAR,
210            content_type VARCHAR, date DATE, _source VARCHAR
211        );
212        CREATE TABLE remote_placeholder.events (
213            id UUID, invocation_id UUID, client_id VARCHAR, hostname VARCHAR,
214            event_type VARCHAR, severity VARCHAR, ref_file VARCHAR, ref_line INTEGER,
215            ref_column INTEGER, message VARCHAR, error_code VARCHAR, test_name VARCHAR,
216            status VARCHAR, format_used VARCHAR, date DATE, _source VARCHAR
217        );
218        "#,
219    )?;
220
221    Ok(())
222}
223
224/// Create union schemas that combine data from multiple sources.
225/// Initially these just reference placeholders; they get rebuilt when remotes are added.
226fn create_union_schemas(conn: &duckdb::Connection) -> Result<()> {
227    // caches = union of all cached_* schemas (initially just placeholder)
228    conn.execute_batch(
229        r#"
230        CREATE OR REPLACE VIEW caches.sessions AS SELECT * FROM cached_placeholder.sessions;
231        CREATE OR REPLACE VIEW caches.invocations AS SELECT * FROM cached_placeholder.invocations;
232        CREATE OR REPLACE VIEW caches.outputs AS SELECT * FROM cached_placeholder.outputs;
233        CREATE OR REPLACE VIEW caches.events AS SELECT * FROM cached_placeholder.events;
234        "#,
235    )?;
236
237    // remotes = union of all remote_* schemas (initially just placeholder)
238    conn.execute_batch(
239        r#"
240        CREATE OR REPLACE VIEW remotes.sessions AS SELECT * FROM remote_placeholder.sessions;
241        CREATE OR REPLACE VIEW remotes.invocations AS SELECT * FROM remote_placeholder.invocations;
242        CREATE OR REPLACE VIEW remotes.outputs AS SELECT * FROM remote_placeholder.outputs;
243        CREATE OR REPLACE VIEW remotes.events AS SELECT * FROM remote_placeholder.events;
244        "#,
245    )?;
246
247    // main = local + caches (all data we own)
248    conn.execute_batch(
249        r#"
250        CREATE OR REPLACE VIEW main.sessions AS
251            SELECT *, 'local' as _source FROM local.sessions
252            UNION ALL BY NAME SELECT * FROM caches.sessions;
253        CREATE OR REPLACE VIEW main.invocations AS
254            SELECT *, 'local' as _source FROM local.invocations
255            UNION ALL BY NAME SELECT * FROM caches.invocations;
256        CREATE OR REPLACE VIEW main.outputs AS
257            SELECT *, 'local' as _source FROM local.outputs
258            UNION ALL BY NAME SELECT * FROM caches.outputs;
259        CREATE OR REPLACE VIEW main.events AS
260            SELECT *, 'local' as _source FROM local.events
261            UNION ALL BY NAME SELECT * FROM caches.events;
262        "#,
263    )?;
264
265    // unified = main + remotes (everything)
266    conn.execute_batch(
267        r#"
268        CREATE OR REPLACE VIEW unified.sessions AS
269            SELECT * FROM main.sessions
270            UNION ALL BY NAME SELECT * FROM remotes.sessions;
271        CREATE OR REPLACE VIEW unified.invocations AS
272            SELECT * FROM main.invocations
273            UNION ALL BY NAME SELECT * FROM remotes.invocations;
274        CREATE OR REPLACE VIEW unified.outputs AS
275            SELECT * FROM main.outputs
276            UNION ALL BY NAME SELECT * FROM remotes.outputs;
277        CREATE OR REPLACE VIEW unified.events AS
278            SELECT * FROM main.events
279            UNION ALL BY NAME SELECT * FROM remotes.events;
280        "#,
281    )?;
282
283    // unified.qualified_* views - deduplicated with source list
284    conn.execute_batch(
285        r#"
286        CREATE OR REPLACE VIEW unified.qualified_sessions AS
287            SELECT * EXCLUDE (_source), list(DISTINCT _source) as _sources
288            FROM unified.sessions
289            GROUP BY ALL;
290        CREATE OR REPLACE VIEW unified.qualified_invocations AS
291            SELECT * EXCLUDE (_source), list(DISTINCT _source) as _sources
292            FROM unified.invocations
293            GROUP BY ALL;
294        CREATE OR REPLACE VIEW unified.qualified_outputs AS
295            SELECT * EXCLUDE (_source), list(DISTINCT _source) as _sources
296            FROM unified.outputs
297            GROUP BY ALL;
298        CREATE OR REPLACE VIEW unified.qualified_events AS
299            SELECT * EXCLUDE (_source), list(DISTINCT _source) as _sources
300            FROM unified.events
301            GROUP BY ALL;
302        "#,
303    )?;
304
305    Ok(())
306}
307
308/// Create local schema with views over Parquet files (for Parquet mode).
309///
310/// In parquet mode, local data is stored in parquet files.
311/// Views in the local schema read from these files.
312fn create_local_parquet_views(conn: &duckdb::Connection) -> Result<()> {
313    conn.execute_batch(
314        r#"
315        -- Sessions view: read from parquet files
316        CREATE OR REPLACE VIEW local.sessions AS
317        SELECT * EXCLUDE (filename)
318        FROM read_parquet(
319            'recent/sessions/**/*.parquet',
320            union_by_name = true,
321            hive_partitioning = true,
322            filename = true
323        );
324
325        -- Invocations view: read from parquet files
326        CREATE OR REPLACE VIEW local.invocations AS
327        SELECT * EXCLUDE (filename)
328        FROM read_parquet(
329            'recent/invocations/**/*.parquet',
330            union_by_name = true,
331            hive_partitioning = true,
332            filename = true
333        );
334
335        -- Outputs view: read from parquet files
336        CREATE OR REPLACE VIEW local.outputs AS
337        SELECT * EXCLUDE (filename)
338        FROM read_parquet(
339            'recent/outputs/**/*.parquet',
340            union_by_name = true,
341            hive_partitioning = true,
342            filename = true
343        );
344
345        -- Events view: read from parquet files
346        CREATE OR REPLACE VIEW local.events AS
347        SELECT * EXCLUDE (filename)
348        FROM read_parquet(
349            'recent/events/**/*.parquet',
350            union_by_name = true,
351            hive_partitioning = true,
352            filename = true
353        );
354        "#,
355    )?;
356    Ok(())
357}
358
359/// Create local schema with tables for direct storage (for DuckDB mode).
360fn create_local_tables(conn: &duckdb::Connection) -> Result<()> {
361    conn.execute_batch(
362        r#"
363        -- Sessions table
364        CREATE TABLE IF NOT EXISTS local.sessions (
365            session_id VARCHAR,
366            client_id VARCHAR,
367            invoker VARCHAR,
368            invoker_pid INTEGER,
369            invoker_type VARCHAR,
370            registered_at TIMESTAMP,
371            cwd VARCHAR,
372            date DATE
373        );
374
375        -- Invocations table
376        CREATE TABLE IF NOT EXISTS local.invocations (
377            id UUID,
378            session_id VARCHAR,
379            timestamp TIMESTAMP,
380            duration_ms BIGINT,
381            cwd VARCHAR,
382            cmd VARCHAR,
383            executable VARCHAR,
384            exit_code INTEGER,
385            format_hint VARCHAR,
386            client_id VARCHAR,
387            hostname VARCHAR,
388            username VARCHAR,
389            tag VARCHAR,
390            date DATE
391        );
392
393        -- Outputs table
394        CREATE TABLE IF NOT EXISTS local.outputs (
395            id UUID,
396            invocation_id UUID,
397            stream VARCHAR,
398            content_hash VARCHAR,
399            byte_length BIGINT,
400            storage_type VARCHAR,
401            storage_ref VARCHAR,
402            content_type VARCHAR,
403            date DATE
404        );
405
406        -- Events table
407        CREATE TABLE IF NOT EXISTS local.events (
408            id UUID,
409            invocation_id UUID,
410            client_id VARCHAR,
411            hostname VARCHAR,
412            event_type VARCHAR,
413            severity VARCHAR,
414            ref_file VARCHAR,
415            ref_line INTEGER,
416            ref_column INTEGER,
417            message VARCHAR,
418            error_code VARCHAR,
419            test_name VARCHAR,
420            status VARCHAR,
421            format_used VARCHAR,
422            date DATE
423        );
424        "#,
425    )?;
426    Ok(())
427}
428
429/// Create helper views in main schema.
430fn create_helper_views(conn: &duckdb::Connection) -> Result<()> {
431    conn.execute_batch(
432        r#"
433        -- Recent invocations helper view
434        CREATE OR REPLACE VIEW main.recent_invocations AS
435        SELECT *
436        FROM main.invocations
437        WHERE date >= CURRENT_DATE - INTERVAL '7 days'
438        ORDER BY timestamp DESC;
439
440        -- Invocations today helper view
441        CREATE OR REPLACE VIEW main.invocations_today AS
442        SELECT *
443        FROM main.invocations
444        WHERE date = CURRENT_DATE
445        ORDER BY timestamp DESC;
446
447        -- Failed invocations helper view
448        CREATE OR REPLACE VIEW main.failed_invocations AS
449        SELECT *
450        FROM main.invocations
451        WHERE exit_code != 0
452        ORDER BY timestamp DESC;
453
454        -- Invocations with outputs (joined view)
455        CREATE OR REPLACE VIEW main.invocations_with_outputs AS
456        SELECT
457            i.*,
458            o.id as output_id,
459            o.stream,
460            o.byte_length,
461            o.storage_type,
462            o.storage_ref
463        FROM main.invocations i
464        LEFT JOIN main.outputs o ON i.id = o.invocation_id;
465
466        -- Clients view (derived from sessions)
467        CREATE OR REPLACE VIEW main.clients AS
468        SELECT
469            client_id,
470            MIN(registered_at) as first_seen,
471            MAX(registered_at) as last_seen,
472            COUNT(DISTINCT session_id) as session_count
473        FROM main.sessions
474        GROUP BY client_id;
475
476        -- Events with invocation context (joined view)
477        CREATE OR REPLACE VIEW main.events_with_context AS
478        SELECT
479            e.*,
480            i.cmd,
481            i.timestamp,
482            i.cwd,
483            i.exit_code
484        FROM main.events e
485        JOIN main.invocations i ON e.invocation_id = i.id;
486        "#,
487    )?;
488    Ok(())
489}
490
491/// Create cwd schema views filtered to current working directory.
492/// These views are dynamically regenerated when the connection opens.
493/// Note: Initial creation uses a placeholder; actual filtering happens at connection time.
494fn create_cwd_views(conn: &duckdb::Connection) -> Result<()> {
495    // cwd views filter main data to entries where cwd starts with current directory
496    // The actual current directory is set via a variable at connection time
497    conn.execute_batch(
498        r#"
499        -- Placeholder views - these get rebuilt with actual cwd at connection time
500        CREATE OR REPLACE VIEW cwd.sessions AS
501        SELECT * FROM main.sessions WHERE false;
502        CREATE OR REPLACE VIEW cwd.invocations AS
503        SELECT * FROM main.invocations WHERE false;
504        CREATE OR REPLACE VIEW cwd.outputs AS
505        SELECT * FROM main.outputs WHERE false;
506        CREATE OR REPLACE VIEW cwd.events AS
507        SELECT * FROM main.events WHERE false;
508        "#,
509    )?;
510    Ok(())
511}
512
513/// Ensure a DuckDB extension is loaded, installing if necessary.
514///
515/// Attempts in order:
516/// 1. LOAD (extension might already be available)
517/// 2. INSTALL from default repository, then LOAD
518/// 3. INSTALL FROM community, then LOAD
519fn ensure_extension(conn: &duckdb::Connection, name: &str) -> Result<bool> {
520    // Try loading directly first (already installed/cached)
521    if conn.execute(&format!("LOAD {}", name), []).is_ok() {
522        return Ok(true);
523    }
524
525    // Try installing from default repository
526    if conn.execute(&format!("INSTALL {}", name), []).is_ok()
527        && conn.execute(&format!("LOAD {}", name), []).is_ok()
528    {
529        return Ok(true);
530    }
531
532    // Try installing from community repository
533    if conn.execute(&format!("INSTALL {} FROM community", name), []).is_ok()
534        && conn.execute(&format!("LOAD {}", name), []).is_ok()
535    {
536        return Ok(true);
537    }
538
539    Ok(false)
540}
541
542/// Install and load all required extensions during initialization.
543/// This pre-populates the extension cache so connect() is fast.
544fn install_extensions(conn: &duckdb::Connection) -> Result<()> {
545    // Required extensions - fail if not available
546    for name in ["parquet", "icu", "httpfs", "json"] {
547        if !ensure_extension(conn, name)? {
548            return Err(Error::Config(format!(
549                "Required extension '{}' could not be installed",
550                name
551            )));
552        }
553    }
554
555    // Optional community extensions - warn if not available
556    for (name, desc) in [
557        ("scalarfs", "data: URL support for inline blobs"),
558        ("duck_hunt", "log/output parsing for event extraction"),
559    ] {
560        if !ensure_extension(conn, name)? {
561            eprintln!("Warning: {} extension not available ({})", name, desc);
562        }
563    }
564
565    Ok(())
566}
567
568
569/// Create the blob_registry table for tracking deduplicated blobs.
570fn create_blob_registry(conn: &duckdb::Connection) -> Result<()> {
571    conn.execute_batch(
572        r#"
573        CREATE TABLE IF NOT EXISTS blob_registry (
574            content_hash  VARCHAR PRIMARY KEY,  -- BLAKE3 hash
575            byte_length   BIGINT NOT NULL,      -- Original uncompressed size
576            ref_count     INTEGER DEFAULT 1,    -- Number of outputs referencing this blob
577            first_seen    TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
578            last_accessed TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
579            storage_path  VARCHAR NOT NULL      -- Relative path to blob file
580        );
581        "#,
582    )?;
583    Ok(())
584}
585
586/// Create seed parquet files with correct schema but no rows.
587fn create_seed_files(conn: &duckdb::Connection, config: &Config) -> Result<()> {
588    // Create invocations seed
589    let invocations_seed_dir = config
590        .recent_dir()
591        .join("invocations")
592        .join("date=1970-01-01");
593    fs::create_dir_all(&invocations_seed_dir)?;
594
595    let invocations_seed_path = invocations_seed_dir.join("_seed.parquet");
596    conn.execute_batch(&format!(
597        r#"
598        COPY (
599            SELECT
600                NULL::UUID as id,
601                NULL::VARCHAR as session_id,
602                NULL::TIMESTAMP as timestamp,
603                NULL::BIGINT as duration_ms,
604                NULL::VARCHAR as cwd,
605                NULL::VARCHAR as cmd,
606                NULL::VARCHAR as executable,
607                NULL::INTEGER as exit_code,
608                NULL::VARCHAR as format_hint,
609                NULL::VARCHAR as client_id,
610                NULL::VARCHAR as hostname,
611                NULL::VARCHAR as username,
612                NULL::VARCHAR as tag,
613                NULL::DATE as date
614            WHERE false
615        ) TO '{}' (FORMAT PARQUET);
616        "#,
617        invocations_seed_path.display()
618    ))?;
619
620    // Create outputs seed
621    let outputs_seed_dir = config.recent_dir().join("outputs").join("date=1970-01-01");
622    fs::create_dir_all(&outputs_seed_dir)?;
623
624    let outputs_seed_path = outputs_seed_dir.join("_seed.parquet");
625    conn.execute_batch(&format!(
626        r#"
627        COPY (
628            SELECT
629                NULL::UUID as id,
630                NULL::UUID as invocation_id,
631                NULL::VARCHAR as stream,
632                NULL::VARCHAR as content_hash,
633                NULL::BIGINT as byte_length,
634                NULL::VARCHAR as storage_type,
635                NULL::VARCHAR as storage_ref,
636                NULL::VARCHAR as content_type,
637                NULL::DATE as date
638            WHERE false
639        ) TO '{}' (FORMAT PARQUET);
640        "#,
641        outputs_seed_path.display()
642    ))?;
643
644    // Create sessions seed
645    let sessions_seed_dir = config.recent_dir().join("sessions").join("date=1970-01-01");
646    fs::create_dir_all(&sessions_seed_dir)?;
647
648    let sessions_seed_path = sessions_seed_dir.join("_seed.parquet");
649    conn.execute_batch(&format!(
650        r#"
651        COPY (
652            SELECT
653                NULL::VARCHAR as session_id,
654                NULL::VARCHAR as client_id,
655                NULL::VARCHAR as invoker,
656                NULL::INTEGER as invoker_pid,
657                NULL::VARCHAR as invoker_type,
658                NULL::TIMESTAMP as registered_at,
659                NULL::VARCHAR as cwd,
660                NULL::DATE as date
661            WHERE false
662        ) TO '{}' (FORMAT PARQUET);
663        "#,
664        sessions_seed_path.display()
665    ))?;
666
667    // Create events seed
668    let events_seed_dir = config.recent_dir().join("events").join("date=1970-01-01");
669    fs::create_dir_all(&events_seed_dir)?;
670
671    let events_seed_path = events_seed_dir.join("_seed.parquet");
672    conn.execute_batch(&format!(
673        r#"
674        COPY (
675            SELECT
676                NULL::UUID as id,
677                NULL::UUID as invocation_id,
678                NULL::VARCHAR as client_id,
679                NULL::VARCHAR as hostname,
680                NULL::VARCHAR as event_type,
681                NULL::VARCHAR as severity,
682                NULL::VARCHAR as ref_file,
683                NULL::INTEGER as ref_line,
684                NULL::INTEGER as ref_column,
685                NULL::VARCHAR as message,
686                NULL::VARCHAR as error_code,
687                NULL::VARCHAR as test_name,
688                NULL::VARCHAR as status,
689                NULL::VARCHAR as format_used,
690                NULL::DATE as date
691            WHERE false
692        ) TO '{}' (FORMAT PARQUET);
693        "#,
694        events_seed_path.display()
695    ))?;
696
697    Ok(())
698}
699
700/// Create the default event-formats.toml configuration file.
701fn create_event_formats_config(config: &Config) -> Result<()> {
702    let path = config.event_formats_path();
703    if !path.exists() {
704        fs::write(&path, DEFAULT_EVENT_FORMATS_CONFIG)?;
705    }
706    Ok(())
707}
708
709/// Default content for event-formats.toml.
710pub const DEFAULT_EVENT_FORMATS_CONFIG: &str = r#"# Event format detection rules for duck_hunt
711# Patterns are glob-matched against the command string
712# First matching rule wins; use 'auto' for duck_hunt's built-in detection
713
714# C/C++ compilers
715[[rules]]
716pattern = "*gcc*"
717format = "gcc"
718
719[[rules]]
720pattern = "*g++*"
721format = "gcc"
722
723[[rules]]
724pattern = "*clang*"
725format = "gcc"
726
727[[rules]]
728pattern = "*clang++*"
729format = "gcc"
730
731# Rust
732[[rules]]
733pattern = "*cargo build*"
734format = "cargo_build"
735
736[[rules]]
737pattern = "*cargo test*"
738format = "cargo_test_json"
739
740[[rules]]
741pattern = "*cargo check*"
742format = "cargo_build"
743
744[[rules]]
745pattern = "*rustc*"
746format = "rustc"
747
748# Python
749[[rules]]
750pattern = "*pytest*"
751format = "pytest_text"
752
753[[rules]]
754pattern = "*python*-m*pytest*"
755format = "pytest_text"
756
757[[rules]]
758pattern = "*mypy*"
759format = "mypy"
760
761[[rules]]
762pattern = "*flake8*"
763format = "flake8"
764
765[[rules]]
766pattern = "*pylint*"
767format = "pylint"
768
769# JavaScript/TypeScript
770[[rules]]
771pattern = "*eslint*"
772format = "eslint"
773
774[[rules]]
775pattern = "*tsc*"
776format = "typescript"
777
778[[rules]]
779pattern = "*jest*"
780format = "jest"
781
782# Build systems
783[[rules]]
784pattern = "*make*"
785format = "make_error"
786
787[[rules]]
788pattern = "*cmake*"
789format = "cmake"
790
791[[rules]]
792pattern = "*ninja*"
793format = "ninja"
794
795# Go
796[[rules]]
797pattern = "*go build*"
798format = "go_build"
799
800[[rules]]
801pattern = "*go test*"
802format = "go_test"
803
804# Default: use duck_hunt's auto-detection
805[default]
806format = "auto"
807"#;
808
809/// Check if BIRD is initialized at the given location.
810pub fn is_initialized(config: &Config) -> bool {
811    config.db_path().exists()
812}
813
814#[cfg(test)]
815mod tests {
816    use super::*;
817    use tempfile::TempDir;
818
819    #[test]
820    fn test_initialize_creates_structure() {
821        let tmp = TempDir::new().unwrap();
822        let config = Config::with_root(tmp.path());
823
824        initialize(&config).unwrap();
825
826        // Check directories exist
827        assert!(config.db_path().exists());
828        assert!(config.recent_dir().join("invocations").exists());
829        assert!(config.recent_dir().join("outputs").exists());
830        assert!(config.recent_dir().join("sessions").exists());
831        assert!(config.blobs_dir().exists());
832        assert!(config.extensions_dir().exists());
833        assert!(config.sql_dir().exists());
834        assert!(config.bird_root.join("config.toml").exists());
835    }
836
837    #[test]
838    fn test_initialize_twice_fails() {
839        let tmp = TempDir::new().unwrap();
840        let config = Config::with_root(tmp.path());
841
842        initialize(&config).unwrap();
843
844        // Second init should fail
845        let result = initialize(&config);
846        assert!(matches!(result, Err(Error::AlreadyInitialized(_))));
847    }
848
849    #[test]
850    fn test_is_initialized() {
851        let tmp = TempDir::new().unwrap();
852        let config = Config::with_root(tmp.path());
853
854        assert!(!is_initialized(&config));
855        initialize(&config).unwrap();
856        assert!(is_initialized(&config));
857    }
858}