Skip to main content

magic_bird/
init.rs

1//! BIRD initialization - creates directory structure and database.
2//!
3//! # Schema Architecture
4//!
5//! BIRD uses a multi-schema architecture for flexible data organization:
6//!
7//! ## Data Schemas (contain actual tables)
8//! - `local` - Locally generated data (tables in DuckDB mode, parquet views in parquet mode)
9//! - `cached_<name>` - One per remote, contains data pulled/synced from that remote
10//! - `cached_placeholder` - Empty tables (ensures `caches` views work with no cached data)
11//!
12//! ## Attached Schemas (live remote connections)
13//! - `remote_<name>` - Attached remote databases (read-only)
14//! - `remote_placeholder` - Empty tables (ensures `remotes` views work with no remotes)
15//!
16//! ## Union Schemas (dynamic views)
17//! - `caches` - Union of all `cached_*` schemas
18//! - `remotes` - Union of all `remote_*` schemas
19//! - `main` - Union of `local` + `caches` (all data we own locally)
20//! - `unified` - Union of `main` + `remotes` (everything)
21//! - `cwd` - Views filtered to current working directory
22//!
23//! ## Reserved Schema Names
24//! - `local`, `main`, `unified`, `cwd`, `caches`, `remotes` - Core schemas
25//! - `cached_*` - Reserved prefix for cached remote data
26//! - `remote_*` - Reserved prefix for attached remotes
27//! - `project` - Reserved for attached project-level database
28
29use std::fs;
30
31use crate::config::StorageMode;
32use crate::{Config, Error, Result};
33
34/// Initialize a new BIRD installation.
35///
36/// Creates the directory structure and initializes the DuckDB database
37/// with the schema architecture.
38pub fn initialize(config: &Config) -> Result<()> {
39    let bird_root = &config.bird_root;
40
41    // Check if already initialized
42    if config.db_path().exists() {
43        return Err(Error::AlreadyInitialized(bird_root.clone()));
44    }
45
46    // Create directory structure
47    create_directories(config)?;
48
49    // Initialize DuckDB with schemas
50    init_database(config)?;
51
52    // Save config
53    config.save()?;
54
55    // Create default event-formats.toml
56    create_event_formats_config(config)?;
57
58    Ok(())
59}
60
61/// Create the BIRD directory structure.
62fn create_directories(config: &Config) -> Result<()> {
63    // Common directories for both modes
64    let mut dirs = vec![
65        config.bird_root.join("db"),
66        config.blobs_dir(), // blobs/content
67        config.archive_dir().join("blobs/content"),
68        config.extensions_dir(),
69        config.sql_dir(),
70    ];
71
72    // Parquet mode needs partition directories
73    if config.storage_mode == StorageMode::Parquet {
74        dirs.extend([
75            config.recent_dir().join("invocations"),
76            config.recent_dir().join("outputs"),
77            config.recent_dir().join("sessions"),
78            config.recent_dir().join("events"),
79        ]);
80    }
81
82    for dir in &dirs {
83        fs::create_dir_all(dir)?;
84    }
85
86    Ok(())
87}
88
89/// Initialize the DuckDB database with schema architecture.
90fn init_database(config: &Config) -> Result<()> {
91    let conn = duckdb::Connection::open(config.db_path())?;
92
93    // Enable community extensions
94    conn.execute("SET allow_community_extensions = true", [])?;
95
96    // Install and load required extensions
97    // This pre-installs to the default location so connect() is fast
98    install_extensions(&conn)?;
99
100    // Set file search path so views use relative paths
101    let data_dir = config.data_dir();
102    conn.execute(
103        &format!("SET file_search_path = '{}'", data_dir.display()),
104        [],
105    )?;
106
107    // Create core schemas
108    create_core_schemas(&conn)?;
109
110    // Create blob_registry table in main schema (used by both modes)
111    create_blob_registry(&conn)?;
112
113    // Mode-specific initialization for local schema
114    match config.storage_mode {
115        StorageMode::Parquet => {
116            // Create seed parquet files with correct schema but no rows
117            create_seed_files(&conn, config)?;
118            // Create local schema with views over parquet files
119            create_local_parquet_views(&conn)?;
120        }
121        StorageMode::DuckDB => {
122            // Create local schema with tables for direct storage
123            create_local_tables(&conn)?;
124        }
125    }
126
127    // Create placeholder schemas (for empty unions)
128    create_placeholder_schemas(&conn)?;
129
130    // Create union schemas (caches, remotes, main, bird)
131    create_union_schemas(&conn)?;
132
133    // Create helper views in main schema
134    create_helper_views(&conn)?;
135
136    // Create cwd schema views (placeholders, rebuilt at connection time)
137    create_cwd_views(&conn)?;
138
139    Ok(())
140}
141
142/// Create core schemas used by BIRD.
143fn create_core_schemas(conn: &duckdb::Connection) -> Result<()> {
144    conn.execute_batch(
145        r#"
146        -- Data schemas
147        CREATE SCHEMA IF NOT EXISTS local;
148        CREATE SCHEMA IF NOT EXISTS cached_placeholder;
149        CREATE SCHEMA IF NOT EXISTS remote_placeholder;
150
151        -- Union schemas
152        CREATE SCHEMA IF NOT EXISTS caches;
153        CREATE SCHEMA IF NOT EXISTS remotes;
154        -- main already exists as default schema
155        CREATE SCHEMA IF NOT EXISTS unified;
156        CREATE SCHEMA IF NOT EXISTS cwd;
157        "#,
158    )?;
159    Ok(())
160}
161
162/// Create placeholder schemas with empty tables.
163/// These ensure union views work even when no cached/remote schemas exist.
164fn create_placeholder_schemas(conn: &duckdb::Connection) -> Result<()> {
165    // Cached placeholder - empty tables with correct schema
166    conn.execute_batch(
167        r#"
168        CREATE TABLE cached_placeholder.sessions (
169            session_id VARCHAR, client_id VARCHAR, invoker VARCHAR, invoker_pid INTEGER,
170            invoker_type VARCHAR, registered_at TIMESTAMP, cwd VARCHAR, date DATE,
171            _source VARCHAR
172        );
173        CREATE TABLE cached_placeholder.invocations (
174            id UUID, session_id VARCHAR, timestamp TIMESTAMP, duration_ms BIGINT,
175            cwd VARCHAR, cmd VARCHAR, executable VARCHAR, runner_id VARCHAR, exit_code INTEGER,
176            status VARCHAR, format_hint VARCHAR, client_id VARCHAR, hostname VARCHAR,
177            username VARCHAR, tag VARCHAR, date DATE, _source VARCHAR
178        );
179        CREATE TABLE cached_placeholder.outputs (
180            id UUID, invocation_id UUID, stream VARCHAR, content_hash VARCHAR,
181            byte_length BIGINT, storage_type VARCHAR, storage_ref VARCHAR,
182            content_type VARCHAR, date DATE, _source VARCHAR
183        );
184        CREATE TABLE cached_placeholder.events (
185            id UUID, invocation_id UUID, client_id VARCHAR, hostname VARCHAR,
186            event_type VARCHAR, severity VARCHAR, ref_file VARCHAR, ref_line INTEGER,
187            ref_column INTEGER, message VARCHAR, error_code VARCHAR, test_name VARCHAR,
188            status VARCHAR, format_used VARCHAR, date DATE, _source VARCHAR
189        );
190        "#,
191    )?;
192
193    // Remote placeholder - same structure
194    conn.execute_batch(
195        r#"
196        CREATE TABLE remote_placeholder.sessions (
197            session_id VARCHAR, client_id VARCHAR, invoker VARCHAR, invoker_pid INTEGER,
198            invoker_type VARCHAR, registered_at TIMESTAMP, cwd VARCHAR, date DATE,
199            _source VARCHAR
200        );
201        CREATE TABLE remote_placeholder.invocations (
202            id UUID, session_id VARCHAR, timestamp TIMESTAMP, duration_ms BIGINT,
203            cwd VARCHAR, cmd VARCHAR, executable VARCHAR, runner_id VARCHAR, exit_code INTEGER,
204            status VARCHAR, format_hint VARCHAR, client_id VARCHAR, hostname VARCHAR,
205            username VARCHAR, tag VARCHAR, date DATE, _source VARCHAR
206        );
207        CREATE TABLE remote_placeholder.outputs (
208            id UUID, invocation_id UUID, stream VARCHAR, content_hash VARCHAR,
209            byte_length BIGINT, storage_type VARCHAR, storage_ref VARCHAR,
210            content_type VARCHAR, date DATE, _source VARCHAR
211        );
212        CREATE TABLE remote_placeholder.events (
213            id UUID, invocation_id UUID, client_id VARCHAR, hostname VARCHAR,
214            event_type VARCHAR, severity VARCHAR, ref_file VARCHAR, ref_line INTEGER,
215            ref_column INTEGER, message VARCHAR, error_code VARCHAR, test_name VARCHAR,
216            status VARCHAR, format_used VARCHAR, date DATE, _source VARCHAR
217        );
218        "#,
219    )?;
220
221    Ok(())
222}
223
224/// Create union schemas that combine data from multiple sources.
225/// Initially these just reference placeholders; they get rebuilt when remotes are added.
226fn create_union_schemas(conn: &duckdb::Connection) -> Result<()> {
227    // caches = union of all cached_* schemas (initially just placeholder)
228    conn.execute_batch(
229        r#"
230        CREATE OR REPLACE VIEW caches.sessions AS SELECT * FROM cached_placeholder.sessions;
231        CREATE OR REPLACE VIEW caches.invocations AS SELECT * FROM cached_placeholder.invocations;
232        CREATE OR REPLACE VIEW caches.outputs AS SELECT * FROM cached_placeholder.outputs;
233        CREATE OR REPLACE VIEW caches.events AS SELECT * FROM cached_placeholder.events;
234        "#,
235    )?;
236
237    // remotes = union of all remote_* schemas (initially just placeholder)
238    conn.execute_batch(
239        r#"
240        CREATE OR REPLACE VIEW remotes.sessions AS SELECT * FROM remote_placeholder.sessions;
241        CREATE OR REPLACE VIEW remotes.invocations AS SELECT * FROM remote_placeholder.invocations;
242        CREATE OR REPLACE VIEW remotes.outputs AS SELECT * FROM remote_placeholder.outputs;
243        CREATE OR REPLACE VIEW remotes.events AS SELECT * FROM remote_placeholder.events;
244        "#,
245    )?;
246
247    // main = local + caches (all data we own)
248    conn.execute_batch(
249        r#"
250        CREATE OR REPLACE VIEW main.sessions AS
251            SELECT *, 'local' as _source FROM local.sessions
252            UNION ALL BY NAME SELECT * FROM caches.sessions;
253        CREATE OR REPLACE VIEW main.invocations AS
254            SELECT *, 'local' as _source FROM local.invocations
255            UNION ALL BY NAME SELECT * FROM caches.invocations;
256        CREATE OR REPLACE VIEW main.outputs AS
257            SELECT *, 'local' as _source FROM local.outputs
258            UNION ALL BY NAME SELECT * FROM caches.outputs;
259        CREATE OR REPLACE VIEW main.events AS
260            SELECT *, 'local' as _source FROM local.events
261            UNION ALL BY NAME SELECT * FROM caches.events;
262        "#,
263    )?;
264
265    // unified = main + remotes (everything)
266    conn.execute_batch(
267        r#"
268        CREATE OR REPLACE VIEW unified.sessions AS
269            SELECT * FROM main.sessions
270            UNION ALL BY NAME SELECT * FROM remotes.sessions;
271        CREATE OR REPLACE VIEW unified.invocations AS
272            SELECT * FROM main.invocations
273            UNION ALL BY NAME SELECT * FROM remotes.invocations;
274        CREATE OR REPLACE VIEW unified.outputs AS
275            SELECT * FROM main.outputs
276            UNION ALL BY NAME SELECT * FROM remotes.outputs;
277        CREATE OR REPLACE VIEW unified.events AS
278            SELECT * FROM main.events
279            UNION ALL BY NAME SELECT * FROM remotes.events;
280        "#,
281    )?;
282
283    // unified.qualified_* views - deduplicated with source list
284    conn.execute_batch(
285        r#"
286        CREATE OR REPLACE VIEW unified.qualified_sessions AS
287            SELECT * EXCLUDE (_source), list(DISTINCT _source) as _sources
288            FROM unified.sessions
289            GROUP BY ALL;
290        CREATE OR REPLACE VIEW unified.qualified_invocations AS
291            SELECT * EXCLUDE (_source), list(DISTINCT _source) as _sources
292            FROM unified.invocations
293            GROUP BY ALL;
294        CREATE OR REPLACE VIEW unified.qualified_outputs AS
295            SELECT * EXCLUDE (_source), list(DISTINCT _source) as _sources
296            FROM unified.outputs
297            GROUP BY ALL;
298        CREATE OR REPLACE VIEW unified.qualified_events AS
299            SELECT * EXCLUDE (_source), list(DISTINCT _source) as _sources
300            FROM unified.events
301            GROUP BY ALL;
302        "#,
303    )?;
304
305    Ok(())
306}
307
308/// Create local schema with views over Parquet files (for Parquet mode).
309///
310/// In parquet mode, local data is stored in parquet files.
311/// Views in the local schema read from these files.
312/// Uses `file_row_number = true` to handle empty directories gracefully.
313fn create_local_parquet_views(conn: &duckdb::Connection) -> Result<()> {
314    // Note: We use UNION ALL with seed files to ensure views work even when
315    // main directories are empty. The seed files are in date=1970-01-01 and
316    // contain no data rows, just schema.
317    conn.execute_batch(
318        r#"
319        -- Sessions view: read from parquet files
320        CREATE OR REPLACE VIEW local.sessions AS
321        SELECT * EXCLUDE (filename, file_row_number)
322        FROM read_parquet(
323            'recent/sessions/**/*.parquet',
324            union_by_name = true,
325            hive_partitioning = true,
326            filename = true,
327            file_row_number = true
328        );
329
330        -- Invocations view: read from parquet files
331        CREATE OR REPLACE VIEW local.invocations AS
332        SELECT * EXCLUDE (filename, file_row_number)
333        FROM read_parquet(
334            'recent/invocations/**/*.parquet',
335            union_by_name = true,
336            hive_partitioning = true,
337            filename = true,
338            file_row_number = true
339        );
340
341        -- Outputs view: read from parquet files
342        CREATE OR REPLACE VIEW local.outputs AS
343        SELECT * EXCLUDE (filename, file_row_number)
344        FROM read_parquet(
345            'recent/outputs/**/*.parquet',
346            union_by_name = true,
347            hive_partitioning = true,
348            filename = true,
349            file_row_number = true
350        );
351
352        -- Events view: read from parquet files
353        CREATE OR REPLACE VIEW local.events AS
354        SELECT * EXCLUDE (filename, file_row_number)
355        FROM read_parquet(
356            'recent/events/**/*.parquet',
357            union_by_name = true,
358            hive_partitioning = true,
359            filename = true,
360            file_row_number = true
361        );
362        "#,
363    )?;
364    Ok(())
365}
366
367/// Create local schema with tables for direct storage (for DuckDB mode).
368fn create_local_tables(conn: &duckdb::Connection) -> Result<()> {
369    conn.execute_batch(
370        r#"
371        -- Sessions table
372        CREATE TABLE IF NOT EXISTS local.sessions (
373            session_id VARCHAR,
374            client_id VARCHAR,
375            invoker VARCHAR,
376            invoker_pid INTEGER,
377            invoker_type VARCHAR,
378            registered_at TIMESTAMP,
379            cwd VARCHAR,
380            date DATE
381        );
382
383        -- Invocations table
384        CREATE TABLE IF NOT EXISTS local.invocations (
385            id UUID,
386            session_id VARCHAR,
387            timestamp TIMESTAMP,
388            duration_ms BIGINT,
389            cwd VARCHAR,
390            cmd VARCHAR,
391            executable VARCHAR,
392            runner_id VARCHAR,
393            exit_code INTEGER,
394            status VARCHAR DEFAULT 'completed',
395            format_hint VARCHAR,
396            client_id VARCHAR,
397            hostname VARCHAR,
398            username VARCHAR,
399            tag VARCHAR,
400            date DATE
401        );
402
403        -- Outputs table
404        CREATE TABLE IF NOT EXISTS local.outputs (
405            id UUID,
406            invocation_id UUID,
407            stream VARCHAR,
408            content_hash VARCHAR,
409            byte_length BIGINT,
410            storage_type VARCHAR,
411            storage_ref VARCHAR,
412            content_type VARCHAR,
413            date DATE
414        );
415
416        -- Events table
417        CREATE TABLE IF NOT EXISTS local.events (
418            id UUID,
419            invocation_id UUID,
420            client_id VARCHAR,
421            hostname VARCHAR,
422            event_type VARCHAR,
423            severity VARCHAR,
424            ref_file VARCHAR,
425            ref_line INTEGER,
426            ref_column INTEGER,
427            message VARCHAR,
428            error_code VARCHAR,
429            test_name VARCHAR,
430            status VARCHAR,
431            format_used VARCHAR,
432            date DATE
433        );
434        "#,
435    )?;
436    Ok(())
437}
438
439/// Create helper views in main schema.
440fn create_helper_views(conn: &duckdb::Connection) -> Result<()> {
441    conn.execute_batch(
442        r#"
443        -- Recent invocations helper view
444        CREATE OR REPLACE VIEW main.recent_invocations AS
445        SELECT *
446        FROM main.invocations
447        WHERE date >= CURRENT_DATE - INTERVAL '7 days'
448        ORDER BY timestamp DESC;
449
450        -- Invocations today helper view
451        CREATE OR REPLACE VIEW main.invocations_today AS
452        SELECT *
453        FROM main.invocations
454        WHERE date = CURRENT_DATE
455        ORDER BY timestamp DESC;
456
457        -- Failed invocations helper view
458        CREATE OR REPLACE VIEW main.failed_invocations AS
459        SELECT *
460        FROM main.invocations
461        WHERE exit_code != 0
462        ORDER BY timestamp DESC;
463
464        -- Invocations with outputs (joined view)
465        CREATE OR REPLACE VIEW main.invocations_with_outputs AS
466        SELECT
467            i.*,
468            o.id as output_id,
469            o.stream,
470            o.byte_length,
471            o.storage_type,
472            o.storage_ref
473        FROM main.invocations i
474        LEFT JOIN main.outputs o ON i.id = o.invocation_id;
475
476        -- Clients view (derived from sessions)
477        CREATE OR REPLACE VIEW main.clients AS
478        SELECT
479            client_id,
480            MIN(registered_at) as first_seen,
481            MAX(registered_at) as last_seen,
482            COUNT(DISTINCT session_id) as session_count
483        FROM main.sessions
484        GROUP BY client_id;
485
486        -- Events with invocation context (joined view)
487        CREATE OR REPLACE VIEW main.events_with_context AS
488        SELECT
489            e.*,
490            i.cmd,
491            i.timestamp,
492            i.cwd,
493            i.exit_code
494        FROM main.events e
495        JOIN main.invocations i ON e.invocation_id = i.id;
496        "#,
497    )?;
498    Ok(())
499}
500
501/// Create cwd schema views filtered to current working directory.
502/// These views are dynamically regenerated when the connection opens.
503/// Note: Initial creation uses a placeholder; actual filtering happens at connection time.
504fn create_cwd_views(conn: &duckdb::Connection) -> Result<()> {
505    // cwd views filter main data to entries where cwd starts with current directory
506    // The actual current directory is set via a variable at connection time
507    conn.execute_batch(
508        r#"
509        -- Placeholder views - these get rebuilt with actual cwd at connection time
510        CREATE OR REPLACE VIEW cwd.sessions AS
511        SELECT * FROM main.sessions WHERE false;
512        CREATE OR REPLACE VIEW cwd.invocations AS
513        SELECT * FROM main.invocations WHERE false;
514        CREATE OR REPLACE VIEW cwd.outputs AS
515        SELECT * FROM main.outputs WHERE false;
516        CREATE OR REPLACE VIEW cwd.events AS
517        SELECT * FROM main.events WHERE false;
518        "#,
519    )?;
520    Ok(())
521}
522
523/// Ensure a DuckDB extension is loaded, installing if necessary.
524///
525/// Attempts in order:
526/// 1. LOAD (extension might already be available)
527/// 2. INSTALL from default repository, then LOAD
528/// 3. INSTALL FROM community, then LOAD
529///
530/// Includes retry logic to handle race conditions when multiple processes
531/// try to install extensions concurrently.
532fn ensure_extension(conn: &duckdb::Connection, name: &str) -> Result<bool> {
533    // Retry up to 3 times to handle concurrent installation races
534    for attempt in 0..3 {
535        // Try loading directly first (already installed/cached)
536        if conn.execute(&format!("LOAD {}", name), []).is_ok() {
537            return Ok(true);
538        }
539
540        // Try installing from default repository
541        if conn.execute(&format!("INSTALL {}", name), []).is_ok()
542            && conn.execute(&format!("LOAD {}", name), []).is_ok()
543        {
544            return Ok(true);
545        }
546
547        // Try installing from community repository
548        if conn.execute(&format!("INSTALL {} FROM community", name), []).is_ok()
549            && conn.execute(&format!("LOAD {}", name), []).is_ok()
550        {
551            return Ok(true);
552        }
553
554        // If not the last attempt, wait a bit before retrying
555        if attempt < 2 {
556            std::thread::sleep(std::time::Duration::from_millis(100 * (attempt as u64 + 1)));
557        }
558    }
559
560    Ok(false)
561}
562
563/// Install and load all required extensions during initialization.
564/// This pre-populates the extension cache so connect() is fast.
565fn install_extensions(conn: &duckdb::Connection) -> Result<()> {
566    // Required extensions - fail if not available
567    for name in ["parquet", "icu", "httpfs", "json"] {
568        if !ensure_extension(conn, name)? {
569            return Err(Error::Config(format!(
570                "Required extension '{}' could not be installed",
571                name
572            )));
573        }
574    }
575
576    // Optional community extensions - warn if not available
577    for (name, desc) in [
578        ("scalarfs", "data: URL support for inline blobs"),
579        ("duck_hunt", "log/output parsing for event extraction"),
580    ] {
581        if !ensure_extension(conn, name)? {
582            eprintln!("Warning: {} extension not available ({})", name, desc);
583        }
584    }
585
586    Ok(())
587}
588
589
590/// Create the blob_registry table for tracking deduplicated blobs.
591fn create_blob_registry(conn: &duckdb::Connection) -> Result<()> {
592    conn.execute_batch(
593        r#"
594        CREATE TABLE IF NOT EXISTS blob_registry (
595            content_hash  VARCHAR PRIMARY KEY,  -- BLAKE3 hash
596            byte_length   BIGINT NOT NULL,      -- Original uncompressed size
597            ref_count     INTEGER DEFAULT 1,    -- Number of outputs referencing this blob
598            first_seen    TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
599            last_accessed TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
600            storage_path  VARCHAR NOT NULL      -- Relative path to blob file
601        );
602        "#,
603    )?;
604    Ok(())
605}
606
607/// Create seed parquet files with correct schema but no rows.
608fn create_seed_files(conn: &duckdb::Connection, config: &Config) -> Result<()> {
609    // Create invocations seed (in status=completed partition)
610    let invocations_seed_dir = config
611        .recent_dir()
612        .join("invocations")
613        .join("status=completed")
614        .join("date=1970-01-01");
615    fs::create_dir_all(&invocations_seed_dir)?;
616
617    let invocations_seed_path = invocations_seed_dir.join("_seed.parquet");
618    conn.execute_batch(&format!(
619        r#"
620        COPY (
621            SELECT
622                NULL::UUID as id,
623                NULL::VARCHAR as session_id,
624                NULL::TIMESTAMP as timestamp,
625                NULL::BIGINT as duration_ms,
626                NULL::VARCHAR as cwd,
627                NULL::VARCHAR as cmd,
628                NULL::VARCHAR as executable,
629                NULL::VARCHAR as runner_id,
630                NULL::INTEGER as exit_code,
631                NULL::VARCHAR as status,
632                NULL::VARCHAR as format_hint,
633                NULL::VARCHAR as client_id,
634                NULL::VARCHAR as hostname,
635                NULL::VARCHAR as username,
636                NULL::VARCHAR as tag,
637                NULL::DATE as date
638            WHERE false
639        ) TO '{}' (FORMAT PARQUET);
640        "#,
641        invocations_seed_path.display()
642    ))?;
643
644    // Create outputs seed
645    let outputs_seed_dir = config.recent_dir().join("outputs").join("date=1970-01-01");
646    fs::create_dir_all(&outputs_seed_dir)?;
647
648    let outputs_seed_path = outputs_seed_dir.join("_seed.parquet");
649    conn.execute_batch(&format!(
650        r#"
651        COPY (
652            SELECT
653                NULL::UUID as id,
654                NULL::UUID as invocation_id,
655                NULL::VARCHAR as stream,
656                NULL::VARCHAR as content_hash,
657                NULL::BIGINT as byte_length,
658                NULL::VARCHAR as storage_type,
659                NULL::VARCHAR as storage_ref,
660                NULL::VARCHAR as content_type,
661                NULL::DATE as date
662            WHERE false
663        ) TO '{}' (FORMAT PARQUET);
664        "#,
665        outputs_seed_path.display()
666    ))?;
667
668    // Create sessions seed
669    let sessions_seed_dir = config.recent_dir().join("sessions").join("date=1970-01-01");
670    fs::create_dir_all(&sessions_seed_dir)?;
671
672    let sessions_seed_path = sessions_seed_dir.join("_seed.parquet");
673    conn.execute_batch(&format!(
674        r#"
675        COPY (
676            SELECT
677                NULL::VARCHAR as session_id,
678                NULL::VARCHAR as client_id,
679                NULL::VARCHAR as invoker,
680                NULL::INTEGER as invoker_pid,
681                NULL::VARCHAR as invoker_type,
682                NULL::TIMESTAMP as registered_at,
683                NULL::VARCHAR as cwd,
684                NULL::DATE as date
685            WHERE false
686        ) TO '{}' (FORMAT PARQUET);
687        "#,
688        sessions_seed_path.display()
689    ))?;
690
691    // Create events seed
692    let events_seed_dir = config.recent_dir().join("events").join("date=1970-01-01");
693    fs::create_dir_all(&events_seed_dir)?;
694
695    let events_seed_path = events_seed_dir.join("_seed.parquet");
696    conn.execute_batch(&format!(
697        r#"
698        COPY (
699            SELECT
700                NULL::UUID as id,
701                NULL::UUID as invocation_id,
702                NULL::VARCHAR as client_id,
703                NULL::VARCHAR as hostname,
704                NULL::VARCHAR as event_type,
705                NULL::VARCHAR as severity,
706                NULL::VARCHAR as ref_file,
707                NULL::INTEGER as ref_line,
708                NULL::INTEGER as ref_column,
709                NULL::VARCHAR as message,
710                NULL::VARCHAR as error_code,
711                NULL::VARCHAR as test_name,
712                NULL::VARCHAR as status,
713                NULL::VARCHAR as format_used,
714                NULL::DATE as date
715            WHERE false
716        ) TO '{}' (FORMAT PARQUET);
717        "#,
718        events_seed_path.display()
719    ))?;
720
721    Ok(())
722}
723
724/// Create the default event-formats.toml configuration file.
725fn create_event_formats_config(config: &Config) -> Result<()> {
726    let path = config.event_formats_path();
727    if !path.exists() {
728        fs::write(&path, DEFAULT_EVENT_FORMATS_CONFIG)?;
729    }
730    Ok(())
731}
732
733/// Default content for event-formats.toml.
734pub const DEFAULT_EVENT_FORMATS_CONFIG: &str = r#"# Event format detection rules for duck_hunt
735# Patterns are glob-matched against the command string
736# First matching rule wins; use 'auto' for duck_hunt's built-in detection
737
738# C/C++ compilers
739[[rules]]
740pattern = "*gcc*"
741format = "gcc"
742
743[[rules]]
744pattern = "*g++*"
745format = "gcc"
746
747[[rules]]
748pattern = "*clang*"
749format = "gcc"
750
751[[rules]]
752pattern = "*clang++*"
753format = "gcc"
754
755# Rust
756[[rules]]
757pattern = "*cargo build*"
758format = "cargo_build"
759
760[[rules]]
761pattern = "*cargo test*"
762format = "cargo_test_json"
763
764[[rules]]
765pattern = "*cargo check*"
766format = "cargo_build"
767
768[[rules]]
769pattern = "*rustc*"
770format = "rustc"
771
772# Python
773[[rules]]
774pattern = "*pytest*"
775format = "pytest_text"
776
777[[rules]]
778pattern = "*python*-m*pytest*"
779format = "pytest_text"
780
781[[rules]]
782pattern = "*mypy*"
783format = "mypy"
784
785[[rules]]
786pattern = "*flake8*"
787format = "flake8"
788
789[[rules]]
790pattern = "*pylint*"
791format = "pylint"
792
793# JavaScript/TypeScript
794[[rules]]
795pattern = "*eslint*"
796format = "eslint"
797
798[[rules]]
799pattern = "*tsc*"
800format = "typescript"
801
802[[rules]]
803pattern = "*jest*"
804format = "jest"
805
806# Build systems
807[[rules]]
808pattern = "*make*"
809format = "make_error"
810
811[[rules]]
812pattern = "*cmake*"
813format = "cmake"
814
815[[rules]]
816pattern = "*ninja*"
817format = "ninja"
818
819# Go
820[[rules]]
821pattern = "*go build*"
822format = "go_build"
823
824[[rules]]
825pattern = "*go test*"
826format = "go_test"
827
828# Default: use duck_hunt's auto-detection
829[default]
830format = "auto"
831"#;
832
833/// Check if BIRD is initialized at the given location.
834pub fn is_initialized(config: &Config) -> bool {
835    config.db_path().exists()
836}
837
838#[cfg(test)]
839mod tests {
840    use super::*;
841    use tempfile::TempDir;
842
843    #[test]
844    fn test_initialize_creates_structure() {
845        let tmp = TempDir::new().unwrap();
846        let config = Config::with_root(tmp.path());
847
848        initialize(&config).unwrap();
849
850        // Check directories exist
851        assert!(config.db_path().exists());
852        assert!(config.recent_dir().join("invocations").exists());
853        assert!(config.recent_dir().join("outputs").exists());
854        assert!(config.recent_dir().join("sessions").exists());
855        assert!(config.blobs_dir().exists());
856        assert!(config.extensions_dir().exists());
857        assert!(config.sql_dir().exists());
858        assert!(config.bird_root.join("config.toml").exists());
859    }
860
861    #[test]
862    fn test_initialize_twice_fails() {
863        let tmp = TempDir::new().unwrap();
864        let config = Config::with_root(tmp.path());
865
866        initialize(&config).unwrap();
867
868        // Second init should fail
869        let result = initialize(&config);
870        assert!(matches!(result, Err(Error::AlreadyInitialized(_))));
871    }
872
873    #[test]
874    fn test_is_initialized() {
875        let tmp = TempDir::new().unwrap();
876        let config = Config::with_root(tmp.path());
877
878        assert!(!is_initialized(&config));
879        initialize(&config).unwrap();
880        assert!(is_initialized(&config));
881    }
882}