Skip to main content

magic_bird/
schema.rs

1//! Schema definitions for BIRD tables.
2//!
3//! # BIRD v5 Schema
4//!
5//! The v5 schema splits invocations into attempts + outcomes:
6//!
7//! - **attempts**: Record of invocation start (cmd, cwd, timestamp, etc.)
8//! - **outcomes**: Record of invocation completion (exit_code, duration, etc.)
9//! - **invocations**: VIEW joining attempts LEFT JOIN outcomes with derived status
10//!
11//! Status is derived from the join:
12//! - `pending`: attempt exists but no outcome
13//! - `completed`: outcome exists with exit_code
14//! - `orphaned`: outcome exists but exit_code is NULL (signal/crash)
15
16use chrono::{DateTime, NaiveDate, Utc};
17use serde::{Deserialize, Serialize};
18use std::collections::HashMap;
19use uuid::Uuid;
20
21/// An invocation record (a captured command/process execution).
22#[derive(Debug, Clone, Serialize, Deserialize)]
23pub struct InvocationRecord {
24    /// Unique identifier (UUIDv7 for time-ordering).
25    pub id: Uuid,
26
27    /// Session identifier (groups related invocations).
28    pub session_id: String,
29
30    /// When the invocation started.
31    pub timestamp: DateTime<Utc>,
32
33    /// How long the invocation took in milliseconds.
34    pub duration_ms: Option<i64>,
35
36    /// Working directory when invocation was executed.
37    pub cwd: String,
38
39    /// The full command string.
40    pub cmd: String,
41
42    /// Extracted executable name (e.g., "make" from "make test").
43    pub executable: Option<String>,
44
45    /// Runner identifier for liveness checking of pending invocations.
46    /// Format depends on execution context:
47    /// - Local process: "pid:12345"
48    /// - GitHub Actions: "gha:run:12345678"
49    /// - Kubernetes: "k8s:pod:abc123"
50    pub runner_id: Option<String>,
51
52    /// Exit code (None while pending).
53    pub exit_code: Option<i32>,
54
55    /// Invocation status: "pending", "completed", "orphaned".
56    pub status: String,
57
58    /// Detected output format (e.g., "gcc", "pytest").
59    pub format_hint: Option<String>,
60
61    /// Client identifier (user@hostname).
62    pub client_id: String,
63
64    /// Hostname where invocation was executed.
65    pub hostname: Option<String>,
66
67    /// Username who executed the invocation.
68    pub username: Option<String>,
69
70    /// User-defined tag (unique alias for this invocation, like git tags).
71    pub tag: Option<String>,
72
73    /// Extensible metadata (VCS info, CI context, etc.).
74    /// Stored on the AttemptRecord when converted to v5 schema.
75    #[serde(default)]
76    pub metadata: HashMap<String, serde_json::Value>,
77}
78
79// =============================================================================
80// BIRD v5 Schema Types: Attempts and Outcomes
81// =============================================================================
82
83/// An attempt record (the start of a command execution).
84///
85/// This represents the "attempt" to run a command - recorded at invocation start.
86/// The outcome (completion) is recorded separately in `OutcomeRecord`.
87#[derive(Debug, Clone, Serialize, Deserialize)]
88pub struct AttemptRecord {
89    /// Unique identifier (UUIDv7 for time-ordering).
90    pub id: Uuid,
91
92    /// When the attempt started.
93    pub timestamp: DateTime<Utc>,
94
95    /// The full command string.
96    pub cmd: String,
97
98    /// Working directory when invocation was executed.
99    pub cwd: String,
100
101    /// Session identifier (groups related invocations).
102    pub session_id: String,
103
104    /// User-defined tag (unique alias for this invocation, like git tags).
105    pub tag: Option<String>,
106
107    /// Client identifier (user@hostname or application name).
108    pub source_client: String,
109
110    /// Machine identifier (for multi-machine setups).
111    pub machine_id: Option<String>,
112
113    /// Hostname where invocation was executed.
114    pub hostname: Option<String>,
115
116    /// Extracted executable name (e.g., "make" from "make test").
117    pub executable: Option<String>,
118
119    /// Detected output format (e.g., "gcc", "pytest").
120    pub format_hint: Option<String>,
121
122    /// Extensible metadata (user-defined key-value pairs).
123    /// Stored as MAP(VARCHAR, JSON) in DuckDB.
124    pub metadata: HashMap<String, serde_json::Value>,
125
126    /// Date for partitioning.
127    pub date: NaiveDate,
128}
129
130impl AttemptRecord {
131    /// Create a new attempt record.
132    ///
133    /// If `BIRD_INVOCATION_UUID` is set in the environment, uses that UUID
134    /// to enable deduplication across nested BIRD clients.
135    pub fn new(
136        session_id: impl Into<String>,
137        cmd: impl Into<String>,
138        cwd: impl Into<String>,
139        source_client: impl Into<String>,
140    ) -> Self {
141        let cmd = cmd.into();
142        let now = Utc::now();
143
144        // Check for inherited invocation UUID from parent BIRD client
145        let id = if let Ok(uuid_str) = std::env::var(BIRD_INVOCATION_UUID_VAR) {
146            Uuid::parse_str(&uuid_str).unwrap_or_else(|_| Uuid::now_v7())
147        } else {
148            Uuid::now_v7()
149        };
150
151        Self {
152            id,
153            timestamp: now,
154            executable: extract_executable(&cmd),
155            cmd,
156            cwd: cwd.into(),
157            session_id: session_id.into(),
158            tag: None,
159            source_client: source_client.into(),
160            machine_id: None,
161            hostname: gethostname::gethostname().to_str().map(|s| s.to_string()),
162            format_hint: None,
163            metadata: HashMap::new(),
164            date: now.date_naive(),
165        }
166    }
167
168    /// Create an attempt record with an explicit UUID.
169    pub fn with_id(
170        id: Uuid,
171        session_id: impl Into<String>,
172        cmd: impl Into<String>,
173        cwd: impl Into<String>,
174        source_client: impl Into<String>,
175    ) -> Self {
176        let cmd = cmd.into();
177        let now = Utc::now();
178
179        Self {
180            id,
181            timestamp: now,
182            executable: extract_executable(&cmd),
183            cmd,
184            cwd: cwd.into(),
185            session_id: session_id.into(),
186            tag: None,
187            source_client: source_client.into(),
188            machine_id: None,
189            hostname: gethostname::gethostname().to_str().map(|s| s.to_string()),
190            format_hint: None,
191            metadata: HashMap::new(),
192            date: now.date_naive(),
193        }
194    }
195
196    /// Set the tag (unique alias for this invocation).
197    pub fn with_tag(mut self, tag: impl Into<String>) -> Self {
198        self.tag = Some(tag.into());
199        self
200    }
201
202    /// Set the machine ID.
203    pub fn with_machine_id(mut self, machine_id: impl Into<String>) -> Self {
204        self.machine_id = Some(machine_id.into());
205        self
206    }
207
208    /// Set the format hint.
209    pub fn with_format_hint(mut self, hint: impl Into<String>) -> Self {
210        self.format_hint = Some(hint.into());
211        self
212    }
213
214    /// Add metadata entry.
215    pub fn with_metadata(mut self, key: impl Into<String>, value: serde_json::Value) -> Self {
216        self.metadata.insert(key.into(), value);
217        self
218    }
219
220    /// Get the date portion of the timestamp (for partitioning).
221    pub fn date(&self) -> NaiveDate {
222        self.date
223    }
224}
225
226/// An outcome record (the completion of a command execution).
227///
228/// This represents the "outcome" of a command - recorded at invocation end.
229/// Links back to an `AttemptRecord` via `attempt_id`.
230#[derive(Debug, Clone, Serialize, Deserialize)]
231pub struct OutcomeRecord {
232    /// The attempt this outcome is for.
233    pub attempt_id: Uuid,
234
235    /// When the invocation completed.
236    pub completed_at: DateTime<Utc>,
237
238    /// Exit code (None if killed by signal or crashed).
239    pub exit_code: Option<i32>,
240
241    /// How long the invocation took in milliseconds.
242    pub duration_ms: Option<i64>,
243
244    /// Signal that terminated the process (if killed by signal).
245    pub signal: Option<i32>,
246
247    /// Whether the process was terminated due to timeout.
248    pub timeout: bool,
249
250    /// Extensible metadata (user-defined key-value pairs).
251    /// Stored as MAP(VARCHAR, JSON) in DuckDB.
252    pub metadata: HashMap<String, serde_json::Value>,
253
254    /// Date for partitioning (matches the attempt date).
255    pub date: NaiveDate,
256}
257
258impl OutcomeRecord {
259    /// Create a new completed outcome record.
260    pub fn completed(attempt_id: Uuid, exit_code: i32, duration_ms: Option<i64>, date: NaiveDate) -> Self {
261        Self {
262            attempt_id,
263            completed_at: Utc::now(),
264            exit_code: Some(exit_code),
265            duration_ms,
266            signal: None,
267            timeout: false,
268            metadata: HashMap::new(),
269            date,
270        }
271    }
272
273    /// Create an outcome for a process killed by signal.
274    pub fn killed(attempt_id: Uuid, signal: i32, duration_ms: Option<i64>, date: NaiveDate) -> Self {
275        Self {
276            attempt_id,
277            completed_at: Utc::now(),
278            exit_code: None,
279            duration_ms,
280            signal: Some(signal),
281            timeout: false,
282            metadata: HashMap::new(),
283            date,
284        }
285    }
286
287    /// Create an outcome for a timed-out process.
288    pub fn timed_out(attempt_id: Uuid, duration_ms: i64, date: NaiveDate) -> Self {
289        Self {
290            attempt_id,
291            completed_at: Utc::now(),
292            exit_code: None,
293            duration_ms: Some(duration_ms),
294            signal: None,
295            timeout: true,
296            metadata: HashMap::new(),
297            date,
298        }
299    }
300
301    /// Create an orphaned outcome (process crashed or was killed without cleanup).
302    pub fn orphaned(attempt_id: Uuid, date: NaiveDate) -> Self {
303        Self {
304            attempt_id,
305            completed_at: Utc::now(),
306            exit_code: None,
307            duration_ms: None,
308            signal: None,
309            timeout: false,
310            metadata: HashMap::new(),
311            date,
312        }
313    }
314
315    /// Add metadata entry.
316    pub fn with_metadata(mut self, key: impl Into<String>, value: serde_json::Value) -> Self {
317        self.metadata.insert(key.into(), value);
318        self
319    }
320}
321
322// =============================================================================
323// Legacy v4 Schema Types (for backwards compatibility)
324// =============================================================================
325
326/// Environment variable for sharing invocation UUID between nested BIRD clients.
327///
328/// When set, nested BIRD clients (e.g., `shq run blq run ...`) will use this UUID
329/// instead of generating a new one, allowing the invocation to be deduplicated
330/// across databases.
331pub const BIRD_INVOCATION_UUID_VAR: &str = "BIRD_INVOCATION_UUID";
332
333/// Environment variable for the parent BIRD client name.
334///
335/// When set, indicates which BIRD client initiated this invocation.
336/// Used to avoid duplicate recording in nested scenarios.
337pub const BIRD_PARENT_CLIENT_VAR: &str = "BIRD_PARENT_CLIENT";
338
339impl InvocationRecord {
340    /// Create a new invocation record.
341    ///
342    /// If `BIRD_INVOCATION_UUID` is set in the environment, uses that UUID
343    /// to enable deduplication across nested BIRD clients.
344    pub fn new(
345        session_id: impl Into<String>,
346        cmd: impl Into<String>,
347        cwd: impl Into<String>,
348        exit_code: i32,
349        client_id: impl Into<String>,
350    ) -> Self {
351        let cmd = cmd.into();
352
353        // Check for inherited invocation UUID from parent BIRD client
354        let id = if let Ok(uuid_str) = std::env::var(BIRD_INVOCATION_UUID_VAR) {
355            Uuid::parse_str(&uuid_str).unwrap_or_else(|_| Uuid::now_v7())
356        } else {
357            Uuid::now_v7()
358        };
359
360        Self {
361            id,
362            session_id: session_id.into(),
363            timestamp: Utc::now(),
364            duration_ms: None,
365            cwd: cwd.into(),
366            executable: extract_executable(&cmd),
367            cmd,
368            runner_id: None,
369            exit_code: Some(exit_code),
370            status: "completed".to_string(),
371            format_hint: None,
372            client_id: client_id.into(),
373            hostname: gethostname::gethostname().to_str().map(|s| s.to_string()),
374            username: std::env::var("USER").ok(),
375            tag: None,
376            metadata: HashMap::new(),
377        }
378    }
379
380    /// Create a new invocation record with an explicit UUID.
381    ///
382    /// Use this when you need to control the UUID (e.g., for testing or
383    /// when the UUID is provided externally).
384    pub fn with_id(
385        id: Uuid,
386        session_id: impl Into<String>,
387        cmd: impl Into<String>,
388        cwd: impl Into<String>,
389        exit_code: i32,
390        client_id: impl Into<String>,
391    ) -> Self {
392        let cmd = cmd.into();
393        Self {
394            id,
395            session_id: session_id.into(),
396            timestamp: Utc::now(),
397            duration_ms: None,
398            cwd: cwd.into(),
399            executable: extract_executable(&cmd),
400            cmd,
401            runner_id: None,
402            exit_code: Some(exit_code),
403            status: "completed".to_string(),
404            format_hint: None,
405            client_id: client_id.into(),
406            hostname: gethostname::gethostname().to_str().map(|s| s.to_string()),
407            username: std::env::var("USER").ok(),
408            tag: None,
409            metadata: HashMap::new(),
410        }
411    }
412
413    /// Create a new pending invocation record.
414    ///
415    /// Use this when a command starts but hasn't completed yet.
416    /// The exit_code is None and status is "pending".
417    ///
418    /// `runner_id` identifies the execution context for liveness checking:
419    /// - Local process: "pid:12345"
420    /// - GitHub Actions: "gha:run:12345678"
421    /// - Kubernetes: "k8s:pod:abc123"
422    pub fn new_pending(
423        session_id: impl Into<String>,
424        cmd: impl Into<String>,
425        cwd: impl Into<String>,
426        runner_id: impl Into<String>,
427        client_id: impl Into<String>,
428    ) -> Self {
429        let cmd = cmd.into();
430
431        // Check for inherited invocation UUID from parent BIRD client
432        let id = if let Ok(uuid_str) = std::env::var(BIRD_INVOCATION_UUID_VAR) {
433            Uuid::parse_str(&uuid_str).unwrap_or_else(|_| Uuid::now_v7())
434        } else {
435            Uuid::now_v7()
436        };
437
438        Self {
439            id,
440            session_id: session_id.into(),
441            timestamp: Utc::now(),
442            duration_ms: None,
443            cwd: cwd.into(),
444            executable: extract_executable(&cmd),
445            cmd,
446            runner_id: Some(runner_id.into()),
447            exit_code: None,
448            status: "pending".to_string(),
449            format_hint: None,
450            client_id: client_id.into(),
451            hostname: gethostname::gethostname().to_str().map(|s| s.to_string()),
452            username: std::env::var("USER").ok(),
453            tag: None,
454            metadata: HashMap::new(),
455        }
456    }
457
458    /// Create a pending invocation for a local process.
459    ///
460    /// Convenience method that formats the PID as "pid:{pid}".
461    pub fn new_pending_local(
462        session_id: impl Into<String>,
463        cmd: impl Into<String>,
464        cwd: impl Into<String>,
465        pid: i32,
466        client_id: impl Into<String>,
467    ) -> Self {
468        Self::new_pending(session_id, cmd, cwd, format!("pid:{}", pid), client_id)
469    }
470
471    /// Mark this invocation as completed with the given exit code.
472    pub fn complete(mut self, exit_code: i32, duration_ms: Option<i64>) -> Self {
473        self.exit_code = Some(exit_code);
474        self.duration_ms = duration_ms;
475        self.status = "completed".to_string();
476        self
477    }
478
479    /// Mark this invocation as orphaned (process died without cleanup).
480    pub fn mark_orphaned(mut self) -> Self {
481        self.status = "orphaned".to_string();
482        self
483    }
484
485    /// Set the runner ID.
486    pub fn with_runner_id(mut self, runner_id: impl Into<String>) -> Self {
487        self.runner_id = Some(runner_id.into());
488        self
489    }
490
491    /// Check if this invocation was inherited from a parent BIRD client.
492    pub fn is_inherited() -> bool {
493        std::env::var(BIRD_INVOCATION_UUID_VAR).is_ok()
494    }
495
496    /// Get the parent BIRD client name, if any.
497    pub fn parent_client() -> Option<String> {
498        std::env::var(BIRD_PARENT_CLIENT_VAR).ok()
499    }
500
501    /// Set the duration.
502    pub fn with_duration(mut self, duration_ms: i64) -> Self {
503        self.duration_ms = Some(duration_ms);
504        self
505    }
506
507    /// Set the format hint.
508    pub fn with_format_hint(mut self, hint: impl Into<String>) -> Self {
509        self.format_hint = Some(hint.into());
510        self
511    }
512
513    /// Set the tag (unique alias for this invocation).
514    pub fn with_tag(mut self, tag: impl Into<String>) -> Self {
515        self.tag = Some(tag.into());
516        self
517    }
518
519    /// Add a single metadata entry.
520    pub fn with_metadata_entry(mut self, key: impl Into<String>, value: serde_json::Value) -> Self {
521        self.metadata.insert(key.into(), value);
522        self
523    }
524
525    /// Set all metadata from a HashMap.
526    pub fn with_metadata(mut self, metadata: HashMap<String, serde_json::Value>) -> Self {
527        self.metadata = metadata;
528        self
529    }
530
531    /// Merge metadata from a HashMap (existing entries are preserved).
532    pub fn merge_metadata(mut self, metadata: HashMap<String, serde_json::Value>) -> Self {
533        for (key, value) in metadata {
534            self.metadata.entry(key).or_insert(value);
535        }
536        self
537    }
538
539    /// Get the date portion of the timestamp (for partitioning).
540    pub fn date(&self) -> NaiveDate {
541        self.timestamp.date_naive()
542    }
543
544    // =========================================================================
545    // V5 Schema Conversion Methods
546    // =========================================================================
547
548    /// Convert this invocation record to an AttemptRecord (v5 schema).
549    pub fn to_attempt(&self) -> AttemptRecord {
550        AttemptRecord {
551            id: self.id,
552            timestamp: self.timestamp,
553            cmd: self.cmd.clone(),
554            cwd: self.cwd.clone(),
555            session_id: self.session_id.clone(),
556            tag: self.tag.clone(),
557            source_client: self.client_id.clone(),
558            // machine_id stores runner_id for liveness checking
559            machine_id: self.runner_id.clone(),
560            hostname: self.hostname.clone(),
561            executable: self.executable.clone(),
562            format_hint: self.format_hint.clone(),
563            metadata: self.metadata.clone(),
564            date: self.date(),
565        }
566    }
567
568    /// Convert this invocation record to an OutcomeRecord (v5 schema).
569    ///
570    /// Returns None if this is a pending invocation (no outcome yet).
571    pub fn to_outcome(&self) -> Option<OutcomeRecord> {
572        // Pending invocations don't have an outcome
573        if self.status == "pending" {
574            return None;
575        }
576
577        Some(OutcomeRecord {
578            attempt_id: self.id,
579            completed_at: self.timestamp + chrono::Duration::milliseconds(self.duration_ms.unwrap_or(0)),
580            exit_code: self.exit_code,
581            duration_ms: self.duration_ms,
582            signal: None,
583            timeout: false,
584            metadata: HashMap::new(),
585            date: self.date(),
586        })
587    }
588
589    /// Create an InvocationRecord from AttemptRecord and optional OutcomeRecord (v5 schema).
590    pub fn from_attempt_outcome(attempt: &AttemptRecord, outcome: Option<&OutcomeRecord>) -> Self {
591        let (exit_code, duration_ms, status) = match outcome {
592            Some(o) => {
593                let status = if o.exit_code.is_some() {
594                    "completed"
595                } else {
596                    "orphaned"
597                };
598                (o.exit_code, o.duration_ms, status.to_string())
599            }
600            None => (None, None, "pending".to_string()),
601        };
602
603        Self {
604            id: attempt.id,
605            session_id: attempt.session_id.clone(),
606            timestamp: attempt.timestamp,
607            duration_ms,
608            cwd: attempt.cwd.clone(),
609            cmd: attempt.cmd.clone(),
610            executable: attempt.executable.clone(),
611            runner_id: None,
612            exit_code,
613            status,
614            format_hint: attempt.format_hint.clone(),
615            client_id: attempt.source_client.clone(),
616            hostname: attempt.hostname.clone(),
617            username: None,
618            tag: attempt.tag.clone(),
619            metadata: attempt.metadata.clone(),
620        }
621    }
622}
623
624/// A session record (a shell or process that captures invocations).
625#[derive(Debug, Clone, Serialize, Deserialize)]
626pub struct SessionRecord {
627    /// Session identifier (e.g., "zsh-12345").
628    pub session_id: String,
629
630    /// Client identifier (user@hostname).
631    pub client_id: String,
632
633    /// Invoker name (e.g., "zsh", "bash", "shq", "python").
634    pub invoker: String,
635
636    /// Invoker PID.
637    pub invoker_pid: u32,
638
639    /// Invoker type: "shell", "cli", "hook", "script".
640    pub invoker_type: String,
641
642    /// When the session was first seen.
643    pub registered_at: DateTime<Utc>,
644
645    /// Initial working directory.
646    pub cwd: Option<String>,
647
648    /// Date for partitioning.
649    pub date: NaiveDate,
650}
651
652impl SessionRecord {
653    /// Create a new session record.
654    pub fn new(
655        session_id: impl Into<String>,
656        client_id: impl Into<String>,
657        invoker: impl Into<String>,
658        invoker_pid: u32,
659        invoker_type: impl Into<String>,
660    ) -> Self {
661        let now = Utc::now();
662        Self {
663            session_id: session_id.into(),
664            client_id: client_id.into(),
665            invoker: invoker.into(),
666            invoker_pid,
667            invoker_type: invoker_type.into(),
668            registered_at: now,
669            cwd: std::env::current_dir()
670                .ok()
671                .map(|p| p.display().to_string()),
672            date: now.date_naive(),
673        }
674    }
675}
676
677/// Extract the executable name from a command string.
678fn extract_executable(cmd: &str) -> Option<String> {
679    let cmd = cmd.trim();
680
681    // Skip environment variable assignments at the start
682    let mut parts = cmd.split_whitespace();
683    for part in parts.by_ref() {
684        if !part.contains('=') {
685            // This is the actual command
686            // Extract basename if it's a path
687            let exe = part.rsplit('/').next().unwrap_or(part);
688            return Some(exe.to_string());
689        }
690    }
691
692    None
693}
694
695/// An output record (stdout/stderr from an invocation).
696#[derive(Debug, Clone, Serialize, Deserialize)]
697pub struct OutputRecord {
698    /// Unique identifier.
699    pub id: Uuid,
700
701    /// Invocation this output belongs to.
702    pub invocation_id: Uuid,
703
704    /// Stream type: "stdout", "stderr", or "combined".
705    pub stream: String,
706
707    /// BLAKE3 hash of the content.
708    pub content_hash: String,
709
710    /// Size in bytes.
711    pub byte_length: usize,
712
713    /// Storage type: "inline" or "blob".
714    pub storage_type: String,
715
716    /// Storage reference (data: URI for inline, file:// for blob).
717    pub storage_ref: String,
718
719    /// Content type hint (e.g., "text/plain", "application/json").
720    pub content_type: Option<String>,
721
722    /// Date for partitioning.
723    pub date: NaiveDate,
724}
725
726impl OutputRecord {
727    /// Create a new inline output record.
728    ///
729    /// For small outputs, content is stored as a base64 data URI.
730    pub fn new_inline(
731        invocation_id: Uuid,
732        stream: impl Into<String>,
733        content: &[u8],
734        date: NaiveDate,
735    ) -> Self {
736        use base64::Engine;
737
738        let content_hash = blake3::hash(content).to_hex().to_string();
739        let byte_length = content.len();
740
741        // Encode as data URI
742        let b64 = base64::engine::general_purpose::STANDARD.encode(content);
743        let storage_ref = format!("data:application/octet-stream;base64,{}", b64);
744
745        Self {
746            id: Uuid::now_v7(),
747            invocation_id,
748            stream: stream.into(),
749            content_hash,
750            byte_length,
751            storage_type: "inline".to_string(),
752            storage_ref,
753            content_type: Some("text/plain".to_string()),
754            date,
755        }
756    }
757
758    /// Decode the content from storage_ref.
759    pub fn decode_content(&self) -> Option<Vec<u8>> {
760        use base64::Engine;
761
762        if self.storage_type == "inline" {
763            // Parse data: URI
764            if let Some(b64_part) = self.storage_ref.split(",").nth(1) {
765                base64::engine::general_purpose::STANDARD.decode(b64_part).ok()
766            } else {
767                None
768            }
769        } else {
770            // TODO: Handle blob storage
771            None
772        }
773    }
774}
775
776/// An event record (a parsed log entry from an invocation output).
777#[derive(Debug, Clone, Serialize, Deserialize)]
778pub struct EventRecord {
779    /// Unique identifier (UUIDv7 for time-ordering).
780    pub id: Uuid,
781
782    /// Invocation this event was parsed from.
783    pub invocation_id: Uuid,
784
785    /// Client identifier (for cross-client queries).
786    pub client_id: String,
787
788    /// Hostname where the invocation ran.
789    pub hostname: Option<String>,
790
791    /// Event type from duck_hunt (e.g., "diagnostic", "test_result").
792    pub event_type: Option<String>,
793
794    /// Severity level: error, warning, info, note.
795    pub severity: Option<String>,
796
797    /// Source file referenced by this event.
798    pub ref_file: Option<String>,
799
800    /// Line number in the source file.
801    pub ref_line: Option<i32>,
802
803    /// Column number in the source file.
804    pub ref_column: Option<i32>,
805
806    /// The event message.
807    pub message: Option<String>,
808
809    /// Error/warning code (e.g., "E0308", "W0401").
810    pub error_code: Option<String>,
811
812    /// Test name (for test results).
813    pub test_name: Option<String>,
814
815    /// Test status: passed, failed, skipped.
816    pub status: Option<String>,
817
818    /// Format used for parsing.
819    pub format_used: String,
820
821    /// Date for partitioning.
822    pub date: NaiveDate,
823}
824
825impl EventRecord {
826    /// Create a new event record with a fresh UUIDv7.
827    pub fn new(
828        invocation_id: Uuid,
829        client_id: impl Into<String>,
830        format_used: impl Into<String>,
831        date: NaiveDate,
832    ) -> Self {
833        Self {
834            id: Uuid::now_v7(),
835            invocation_id,
836            client_id: client_id.into(),
837            hostname: gethostname::gethostname().to_str().map(|s| s.to_string()),
838            event_type: None,
839            severity: None,
840            ref_file: None,
841            ref_line: None,
842            ref_column: None,
843            message: None,
844            error_code: None,
845            test_name: None,
846            status: None,
847            format_used: format_used.into(),
848            date,
849        }
850    }
851}
852
853/// SQL to create the events table schema (for documentation/reference).
854pub const EVENTS_SCHEMA: &str = r#"
855CREATE TABLE events (
856    id                UUID PRIMARY KEY,
857    invocation_id     UUID NOT NULL,
858    client_id         VARCHAR NOT NULL,
859    hostname          VARCHAR,
860    event_type        VARCHAR,
861    severity          VARCHAR,
862    ref_file          VARCHAR,
863    ref_line          INTEGER,
864    ref_column        INTEGER,
865    message           VARCHAR,
866    error_code        VARCHAR,
867    test_name         VARCHAR,
868    status            VARCHAR,
869    format_used       VARCHAR NOT NULL,
870    date              DATE NOT NULL
871);
872"#;
873
874/// SQL to create the invocations table schema (for documentation/reference).
875pub const INVOCATIONS_SCHEMA: &str = r#"
876CREATE TABLE invocations (
877    id                UUID PRIMARY KEY,
878    session_id        VARCHAR NOT NULL,
879    timestamp         TIMESTAMP NOT NULL,
880    duration_ms       BIGINT,
881    cwd               VARCHAR NOT NULL,
882    cmd               VARCHAR NOT NULL,
883    executable        VARCHAR,
884    runner_id         VARCHAR,
885    exit_code         INTEGER,
886    status            VARCHAR DEFAULT 'completed',
887    format_hint       VARCHAR,
888    client_id         VARCHAR NOT NULL,
889    hostname          VARCHAR,
890    username          VARCHAR,
891    tag               VARCHAR,
892    date              DATE NOT NULL
893);
894"#;
895
896/// SQL to create the sessions table schema (for documentation/reference).
897pub const SESSIONS_SCHEMA: &str = r#"
898CREATE TABLE sessions (
899    session_id        VARCHAR PRIMARY KEY,
900    client_id         VARCHAR NOT NULL,
901    invoker           VARCHAR NOT NULL,
902    invoker_pid       INTEGER NOT NULL,
903    invoker_type      VARCHAR NOT NULL,
904    registered_at     TIMESTAMP NOT NULL,
905    cwd               VARCHAR,
906    date              DATE NOT NULL
907);
908"#;
909
910// =============================================================================
911// BIRD v5 Schema SQL Constants
912// =============================================================================
913
914/// SQL to create the attempts table (v5 schema).
915pub const ATTEMPTS_SCHEMA: &str = r#"
916CREATE TABLE attempts (
917    id                UUID PRIMARY KEY,
918    timestamp         TIMESTAMP NOT NULL,
919    cmd               VARCHAR NOT NULL,
920    cwd               VARCHAR NOT NULL,
921    session_id        VARCHAR NOT NULL,
922    tag               VARCHAR,
923    source_client     VARCHAR NOT NULL,
924    machine_id        VARCHAR,
925    hostname          VARCHAR,
926    executable        VARCHAR,
927    format_hint       VARCHAR,
928    metadata          MAP(VARCHAR, JSON),
929    date              DATE NOT NULL
930);
931"#;
932
933/// SQL to create the outcomes table (v5 schema).
934pub const OUTCOMES_SCHEMA: &str = r#"
935CREATE TABLE outcomes (
936    attempt_id        UUID PRIMARY KEY,
937    completed_at      TIMESTAMP NOT NULL,
938    exit_code         INTEGER,
939    duration_ms       BIGINT,
940    signal            INTEGER,
941    timeout           BOOLEAN DEFAULT FALSE,
942    metadata          MAP(VARCHAR, JSON),
943    date              DATE NOT NULL
944);
945"#;
946
947/// SQL to create the bird_meta table for schema versioning.
948pub const BIRD_META_SCHEMA: &str = r#"
949CREATE TABLE bird_meta (
950    key               VARCHAR PRIMARY KEY,
951    value             VARCHAR NOT NULL,
952    updated_at        TIMESTAMP DEFAULT CURRENT_TIMESTAMP
953);
954"#;
955
956/// SQL to create the invocations VIEW (v5 schema).
957///
958/// This joins attempts LEFT JOIN outcomes and derives the status:
959/// - `pending`: attempt exists but no outcome
960/// - `completed`: outcome exists with exit_code
961/// - `orphaned`: outcome exists but exit_code is NULL
962pub const INVOCATIONS_VIEW_SCHEMA: &str = r#"
963CREATE VIEW invocations AS
964SELECT
965    a.id,
966    a.session_id,
967    a.timestamp,
968    o.duration_ms,
969    a.cwd,
970    a.cmd,
971    a.executable,
972    o.exit_code,
973    CASE
974        WHEN o.attempt_id IS NULL THEN 'pending'
975        WHEN o.exit_code IS NULL THEN 'orphaned'
976        ELSE 'completed'
977    END AS status,
978    a.format_hint,
979    a.source_client AS client_id,
980    a.hostname,
981    a.tag,
982    o.signal,
983    o.timeout,
984    o.completed_at,
985    -- Merge metadata from both attempt and outcome (outcome wins on conflict)
986    map_concat(COALESCE(a.metadata, MAP{}), COALESCE(o.metadata, MAP{})) AS metadata,
987    a.date
988FROM attempts a
989LEFT JOIN outcomes o ON a.id = o.attempt_id;
990"#;
991
992/// Current BIRD schema version.
993pub const BIRD_SCHEMA_VERSION: &str = "5";
994
995#[cfg(test)]
996mod tests {
997    use super::*;
998
999    #[test]
1000    fn test_extract_executable() {
1001        assert_eq!(extract_executable("make test"), Some("make".to_string()));
1002        assert_eq!(extract_executable("/usr/bin/gcc -o foo foo.c"), Some("gcc".to_string()));
1003        assert_eq!(extract_executable("ENV=val make"), Some("make".to_string()));
1004        assert_eq!(extract_executable("CC=gcc CXX=g++ make"), Some("make".to_string()));
1005        assert_eq!(extract_executable(""), None);
1006    }
1007
1008    #[test]
1009    fn test_invocation_record_new() {
1010        let record = InvocationRecord::new(
1011            "session-123",
1012            "make test",
1013            "/home/user/project",
1014            0,
1015            "user@laptop",
1016        );
1017
1018        assert_eq!(record.session_id, "session-123");
1019        assert_eq!(record.cmd, "make test");
1020        assert_eq!(record.executable, Some("make".to_string()));
1021        assert_eq!(record.exit_code, Some(0));
1022        assert_eq!(record.status, "completed");
1023        assert!(record.duration_ms.is_none());
1024        assert!(record.runner_id.is_none());
1025    }
1026
1027    #[test]
1028    fn test_invocation_record_pending() {
1029        let record = InvocationRecord::new_pending(
1030            "session-123",
1031            "make test",
1032            "/home/user/project",
1033            "pid:12345",
1034            "user@laptop",
1035        );
1036
1037        assert_eq!(record.session_id, "session-123");
1038        assert_eq!(record.cmd, "make test");
1039        assert_eq!(record.runner_id, Some("pid:12345".to_string()));
1040        assert_eq!(record.exit_code, None);
1041        assert_eq!(record.status, "pending");
1042    }
1043
1044    #[test]
1045    fn test_invocation_record_pending_local() {
1046        let record = InvocationRecord::new_pending_local(
1047            "session-123",
1048            "make test",
1049            "/home/user/project",
1050            12345,
1051            "user@laptop",
1052        );
1053
1054        assert_eq!(record.runner_id, Some("pid:12345".to_string()));
1055        assert_eq!(record.status, "pending");
1056    }
1057
1058    #[test]
1059    fn test_invocation_record_pending_gha() {
1060        let record = InvocationRecord::new_pending(
1061            "gha-session",
1062            "make test",
1063            "/github/workspace",
1064            "gha:run:123456789",
1065            "runner@github",
1066        );
1067
1068        assert_eq!(record.runner_id, Some("gha:run:123456789".to_string()));
1069        assert_eq!(record.status, "pending");
1070    }
1071
1072    #[test]
1073    fn test_invocation_record_complete() {
1074        let record = InvocationRecord::new_pending(
1075            "session-123",
1076            "make test",
1077            "/home/user/project",
1078            "pid:12345",
1079            "user@laptop",
1080        )
1081        .complete(0, Some(1500));
1082
1083        assert_eq!(record.exit_code, Some(0));
1084        assert_eq!(record.duration_ms, Some(1500));
1085        assert_eq!(record.status, "completed");
1086    }
1087
1088    #[test]
1089    fn test_invocation_record_orphaned() {
1090        let record = InvocationRecord::new_pending(
1091            "session-123",
1092            "make test",
1093            "/home/user/project",
1094            "pid:12345",
1095            "user@laptop",
1096        )
1097        .mark_orphaned();
1098
1099        assert_eq!(record.exit_code, None);
1100        assert_eq!(record.status, "orphaned");
1101    }
1102
1103    #[test]
1104    fn test_invocation_record_with_duration() {
1105        let record = InvocationRecord::new(
1106            "session-123",
1107            "make test",
1108            "/home/user/project",
1109            0,
1110            "user@laptop",
1111        )
1112        .with_duration(1500);
1113
1114        assert_eq!(record.duration_ms, Some(1500));
1115    }
1116
1117    #[test]
1118    fn test_session_record_new() {
1119        let record = SessionRecord::new(
1120            "zsh-12345",
1121            "user@laptop",
1122            "zsh",
1123            12345,
1124            "shell",
1125        );
1126
1127        assert_eq!(record.session_id, "zsh-12345");
1128        assert_eq!(record.client_id, "user@laptop");
1129        assert_eq!(record.invoker, "zsh");
1130        assert_eq!(record.invoker_pid, 12345);
1131        assert_eq!(record.invoker_type, "shell");
1132    }
1133
1134    // =========================================================================
1135    // V5 Schema Tests
1136    // =========================================================================
1137
1138    #[test]
1139    fn test_attempt_record_new() {
1140        let attempt = AttemptRecord::new(
1141            "session-123",
1142            "make test",
1143            "/home/user/project",
1144            "user@laptop",
1145        );
1146
1147        assert_eq!(attempt.session_id, "session-123");
1148        assert_eq!(attempt.cmd, "make test");
1149        assert_eq!(attempt.cwd, "/home/user/project");
1150        assert_eq!(attempt.source_client, "user@laptop");
1151        assert_eq!(attempt.executable, Some("make".to_string()));
1152        assert!(attempt.metadata.is_empty());
1153    }
1154
1155    #[test]
1156    fn test_attempt_record_with_metadata() {
1157        let attempt = AttemptRecord::new(
1158            "session-123",
1159            "make test",
1160            "/home/user/project",
1161            "user@laptop",
1162        )
1163        .with_metadata("git_branch", serde_json::json!("main"))
1164        .with_metadata("ci", serde_json::json!(true));
1165
1166        assert_eq!(attempt.metadata.len(), 2);
1167        assert_eq!(attempt.metadata.get("git_branch"), Some(&serde_json::json!("main")));
1168        assert_eq!(attempt.metadata.get("ci"), Some(&serde_json::json!(true)));
1169    }
1170
1171    #[test]
1172    fn test_outcome_record_completed() {
1173        let attempt_id = Uuid::now_v7();
1174        let date = Utc::now().date_naive();
1175        let outcome = OutcomeRecord::completed(attempt_id, 0, Some(1500), date);
1176
1177        assert_eq!(outcome.attempt_id, attempt_id);
1178        assert_eq!(outcome.exit_code, Some(0));
1179        assert_eq!(outcome.duration_ms, Some(1500));
1180        assert_eq!(outcome.signal, None);
1181        assert!(!outcome.timeout);
1182    }
1183
1184    #[test]
1185    fn test_outcome_record_killed() {
1186        let attempt_id = Uuid::now_v7();
1187        let date = Utc::now().date_naive();
1188        let outcome = OutcomeRecord::killed(attempt_id, 9, Some(500), date);
1189
1190        assert_eq!(outcome.exit_code, None);
1191        assert_eq!(outcome.signal, Some(9));
1192        assert!(!outcome.timeout);
1193    }
1194
1195    #[test]
1196    fn test_outcome_record_timed_out() {
1197        let attempt_id = Uuid::now_v7();
1198        let date = Utc::now().date_naive();
1199        let outcome = OutcomeRecord::timed_out(attempt_id, 30000, date);
1200
1201        assert_eq!(outcome.exit_code, None);
1202        assert_eq!(outcome.duration_ms, Some(30000));
1203        assert!(outcome.timeout);
1204    }
1205
1206    #[test]
1207    fn test_outcome_record_orphaned() {
1208        let attempt_id = Uuid::now_v7();
1209        let date = Utc::now().date_naive();
1210        let outcome = OutcomeRecord::orphaned(attempt_id, date);
1211
1212        assert_eq!(outcome.exit_code, None);
1213        assert_eq!(outcome.duration_ms, None);
1214        assert_eq!(outcome.signal, None);
1215        assert!(!outcome.timeout);
1216    }
1217
1218    #[test]
1219    fn test_invocation_to_attempt_conversion() {
1220        let invocation = InvocationRecord::new(
1221            "session-123",
1222            "make test",
1223            "/home/user/project",
1224            0,
1225            "user@laptop",
1226        );
1227
1228        let attempt = invocation.to_attempt();
1229
1230        assert_eq!(attempt.id, invocation.id);
1231        assert_eq!(attempt.session_id, invocation.session_id);
1232        assert_eq!(attempt.cmd, invocation.cmd);
1233        assert_eq!(attempt.cwd, invocation.cwd);
1234        assert_eq!(attempt.source_client, invocation.client_id);
1235    }
1236
1237    #[test]
1238    fn test_invocation_to_outcome_conversion() {
1239        let invocation = InvocationRecord::new(
1240            "session-123",
1241            "make test",
1242            "/home/user/project",
1243            0,
1244            "user@laptop",
1245        )
1246        .with_duration(1500);
1247
1248        let outcome = invocation.to_outcome().expect("Should have outcome for completed invocation");
1249
1250        assert_eq!(outcome.attempt_id, invocation.id);
1251        assert_eq!(outcome.exit_code, Some(0));
1252        assert_eq!(outcome.duration_ms, Some(1500));
1253    }
1254
1255    #[test]
1256    fn test_pending_invocation_has_no_outcome() {
1257        let invocation = InvocationRecord::new_pending(
1258            "session-123",
1259            "make test",
1260            "/home/user/project",
1261            "pid:12345",
1262            "user@laptop",
1263        );
1264
1265        assert!(invocation.to_outcome().is_none());
1266    }
1267
1268    #[test]
1269    fn test_invocation_from_attempt_outcome() {
1270        let attempt = AttemptRecord::new(
1271            "session-123",
1272            "make test",
1273            "/home/user/project",
1274            "user@laptop",
1275        );
1276
1277        // Pending: no outcome
1278        let pending = InvocationRecord::from_attempt_outcome(&attempt, None);
1279        assert_eq!(pending.status, "pending");
1280        assert_eq!(pending.exit_code, None);
1281
1282        // Completed: outcome with exit code
1283        let outcome = OutcomeRecord::completed(attempt.id, 0, Some(1500), attempt.date);
1284        let completed = InvocationRecord::from_attempt_outcome(&attempt, Some(&outcome));
1285        assert_eq!(completed.status, "completed");
1286        assert_eq!(completed.exit_code, Some(0));
1287        assert_eq!(completed.duration_ms, Some(1500));
1288
1289        // Orphaned: outcome without exit code
1290        let orphaned_outcome = OutcomeRecord::orphaned(attempt.id, attempt.date);
1291        let orphaned = InvocationRecord::from_attempt_outcome(&attempt, Some(&orphaned_outcome));
1292        assert_eq!(orphaned.status, "orphaned");
1293        assert_eq!(orphaned.exit_code, None);
1294    }
1295}