Skip to main content

magic_bird/
schema.rs

1//! Schema definitions for BIRD tables.
2
3use chrono::{DateTime, NaiveDate, Utc};
4use serde::{Deserialize, Serialize};
5use uuid::Uuid;
6
7/// An invocation record (a captured command/process execution).
8#[derive(Debug, Clone, Serialize, Deserialize)]
9pub struct InvocationRecord {
10    /// Unique identifier (UUIDv7 for time-ordering).
11    pub id: Uuid,
12
13    /// Session identifier (groups related invocations).
14    pub session_id: String,
15
16    /// When the invocation started.
17    pub timestamp: DateTime<Utc>,
18
19    /// How long the invocation took in milliseconds.
20    pub duration_ms: Option<i64>,
21
22    /// Working directory when invocation was executed.
23    pub cwd: String,
24
25    /// The full command string.
26    pub cmd: String,
27
28    /// Extracted executable name (e.g., "make" from "make test").
29    pub executable: Option<String>,
30
31    /// Runner identifier for liveness checking of pending invocations.
32    /// Format depends on execution context:
33    /// - Local process: "pid:12345"
34    /// - GitHub Actions: "gha:run:12345678"
35    /// - Kubernetes: "k8s:pod:abc123"
36    pub runner_id: Option<String>,
37
38    /// Exit code (None while pending).
39    pub exit_code: Option<i32>,
40
41    /// Invocation status: "pending", "completed", "orphaned".
42    pub status: String,
43
44    /// Detected output format (e.g., "gcc", "pytest").
45    pub format_hint: Option<String>,
46
47    /// Client identifier (user@hostname).
48    pub client_id: String,
49
50    /// Hostname where invocation was executed.
51    pub hostname: Option<String>,
52
53    /// Username who executed the invocation.
54    pub username: Option<String>,
55
56    /// User-defined tag (unique alias for this invocation, like git tags).
57    pub tag: Option<String>,
58}
59
60/// Environment variable for sharing invocation UUID between nested BIRD clients.
61///
62/// When set, nested BIRD clients (e.g., `shq run blq run ...`) will use this UUID
63/// instead of generating a new one, allowing the invocation to be deduplicated
64/// across databases.
65pub const BIRD_INVOCATION_UUID_VAR: &str = "BIRD_INVOCATION_UUID";
66
67/// Environment variable for the parent BIRD client name.
68///
69/// When set, indicates which BIRD client initiated this invocation.
70/// Used to avoid duplicate recording in nested scenarios.
71pub const BIRD_PARENT_CLIENT_VAR: &str = "BIRD_PARENT_CLIENT";
72
73impl InvocationRecord {
74    /// Create a new invocation record.
75    ///
76    /// If `BIRD_INVOCATION_UUID` is set in the environment, uses that UUID
77    /// to enable deduplication across nested BIRD clients.
78    pub fn new(
79        session_id: impl Into<String>,
80        cmd: impl Into<String>,
81        cwd: impl Into<String>,
82        exit_code: i32,
83        client_id: impl Into<String>,
84    ) -> Self {
85        let cmd = cmd.into();
86
87        // Check for inherited invocation UUID from parent BIRD client
88        let id = if let Ok(uuid_str) = std::env::var(BIRD_INVOCATION_UUID_VAR) {
89            Uuid::parse_str(&uuid_str).unwrap_or_else(|_| Uuid::now_v7())
90        } else {
91            Uuid::now_v7()
92        };
93
94        Self {
95            id,
96            session_id: session_id.into(),
97            timestamp: Utc::now(),
98            duration_ms: None,
99            cwd: cwd.into(),
100            executable: extract_executable(&cmd),
101            cmd,
102            runner_id: None,
103            exit_code: Some(exit_code),
104            status: "completed".to_string(),
105            format_hint: None,
106            client_id: client_id.into(),
107            hostname: gethostname::gethostname().to_str().map(|s| s.to_string()),
108            username: std::env::var("USER").ok(),
109            tag: None,
110        }
111    }
112
113    /// Create a new invocation record with an explicit UUID.
114    ///
115    /// Use this when you need to control the UUID (e.g., for testing or
116    /// when the UUID is provided externally).
117    pub fn with_id(
118        id: Uuid,
119        session_id: impl Into<String>,
120        cmd: impl Into<String>,
121        cwd: impl Into<String>,
122        exit_code: i32,
123        client_id: impl Into<String>,
124    ) -> Self {
125        let cmd = cmd.into();
126        Self {
127            id,
128            session_id: session_id.into(),
129            timestamp: Utc::now(),
130            duration_ms: None,
131            cwd: cwd.into(),
132            executable: extract_executable(&cmd),
133            cmd,
134            runner_id: None,
135            exit_code: Some(exit_code),
136            status: "completed".to_string(),
137            format_hint: None,
138            client_id: client_id.into(),
139            hostname: gethostname::gethostname().to_str().map(|s| s.to_string()),
140            username: std::env::var("USER").ok(),
141            tag: None,
142        }
143    }
144
145    /// Create a new pending invocation record.
146    ///
147    /// Use this when a command starts but hasn't completed yet.
148    /// The exit_code is None and status is "pending".
149    ///
150    /// `runner_id` identifies the execution context for liveness checking:
151    /// - Local process: "pid:12345"
152    /// - GitHub Actions: "gha:run:12345678"
153    /// - Kubernetes: "k8s:pod:abc123"
154    pub fn new_pending(
155        session_id: impl Into<String>,
156        cmd: impl Into<String>,
157        cwd: impl Into<String>,
158        runner_id: impl Into<String>,
159        client_id: impl Into<String>,
160    ) -> Self {
161        let cmd = cmd.into();
162
163        // Check for inherited invocation UUID from parent BIRD client
164        let id = if let Ok(uuid_str) = std::env::var(BIRD_INVOCATION_UUID_VAR) {
165            Uuid::parse_str(&uuid_str).unwrap_or_else(|_| Uuid::now_v7())
166        } else {
167            Uuid::now_v7()
168        };
169
170        Self {
171            id,
172            session_id: session_id.into(),
173            timestamp: Utc::now(),
174            duration_ms: None,
175            cwd: cwd.into(),
176            executable: extract_executable(&cmd),
177            cmd,
178            runner_id: Some(runner_id.into()),
179            exit_code: None,
180            status: "pending".to_string(),
181            format_hint: None,
182            client_id: client_id.into(),
183            hostname: gethostname::gethostname().to_str().map(|s| s.to_string()),
184            username: std::env::var("USER").ok(),
185            tag: None,
186        }
187    }
188
189    /// Create a pending invocation for a local process.
190    ///
191    /// Convenience method that formats the PID as "pid:{pid}".
192    pub fn new_pending_local(
193        session_id: impl Into<String>,
194        cmd: impl Into<String>,
195        cwd: impl Into<String>,
196        pid: i32,
197        client_id: impl Into<String>,
198    ) -> Self {
199        Self::new_pending(session_id, cmd, cwd, format!("pid:{}", pid), client_id)
200    }
201
202    /// Mark this invocation as completed with the given exit code.
203    pub fn complete(mut self, exit_code: i32, duration_ms: Option<i64>) -> Self {
204        self.exit_code = Some(exit_code);
205        self.duration_ms = duration_ms;
206        self.status = "completed".to_string();
207        self
208    }
209
210    /// Mark this invocation as orphaned (process died without cleanup).
211    pub fn mark_orphaned(mut self) -> Self {
212        self.status = "orphaned".to_string();
213        self
214    }
215
216    /// Set the runner ID.
217    pub fn with_runner_id(mut self, runner_id: impl Into<String>) -> Self {
218        self.runner_id = Some(runner_id.into());
219        self
220    }
221
222    /// Check if this invocation was inherited from a parent BIRD client.
223    pub fn is_inherited() -> bool {
224        std::env::var(BIRD_INVOCATION_UUID_VAR).is_ok()
225    }
226
227    /// Get the parent BIRD client name, if any.
228    pub fn parent_client() -> Option<String> {
229        std::env::var(BIRD_PARENT_CLIENT_VAR).ok()
230    }
231
232    /// Set the duration.
233    pub fn with_duration(mut self, duration_ms: i64) -> Self {
234        self.duration_ms = Some(duration_ms);
235        self
236    }
237
238    /// Set the format hint.
239    pub fn with_format_hint(mut self, hint: impl Into<String>) -> Self {
240        self.format_hint = Some(hint.into());
241        self
242    }
243
244    /// Set the tag (unique alias for this invocation).
245    pub fn with_tag(mut self, tag: impl Into<String>) -> Self {
246        self.tag = Some(tag.into());
247        self
248    }
249
250    /// Get the date portion of the timestamp (for partitioning).
251    pub fn date(&self) -> NaiveDate {
252        self.timestamp.date_naive()
253    }
254}
255
256/// A session record (a shell or process that captures invocations).
257#[derive(Debug, Clone, Serialize, Deserialize)]
258pub struct SessionRecord {
259    /// Session identifier (e.g., "zsh-12345").
260    pub session_id: String,
261
262    /// Client identifier (user@hostname).
263    pub client_id: String,
264
265    /// Invoker name (e.g., "zsh", "bash", "shq", "python").
266    pub invoker: String,
267
268    /// Invoker PID.
269    pub invoker_pid: u32,
270
271    /// Invoker type: "shell", "cli", "hook", "script".
272    pub invoker_type: String,
273
274    /// When the session was first seen.
275    pub registered_at: DateTime<Utc>,
276
277    /// Initial working directory.
278    pub cwd: Option<String>,
279
280    /// Date for partitioning.
281    pub date: NaiveDate,
282}
283
284impl SessionRecord {
285    /// Create a new session record.
286    pub fn new(
287        session_id: impl Into<String>,
288        client_id: impl Into<String>,
289        invoker: impl Into<String>,
290        invoker_pid: u32,
291        invoker_type: impl Into<String>,
292    ) -> Self {
293        let now = Utc::now();
294        Self {
295            session_id: session_id.into(),
296            client_id: client_id.into(),
297            invoker: invoker.into(),
298            invoker_pid,
299            invoker_type: invoker_type.into(),
300            registered_at: now,
301            cwd: std::env::current_dir()
302                .ok()
303                .map(|p| p.display().to_string()),
304            date: now.date_naive(),
305        }
306    }
307}
308
309/// Extract the executable name from a command string.
310fn extract_executable(cmd: &str) -> Option<String> {
311    let cmd = cmd.trim();
312
313    // Skip environment variable assignments at the start
314    let mut parts = cmd.split_whitespace();
315    for part in parts.by_ref() {
316        if !part.contains('=') {
317            // This is the actual command
318            // Extract basename if it's a path
319            let exe = part.rsplit('/').next().unwrap_or(part);
320            return Some(exe.to_string());
321        }
322    }
323
324    None
325}
326
327/// An output record (stdout/stderr from an invocation).
328#[derive(Debug, Clone, Serialize, Deserialize)]
329pub struct OutputRecord {
330    /// Unique identifier.
331    pub id: Uuid,
332
333    /// Invocation this output belongs to.
334    pub invocation_id: Uuid,
335
336    /// Stream type: "stdout", "stderr", or "combined".
337    pub stream: String,
338
339    /// BLAKE3 hash of the content.
340    pub content_hash: String,
341
342    /// Size in bytes.
343    pub byte_length: usize,
344
345    /// Storage type: "inline" or "blob".
346    pub storage_type: String,
347
348    /// Storage reference (data: URI for inline, file:// for blob).
349    pub storage_ref: String,
350
351    /// Content type hint (e.g., "text/plain", "application/json").
352    pub content_type: Option<String>,
353
354    /// Date for partitioning.
355    pub date: NaiveDate,
356}
357
358impl OutputRecord {
359    /// Create a new inline output record.
360    ///
361    /// For small outputs, content is stored as a base64 data URI.
362    pub fn new_inline(
363        invocation_id: Uuid,
364        stream: impl Into<String>,
365        content: &[u8],
366        date: NaiveDate,
367    ) -> Self {
368        use base64::Engine;
369
370        let content_hash = blake3::hash(content).to_hex().to_string();
371        let byte_length = content.len();
372
373        // Encode as data URI
374        let b64 = base64::engine::general_purpose::STANDARD.encode(content);
375        let storage_ref = format!("data:application/octet-stream;base64,{}", b64);
376
377        Self {
378            id: Uuid::now_v7(),
379            invocation_id,
380            stream: stream.into(),
381            content_hash,
382            byte_length,
383            storage_type: "inline".to_string(),
384            storage_ref,
385            content_type: Some("text/plain".to_string()),
386            date,
387        }
388    }
389
390    /// Decode the content from storage_ref.
391    pub fn decode_content(&self) -> Option<Vec<u8>> {
392        use base64::Engine;
393
394        if self.storage_type == "inline" {
395            // Parse data: URI
396            if let Some(b64_part) = self.storage_ref.split(",").nth(1) {
397                base64::engine::general_purpose::STANDARD.decode(b64_part).ok()
398            } else {
399                None
400            }
401        } else {
402            // TODO: Handle blob storage
403            None
404        }
405    }
406}
407
408/// An event record (a parsed log entry from an invocation output).
409#[derive(Debug, Clone, Serialize, Deserialize)]
410pub struct EventRecord {
411    /// Unique identifier (UUIDv7 for time-ordering).
412    pub id: Uuid,
413
414    /// Invocation this event was parsed from.
415    pub invocation_id: Uuid,
416
417    /// Client identifier (for cross-client queries).
418    pub client_id: String,
419
420    /// Hostname where the invocation ran.
421    pub hostname: Option<String>,
422
423    /// Event type from duck_hunt (e.g., "diagnostic", "test_result").
424    pub event_type: Option<String>,
425
426    /// Severity level: error, warning, info, note.
427    pub severity: Option<String>,
428
429    /// Source file referenced by this event.
430    pub ref_file: Option<String>,
431
432    /// Line number in the source file.
433    pub ref_line: Option<i32>,
434
435    /// Column number in the source file.
436    pub ref_column: Option<i32>,
437
438    /// The event message.
439    pub message: Option<String>,
440
441    /// Error/warning code (e.g., "E0308", "W0401").
442    pub error_code: Option<String>,
443
444    /// Test name (for test results).
445    pub test_name: Option<String>,
446
447    /// Test status: passed, failed, skipped.
448    pub status: Option<String>,
449
450    /// Format used for parsing.
451    pub format_used: String,
452
453    /// Date for partitioning.
454    pub date: NaiveDate,
455}
456
457impl EventRecord {
458    /// Create a new event record with a fresh UUIDv7.
459    pub fn new(
460        invocation_id: Uuid,
461        client_id: impl Into<String>,
462        format_used: impl Into<String>,
463        date: NaiveDate,
464    ) -> Self {
465        Self {
466            id: Uuid::now_v7(),
467            invocation_id,
468            client_id: client_id.into(),
469            hostname: gethostname::gethostname().to_str().map(|s| s.to_string()),
470            event_type: None,
471            severity: None,
472            ref_file: None,
473            ref_line: None,
474            ref_column: None,
475            message: None,
476            error_code: None,
477            test_name: None,
478            status: None,
479            format_used: format_used.into(),
480            date,
481        }
482    }
483}
484
485/// SQL to create the events table schema (for documentation/reference).
486pub const EVENTS_SCHEMA: &str = r#"
487CREATE TABLE events (
488    id                UUID PRIMARY KEY,
489    invocation_id     UUID NOT NULL,
490    client_id         VARCHAR NOT NULL,
491    hostname          VARCHAR,
492    event_type        VARCHAR,
493    severity          VARCHAR,
494    ref_file          VARCHAR,
495    ref_line          INTEGER,
496    ref_column        INTEGER,
497    message           VARCHAR,
498    error_code        VARCHAR,
499    test_name         VARCHAR,
500    status            VARCHAR,
501    format_used       VARCHAR NOT NULL,
502    date              DATE NOT NULL
503);
504"#;
505
506/// SQL to create the invocations table schema (for documentation/reference).
507pub const INVOCATIONS_SCHEMA: &str = r#"
508CREATE TABLE invocations (
509    id                UUID PRIMARY KEY,
510    session_id        VARCHAR NOT NULL,
511    timestamp         TIMESTAMP NOT NULL,
512    duration_ms       BIGINT,
513    cwd               VARCHAR NOT NULL,
514    cmd               VARCHAR NOT NULL,
515    executable        VARCHAR,
516    runner_id         VARCHAR,
517    exit_code         INTEGER,
518    status            VARCHAR DEFAULT 'completed',
519    format_hint       VARCHAR,
520    client_id         VARCHAR NOT NULL,
521    hostname          VARCHAR,
522    username          VARCHAR,
523    tag               VARCHAR,
524    date              DATE NOT NULL
525);
526"#;
527
528/// SQL to create the sessions table schema (for documentation/reference).
529pub const SESSIONS_SCHEMA: &str = r#"
530CREATE TABLE sessions (
531    session_id        VARCHAR PRIMARY KEY,
532    client_id         VARCHAR NOT NULL,
533    invoker           VARCHAR NOT NULL,
534    invoker_pid       INTEGER NOT NULL,
535    invoker_type      VARCHAR NOT NULL,
536    registered_at     TIMESTAMP NOT NULL,
537    cwd               VARCHAR,
538    date              DATE NOT NULL
539);
540"#;
541
542#[cfg(test)]
543mod tests {
544    use super::*;
545
546    #[test]
547    fn test_extract_executable() {
548        assert_eq!(extract_executable("make test"), Some("make".to_string()));
549        assert_eq!(extract_executable("/usr/bin/gcc -o foo foo.c"), Some("gcc".to_string()));
550        assert_eq!(extract_executable("ENV=val make"), Some("make".to_string()));
551        assert_eq!(extract_executable("CC=gcc CXX=g++ make"), Some("make".to_string()));
552        assert_eq!(extract_executable(""), None);
553    }
554
555    #[test]
556    fn test_invocation_record_new() {
557        let record = InvocationRecord::new(
558            "session-123",
559            "make test",
560            "/home/user/project",
561            0,
562            "user@laptop",
563        );
564
565        assert_eq!(record.session_id, "session-123");
566        assert_eq!(record.cmd, "make test");
567        assert_eq!(record.executable, Some("make".to_string()));
568        assert_eq!(record.exit_code, Some(0));
569        assert_eq!(record.status, "completed");
570        assert!(record.duration_ms.is_none());
571        assert!(record.runner_id.is_none());
572    }
573
574    #[test]
575    fn test_invocation_record_pending() {
576        let record = InvocationRecord::new_pending(
577            "session-123",
578            "make test",
579            "/home/user/project",
580            "pid:12345",
581            "user@laptop",
582        );
583
584        assert_eq!(record.session_id, "session-123");
585        assert_eq!(record.cmd, "make test");
586        assert_eq!(record.runner_id, Some("pid:12345".to_string()));
587        assert_eq!(record.exit_code, None);
588        assert_eq!(record.status, "pending");
589    }
590
591    #[test]
592    fn test_invocation_record_pending_local() {
593        let record = InvocationRecord::new_pending_local(
594            "session-123",
595            "make test",
596            "/home/user/project",
597            12345,
598            "user@laptop",
599        );
600
601        assert_eq!(record.runner_id, Some("pid:12345".to_string()));
602        assert_eq!(record.status, "pending");
603    }
604
605    #[test]
606    fn test_invocation_record_pending_gha() {
607        let record = InvocationRecord::new_pending(
608            "gha-session",
609            "make test",
610            "/github/workspace",
611            "gha:run:123456789",
612            "runner@github",
613        );
614
615        assert_eq!(record.runner_id, Some("gha:run:123456789".to_string()));
616        assert_eq!(record.status, "pending");
617    }
618
619    #[test]
620    fn test_invocation_record_complete() {
621        let record = InvocationRecord::new_pending(
622            "session-123",
623            "make test",
624            "/home/user/project",
625            "pid:12345",
626            "user@laptop",
627        )
628        .complete(0, Some(1500));
629
630        assert_eq!(record.exit_code, Some(0));
631        assert_eq!(record.duration_ms, Some(1500));
632        assert_eq!(record.status, "completed");
633    }
634
635    #[test]
636    fn test_invocation_record_orphaned() {
637        let record = InvocationRecord::new_pending(
638            "session-123",
639            "make test",
640            "/home/user/project",
641            "pid:12345",
642            "user@laptop",
643        )
644        .mark_orphaned();
645
646        assert_eq!(record.exit_code, None);
647        assert_eq!(record.status, "orphaned");
648    }
649
650    #[test]
651    fn test_invocation_record_with_duration() {
652        let record = InvocationRecord::new(
653            "session-123",
654            "make test",
655            "/home/user/project",
656            0,
657            "user@laptop",
658        )
659        .with_duration(1500);
660
661        assert_eq!(record.duration_ms, Some(1500));
662    }
663
664    #[test]
665    fn test_session_record_new() {
666        let record = SessionRecord::new(
667            "zsh-12345",
668            "user@laptop",
669            "zsh",
670            12345,
671            "shell",
672        );
673
674        assert_eq!(record.session_id, "zsh-12345");
675        assert_eq!(record.client_id, "user@laptop");
676        assert_eq!(record.invoker, "zsh");
677        assert_eq!(record.invoker_pid, 12345);
678        assert_eq!(record.invoker_type, "shell");
679    }
680}