Skip to main content

magic_bird/
schema.rs

1//! Schema definitions for BIRD tables.
2
3use chrono::{DateTime, NaiveDate, Utc};
4use serde::{Deserialize, Serialize};
5use uuid::Uuid;
6
7/// An invocation record (a captured command/process execution).
8#[derive(Debug, Clone, Serialize, Deserialize)]
9pub struct InvocationRecord {
10    /// Unique identifier (UUIDv7 for time-ordering).
11    pub id: Uuid,
12
13    /// Session identifier (groups related invocations).
14    pub session_id: String,
15
16    /// When the invocation started.
17    pub timestamp: DateTime<Utc>,
18
19    /// How long the invocation took in milliseconds.
20    pub duration_ms: Option<i64>,
21
22    /// Working directory when invocation was executed.
23    pub cwd: String,
24
25    /// The full command string.
26    pub cmd: String,
27
28    /// Extracted executable name (e.g., "make" from "make test").
29    pub executable: Option<String>,
30
31    /// Exit code.
32    pub exit_code: i32,
33
34    /// Detected output format (e.g., "gcc", "pytest").
35    pub format_hint: Option<String>,
36
37    /// Client identifier (user@hostname).
38    pub client_id: String,
39
40    /// Hostname where invocation was executed.
41    pub hostname: Option<String>,
42
43    /// Username who executed the invocation.
44    pub username: Option<String>,
45
46    /// User-defined tag (unique alias for this invocation, like git tags).
47    pub tag: Option<String>,
48}
49
50/// Environment variable for sharing invocation UUID between nested BIRD clients.
51///
52/// When set, nested BIRD clients (e.g., `shq run blq run ...`) will use this UUID
53/// instead of generating a new one, allowing the invocation to be deduplicated
54/// across databases.
55pub const BIRD_INVOCATION_UUID_VAR: &str = "BIRD_INVOCATION_UUID";
56
57/// Environment variable for the parent BIRD client name.
58///
59/// When set, indicates which BIRD client initiated this invocation.
60/// Used to avoid duplicate recording in nested scenarios.
61pub const BIRD_PARENT_CLIENT_VAR: &str = "BIRD_PARENT_CLIENT";
62
63impl InvocationRecord {
64    /// Create a new invocation record.
65    ///
66    /// If `BIRD_INVOCATION_UUID` is set in the environment, uses that UUID
67    /// to enable deduplication across nested BIRD clients.
68    pub fn new(
69        session_id: impl Into<String>,
70        cmd: impl Into<String>,
71        cwd: impl Into<String>,
72        exit_code: i32,
73        client_id: impl Into<String>,
74    ) -> Self {
75        let cmd = cmd.into();
76
77        // Check for inherited invocation UUID from parent BIRD client
78        let id = if let Ok(uuid_str) = std::env::var(BIRD_INVOCATION_UUID_VAR) {
79            Uuid::parse_str(&uuid_str).unwrap_or_else(|_| Uuid::now_v7())
80        } else {
81            Uuid::now_v7()
82        };
83
84        Self {
85            id,
86            session_id: session_id.into(),
87            timestamp: Utc::now(),
88            duration_ms: None,
89            cwd: cwd.into(),
90            executable: extract_executable(&cmd),
91            cmd,
92            exit_code,
93            format_hint: None,
94            client_id: client_id.into(),
95            hostname: gethostname::gethostname().to_str().map(|s| s.to_string()),
96            username: std::env::var("USER").ok(),
97            tag: None,
98        }
99    }
100
101    /// Create a new invocation record with an explicit UUID.
102    ///
103    /// Use this when you need to control the UUID (e.g., for testing or
104    /// when the UUID is provided externally).
105    pub fn with_id(
106        id: Uuid,
107        session_id: impl Into<String>,
108        cmd: impl Into<String>,
109        cwd: impl Into<String>,
110        exit_code: i32,
111        client_id: impl Into<String>,
112    ) -> Self {
113        let cmd = cmd.into();
114        Self {
115            id,
116            session_id: session_id.into(),
117            timestamp: Utc::now(),
118            duration_ms: None,
119            cwd: cwd.into(),
120            executable: extract_executable(&cmd),
121            cmd,
122            exit_code,
123            format_hint: None,
124            client_id: client_id.into(),
125            hostname: gethostname::gethostname().to_str().map(|s| s.to_string()),
126            username: std::env::var("USER").ok(),
127            tag: None,
128        }
129    }
130
131    /// Check if this invocation was inherited from a parent BIRD client.
132    pub fn is_inherited() -> bool {
133        std::env::var(BIRD_INVOCATION_UUID_VAR).is_ok()
134    }
135
136    /// Get the parent BIRD client name, if any.
137    pub fn parent_client() -> Option<String> {
138        std::env::var(BIRD_PARENT_CLIENT_VAR).ok()
139    }
140
141    /// Set the duration.
142    pub fn with_duration(mut self, duration_ms: i64) -> Self {
143        self.duration_ms = Some(duration_ms);
144        self
145    }
146
147    /// Set the format hint.
148    pub fn with_format_hint(mut self, hint: impl Into<String>) -> Self {
149        self.format_hint = Some(hint.into());
150        self
151    }
152
153    /// Set the tag (unique alias for this invocation).
154    pub fn with_tag(mut self, tag: impl Into<String>) -> Self {
155        self.tag = Some(tag.into());
156        self
157    }
158
159    /// Get the date portion of the timestamp (for partitioning).
160    pub fn date(&self) -> NaiveDate {
161        self.timestamp.date_naive()
162    }
163}
164
165/// A session record (a shell or process that captures invocations).
166#[derive(Debug, Clone, Serialize, Deserialize)]
167pub struct SessionRecord {
168    /// Session identifier (e.g., "zsh-12345").
169    pub session_id: String,
170
171    /// Client identifier (user@hostname).
172    pub client_id: String,
173
174    /// Invoker name (e.g., "zsh", "bash", "shq", "python").
175    pub invoker: String,
176
177    /// Invoker PID.
178    pub invoker_pid: u32,
179
180    /// Invoker type: "shell", "cli", "hook", "script".
181    pub invoker_type: String,
182
183    /// When the session was first seen.
184    pub registered_at: DateTime<Utc>,
185
186    /// Initial working directory.
187    pub cwd: Option<String>,
188
189    /// Date for partitioning.
190    pub date: NaiveDate,
191}
192
193impl SessionRecord {
194    /// Create a new session record.
195    pub fn new(
196        session_id: impl Into<String>,
197        client_id: impl Into<String>,
198        invoker: impl Into<String>,
199        invoker_pid: u32,
200        invoker_type: impl Into<String>,
201    ) -> Self {
202        let now = Utc::now();
203        Self {
204            session_id: session_id.into(),
205            client_id: client_id.into(),
206            invoker: invoker.into(),
207            invoker_pid,
208            invoker_type: invoker_type.into(),
209            registered_at: now,
210            cwd: std::env::current_dir()
211                .ok()
212                .map(|p| p.display().to_string()),
213            date: now.date_naive(),
214        }
215    }
216}
217
218/// Extract the executable name from a command string.
219fn extract_executable(cmd: &str) -> Option<String> {
220    let cmd = cmd.trim();
221
222    // Skip environment variable assignments at the start
223    let mut parts = cmd.split_whitespace();
224    for part in parts.by_ref() {
225        if !part.contains('=') {
226            // This is the actual command
227            // Extract basename if it's a path
228            let exe = part.split('/').last().unwrap_or(part);
229            return Some(exe.to_string());
230        }
231    }
232
233    None
234}
235
236/// An output record (stdout/stderr from an invocation).
237#[derive(Debug, Clone, Serialize, Deserialize)]
238pub struct OutputRecord {
239    /// Unique identifier.
240    pub id: Uuid,
241
242    /// Invocation this output belongs to.
243    pub invocation_id: Uuid,
244
245    /// Stream type: "stdout", "stderr", or "combined".
246    pub stream: String,
247
248    /// BLAKE3 hash of the content.
249    pub content_hash: String,
250
251    /// Size in bytes.
252    pub byte_length: usize,
253
254    /// Storage type: "inline" or "blob".
255    pub storage_type: String,
256
257    /// Storage reference (data: URI for inline, file:// for blob).
258    pub storage_ref: String,
259
260    /// Content type hint (e.g., "text/plain", "application/json").
261    pub content_type: Option<String>,
262
263    /// Date for partitioning.
264    pub date: NaiveDate,
265}
266
267impl OutputRecord {
268    /// Create a new inline output record.
269    ///
270    /// For small outputs, content is stored as a base64 data URI.
271    pub fn new_inline(
272        invocation_id: Uuid,
273        stream: impl Into<String>,
274        content: &[u8],
275        date: NaiveDate,
276    ) -> Self {
277        use base64::Engine;
278
279        let content_hash = blake3::hash(content).to_hex().to_string();
280        let byte_length = content.len();
281
282        // Encode as data URI
283        let b64 = base64::engine::general_purpose::STANDARD.encode(content);
284        let storage_ref = format!("data:application/octet-stream;base64,{}", b64);
285
286        Self {
287            id: Uuid::now_v7(),
288            invocation_id,
289            stream: stream.into(),
290            content_hash,
291            byte_length,
292            storage_type: "inline".to_string(),
293            storage_ref,
294            content_type: Some("text/plain".to_string()),
295            date,
296        }
297    }
298
299    /// Decode the content from storage_ref.
300    pub fn decode_content(&self) -> Option<Vec<u8>> {
301        use base64::Engine;
302
303        if self.storage_type == "inline" {
304            // Parse data: URI
305            if let Some(b64_part) = self.storage_ref.split(",").nth(1) {
306                base64::engine::general_purpose::STANDARD.decode(b64_part).ok()
307            } else {
308                None
309            }
310        } else {
311            // TODO: Handle blob storage
312            None
313        }
314    }
315}
316
317/// An event record (a parsed log entry from an invocation output).
318#[derive(Debug, Clone, Serialize, Deserialize)]
319pub struct EventRecord {
320    /// Unique identifier (UUIDv7 for time-ordering).
321    pub id: Uuid,
322
323    /// Invocation this event was parsed from.
324    pub invocation_id: Uuid,
325
326    /// Client identifier (for cross-client queries).
327    pub client_id: String,
328
329    /// Hostname where the invocation ran.
330    pub hostname: Option<String>,
331
332    /// Event type from duck_hunt (e.g., "diagnostic", "test_result").
333    pub event_type: Option<String>,
334
335    /// Severity level: error, warning, info, note.
336    pub severity: Option<String>,
337
338    /// Source file referenced by this event.
339    pub ref_file: Option<String>,
340
341    /// Line number in the source file.
342    pub ref_line: Option<i32>,
343
344    /// Column number in the source file.
345    pub ref_column: Option<i32>,
346
347    /// The event message.
348    pub message: Option<String>,
349
350    /// Error/warning code (e.g., "E0308", "W0401").
351    pub error_code: Option<String>,
352
353    /// Test name (for test results).
354    pub test_name: Option<String>,
355
356    /// Test status: passed, failed, skipped.
357    pub status: Option<String>,
358
359    /// Format used for parsing.
360    pub format_used: String,
361
362    /// Date for partitioning.
363    pub date: NaiveDate,
364}
365
366impl EventRecord {
367    /// Create a new event record with a fresh UUIDv7.
368    pub fn new(
369        invocation_id: Uuid,
370        client_id: impl Into<String>,
371        format_used: impl Into<String>,
372        date: NaiveDate,
373    ) -> Self {
374        Self {
375            id: Uuid::now_v7(),
376            invocation_id,
377            client_id: client_id.into(),
378            hostname: gethostname::gethostname().to_str().map(|s| s.to_string()),
379            event_type: None,
380            severity: None,
381            ref_file: None,
382            ref_line: None,
383            ref_column: None,
384            message: None,
385            error_code: None,
386            test_name: None,
387            status: None,
388            format_used: format_used.into(),
389            date,
390        }
391    }
392}
393
394/// SQL to create the events table schema (for documentation/reference).
395pub const EVENTS_SCHEMA: &str = r#"
396CREATE TABLE events (
397    id                UUID PRIMARY KEY,
398    invocation_id     UUID NOT NULL,
399    client_id         VARCHAR NOT NULL,
400    hostname          VARCHAR,
401    event_type        VARCHAR,
402    severity          VARCHAR,
403    ref_file          VARCHAR,
404    ref_line          INTEGER,
405    ref_column        INTEGER,
406    message           VARCHAR,
407    error_code        VARCHAR,
408    test_name         VARCHAR,
409    status            VARCHAR,
410    format_used       VARCHAR NOT NULL,
411    date              DATE NOT NULL
412);
413"#;
414
415/// SQL to create the invocations table schema (for documentation/reference).
416pub const INVOCATIONS_SCHEMA: &str = r#"
417CREATE TABLE invocations (
418    id                UUID PRIMARY KEY,
419    session_id        VARCHAR NOT NULL,
420    timestamp         TIMESTAMP NOT NULL,
421    duration_ms       BIGINT,
422    cwd               VARCHAR NOT NULL,
423    cmd               VARCHAR NOT NULL,
424    executable        VARCHAR,
425    exit_code         INTEGER NOT NULL,
426    format_hint       VARCHAR,
427    client_id         VARCHAR NOT NULL,
428    hostname          VARCHAR,
429    username          VARCHAR,
430    tag               VARCHAR,
431    date              DATE NOT NULL
432);
433"#;
434
435/// SQL to create the sessions table schema (for documentation/reference).
436pub const SESSIONS_SCHEMA: &str = r#"
437CREATE TABLE sessions (
438    session_id        VARCHAR PRIMARY KEY,
439    client_id         VARCHAR NOT NULL,
440    invoker           VARCHAR NOT NULL,
441    invoker_pid       INTEGER NOT NULL,
442    invoker_type      VARCHAR NOT NULL,
443    registered_at     TIMESTAMP NOT NULL,
444    cwd               VARCHAR,
445    date              DATE NOT NULL
446);
447"#;
448
449#[cfg(test)]
450mod tests {
451    use super::*;
452
453    #[test]
454    fn test_extract_executable() {
455        assert_eq!(extract_executable("make test"), Some("make".to_string()));
456        assert_eq!(extract_executable("/usr/bin/gcc -o foo foo.c"), Some("gcc".to_string()));
457        assert_eq!(extract_executable("ENV=val make"), Some("make".to_string()));
458        assert_eq!(extract_executable("CC=gcc CXX=g++ make"), Some("make".to_string()));
459        assert_eq!(extract_executable(""), None);
460    }
461
462    #[test]
463    fn test_invocation_record_new() {
464        let record = InvocationRecord::new(
465            "session-123",
466            "make test",
467            "/home/user/project",
468            0,
469            "user@laptop",
470        );
471
472        assert_eq!(record.session_id, "session-123");
473        assert_eq!(record.cmd, "make test");
474        assert_eq!(record.executable, Some("make".to_string()));
475        assert_eq!(record.exit_code, 0);
476        assert!(record.duration_ms.is_none());
477    }
478
479    #[test]
480    fn test_invocation_record_with_duration() {
481        let record = InvocationRecord::new(
482            "session-123",
483            "make test",
484            "/home/user/project",
485            0,
486            "user@laptop",
487        )
488        .with_duration(1500);
489
490        assert_eq!(record.duration_ms, Some(1500));
491    }
492
493    #[test]
494    fn test_session_record_new() {
495        let record = SessionRecord::new(
496            "zsh-12345",
497            "user@laptop",
498            "zsh",
499            12345,
500            "shell",
501        );
502
503        assert_eq!(record.session_id, "zsh-12345");
504        assert_eq!(record.client_id, "user@laptop");
505        assert_eq!(record.invoker, "zsh");
506        assert_eq!(record.invoker_pid, 12345);
507        assert_eq!(record.invoker_type, "shell");
508    }
509}