Skip to main content

anomalyx_normalize/parsers/
sqlite.rs

1//! SQLite database parser — app/telemetry data that lives in `.db` files.
2//!
3//! The first user table (alphabetically, excluding the `sqlite_*` internals)
4//! is scanned straight into a RecordSet: `SELECT *` in rowid order, each SQLite
5//! value mapped to the closed [`Value`] set — so the full detector taxonomy
6//! applies. The database is loaded from the byte buffer via SQLite's deserialize
7//! API (no temp file). `REAL`→`Float`, `INTEGER`→`Int`, `TEXT`→`Str`, `BLOB`→hex
8//! `Str`, `NULL`/non-finite→`Null`.
9//!
10//! Detected by the `SQLite format 3\0` magic; extensions `.db`/`.sqlite`/
11//! `.sqlite3`/`.db3`. Behind the default-on `sqlite` feature (binary format).
12
13use crate::parser::{Confidence, FormatParser, MAGIC};
14use crate::table::TableBuilder;
15use ax_core::{AxError, Column, Value};
16use rusqlite::types::ValueRef;
17use rusqlite::{Connection, MAIN_DB};
18use std::collections::BTreeMap;
19use std::io::Cursor;
20
21/// The 16-byte SQLite database file header.
22const SQLITE_MAGIC: &[u8] = b"SQLite format 3\x00";
23
24#[derive(Debug, Default, Clone)]
25pub struct SqliteParser;
26
27/// Maps a SQLite cell to the closed [`Value`] set. A blob becomes its lowercase
28/// hex (deterministic; structural detectors still see the column exists).
29fn value_ref_to_value(v: ValueRef) -> Value {
30    match v {
31        ValueRef::Null => Value::Null,
32        ValueRef::Integer(i) => Value::Int(i),
33        ValueRef::Real(f) => {
34            if f.is_finite() {
35                Value::Float(f)
36            } else {
37                Value::Null
38            }
39        }
40        ValueRef::Text(bytes) => Value::Str(String::from_utf8_lossy(bytes).into_owned()),
41        ValueRef::Blob(bytes) => {
42            let hex: String = bytes.iter().map(|b| format!("{b:02x}")).collect();
43            Value::Str(hex)
44        }
45    }
46}
47
48impl SqliteParser {
49    fn err(&self, msg: impl std::fmt::Display) -> AxError {
50        AxError::Parse {
51            format: self.id().to_string(),
52            message: msg.to_string(),
53        }
54    }
55}
56
57impl FormatParser for SqliteParser {
58    fn id(&self) -> &'static str {
59        "sqlite"
60    }
61    fn extensions(&self) -> &'static [&'static str] {
62        &["db", "sqlite", "sqlite3", "db3"]
63    }
64    fn sniff(&self, bytes: &[u8]) -> Option<Confidence> {
65        bytes.starts_with(SQLITE_MAGIC).then_some(MAGIC)
66    }
67    fn parse(&self, _source: &str, bytes: &[u8]) -> Result<Vec<Column>, AxError> {
68        let mut conn = Connection::open_in_memory().map_err(|e| self.err(e))?;
69        conn.deserialize_read_exact(MAIN_DB, Cursor::new(bytes), bytes.len(), true)
70            .map_err(|e| self.err(e))?;
71
72        // First user table (deterministic order; internal tables excluded).
73        let table: String = conn
74            .query_row(
75                "SELECT name FROM sqlite_master WHERE type = 'table' \
76                 AND name NOT LIKE 'sqlite_%' ORDER BY name LIMIT 1",
77                [],
78                |row| row.get(0),
79            )
80            .map_err(|e| self.err(format!("no readable table: {e}")))?;
81
82        let sql = format!("SELECT * FROM \"{}\"", table.replace('"', "\"\""));
83        let mut stmt = conn.prepare(&sql).map_err(|e| self.err(e))?;
84        let names: Vec<String> = stmt.column_names().iter().map(|s| s.to_string()).collect();
85
86        let mut rows = stmt.query([]).map_err(|e| self.err(e))?;
87        let mut builder = TableBuilder::new();
88        while let Some(row) = rows.next().map_err(|e| self.err(e))? {
89            let mut record: BTreeMap<String, Value> = BTreeMap::new();
90            for (i, name) in names.iter().enumerate() {
91                let cell = row.get_ref(i).map_err(|e| self.err(e))?;
92                record.insert(name.clone(), value_ref_to_value(cell));
93            }
94            builder.push_row(record);
95        }
96        Ok(builder.finish())
97    }
98}
99
100#[cfg(test)]
101mod tests {
102    use super::*;
103    use ax_core::ColType;
104
105    /// Builds an in-memory SQLite DB and serializes it to bytes (no committed
106    /// binary fixture).
107    fn build_db(setup: &str) -> Vec<u8> {
108        let conn = Connection::open_in_memory().unwrap();
109        conn.execute_batch(setup).unwrap();
110        conn.serialize(MAIN_DB).unwrap().to_vec()
111    }
112
113    const EVENTS: &str = "CREATE TABLE events (id INTEGER, host TEXT, score REAL, ok INTEGER);\
114        INSERT INTO events VALUES (1,'a',9.5,1),(2,'b',3.0,0),(3,'c',7.5,1);";
115
116    fn col<'a>(cols: &'a [Column], name: &str) -> &'a Column {
117        cols.iter()
118            .find(|c| c.name == name)
119            .unwrap_or_else(|| panic!("missing column {name}"))
120    }
121
122    #[test]
123    fn scans_a_table_into_typed_columns() {
124        let cols = SqliteParser.parse("app.db", &build_db(EVENTS)).unwrap();
125        assert_eq!(col(&cols, "id").ty, ColType::Int);
126        assert_eq!(
127            col(&cols, "id").cells,
128            vec![Value::Int(1), Value::Int(2), Value::Int(3)]
129        );
130        assert_eq!(
131            col(&cols, "host").cells,
132            vec![
133                Value::Str("a".into()),
134                Value::Str("b".into()),
135                Value::Str("c".into())
136            ]
137        );
138        assert_eq!(col(&cols, "score").numeric(), vec![9.5, 3.0, 7.5]);
139        assert_eq!(col(&cols, "ok").cells[1], Value::Int(0));
140    }
141
142    #[test]
143    fn picks_the_first_table_alphabetically() {
144        let db = build_db(
145            "CREATE TABLE zeta (z INTEGER); INSERT INTO zeta VALUES (9);\
146             CREATE TABLE alpha (a TEXT); INSERT INTO alpha VALUES ('first');",
147        );
148        let cols = SqliteParser.parse("-", &db).unwrap();
149        // `alpha` sorts before `zeta`, so its column is the one scanned.
150        assert_eq!(col(&cols, "a").cells, vec![Value::Str("first".into())]);
151        assert!(cols.iter().all(|c| c.name != "z"));
152    }
153
154    #[test]
155    fn null_and_blob_cells() {
156        let db = build_db(
157            "CREATE TABLE t (v BLOB, n INTEGER);\
158             INSERT INTO t VALUES (x'01ff', NULL);",
159        );
160        let cols = SqliteParser.parse("-", &db).unwrap();
161        assert_eq!(col(&cols, "v").cells[0], Value::Str("01ff".into())); // blob → hex
162        assert_eq!(col(&cols, "n").cells[0], Value::Null);
163    }
164
165    #[test]
166    fn value_ref_units() {
167        assert_eq!(value_ref_to_value(ValueRef::Null), Value::Null);
168        assert_eq!(value_ref_to_value(ValueRef::Integer(7)), Value::Int(7));
169        assert_eq!(value_ref_to_value(ValueRef::Real(1.5)), Value::Float(1.5));
170        assert_eq!(value_ref_to_value(ValueRef::Real(f64::NAN)), Value::Null);
171        assert_eq!(
172            value_ref_to_value(ValueRef::Text(b"hi")),
173            Value::Str("hi".into())
174        );
175        assert_eq!(
176            value_ref_to_value(ValueRef::Blob(&[0x00, 0xab])),
177            Value::Str("00ab".into())
178        );
179    }
180
181    #[test]
182    fn malformed_input_errors() {
183        assert!(matches!(
184            SqliteParser.parse("app.db", b"this is not a sqlite database"),
185            Err(AxError::Parse { .. })
186        ));
187        // Magic header but otherwise garbage.
188        let mut bad = SQLITE_MAGIC.to_vec();
189        bad.extend_from_slice(b"corrupt");
190        assert!(matches!(
191            SqliteParser.parse("app.db", &bad),
192            Err(AxError::Parse { .. })
193        ));
194    }
195
196    #[test]
197    fn database_with_no_user_tables_errors() {
198        assert!(matches!(
199            SqliteParser.parse("-", &build_db("PRAGMA user_version = 1;")),
200            Err(AxError::Parse { .. })
201        ));
202    }
203
204    #[test]
205    fn sniff_keys_on_magic() {
206        assert_eq!(SqliteParser.sniff(&build_db(EVENTS)), Some(MAGIC));
207        assert_eq!(
208            SqliteParser.sniff(b"SQLite format 3\x00........"),
209            Some(MAGIC)
210        );
211        assert_eq!(SqliteParser.sniff(b"SQLite format 3"), None); // missing the NUL
212        assert_eq!(SqliteParser.sniff(b"PK\x03\x04"), None); // zip, not sqlite
213        assert_eq!(SqliteParser.sniff(b"{\"a\":1}"), None);
214    }
215
216    #[test]
217    fn claims_sqlite_extensions() {
218        assert_eq!(
219            SqliteParser.extensions(),
220            &["db", "sqlite", "sqlite3", "db3"]
221        );
222    }
223
224    #[test]
225    fn resolves_by_extension_and_magic() {
226        let reg = crate::parser::ParserRegistry::default();
227        assert_eq!(reg.resolve("app.db", b"zz").unwrap().id(), "sqlite");
228        assert_eq!(reg.resolve("data.sqlite3", b"zz").unwrap().id(), "sqlite");
229        assert_eq!(reg.resolve("-", &build_db(EVENTS)).unwrap().id(), "sqlite");
230    }
231}