anomalyx_normalize/parsers/
sqlite.rs1use crate::parser::{Confidence, FormatParser, MAGIC};
14use crate::table::TableBuilder;
15use ax_core::{AxError, Column, Value};
16use rusqlite::types::ValueRef;
17use rusqlite::{Connection, MAIN_DB};
18use std::collections::BTreeMap;
19use std::io::Cursor;
20
21const SQLITE_MAGIC: &[u8] = b"SQLite format 3\x00";
23
24#[derive(Debug, Default, Clone)]
25pub struct SqliteParser;
26
27fn value_ref_to_value(v: ValueRef) -> Value {
30 match v {
31 ValueRef::Null => Value::Null,
32 ValueRef::Integer(i) => Value::Int(i),
33 ValueRef::Real(f) => {
34 if f.is_finite() {
35 Value::Float(f)
36 } else {
37 Value::Null
38 }
39 }
40 ValueRef::Text(bytes) => Value::Str(String::from_utf8_lossy(bytes).into_owned()),
41 ValueRef::Blob(bytes) => {
42 let hex: String = bytes.iter().map(|b| format!("{b:02x}")).collect();
43 Value::Str(hex)
44 }
45 }
46}
47
48impl SqliteParser {
49 fn err(&self, msg: impl std::fmt::Display) -> AxError {
50 AxError::Parse {
51 format: self.id().to_string(),
52 message: msg.to_string(),
53 }
54 }
55}
56
57impl FormatParser for SqliteParser {
58 fn id(&self) -> &'static str {
59 "sqlite"
60 }
61 fn extensions(&self) -> &'static [&'static str] {
62 &["db", "sqlite", "sqlite3", "db3"]
63 }
64 fn sniff(&self, bytes: &[u8]) -> Option<Confidence> {
65 bytes.starts_with(SQLITE_MAGIC).then_some(MAGIC)
66 }
67 fn parse(&self, _source: &str, bytes: &[u8]) -> Result<Vec<Column>, AxError> {
68 let mut conn = Connection::open_in_memory().map_err(|e| self.err(e))?;
69 conn.deserialize_read_exact(MAIN_DB, Cursor::new(bytes), bytes.len(), true)
70 .map_err(|e| self.err(e))?;
71
72 let table: String = conn
74 .query_row(
75 "SELECT name FROM sqlite_master WHERE type = 'table' \
76 AND name NOT LIKE 'sqlite_%' ORDER BY name LIMIT 1",
77 [],
78 |row| row.get(0),
79 )
80 .map_err(|e| self.err(format!("no readable table: {e}")))?;
81
82 let sql = format!("SELECT * FROM \"{}\"", table.replace('"', "\"\""));
83 let mut stmt = conn.prepare(&sql).map_err(|e| self.err(e))?;
84 let names: Vec<String> = stmt.column_names().iter().map(|s| s.to_string()).collect();
85
86 let mut rows = stmt.query([]).map_err(|e| self.err(e))?;
87 let mut builder = TableBuilder::new();
88 while let Some(row) = rows.next().map_err(|e| self.err(e))? {
89 let mut record: BTreeMap<String, Value> = BTreeMap::new();
90 for (i, name) in names.iter().enumerate() {
91 let cell = row.get_ref(i).map_err(|e| self.err(e))?;
92 record.insert(name.clone(), value_ref_to_value(cell));
93 }
94 builder.push_row(record);
95 }
96 Ok(builder.finish())
97 }
98}
99
100#[cfg(test)]
101mod tests {
102 use super::*;
103 use ax_core::ColType;
104
105 fn build_db(setup: &str) -> Vec<u8> {
108 let conn = Connection::open_in_memory().unwrap();
109 conn.execute_batch(setup).unwrap();
110 conn.serialize(MAIN_DB).unwrap().to_vec()
111 }
112
113 const EVENTS: &str = "CREATE TABLE events (id INTEGER, host TEXT, score REAL, ok INTEGER);\
114 INSERT INTO events VALUES (1,'a',9.5,1),(2,'b',3.0,0),(3,'c',7.5,1);";
115
116 fn col<'a>(cols: &'a [Column], name: &str) -> &'a Column {
117 cols.iter()
118 .find(|c| c.name == name)
119 .unwrap_or_else(|| panic!("missing column {name}"))
120 }
121
122 #[test]
123 fn scans_a_table_into_typed_columns() {
124 let cols = SqliteParser.parse("app.db", &build_db(EVENTS)).unwrap();
125 assert_eq!(col(&cols, "id").ty, ColType::Int);
126 assert_eq!(
127 col(&cols, "id").cells,
128 vec![Value::Int(1), Value::Int(2), Value::Int(3)]
129 );
130 assert_eq!(
131 col(&cols, "host").cells,
132 vec![
133 Value::Str("a".into()),
134 Value::Str("b".into()),
135 Value::Str("c".into())
136 ]
137 );
138 assert_eq!(col(&cols, "score").numeric(), vec![9.5, 3.0, 7.5]);
139 assert_eq!(col(&cols, "ok").cells[1], Value::Int(0));
140 }
141
142 #[test]
143 fn picks_the_first_table_alphabetically() {
144 let db = build_db(
145 "CREATE TABLE zeta (z INTEGER); INSERT INTO zeta VALUES (9);\
146 CREATE TABLE alpha (a TEXT); INSERT INTO alpha VALUES ('first');",
147 );
148 let cols = SqliteParser.parse("-", &db).unwrap();
149 assert_eq!(col(&cols, "a").cells, vec![Value::Str("first".into())]);
151 assert!(cols.iter().all(|c| c.name != "z"));
152 }
153
154 #[test]
155 fn null_and_blob_cells() {
156 let db = build_db(
157 "CREATE TABLE t (v BLOB, n INTEGER);\
158 INSERT INTO t VALUES (x'01ff', NULL);",
159 );
160 let cols = SqliteParser.parse("-", &db).unwrap();
161 assert_eq!(col(&cols, "v").cells[0], Value::Str("01ff".into())); assert_eq!(col(&cols, "n").cells[0], Value::Null);
163 }
164
165 #[test]
166 fn value_ref_units() {
167 assert_eq!(value_ref_to_value(ValueRef::Null), Value::Null);
168 assert_eq!(value_ref_to_value(ValueRef::Integer(7)), Value::Int(7));
169 assert_eq!(value_ref_to_value(ValueRef::Real(1.5)), Value::Float(1.5));
170 assert_eq!(value_ref_to_value(ValueRef::Real(f64::NAN)), Value::Null);
171 assert_eq!(
172 value_ref_to_value(ValueRef::Text(b"hi")),
173 Value::Str("hi".into())
174 );
175 assert_eq!(
176 value_ref_to_value(ValueRef::Blob(&[0x00, 0xab])),
177 Value::Str("00ab".into())
178 );
179 }
180
181 #[test]
182 fn malformed_input_errors() {
183 assert!(matches!(
184 SqliteParser.parse("app.db", b"this is not a sqlite database"),
185 Err(AxError::Parse { .. })
186 ));
187 let mut bad = SQLITE_MAGIC.to_vec();
189 bad.extend_from_slice(b"corrupt");
190 assert!(matches!(
191 SqliteParser.parse("app.db", &bad),
192 Err(AxError::Parse { .. })
193 ));
194 }
195
196 #[test]
197 fn database_with_no_user_tables_errors() {
198 assert!(matches!(
199 SqliteParser.parse("-", &build_db("PRAGMA user_version = 1;")),
200 Err(AxError::Parse { .. })
201 ));
202 }
203
204 #[test]
205 fn sniff_keys_on_magic() {
206 assert_eq!(SqliteParser.sniff(&build_db(EVENTS)), Some(MAGIC));
207 assert_eq!(
208 SqliteParser.sniff(b"SQLite format 3\x00........"),
209 Some(MAGIC)
210 );
211 assert_eq!(SqliteParser.sniff(b"SQLite format 3"), None); assert_eq!(SqliteParser.sniff(b"PK\x03\x04"), None); assert_eq!(SqliteParser.sniff(b"{\"a\":1}"), None);
214 }
215
216 #[test]
217 fn claims_sqlite_extensions() {
218 assert_eq!(
219 SqliteParser.extensions(),
220 &["db", "sqlite", "sqlite3", "db3"]
221 );
222 }
223
224 #[test]
225 fn resolves_by_extension_and_magic() {
226 let reg = crate::parser::ParserRegistry::default();
227 assert_eq!(reg.resolve("app.db", b"zz").unwrap().id(), "sqlite");
228 assert_eq!(reg.resolve("data.sqlite3", b"zz").unwrap().id(), "sqlite");
229 assert_eq!(reg.resolve("-", &build_db(EVENTS)).unwrap().id(), "sqlite");
230 }
231}