anomalyx_normalize/parsers/
sqlite.rs1use crate::parser::{Confidence, FormatParser, MAGIC};
14use crate::table::TableBuilder;
15use ax_core::{AxError, Column, Value};
16use rusqlite::types::ValueRef;
17use rusqlite::{Connection, MAIN_DB};
18use std::collections::BTreeMap;
19use std::io::Cursor;
20
21const SQLITE_MAGIC: &[u8] = b"SQLite format 3\x00";
23
24#[derive(Debug, Default, Clone)]
25pub struct SqliteParser;
26
27fn value_ref_to_value(v: ValueRef) -> Value {
30 match v {
31 ValueRef::Null => Value::Null,
32 ValueRef::Integer(i) => Value::Int(i),
33 ValueRef::Real(f) => {
34 if f.is_finite() {
35 Value::Float(f)
36 } else {
37 Value::Null
38 }
39 }
40 ValueRef::Text(bytes) => Value::Str(String::from_utf8_lossy(bytes).into_owned()),
41 ValueRef::Blob(bytes) => {
42 let hex: String = bytes.iter().map(|b| format!("{b:02x}")).collect();
43 Value::Str(hex)
44 }
45 }
46}
47
48impl SqliteParser {
49 fn err(&self, msg: impl std::fmt::Display) -> AxError {
50 AxError::Parse {
51 format: self.id().to_string(),
52 message: msg.to_string(),
53 }
54 }
55}
56
57impl FormatParser for SqliteParser {
58 fn id(&self) -> &'static str {
59 "sqlite"
60 }
61 fn extensions(&self) -> &'static [&'static str] {
62 &["db", "sqlite", "sqlite3", "db3"]
63 }
64 fn sniff(&self, bytes: &[u8]) -> Option<Confidence> {
65 bytes.starts_with(SQLITE_MAGIC).then_some(MAGIC)
66 }
67 fn parse(&self, _source: &str, bytes: &[u8]) -> Result<Vec<Column>, AxError> {
68 let patched: Vec<u8>;
77 let data: &[u8] = if bytes.get(19) == Some(&2) {
78 let mut v = bytes.to_vec();
79 v[19] = 1;
80 patched = v;
81 &patched
82 } else {
83 bytes
84 };
85
86 let mut conn = Connection::open_in_memory().map_err(|e| self.err(e))?;
87 conn.deserialize_read_exact(MAIN_DB, Cursor::new(data), data.len(), true)
88 .map_err(|e| self.err(e))?;
89
90 let table: String = conn
92 .query_row(
93 "SELECT name FROM sqlite_master WHERE type = 'table' \
94 AND name NOT LIKE 'sqlite_%' ORDER BY name LIMIT 1",
95 [],
96 |row| row.get(0),
97 )
98 .map_err(|e| self.err(format!("no readable table: {e}")))?;
99
100 let sql = format!("SELECT * FROM \"{}\"", table.replace('"', "\"\""));
101 let mut stmt = conn.prepare(&sql).map_err(|e| self.err(e))?;
102 let names: Vec<String> = stmt.column_names().iter().map(|s| s.to_string()).collect();
103
104 let mut rows = stmt.query([]).map_err(|e| self.err(e))?;
105 let mut builder = TableBuilder::new();
106 while let Some(row) = rows.next().map_err(|e| self.err(e))? {
107 let mut record: BTreeMap<String, Value> = BTreeMap::new();
108 for (i, name) in names.iter().enumerate() {
109 let cell = row.get_ref(i).map_err(|e| self.err(e))?;
110 record.insert(name.clone(), value_ref_to_value(cell));
111 }
112 builder.push_row(record);
113 }
114 Ok(builder.finish())
115 }
116}
117
118#[cfg(test)]
119mod tests {
120 use super::*;
121 use ax_core::ColType;
122
123 fn build_db(setup: &str) -> Vec<u8> {
126 let conn = Connection::open_in_memory().unwrap();
127 conn.execute_batch(setup).unwrap();
128 conn.serialize(MAIN_DB).unwrap().to_vec()
129 }
130
131 const EVENTS: &str = "CREATE TABLE events (id INTEGER, host TEXT, score REAL, ok INTEGER);\
132 INSERT INTO events VALUES (1,'a',9.5,1),(2,'b',3.0,0),(3,'c',7.5,1);";
133
134 fn col<'a>(cols: &'a [Column], name: &str) -> &'a Column {
135 cols.iter()
136 .find(|c| c.name == name)
137 .unwrap_or_else(|| panic!("missing column {name}"))
138 }
139
140 #[test]
141 fn scans_a_table_into_typed_columns() {
142 let cols = SqliteParser.parse("app.db", &build_db(EVENTS)).unwrap();
143 assert_eq!(col(&cols, "id").ty, ColType::Int);
144 assert_eq!(
145 col(&cols, "id").cells,
146 vec![Value::Int(1), Value::Int(2), Value::Int(3)]
147 );
148 assert_eq!(
149 col(&cols, "host").cells,
150 vec![
151 Value::Str("a".into()),
152 Value::Str("b".into()),
153 Value::Str("c".into())
154 ]
155 );
156 assert_eq!(col(&cols, "score").numeric(), vec![9.5, 3.0, 7.5]);
157 assert_eq!(col(&cols, "ok").cells[1], Value::Int(0));
158 }
159
160 #[test]
161 fn picks_the_first_table_alphabetically() {
162 let db = build_db(
163 "CREATE TABLE zeta (z INTEGER); INSERT INTO zeta VALUES (9);\
164 CREATE TABLE alpha (a TEXT); INSERT INTO alpha VALUES ('first');",
165 );
166 let cols = SqliteParser.parse("-", &db).unwrap();
167 assert_eq!(col(&cols, "a").cells, vec![Value::Str("first".into())]);
169 assert!(cols.iter().all(|c| c.name != "z"));
170 }
171
172 #[test]
173 fn reads_a_wal_mode_database() {
174 let mut db = build_db(EVENTS);
181 assert_eq!(db[19], 1, "serialized fixture should start as legacy");
182 db[19] = 2;
183 let cols = SqliteParser.parse("app.db", &db).unwrap();
184 assert_eq!(
185 col(&cols, "id").cells,
186 vec![Value::Int(1), Value::Int(2), Value::Int(3)]
187 );
188 }
189
190 #[test]
191 fn null_and_blob_cells() {
192 let db = build_db(
193 "CREATE TABLE t (v BLOB, n INTEGER);\
194 INSERT INTO t VALUES (x'01ff', NULL);",
195 );
196 let cols = SqliteParser.parse("-", &db).unwrap();
197 assert_eq!(col(&cols, "v").cells[0], Value::Str("01ff".into())); assert_eq!(col(&cols, "n").cells[0], Value::Null);
199 }
200
201 #[test]
202 fn value_ref_units() {
203 assert_eq!(value_ref_to_value(ValueRef::Null), Value::Null);
204 assert_eq!(value_ref_to_value(ValueRef::Integer(7)), Value::Int(7));
205 assert_eq!(value_ref_to_value(ValueRef::Real(1.5)), Value::Float(1.5));
206 assert_eq!(value_ref_to_value(ValueRef::Real(f64::NAN)), Value::Null);
207 assert_eq!(
208 value_ref_to_value(ValueRef::Text(b"hi")),
209 Value::Str("hi".into())
210 );
211 assert_eq!(
212 value_ref_to_value(ValueRef::Blob(&[0x00, 0xab])),
213 Value::Str("00ab".into())
214 );
215 }
216
217 #[test]
218 fn malformed_input_errors() {
219 assert!(matches!(
220 SqliteParser.parse("app.db", b"this is not a sqlite database"),
221 Err(AxError::Parse { .. })
222 ));
223 let mut bad = SQLITE_MAGIC.to_vec();
225 bad.extend_from_slice(b"corrupt");
226 assert!(matches!(
227 SqliteParser.parse("app.db", &bad),
228 Err(AxError::Parse { .. })
229 ));
230 }
231
232 #[test]
233 fn database_with_no_user_tables_errors() {
234 assert!(matches!(
235 SqliteParser.parse("-", &build_db("PRAGMA user_version = 1;")),
236 Err(AxError::Parse { .. })
237 ));
238 }
239
240 #[test]
241 fn sniff_keys_on_magic() {
242 assert_eq!(SqliteParser.sniff(&build_db(EVENTS)), Some(MAGIC));
243 assert_eq!(
244 SqliteParser.sniff(b"SQLite format 3\x00........"),
245 Some(MAGIC)
246 );
247 assert_eq!(SqliteParser.sniff(b"SQLite format 3"), None); assert_eq!(SqliteParser.sniff(b"PK\x03\x04"), None); assert_eq!(SqliteParser.sniff(b"{\"a\":1}"), None);
250 }
251
252 #[test]
253 fn claims_sqlite_extensions() {
254 assert_eq!(
255 SqliteParser.extensions(),
256 &["db", "sqlite", "sqlite3", "db3"]
257 );
258 }
259
260 #[test]
261 fn resolves_by_extension_and_magic() {
262 let reg = crate::parser::ParserRegistry::default();
263 assert_eq!(reg.resolve("app.db", b"zz").unwrap().id(), "sqlite");
264 assert_eq!(reg.resolve("data.sqlite3", b"zz").unwrap().id(), "sqlite");
265 assert_eq!(reg.resolve("-", &build_db(EVENTS)).unwrap().id(), "sqlite");
266 }
267}