anomalyx_normalize/parsers/
toml.rs1use crate::infer;
16use crate::parser::{Confidence, FormatParser, STRONG, TEXT};
17use crate::table::TableBuilder;
18use ax_core::{AxError, Column, Value};
19use std::collections::BTreeMap;
20
21#[derive(Debug, Default, Clone)]
24pub struct TomlParser;
25
26fn toml_to_json(v: toml::Value) -> serde_json::Value {
30 use serde_json::Value as J;
31 use toml::Value as T;
32 match v {
33 T::String(s) => J::String(s),
34 T::Integer(i) => J::Number(i.into()),
35 T::Float(f) => serde_json::Number::from_f64(f).map_or(J::Null, J::Number),
36 T::Boolean(b) => J::Bool(b),
37 T::Datetime(dt) => J::String(dt.to_string()),
38 T::Array(a) => J::Array(a.into_iter().map(toml_to_json).collect()),
39 T::Table(t) => J::Object(t.into_iter().map(|(k, v)| (k, toml_to_json(v))).collect()),
40 }
41}
42
43impl TomlParser {
44 fn err(&self, msg: impl std::fmt::Display) -> AxError {
45 AxError::Parse {
46 format: self.id().to_string(),
47 message: msg.to_string(),
48 }
49 }
50}
51
52impl FormatParser for TomlParser {
53 fn id(&self) -> &'static str {
54 "toml"
55 }
56 fn extensions(&self) -> &'static [&'static str] {
57 &["toml"]
58 }
59 fn sniff(&self, bytes: &[u8]) -> Option<Confidence> {
60 let text = std::str::from_utf8(bytes).ok()?;
61 let parsed = toml::from_str::<toml::Value>(text).ok()?;
65 let nonempty = parsed.as_table().is_some_and(|t| !t.is_empty());
66 nonempty.then_some(STRONG)
67 }
68 fn parse(&self, _source: &str, bytes: &[u8]) -> Result<Vec<Column>, AxError> {
69 let text = std::str::from_utf8(bytes).map_err(|e| self.err(e))?;
70 let value = toml::from_str::<toml::Value>(text).map_err(|e| self.err(e))?;
71 let mut builder = TableBuilder::new();
72 builder.push_value(toml_to_json(value));
73 Ok(builder.finish())
74 }
75}
76
77#[derive(Debug, Default, Clone)]
80pub struct IniParser;
81
82fn ini_is_comment(line: &str) -> bool {
84 line.starts_with(';') || line.starts_with('#')
85}
86
87fn ini_is_section(line: &str) -> bool {
89 line.starts_with('[') && line.ends_with(']') && line.len() > 2
90}
91
92fn ini_kv_split(line: &str) -> Option<(&str, &str)> {
96 let i = line.find(['=', ':'])?;
97 let key = line[..i].trim();
98 (!key.is_empty()).then_some((key, &line[i + 1..]))
99}
100
101fn parse_ini_value(raw: &str) -> Value {
103 let quoted = raw.len() >= 2
104 && ((raw.starts_with('"') && raw.ends_with('"'))
105 || (raw.starts_with('\'') && raw.ends_with('\'')));
106 if quoted {
107 Value::Str(raw[1..raw.len() - 1].to_string())
108 } else {
109 infer::infer_scalar(raw)
110 }
111}
112
113impl IniParser {
114 fn err(&self, msg: impl std::fmt::Display) -> AxError {
115 AxError::Parse {
116 format: self.id().to_string(),
117 message: msg.to_string(),
118 }
119 }
120}
121
122impl FormatParser for IniParser {
123 fn id(&self) -> &'static str {
124 "ini"
125 }
126 fn extensions(&self) -> &'static [&'static str] {
127 &["ini", "cfg", "conf"]
128 }
129 fn sniff(&self, bytes: &[u8]) -> Option<Confidence> {
130 let text = std::str::from_utf8(bytes).ok()?;
131 let mut first: Option<&str> = None;
132 let mut has_section = false;
133 let mut has_kv = false;
134 for raw in text.lines() {
135 let l = raw.trim();
136 if l.is_empty() || ini_is_comment(l) {
137 continue;
138 }
139 if first.is_none() {
140 first = Some(l);
141 }
142 if ini_is_section(l) {
143 has_section = true;
144 } else if ini_kv_split(l).is_some() {
145 has_kv = true;
146 }
147 }
148 let first = first?;
149 if has_section && has_kv {
150 return Some(STRONG);
153 }
154 (ini_is_section(first) || ini_kv_split(first).is_some()).then_some(TEXT)
155 }
156 fn parse(&self, _source: &str, bytes: &[u8]) -> Result<Vec<Column>, AxError> {
157 let text = std::str::from_utf8(bytes).map_err(|e| self.err(e))?;
158 let mut section = String::new();
159 let mut row: BTreeMap<String, Value> = BTreeMap::new();
160 for raw in text.lines() {
161 let l = raw.trim();
162 if l.is_empty() || ini_is_comment(l) {
163 continue;
164 }
165 if ini_is_section(l) {
166 section = l[1..l.len() - 1].trim().to_string();
167 continue;
168 }
169 match ini_kv_split(l) {
170 Some((key, val)) => {
171 let column = if section.is_empty() {
172 key.to_string()
173 } else {
174 format!("{section}.{key}")
175 };
176 row.insert(column, parse_ini_value(val.trim()));
177 }
178 None => return Err(self.err(format!("malformed INI line: {l}"))),
179 }
180 }
181 let mut builder = TableBuilder::new();
182 builder.push_row(row);
183 Ok(builder.finish())
184 }
185}
186
187#[cfg(test)]
188mod tests {
189 use super::*;
190 use ax_core::ColType;
191
192 fn col<'a>(cols: &'a [Column], name: &str) -> &'a Column {
193 cols.iter()
194 .find(|c| c.name == name)
195 .unwrap_or_else(|| panic!("missing column {name}"))
196 }
197
198 const CONFIG: &str = r#"
201title = "anomalyx"
202retries = 3
203ratio = 0.5
204enabled = true
205notanum = nan
206tags = ["a", "b"]
207created = 2024-01-02T03:04:05Z
208
209[server]
210host = "localhost"
211port = 8080
212"#;
213
214 fn toml_parse(s: &str) -> Vec<Column> {
215 TomlParser.parse("-", s.as_bytes()).unwrap()
216 }
217
218 #[test]
219 fn toml_typed_scalars() {
220 let cols = toml_parse(CONFIG);
221 assert_eq!(col(&cols, "title").cells[0], Value::Str("anomalyx".into()));
222 assert_eq!(col(&cols, "retries").ty, ColType::Int);
223 assert_eq!(col(&cols, "retries").cells[0], Value::Int(3));
224 assert_eq!(col(&cols, "ratio").cells[0], Value::Float(0.5));
225 assert_eq!(col(&cols, "enabled").cells[0], Value::Bool(true));
226 assert_eq!(col(&cols, "notanum").cells[0], Value::Null);
228 }
229
230 #[test]
231 fn toml_datetime_array_and_nested_table_are_strings() {
232 let cols = toml_parse(CONFIG);
233 match &col(&cols, "created").cells[0] {
235 Value::Str(s) => assert!(s.contains("2024-01-02"), "got {s}"),
236 other => panic!("expected Str datetime, got {other:?}"),
237 }
238 assert_eq!(
240 col(&cols, "tags").cells[0],
241 Value::Str("[\"a\",\"b\"]".into())
242 );
243 assert_eq!(
245 col(&cols, "server").cells[0],
246 Value::Str("{\"host\":\"localhost\",\"port\":8080}".into())
247 );
248 }
249
250 #[test]
251 fn toml_is_a_single_row() {
252 assert_eq!(col(&toml_parse(CONFIG), "title").cells.len(), 1);
253 }
254
255 #[test]
256 fn toml_sniff_confirms_by_parsing() {
257 assert_eq!(TomlParser.sniff(CONFIG.as_bytes()), Some(STRONG));
258 assert_eq!(TomlParser.sniff(b"key = \"v\"\n"), Some(STRONG));
259 assert_eq!(TomlParser.sniff(b"key=1\n"), Some(STRONG)); assert_eq!(TomlParser.sniff(b""), None);
262 assert_eq!(TomlParser.sniff(b"# just a comment\n"), None);
263 assert_eq!(TomlParser.sniff(b"[1,2,3]"), None); assert_eq!(TomlParser.sniff(b"a,b,c\n1,2,3"), None); assert_eq!(TomlParser.sniff(b"k=1 v=2\n"), None); assert_eq!(TomlParser.sniff(b"kind: Pod\n"), None); }
268
269 #[test]
270 fn toml_malformed_errors() {
271 assert!(matches!(
272 TomlParser.parse("-", b"a = \n"),
273 Err(AxError::Parse { .. })
274 ));
275 assert!(matches!(
276 TomlParser.parse("-", b"= 5\n"),
277 Err(AxError::Parse { .. })
278 ));
279 }
280
281 #[test]
282 fn toml_resolves_by_extension_and_content() {
283 let reg = crate::parser::ParserRegistry::default();
284 assert_eq!(reg.resolve("app.toml", b"x = 1").unwrap().id(), "toml");
285 assert_eq!(
287 reg.resolve("-", b"[server]\nhost = \"x\"\n").unwrap().id(),
288 "toml"
289 );
290 }
291
292 const INI: &str = "\
295; a comment
296host = localhost
297port = 8080
298
299[database]
300name = mydb
301ssl = true
302timeout = 30
303";
304
305 fn ini_parse(s: &str) -> Vec<Column> {
306 IniParser.parse("-", s.as_bytes()).unwrap()
307 }
308
309 #[test]
310 fn ini_flattens_sections_and_infers_types() {
311 let cols = ini_parse(INI);
312 assert_eq!(col(&cols, "host").cells[0], Value::Str("localhost".into()));
313 assert_eq!(col(&cols, "port").cells[0], Value::Int(8080));
314 assert_eq!(
315 col(&cols, "database.name").cells[0],
316 Value::Str("mydb".into())
317 );
318 assert_eq!(col(&cols, "database.ssl").cells[0], Value::Bool(true));
319 assert_eq!(col(&cols, "database.timeout").cells[0], Value::Int(30));
320 assert_eq!(col(&cols, "host").cells.len(), 1, "one row per config");
321 }
322
323 #[test]
324 fn ini_quotes_colons_and_empties() {
325 let cols = ini_parse("a = \"123\"\nb : bare\nc =\n");
326 assert_eq!(col(&cols, "a").cells[0], Value::Str("123".into())); assert_eq!(col(&cols, "b").cells[0], Value::Str("bare".into())); assert_eq!(col(&cols, "c").cells[0], Value::Null); }
330
331 #[test]
332 fn ini_malformed_line_errors() {
333 assert!(matches!(
334 IniParser.parse("-", b"no separator here\n"),
335 Err(AxError::Parse { .. })
336 ));
337 }
338
339 #[test]
340 fn ini_helper_classification() {
341 assert!(ini_is_comment("; x"));
342 assert!(ini_is_comment("# x"));
343 assert!(!ini_is_comment("k = v"));
344 assert!(ini_is_section("[db]"));
345 assert!(!ini_is_section("[]")); assert!(!ini_is_section("[unclosed"));
347 assert!(!ini_is_section("k = v"));
348 assert_eq!(ini_kv_split("k = v"), Some(("k", " v")));
349 assert_eq!(ini_kv_split("k : v"), Some(("k", " v")));
350 assert_eq!(ini_kv_split("= v"), None); assert_eq!(ini_kv_split("no sep"), None);
352 assert_eq!(parse_ini_value("'q'"), Value::Str("q".into()));
353 assert_eq!(parse_ini_value("42"), Value::Int(42));
354 assert_eq!(parse_ini_value("\"abc"), Value::Str("\"abc".into()));
357 assert_eq!(parse_ini_value("'abc"), Value::Str("'abc".into()));
358 }
359
360 #[test]
361 fn ini_sniff() {
362 assert_eq!(IniParser.sniff(INI.as_bytes()), Some(STRONG)); assert_eq!(
364 IniParser.sniff(b"host = localhost\nport = 8080\n"),
365 Some(TEXT)
366 ); assert_eq!(IniParser.sniff(b"; c\nhost = localhost\n"), Some(TEXT));
369 assert_eq!(IniParser.sniff(b"[only_section]\n"), Some(TEXT)); assert_eq!(IniParser.sniff(b"a,b,c\n1,2,3"), None); assert_eq!(IniParser.sniff(b"hello world\n"), None); assert_eq!(IniParser.sniff(b"; only a comment\n"), None); }
374
375 #[test]
376 fn ini_resolves_by_extension() {
377 let reg = crate::parser::ParserRegistry::default();
378 assert_eq!(reg.resolve("app.ini", b"x = y").unwrap().id(), "ini");
379 assert_eq!(reg.resolve("app.cfg", b"x = y").unwrap().id(), "ini");
380 assert_eq!(reg.resolve("app.conf", b"x = y").unwrap().id(), "ini");
381 assert_eq!(
383 reg.resolve("-", b"[db]\nhost = localhost\n").unwrap().id(),
384 "ini"
385 );
386 }
387
388 #[test]
389 fn parsers_claim_their_extensions() {
390 assert_eq!(TomlParser.extensions(), &["toml"]);
391 assert_eq!(IniParser.extensions(), &["ini", "cfg", "conf"]);
392 }
393}