anomalyx_normalize/parsers/
yaml.rs1use crate::parser::{Confidence, FormatParser, TEXT};
12use crate::table::TableBuilder;
13use ax_core::{AxError, Column};
14use serde::Deserialize;
15
16#[derive(Debug, Default, Clone)]
17pub struct YamlParser;
18
19fn is_mapping_key(line: &str) -> bool {
23 match line.find(':') {
24 Some(i) => {
25 let (key, after) = (&line[..i], &line[i + 1..]);
26 !key.is_empty()
27 && key
28 .chars()
29 .all(|c| c.is_ascii_alphanumeric() || c == '.' || c == '_' || c == '-')
30 && (after.is_empty() || after.starts_with(' '))
31 }
32 None => false,
33 }
34}
35
36fn is_list_item(line: &str) -> bool {
38 line == "-" || line.starts_with("- ")
39}
40
41impl YamlParser {
42 fn err(&self, msg: impl std::fmt::Display) -> AxError {
43 AxError::Parse {
44 format: self.id().to_string(),
45 message: msg.to_string(),
46 }
47 }
48}
49
50impl FormatParser for YamlParser {
51 fn id(&self) -> &'static str {
52 "yaml"
53 }
54 fn extensions(&self) -> &'static [&'static str] {
55 &["yaml", "yml"]
56 }
57 fn sniff(&self, bytes: &[u8]) -> Option<Confidence> {
58 let text = std::str::from_utf8(bytes).ok()?;
59 for line in text.lines() {
60 if line.trim().is_empty() {
61 continue;
62 }
63 let lt = line.trim_start();
64 if lt.starts_with('#') {
65 continue; }
67 let yaml_like =
70 lt == "---" || lt.starts_with("--- ") || is_mapping_key(lt) || is_list_item(lt);
71 return yaml_like.then_some(TEXT);
72 }
73 None
74 }
75 fn parse(&self, _source: &str, bytes: &[u8]) -> Result<Vec<Column>, AxError> {
76 let mut builder = TableBuilder::new();
77 for document in serde_yaml::Deserializer::from_slice(bytes) {
78 let val = serde_json::Value::deserialize(document).map_err(|e| self.err(e))?;
79 match val {
80 serde_json::Value::Array(items) => {
81 for item in items {
82 builder.push_value(item);
83 }
84 }
85 serde_json::Value::Null => {} other => builder.push_value(other),
87 }
88 }
89 Ok(builder.finish())
90 }
91}
92
93#[cfg(test)]
94mod tests {
95 use super::*;
96 use ax_core::{ColType, Value};
97
98 fn parse(s: &str) -> Vec<Column> {
99 YamlParser.parse("-", s.as_bytes()).unwrap()
100 }
101 fn col<'a>(cols: &'a [Column], name: &str) -> &'a Column {
102 cols.iter()
103 .find(|c| c.name == name)
104 .unwrap_or_else(|| panic!("missing column {name}"))
105 }
106
107 const MANIFEST: &str = "\
108apiVersion: apps/v1
109kind: Deployment
110replicas: 3
111";
112
113 #[test]
114 fn parses_a_mapping_document_with_typed_cells() {
115 let cols = parse(MANIFEST);
116 assert_eq!(col(&cols, "kind").cells[0], Value::Str("Deployment".into()));
117 assert_eq!(col(&cols, "replicas").ty, ColType::Int);
118 assert_eq!(col(&cols, "replicas").cells[0], Value::Int(3));
119 assert_eq!(
120 col(&cols, "apiVersion").cells[0],
121 Value::Str("apps/v1".into())
122 );
123 }
124
125 #[test]
126 fn multi_document_stream_is_one_row_per_doc() {
127 let cols = parse("kind: A\nfoo: 1\n---\nkind: B\n");
130 let kind = col(&cols, "kind");
131 assert_eq!(kind.cells.len(), 2);
132 assert_eq!(kind.cells[0], Value::Str("A".into()));
133 assert_eq!(kind.cells[1], Value::Str("B".into()));
134 assert_eq!(col(&cols, "foo").cells[1], Value::Null, "absent in doc 2");
135 }
136
137 #[test]
138 fn sequence_document_expands_to_rows() {
139 let cols = parse("- x: 1\n- x: 2\n");
140 assert_eq!(col(&cols, "x").cells, vec![Value::Int(1), Value::Int(2)]);
141 }
142
143 #[test]
144 fn empty_document_produces_no_row() {
145 let cols = parse("kind: A\n---\n");
147 assert_eq!(col(&cols, "kind").cells.len(), 1);
148 }
149
150 #[test]
151 fn malformed_yaml_errors() {
152 assert!(matches!(
154 YamlParser.parse("-", b"a: b: c\n"),
155 Err(AxError::Parse { .. })
156 ));
157 }
158
159 #[test]
160 fn mapping_key_classification() {
161 assert!(is_mapping_key("apiVersion: v1"));
162 assert!(is_mapping_key("a.b-c_d: x"));
163 assert!(is_mapping_key("kind:")); assert!(!is_mapping_key("12:00")); assert!(!is_mapping_key(": x")); assert!(!is_mapping_key("no colon here"));
167 assert!(!is_mapping_key("foo bar: x")); }
169
170 #[test]
171 fn list_item_classification() {
172 assert!(is_list_item("- item"));
173 assert!(is_list_item("-"));
174 assert!(!is_list_item("-nospace"));
175 assert!(!is_list_item("notalist"));
176 }
177
178 #[test]
179 fn sniff_recognizes_yaml_shapes() {
180 assert_eq!(YamlParser.sniff(MANIFEST.as_bytes()), Some(TEXT));
181 assert_eq!(YamlParser.sniff(b"---\nkind: Pod\n"), Some(TEXT)); assert_eq!(YamlParser.sniff(b"--- {inline: 1}\n"), Some(TEXT)); assert_eq!(YamlParser.sniff(b"- a\n- b\n"), Some(TEXT)); assert_eq!(YamlParser.sniff(b"# header\nkind: Pod\n"), Some(TEXT)); assert_eq!(YamlParser.sniff(b"\n\nkind: Pod\n"), Some(TEXT)); }
187
188 #[test]
189 fn sniff_rejects_non_yaml() {
190 assert_eq!(YamlParser.sniff(b"a,b,c\n1,2,3"), None); assert_eq!(YamlParser.sniff(b"k=1 v=2\n"), None); assert_eq!(YamlParser.sniff(b"{\"a\":1}"), None); assert_eq!(YamlParser.sniff(b"12:00 something\n"), None); assert_eq!(YamlParser.sniff(b"hello world\n"), None); assert_eq!(
196 YamlParser.sniff(b"hello world\nkind: Pod\n"),
197 None,
198 "a non-YAML first line is decisive; we do not scan past it"
199 );
200 }
201
202 #[test]
203 fn claims_yaml_extensions() {
204 assert_eq!(YamlParser.extensions(), &["yaml", "yml"]);
205 }
206
207 #[test]
208 fn resolves_by_extension_and_content() {
209 let reg = crate::parser::ParserRegistry::default();
210 assert_eq!(reg.resolve("deploy.yaml", b"x: 1").unwrap().id(), "yaml");
211 assert_eq!(reg.resolve("deploy.yml", b"x: 1").unwrap().id(), "yaml");
212 assert_eq!(
213 reg.resolve("-", MANIFEST.as_bytes()).unwrap().id(),
214 "yaml",
215 "routed by content sniff"
216 );
217 }
218}