use crate::parser::{Confidence, FormatParser, TEXT};
use crate::table::TableBuilder;
use ax_core::{AxError, Column};
use serde::Deserialize;
#[derive(Debug, Default, Clone)]
pub struct YamlParser;
fn is_mapping_key(line: &str) -> bool {
match line.find(':') {
Some(i) => {
let (key, after) = (&line[..i], &line[i + 1..]);
!key.is_empty()
&& key
.chars()
.all(|c| c.is_ascii_alphanumeric() || c == '.' || c == '_' || c == '-')
&& (after.is_empty() || after.starts_with(' '))
}
None => false,
}
}
fn is_list_item(line: &str) -> bool {
line == "-" || line.starts_with("- ")
}
impl YamlParser {
fn err(&self, msg: impl std::fmt::Display) -> AxError {
AxError::Parse {
format: self.id().to_string(),
message: msg.to_string(),
}
}
}
impl FormatParser for YamlParser {
fn id(&self) -> &'static str {
"yaml"
}
fn extensions(&self) -> &'static [&'static str] {
&["yaml", "yml"]
}
fn sniff(&self, bytes: &[u8]) -> Option<Confidence> {
let text = std::str::from_utf8(bytes).ok()?;
for line in text.lines() {
if line.trim().is_empty() {
continue;
}
let lt = line.trim_start();
if lt.starts_with('#') {
continue; }
let yaml_like =
lt == "---" || lt.starts_with("--- ") || is_mapping_key(lt) || is_list_item(lt);
return yaml_like.then_some(TEXT);
}
None
}
fn parse(&self, _source: &str, bytes: &[u8]) -> Result<Vec<Column>, AxError> {
let mut builder = TableBuilder::new();
for document in serde_yaml::Deserializer::from_slice(bytes) {
let val = serde_json::Value::deserialize(document).map_err(|e| self.err(e))?;
match val {
serde_json::Value::Array(items) => {
for item in items {
builder.push_value(item);
}
}
serde_json::Value::Null => {} other => builder.push_value(other),
}
}
Ok(builder.finish())
}
}
#[cfg(test)]
mod tests {
use super::*;
use ax_core::{ColType, Value};
fn parse(s: &str) -> Vec<Column> {
YamlParser.parse("-", s.as_bytes()).unwrap()
}
fn col<'a>(cols: &'a [Column], name: &str) -> &'a Column {
cols.iter()
.find(|c| c.name == name)
.unwrap_or_else(|| panic!("missing column {name}"))
}
const MANIFEST: &str = "\
apiVersion: apps/v1
kind: Deployment
replicas: 3
";
#[test]
fn parses_a_mapping_document_with_typed_cells() {
let cols = parse(MANIFEST);
assert_eq!(col(&cols, "kind").cells[0], Value::Str("Deployment".into()));
assert_eq!(col(&cols, "replicas").ty, ColType::Int);
assert_eq!(col(&cols, "replicas").cells[0], Value::Int(3));
assert_eq!(
col(&cols, "apiVersion").cells[0],
Value::Str("apps/v1".into())
);
}
#[test]
fn multi_document_stream_is_one_row_per_doc() {
let cols = parse("kind: A\nfoo: 1\n---\nkind: B\n");
let kind = col(&cols, "kind");
assert_eq!(kind.cells.len(), 2);
assert_eq!(kind.cells[0], Value::Str("A".into()));
assert_eq!(kind.cells[1], Value::Str("B".into()));
assert_eq!(col(&cols, "foo").cells[1], Value::Null, "absent in doc 2");
}
#[test]
fn sequence_document_expands_to_rows() {
let cols = parse("- x: 1\n- x: 2\n");
assert_eq!(col(&cols, "x").cells, vec![Value::Int(1), Value::Int(2)]);
}
#[test]
fn empty_document_produces_no_row() {
let cols = parse("kind: A\n---\n");
assert_eq!(col(&cols, "kind").cells.len(), 1);
}
#[test]
fn malformed_yaml_errors() {
assert!(matches!(
YamlParser.parse("-", b"a: b: c\n"),
Err(AxError::Parse { .. })
));
}
#[test]
fn mapping_key_classification() {
assert!(is_mapping_key("apiVersion: v1"));
assert!(is_mapping_key("a.b-c_d: x"));
assert!(is_mapping_key("kind:")); assert!(!is_mapping_key("12:00")); assert!(!is_mapping_key(": x")); assert!(!is_mapping_key("no colon here"));
assert!(!is_mapping_key("foo bar: x")); }
#[test]
fn list_item_classification() {
assert!(is_list_item("- item"));
assert!(is_list_item("-"));
assert!(!is_list_item("-nospace"));
assert!(!is_list_item("notalist"));
}
#[test]
fn sniff_recognizes_yaml_shapes() {
assert_eq!(YamlParser.sniff(MANIFEST.as_bytes()), Some(TEXT));
assert_eq!(YamlParser.sniff(b"---\nkind: Pod\n"), Some(TEXT)); assert_eq!(YamlParser.sniff(b"--- {inline: 1}\n"), Some(TEXT)); assert_eq!(YamlParser.sniff(b"- a\n- b\n"), Some(TEXT)); assert_eq!(YamlParser.sniff(b"# header\nkind: Pod\n"), Some(TEXT)); assert_eq!(YamlParser.sniff(b"\n\nkind: Pod\n"), Some(TEXT)); }
#[test]
fn sniff_rejects_non_yaml() {
assert_eq!(YamlParser.sniff(b"a,b,c\n1,2,3"), None); assert_eq!(YamlParser.sniff(b"k=1 v=2\n"), None); assert_eq!(YamlParser.sniff(b"{\"a\":1}"), None); assert_eq!(YamlParser.sniff(b"12:00 something\n"), None); assert_eq!(YamlParser.sniff(b"hello world\n"), None); assert_eq!(
YamlParser.sniff(b"hello world\nkind: Pod\n"),
None,
"a non-YAML first line is decisive; we do not scan past it"
);
}
#[test]
fn claims_yaml_extensions() {
assert_eq!(YamlParser.extensions(), &["yaml", "yml"]);
}
#[test]
fn resolves_by_extension_and_content() {
let reg = crate::parser::ParserRegistry::default();
assert_eq!(reg.resolve("deploy.yaml", b"x: 1").unwrap().id(), "yaml");
assert_eq!(reg.resolve("deploy.yml", b"x: 1").unwrap().id(), "yaml");
assert_eq!(
reg.resolve("-", MANIFEST.as_bytes()).unwrap().id(),
"yaml",
"routed by content sniff"
);
}
}