#![allow(dead_code)]
use std::collections::HashSet;
use crate::syntax::{SyntaxKind, SyntaxNode};
use rowan::NodeOrToken;
use super::model::{YamlDiagnostic, diagnostic_codes};
use super::parser::parse_stream;
use super::profile::{ConsumerSet, YamlConsumer, YamlValidationContext};
use super::scanner::{Scanner, Token, TokenKind};
pub(crate) fn validate_yaml(input: &str) -> Option<YamlDiagnostic> {
validate_yaml_with_context(input, YamlValidationContext::substrate())
}
pub(crate) fn validate_yaml_with_context(
input: &str,
ctx: YamlValidationContext,
) -> Option<YamlDiagnostic> {
let tokens = collect_tokens(input);
if let Some(diag) = check_directives(input, &tokens) {
return Some(diag);
}
if let Some(diag) = check_tag_handle_scope(input, &tokens) {
return Some(diag);
}
if let Some(diag) = check_unterminated_quoted(input) {
return Some(diag);
}
if let Some(diag) = check_required_simple_key(input) {
return Some(diag);
}
let tree = parse_stream(input);
if let Some(diag) = check_trailing_content(&tree) {
return Some(diag);
}
if let Some(diag) = check_flow_commas(&tree) {
return Some(diag);
}
if let Some(diag) = check_unterminated_flow(&tree) {
return Some(diag);
}
if let Some(diag) = check_flow_context_anomalies(&tree) {
return Some(diag);
}
if let Some(diag) = check_multiline_quoted_indent(&tree, input) {
return Some(diag);
}
if let Some(diag) = check_block_indent_anomalies(&tree) {
return Some(diag);
}
if let Some(diag) = check_block_scalar_header(&tree) {
return Some(diag);
}
if let Some(diag) = check_block_scalar_leading_indent(&tree) {
return Some(diag);
}
if let Some(diag) = check_doc_level_bare_scalar_then_colon_map(&tree) {
return Some(diag);
}
if let Some(diag) = check_block_collection_after_value_scalar(&tree) {
return Some(diag);
}
if let Some(diag) = check_flow_continuation_indent(&tree, input) {
return Some(diag);
}
if let Some(diag) = check_flow_doc_markers(&tree, input) {
return Some(diag);
}
if let Some(diag) = check_invalid_dq_escapes(&tree) {
return Some(diag);
}
if let Some(diag) = check_comment_not_preceded_by_space(&tree, input) {
return Some(diag);
}
if let Some(diag) = check_anchor_decorates_alias(&tree) {
return Some(diag);
}
if let Some(diag) = check_anchor_before_block_indicator(&tree) {
return Some(diag);
}
if let Some(diag) = check_anchor_without_target(&tree) {
return Some(diag);
}
if let Some(diag) = check_node_property_underindented(&tree, input) {
return Some(diag);
}
if let Some(diag) = check_invalid_tag_chars(&tree) {
return Some(diag);
}
if ctx.is_substrate() {
return None;
}
check_consumer_rejections(&tree, ctx)
}
fn check_consumer_rejections(
tree: &SyntaxNode,
ctx: YamlValidationContext,
) -> Option<YamlDiagnostic> {
if ctx.any_rejects(ConsumerSet::all())
&& let Some(diag) = check_implicit_empty_block_key(tree)
{
return Some(diag);
}
let dup_rejectors = ConsumerSet::of(YamlConsumer::Jsyaml).with(YamlConsumer::RYaml);
if ctx.any_rejects(dup_rejectors)
&& let Some(diag) = check_duplicate_keys(tree)
{
return Some(diag);
}
None
}
fn is_key_trivia(kind: SyntaxKind) -> bool {
matches!(
kind,
SyntaxKind::WHITESPACE
| SyntaxKind::NEWLINE
| SyntaxKind::YAML_COMMENT
| SyntaxKind::YAML_LINE_PREFIX
)
}
fn check_implicit_empty_block_key(tree: &SyntaxNode) -> Option<YamlDiagnostic> {
for key in tree
.descendants()
.filter(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_KEY)
{
let mut non_trivia = key
.children_with_tokens()
.filter(|el| !is_key_trivia(el.kind()));
let first = non_trivia.next();
let is_colon_only = matches!(&first, Some(el) if el.kind() == SyntaxKind::YAML_COLON)
&& non_trivia.next().is_none();
if is_colon_only {
let range = first.expect("checked above").text_range();
return Some(YamlDiagnostic {
code: diagnostic_codes::CONSUMER_IMPLICIT_EMPTY_KEY,
message: "implicit empty mapping key is rejected by pandoc and quarto",
byte_start: range.start().into(),
byte_end: range.end().into(),
});
}
}
None
}
fn check_duplicate_keys(tree: &SyntaxNode) -> Option<YamlDiagnostic> {
for map in tree.descendants().filter(|n| {
matches!(
n.kind(),
SyntaxKind::YAML_BLOCK_MAP | SyntaxKind::YAML_FLOW_MAP
)
}) {
let (entry_kind, key_kind) = if map.kind() == SyntaxKind::YAML_BLOCK_MAP {
(
SyntaxKind::YAML_BLOCK_MAP_ENTRY,
SyntaxKind::YAML_BLOCK_MAP_KEY,
)
} else {
(
SyntaxKind::YAML_FLOW_MAP_ENTRY,
SyntaxKind::YAML_FLOW_MAP_KEY,
)
};
let mut seen: HashSet<String> = HashSet::new();
for entry in map.children().filter(|n| n.kind() == entry_kind) {
let Some(key) = entry.children().find(|n| n.kind() == key_kind) else {
continue;
};
let Some(name) = simple_scalar_key_text(&key) else {
continue;
};
if !seen.insert(name) {
let range = key.text_range();
return Some(YamlDiagnostic {
code: diagnostic_codes::CONSUMER_DUPLICATE_KEY,
message: "duplicate mapping key is rejected by quarto and rmarkdown",
byte_start: range.start().into(),
byte_end: range.end().into(),
});
}
}
}
None
}
fn simple_scalar_key_text(key: &SyntaxNode) -> Option<String> {
let mut scalar: Option<SyntaxNode> = None;
for child in key.children_with_tokens() {
match child.kind() {
SyntaxKind::YAML_COLON => {}
k if is_key_trivia(k) => {}
SyntaxKind::YAML_SCALAR => {
if scalar.is_some() {
return None;
}
scalar = child.into_node();
}
_ => return None,
}
}
scalar.map(|node| node.text().to_string())
}
fn collect_tokens(input: &str) -> Vec<Token> {
let mut scanner = Scanner::new(input);
let mut tokens = Vec::new();
while let Some(tok) = scanner.next_token() {
tokens.push(tok);
}
tokens
}
fn check_unterminated_quoted(input: &str) -> Option<YamlDiagnostic> {
let mut scanner = Scanner::new(input);
while scanner.next_token().is_some() {}
scanner
.diagnostics()
.iter()
.find(|d| d.code == diagnostic_codes::LEX_UNTERMINATED_QUOTED_SCALAR)
.cloned()
}
fn check_required_simple_key(input: &str) -> Option<YamlDiagnostic> {
let mut scanner = Scanner::new(input);
while scanner.next_token().is_some() {}
scanner
.diagnostics()
.iter()
.find(|d| d.code == diagnostic_codes::LEX_REQUIRED_SIMPLE_KEY_NOT_FOUND)
.cloned()
}
fn check_directives(input: &str, tokens: &[Token]) -> Option<YamlDiagnostic> {
let mut seen_content = false;
let mut yaml_directive_in_scope = false;
for tok in tokens {
match tok.kind {
TokenKind::Directive if seen_content => {
return Some(diag_at_token(
tok,
diagnostic_codes::PARSE_DIRECTIVE_AFTER_CONTENT,
"directive requires document end before subsequent directives",
));
}
TokenKind::Directive => {
let text = &input[tok.start.index..tok.end.index];
if directive_name(text) == "YAML" {
if yaml_directive_in_scope {
return Some(diag_at_token(
tok,
diagnostic_codes::PARSE_DUPLICATE_YAML_DIRECTIVE,
"a document may carry at most one %YAML directive",
));
}
yaml_directive_in_scope = true;
if yaml_directive_has_trailing_content(text) {
return Some(diag_at_token(
tok,
diagnostic_codes::PARSE_MALFORMED_YAML_DIRECTIVE,
"%YAML directive takes a single version argument",
));
}
}
}
TokenKind::Trivia(_) | TokenKind::StreamStart | TokenKind::StreamEnd => {}
TokenKind::DocumentStart => {
seen_content = true;
yaml_directive_in_scope = false;
}
TokenKind::DocumentEnd => {
seen_content = false;
yaml_directive_in_scope = false;
}
_ => seen_content = true,
}
}
if let Some(directive) = tokens.iter().find(|t| t.kind == TokenKind::Directive)
&& !tokens.iter().any(|t| t.kind == TokenKind::DocumentStart)
{
return Some(diag_at_token(
directive,
diagnostic_codes::PARSE_DIRECTIVE_WITHOUT_DOCUMENT_START,
"directive requires an explicit document start marker",
));
}
None
}
fn directive_name(text: &str) -> &str {
text.strip_prefix('%')
.unwrap_or(text)
.split_whitespace()
.next()
.unwrap_or("")
}
fn yaml_directive_has_trailing_content(text: &str) -> bool {
let mut fields = text.strip_prefix('%').unwrap_or(text).split_whitespace();
let _name = fields.next();
let version = fields.next();
if let Some(v) = version
&& !is_valid_yaml_version(v)
{
return true;
}
matches!(fields.next(), Some(field) if !field.starts_with('#'))
}
fn is_valid_yaml_version(s: &str) -> bool {
let mut parts = s.splitn(2, '.');
let major = parts.next().unwrap_or("");
let minor = parts.next().unwrap_or("");
!major.is_empty()
&& !minor.is_empty()
&& major.bytes().all(|b| b.is_ascii_digit())
&& minor.bytes().all(|b| b.is_ascii_digit())
}
fn check_tag_handle_scope<'a>(input: &'a str, tokens: &[Token]) -> Option<YamlDiagnostic> {
let mut pending: HashSet<&'a str> = HashSet::new();
let mut declared: HashSet<&'a str> = HashSet::from(["!", "!!"]);
for tok in tokens {
match tok.kind {
TokenKind::DocumentStart => {
declared.clear();
declared.insert("!");
declared.insert("!!");
declared.extend(pending.drain());
}
TokenKind::Directive => {
let text = &input[tok.start.index..tok.end.index];
if directive_name(text) != "TAG" {
continue;
}
let mut fields = text.strip_prefix('%').unwrap_or(text).split_whitespace();
let _name = fields.next();
if let Some(handle) = fields.next() {
pending.insert(handle);
}
}
TokenKind::Tag => {
let text = &input[tok.start.index..tok.end.index];
if text.starts_with("!<") {
continue;
}
let handle = extract_tag_handle(text);
if !declared.contains(handle) {
return Some(diag_at_token(
tok,
diagnostic_codes::PARSE_UNDEFINED_TAG_HANDLE,
"tag handle is not declared in the current document",
));
}
}
_ => {}
}
}
None
}
fn extract_tag_handle(text: &str) -> &str {
if text.len() < 2 {
return text;
}
if let Some(rel) = text[1..].find('!') {
return &text[..rel + 2];
}
&text[..1]
}
fn diag_at_token(tok: &Token, code: &'static str, message: &'static str) -> YamlDiagnostic {
YamlDiagnostic {
code,
message,
byte_start: tok.start.index,
byte_end: tok.end.index,
}
}
fn check_trailing_content(tree: &SyntaxNode) -> Option<YamlDiagnostic> {
for doc in tree
.descendants()
.filter(|n| n.kind() == SyntaxKind::YAML_DOCUMENT)
{
if let Some(diag) = check_trailing_after_flow(&doc) {
return Some(diag);
}
}
for container in tree.descendants().filter(|n| {
matches!(
n.kind(),
SyntaxKind::YAML_BLOCK_MAP_VALUE | SyntaxKind::YAML_BLOCK_SEQUENCE_ITEM
)
}) {
if let Some(diag) = check_trailing_after_flow_in_container(&container) {
return Some(diag);
}
}
if let Some(diag) = check_trailing_after_doc_end(tree) {
return Some(diag);
}
None
}
fn check_trailing_after_flow_in_container(container: &SyntaxNode) -> Option<YamlDiagnostic> {
let mut after_flow = false;
let mut have_separator = false;
for child in container.children_with_tokens() {
match &child {
NodeOrToken::Node(n) => {
let kind = n.kind();
if matches!(
kind,
SyntaxKind::YAML_FLOW_SEQUENCE | SyntaxKind::YAML_FLOW_MAP
) {
after_flow = true;
have_separator = false;
} else if after_flow {
return Some(diag_at_range(
n.text_range().start().into(),
n.text_range().end().into(),
diagnostic_codes::PARSE_TRAILING_CONTENT_AFTER_FLOW_END,
"unexpected content after flow-collection close in block context",
));
}
}
NodeOrToken::Token(t) => {
if !after_flow {
continue;
}
match t.kind() {
SyntaxKind::WHITESPACE | SyntaxKind::NEWLINE => have_separator = true,
SyntaxKind::YAML_COMMENT => {
if !have_separator {
return Some(diag_at_range(
t.text_range().start().into(),
t.text_range().end().into(),
diagnostic_codes::PARSE_TRAILING_CONTENT_AFTER_FLOW_END,
"comment must be preceded by whitespace after flow-collection close",
));
}
}
SyntaxKind::YAML_SCALAR => {
return Some(diag_at_range(
t.text_range().start().into(),
t.text_range().end().into(),
diagnostic_codes::PARSE_TRAILING_CONTENT_AFTER_FLOW_END,
"unexpected content after flow-collection close in block context",
));
}
_ => {}
}
}
}
}
None
}
fn check_trailing_after_flow(doc: &SyntaxNode) -> Option<YamlDiagnostic> {
let mut after_flow = false;
let mut have_separator = false;
for child in doc.children_with_tokens() {
match &child {
NodeOrToken::Node(n) => {
let kind = n.kind();
if matches!(
kind,
SyntaxKind::YAML_FLOW_SEQUENCE | SyntaxKind::YAML_FLOW_MAP
) {
if after_flow {
return Some(diag_at_range(
n.text_range().start().into(),
n.text_range().end().into(),
diagnostic_codes::PARSE_TRAILING_CONTENT_AFTER_FLOW_END,
"unexpected content after flow-collection close",
));
}
after_flow = true;
have_separator = false;
} else if after_flow {
if kind == SyntaxKind::YAML_BLOCK_MAP && is_implicit_flow_key_block_map(n) {
let flow_nodes: Vec<SyntaxNode> = doc
.children()
.filter(|c| {
matches!(
c.kind(),
SyntaxKind::YAML_FLOW_SEQUENCE | SyntaxKind::YAML_FLOW_MAP
)
})
.collect();
let preceding_flow_spans_lines = flow_nodes
.last()
.map(|f| f.text().to_string().contains('\n'))
.unwrap_or(false);
if preceding_flow_spans_lines {
return Some(diag_at_range(
n.text_range().start().into(),
n.text_range().end().into(),
diagnostic_codes::PARSE_TRAILING_CONTENT_AFTER_FLOW_END,
"implicit key flow node cannot span lines",
));
}
after_flow = false;
have_separator = false;
continue;
}
return Some(diag_at_range(
n.text_range().start().into(),
n.text_range().end().into(),
diagnostic_codes::PARSE_TRAILING_CONTENT_AFTER_FLOW_END,
"unexpected content after flow-collection close",
));
}
}
NodeOrToken::Token(t) => {
if !after_flow {
continue;
}
match t.kind() {
SyntaxKind::WHITESPACE | SyntaxKind::NEWLINE => {
have_separator = true;
}
SyntaxKind::YAML_COMMENT => {
if !have_separator {
return Some(diag_at_range(
t.text_range().start().into(),
t.text_range().end().into(),
diagnostic_codes::PARSE_TRAILING_CONTENT_AFTER_FLOW_END,
"comment must be preceded by whitespace after flow-collection close",
));
}
}
SyntaxKind::YAML_DOCUMENT_END => {
after_flow = false;
have_separator = false;
}
_ => {
return Some(diag_at_range(
t.text_range().start().into(),
t.text_range().end().into(),
diagnostic_codes::PARSE_TRAILING_CONTENT_AFTER_FLOW_END,
"unexpected content after flow-collection close",
));
}
}
}
}
}
None
}
fn is_implicit_flow_key_block_map(block_map: &SyntaxNode) -> bool {
let Some(entry) = block_map
.children()
.find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_ENTRY)
else {
return false;
};
let Some(key) = entry
.children()
.find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_KEY)
else {
return false;
};
key.children_with_tokens().all(|c| {
matches!(
c.kind(),
SyntaxKind::YAML_COLON
| SyntaxKind::WHITESPACE
| SyntaxKind::NEWLINE
| SyntaxKind::YAML_COMMENT
)
})
}
fn check_trailing_after_doc_end(tree: &SyntaxNode) -> Option<YamlDiagnostic> {
let tokens: Vec<_> = tree
.descendants_with_tokens()
.filter_map(|el| el.into_token())
.collect();
for (i, tok) in tokens.iter().enumerate() {
if tok.kind() != SyntaxKind::YAML_DOCUMENT_END {
continue;
}
let mut have_separator = false;
for next in &tokens[i + 1..] {
match next.kind() {
SyntaxKind::NEWLINE => break,
SyntaxKind::WHITESPACE => {
have_separator = true;
}
SyntaxKind::YAML_COMMENT if have_separator => break,
SyntaxKind::YAML_COMMENT => {
return Some(diag_at_range(
next.text_range().start().into(),
next.text_range().end().into(),
diagnostic_codes::LEX_TRAILING_CONTENT_AFTER_DOCUMENT_END,
"comment must be preceded by whitespace after document end marker",
));
}
_ => {
return Some(diag_at_range(
next.text_range().start().into(),
next.text_range().end().into(),
diagnostic_codes::LEX_TRAILING_CONTENT_AFTER_DOCUMENT_END,
"unexpected content on the same line as document end marker",
));
}
}
}
}
None
}
fn check_flow_commas(tree: &SyntaxNode) -> Option<YamlDiagnostic> {
for flow in tree.descendants().filter(|n| {
matches!(
n.kind(),
SyntaxKind::YAML_FLOW_SEQUENCE | SyntaxKind::YAML_FLOW_MAP
)
}) {
if let Some(diag) = check_flow_node_commas(&flow) {
return Some(diag);
}
}
None
}
fn check_flow_node_commas(flow: &SyntaxNode) -> Option<YamlDiagnostic> {
let mut seen_item_since_separator = false;
for child in flow.children_with_tokens() {
match &child {
NodeOrToken::Node(_) => {
seen_item_since_separator = true;
}
NodeOrToken::Token(t) => match t.kind() {
SyntaxKind::WHITESPACE | SyntaxKind::NEWLINE | SyntaxKind::YAML_COMMENT => {}
SyntaxKind::YAML_FLOW_INDICATOR if t.text() == "," => {
if !seen_item_since_separator {
return Some(diag_at_range(
t.text_range().start().into(),
t.text_range().end().into(),
diagnostic_codes::PARSE_INVALID_FLOW_SEQUENCE_COMMA,
"comma must follow a flow-collection item",
));
}
seen_item_since_separator = false;
}
SyntaxKind::YAML_FLOW_INDICATOR if matches!(t.text(), "[" | "]" | "{" | "}") => {}
_ => {
seen_item_since_separator = true;
}
},
}
}
None
}
fn check_unterminated_flow(tree: &SyntaxNode) -> Option<YamlDiagnostic> {
for flow in tree.descendants().filter(|n| {
matches!(
n.kind(),
SyntaxKind::YAML_FLOW_SEQUENCE | SyntaxKind::YAML_FLOW_MAP
)
}) {
let close = if flow.kind() == SyntaxKind::YAML_FLOW_SEQUENCE {
"]"
} else {
"}"
};
let has_close = flow.children_with_tokens().any(|c| {
c.as_token()
.is_some_and(|t| t.kind() == SyntaxKind::YAML_FLOW_INDICATOR && t.text() == close)
});
if !has_close {
let (code, message) = if flow.kind() == SyntaxKind::YAML_FLOW_SEQUENCE {
(
diagnostic_codes::PARSE_UNTERMINATED_FLOW_SEQUENCE,
"flow sequence reached end of input without `]`",
)
} else {
(
diagnostic_codes::PARSE_UNTERMINATED_FLOW_MAP,
"flow mapping reached end of input without `}`",
)
};
return Some(diag_at_range(
flow.text_range().start().into(),
flow.text_range().end().into(),
code,
message,
));
}
}
None
}
fn check_flow_context_anomalies(tree: &SyntaxNode) -> Option<YamlDiagnostic> {
for item in tree
.descendants()
.filter(|n| n.kind() == SyntaxKind::YAML_FLOW_SEQUENCE_ITEM)
{
if let Some(diag) = check_flow_seq_item_multiline_key(&item) {
return Some(diag);
}
}
for value in tree
.descendants()
.filter(|n| n.kind() == SyntaxKind::YAML_FLOW_MAP_VALUE)
{
if let Some(diag) = check_flow_map_value_extra_colon(&value) {
return Some(diag);
}
}
if let Some(diag) = check_flow_lone_dash(tree) {
return Some(diag);
}
None
}
fn check_flow_lone_dash(tree: &SyntaxNode) -> Option<YamlDiagnostic> {
for holder in tree.descendants().filter(|n| {
matches!(
n.kind(),
SyntaxKind::YAML_FLOW_SEQUENCE_ITEM
| SyntaxKind::YAML_FLOW_MAP_KEY
| SyntaxKind::YAML_FLOW_MAP_VALUE
)
}) {
let lone_dash = holder
.children()
.find(|n| n.kind() == SyntaxKind::YAML_SCALAR && n.text() == "-");
if let Some(dash) = lone_dash {
return Some(diag_at_range(
dash.text_range().start().into(),
dash.text_range().end().into(),
diagnostic_codes::PARSE_INVALID_PLAIN_SCALAR_IN_FLOW,
"`-` cannot start a plain scalar in flow context",
));
}
}
None
}
fn check_flow_seq_item_multiline_key(item: &SyntaxNode) -> Option<YamlDiagnostic> {
let starts_with_explicit_key = item.children_with_tokens().any(|c| {
c.as_token()
.is_some_and(|t| t.kind() == SyntaxKind::YAML_KEY)
});
if starts_with_explicit_key {
return None;
}
let mut saw_newline_before_colon = false;
for child in item.children_with_tokens() {
match &child {
NodeOrToken::Token(t) => match t.kind() {
SyntaxKind::NEWLINE => saw_newline_before_colon = true,
SyntaxKind::YAML_COLON => {
if saw_newline_before_colon {
return Some(diag_at_range(
t.text_range().start().into(),
t.text_range().end().into(),
diagnostic_codes::PARSE_INVALID_KEY_TOKEN,
"implicit key in flow context cannot span lines",
));
}
break;
}
_ => {}
},
NodeOrToken::Node(n)
if n.kind() == SyntaxKind::YAML_SCALAR && n.text().to_string().contains('\n') =>
{
saw_newline_before_colon = true;
}
NodeOrToken::Node(_) => {}
}
}
None
}
fn check_flow_map_value_extra_colon(value: &SyntaxNode) -> Option<YamlDiagnostic> {
let mut saw_scalar = false;
for child in value.children_with_tokens() {
match &child {
NodeOrToken::Node(n) if n.kind() == SyntaxKind::YAML_SCALAR => saw_scalar = true,
NodeOrToken::Token(t) => match t.kind() {
SyntaxKind::YAML_COLON if saw_scalar => {
return Some(diag_at_range(
t.text_range().start().into(),
t.text_range().end().into(),
diagnostic_codes::PARSE_INVALID_FLOW_SEQUENCE_COMMA,
"expected comma between flow-mapping entries",
));
}
_ => {}
},
_ => {}
}
}
None
}
fn check_multiline_quoted_indent(tree: &SyntaxNode, input: &str) -> Option<YamlDiagnostic> {
for value in tree
.descendants()
.filter(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_VALUE)
{
let Some(entry) = value.parent() else {
continue;
};
let Some(block_map) = entry.parent() else {
continue;
};
if block_map.kind() != SyntaxKind::YAML_BLOCK_MAP {
continue;
}
let block_map_start: usize = block_map.text_range().start().into();
let parent_indent = column_of(input, block_map_start);
if let Some(diag) = check_quoted_scalar_continuation(&value, input, parent_indent) {
return Some(diag);
}
}
for item in tree
.descendants()
.filter(|n| n.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE_ITEM)
{
let Some(block_seq) = item.parent() else {
continue;
};
if block_seq.kind() != SyntaxKind::YAML_BLOCK_SEQUENCE {
continue;
}
let block_seq_start: usize = block_seq.text_range().start().into();
let parent_indent = column_of(input, block_seq_start);
if let Some(diag) = check_quoted_scalar_continuation(&item, input, parent_indent) {
return Some(diag);
}
}
None
}
fn check_quoted_scalar_continuation(
container: &SyntaxNode,
input: &str,
parent_indent: usize,
) -> Option<YamlDiagnostic> {
for child in container.children() {
if child.kind() != SyntaxKind::YAML_SCALAR {
continue;
}
let text = child.text().to_string();
if !text.contains('\n') {
continue;
}
let starts_quoted = text.starts_with('"') || text.starts_with('\'');
if !starts_quoted {
continue;
}
let scalar_start: usize = child.text_range().start().into();
let bytes = text.as_bytes();
let mut offset = 0usize;
while offset < bytes.len() {
if bytes[offset] != b'\n' {
offset += 1;
continue;
}
let line_start_in_src = scalar_start + offset + 1;
let line_end_in_text = text[offset + 1..]
.find('\n')
.map(|i| offset + 1 + i)
.unwrap_or(text.len());
let line_end_in_src = scalar_start + line_end_in_text.min(text.len());
let line_text_in_src = &input[line_start_in_src..line_end_in_src];
let leading_ws = line_text_in_src
.bytes()
.take_while(|b| *b == b' ' || *b == b'\t')
.count();
if leading_ws == line_text_in_src.len() {
offset += 1;
continue;
}
let leading_spaces = line_text_in_src.bytes().take_while(|b| *b == b' ').count();
if leading_spaces <= parent_indent
&& line_text_in_src.as_bytes().get(leading_spaces) == Some(&b'\t')
{
let tab_byte = line_start_in_src + leading_spaces;
return Some(diag_at_range(
tab_byte,
tab_byte + 1,
diagnostic_codes::PARSE_UNEXPECTED_INDENT,
"tab character used as indentation is not allowed in YAML",
));
}
let first_non_ws_col = leading_ws;
let first_non_ws_byte = line_start_in_src + leading_ws;
if first_non_ws_col <= parent_indent {
return Some(diag_at_range(
first_non_ws_byte,
first_non_ws_byte + 1,
diagnostic_codes::PARSE_UNEXPECTED_DEDENT,
"multi-line quoted scalar continuation indented at or below parent block indent",
));
}
offset += 1;
}
}
None
}
fn check_block_indent_anomalies(tree: &SyntaxNode) -> Option<YamlDiagnostic> {
if let Some(diag) = check_tab_as_indent(tree) {
return Some(diag);
}
if let Some(diag) = check_inline_block_seq_in_value(tree) {
return Some(diag);
}
for node in tree.descendants().filter(|n| {
matches!(
n.kind(),
SyntaxKind::YAML_BLOCK_MAP_VALUE
| SyntaxKind::YAML_BLOCK_SEQUENCE_ITEM
| SyntaxKind::YAML_DOCUMENT
)
}) {
let mut struct_count = 0usize;
let mut scalar_count = 0usize;
let mut last_struct = None;
for child in node.children_with_tokens() {
match &child {
NodeOrToken::Node(n) => {
if matches!(
n.kind(),
SyntaxKind::YAML_BLOCK_MAP | SyntaxKind::YAML_BLOCK_SEQUENCE
) {
struct_count += 1;
last_struct = Some(n.clone());
} else if n.kind() == SyntaxKind::YAML_SCALAR {
scalar_count += 1;
}
}
NodeOrToken::Token(_) => {}
}
}
let is_doc = node.kind() == SyntaxKind::YAML_DOCUMENT;
if !is_doc && struct_count > 1 {
let n = last_struct.expect("struct_count > 1 implies last_struct set");
return Some(diag_at_range(
n.text_range().start().into(),
n.text_range().end().into(),
diagnostic_codes::PARSE_UNEXPECTED_DEDENT,
"block collection has mismatched indentation, splitting it into siblings",
));
}
if struct_count >= 1
&& scalar_count >= 1
&& node.kind() == SyntaxKind::YAML_BLOCK_MAP_VALUE
&& let Some(trailing_scalar) = scalar_after_structural_in_block_map_value(&node)
{
return Some(diag_at_range(
trailing_scalar.text_range().start().into(),
trailing_scalar.text_range().end().into(),
diagnostic_codes::PARSE_INVALID_KEY_TOKEN,
"stray scalar after a block collection in a block-map value",
));
}
if scalar_count > 1 {
if is_doc && !has_comment_between_scalars(&node) {
continue;
}
let scalars: Vec<_> = node
.children()
.filter(|n| n.kind() == SyntaxKind::YAML_SCALAR)
.collect();
let last_scalar = scalars
.last()
.expect("scalar_count > 1 implies at least one scalar child");
let (code, message) = match node.kind() {
SyntaxKind::YAML_BLOCK_MAP_VALUE | SyntaxKind::YAML_DOCUMENT => (
diagnostic_codes::PARSE_UNEXPECTED_DEDENT,
"comment cannot appear inside a multi-line plain scalar",
),
_ => (
diagnostic_codes::PARSE_INVALID_KEY_TOKEN,
"stray content following a block sequence item at its indent level",
),
};
return Some(diag_at_range(
last_scalar.text_range().start().into(),
last_scalar.text_range().end().into(),
code,
message,
));
}
}
None
}
fn has_comment_between_scalars(node: &SyntaxNode) -> bool {
let mut saw_scalar = false;
let mut saw_comment_since_scalar = false;
for child in node.children_with_tokens() {
match &child {
NodeOrToken::Node(n) if n.kind() == SyntaxKind::YAML_SCALAR => {
if n.text().to_string().starts_with('%') {
continue;
}
if saw_scalar && saw_comment_since_scalar {
return true;
}
saw_scalar = true;
saw_comment_since_scalar = false;
}
NodeOrToken::Token(t) => match t.kind() {
SyntaxKind::YAML_COMMENT => {
if saw_scalar {
saw_comment_since_scalar = true;
}
}
SyntaxKind::YAML_DOCUMENT_START | SyntaxKind::YAML_DOCUMENT_END => {
saw_scalar = false;
saw_comment_since_scalar = false;
}
_ => {}
},
_ => {}
}
}
false
}
fn scalar_after_structural_in_block_map_value(value: &SyntaxNode) -> Option<SyntaxNode> {
let mut saw_struct = false;
for child in value.children() {
if matches!(
child.kind(),
SyntaxKind::YAML_BLOCK_MAP | SyntaxKind::YAML_BLOCK_SEQUENCE
) {
saw_struct = true;
} else if child.kind() == SyntaxKind::YAML_SCALAR && saw_struct {
return Some(child);
}
}
None
}
fn check_inline_block_seq_in_value(tree: &SyntaxNode) -> Option<YamlDiagnostic> {
for value in tree
.descendants()
.filter(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_VALUE)
{
if block_map_entry_key_is_explicit(&value) {
continue;
}
let mut seen_newline = false;
for child in value.children_with_tokens() {
match &child {
NodeOrToken::Token(t) => {
if t.kind() == SyntaxKind::NEWLINE {
seen_newline = true;
}
}
NodeOrToken::Node(n) => {
if n.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE && !seen_newline {
let second_item = n
.children()
.filter(|c| c.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE_ITEM)
.nth(1)
.unwrap_or_else(|| n.clone());
return Some(diag_at_range(
second_item.text_range().start().into(),
(Into::<usize>::into(second_item.text_range().start())) + 1,
diagnostic_codes::PARSE_INVALID_KEY_TOKEN,
"block sequence cannot start on the same line as its key",
));
}
}
}
}
}
None
}
fn block_map_entry_key_is_explicit(value: &SyntaxNode) -> bool {
value
.parent()
.into_iter()
.flat_map(|entry| entry.children())
.filter(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_KEY)
.any(|key| {
key.children_with_tokens()
.any(|c| matches!(&c, NodeOrToken::Token(t) if t.kind() == SyntaxKind::YAML_KEY))
})
}
fn check_tab_as_indent(tree: &SyntaxNode) -> Option<YamlDiagnostic> {
fn tab_diag(t: &crate::syntax::SyntaxToken) -> YamlDiagnostic {
diag_at_range(
t.text_range().start().into(),
t.text_range().end().into(),
diagnostic_codes::PARSE_UNEXPECTED_INDENT,
"tab character used as indentation is not allowed in YAML",
)
}
fn flow_has_block_ancestor(n: &SyntaxNode) -> bool {
n.ancestors().any(|a| {
matches!(
a.kind(),
SyntaxKind::YAML_BLOCK_MAP
| SyntaxKind::YAML_BLOCK_SEQUENCE
| SyntaxKind::YAML_BLOCK_MAP_KEY
| SyntaxKind::YAML_BLOCK_MAP_VALUE
| SyntaxKind::YAML_BLOCK_SEQUENCE_ITEM
)
})
}
for node in tree.descendants().filter(|n| {
let is_block = matches!(
n.kind(),
SyntaxKind::YAML_BLOCK_MAP_VALUE
| SyntaxKind::YAML_BLOCK_MAP_KEY
| SyntaxKind::YAML_BLOCK_SEQUENCE_ITEM
| SyntaxKind::YAML_BLOCK_MAP
| SyntaxKind::YAML_BLOCK_SEQUENCE
);
let is_nested_flow = matches!(
n.kind(),
SyntaxKind::YAML_FLOW_SEQUENCE | SyntaxKind::YAML_FLOW_MAP
) && flow_has_block_ancestor(n);
is_block || is_nested_flow
}) {
let children: Vec<_> = node.children_with_tokens().collect();
for (i, child) in children.iter().enumerate() {
let NodeOrToken::Token(t) = child else {
continue;
};
if t.kind() != SyntaxKind::WHITESPACE || !t.text().contains('\t') {
continue;
}
let starts_with_tab = t.text().starts_with('\t');
let prev_kind = i
.checked_sub(1)
.and_then(|j| children.get(j))
.map(|c| match c {
NodeOrToken::Token(pt) => (pt.kind(), pt.text().to_string()),
NodeOrToken::Node(pn) => (pn.kind(), pn.text().to_string()),
});
let next = children.get(i + 1);
let next_is_newline = matches!(
next,
Some(NodeOrToken::Token(nt)) if nt.kind() == SyntaxKind::NEWLINE
);
let at_eof = next.is_none();
let prev_is_newline_token = matches!(&prev_kind, Some((SyntaxKind::NEWLINE, _)));
if prev_is_newline_token && starts_with_tab && !next_is_newline && !at_eof {
return Some(tab_diag(t));
}
let prev_is_block_scalar_with_trailing_newline = matches!(&prev_kind, Some((SyntaxKind::YAML_SCALAR, text))
if (text.starts_with('|') || text.starts_with('>')) && text.ends_with('\n'));
if prev_is_block_scalar_with_trailing_newline && starts_with_tab {
return Some(tab_diag(t));
}
let prev_is_block_indicator = matches!(
&prev_kind,
Some((
SyntaxKind::YAML_BLOCK_SEQ_ENTRY
| SyntaxKind::YAML_KEY
| SyntaxKind::YAML_COLON,
_,
))
);
let leads_block_map_value = i == 0 && node.kind() == SyntaxKind::YAML_BLOCK_MAP_VALUE;
let next_is_block_collection = matches!(
next,
Some(NodeOrToken::Node(n))
if matches!(
n.kind(),
SyntaxKind::YAML_BLOCK_SEQUENCE | SyntaxKind::YAML_BLOCK_MAP
)
);
if (prev_is_block_indicator || leads_block_map_value) && next_is_block_collection {
return Some(tab_diag(t));
}
}
}
None
}
fn check_block_scalar_header(tree: &SyntaxNode) -> Option<YamlDiagnostic> {
for token in tree
.descendants()
.filter(|n| n.kind() == SyntaxKind::YAML_SCALAR)
{
let text = token.text().to_string();
if !text.starts_with('>') && !text.starts_with('|') {
continue;
}
let header_end = text.find('\n').unwrap_or(text.len());
let header = &text[..header_end];
let bytes = header.as_bytes();
let mut i = 1usize;
let mut digit_start: Option<usize> = None;
let mut digit_end: usize = i;
let mut digit_count: usize = 0;
let mut zero_digit: Option<usize> = None;
while i < bytes.len() && (bytes[i] == b'+' || bytes[i] == b'-' || bytes[i].is_ascii_digit())
{
if bytes[i].is_ascii_digit() {
if digit_start.is_none() {
digit_start = Some(i);
}
digit_end = i + 1;
digit_count += 1;
if bytes[i] == b'0' && zero_digit.is_none() {
zero_digit = Some(i);
}
}
i += 1;
}
if digit_count > 0 {
let scalar_start: usize = token.text_range().start().into();
if let Some(zero_off) = zero_digit
&& digit_count == 1
{
return Some(diag_at_range(
scalar_start + zero_off,
scalar_start + zero_off + 1,
diagnostic_codes::PARSE_INVALID_KEY_TOKEN,
"block scalar indent indicator must be in range 1-9",
));
}
if digit_count > 1
&& let Some(start_off) = digit_start
{
return Some(diag_at_range(
scalar_start + start_off,
scalar_start + digit_end,
diagnostic_codes::PARSE_INVALID_KEY_TOKEN,
"block scalar indent indicator must be a single digit in range 1-9",
));
}
}
let rest = &header[i..];
if rest.is_empty() {
continue;
}
if rest.starts_with('#') {
let scalar_start: usize = token.text_range().start().into();
return Some(diag_at_range(
scalar_start + i,
scalar_start + i + 1,
diagnostic_codes::PARSE_INVALID_KEY_TOKEN,
"comment after block scalar indicator must be preceded by whitespace",
));
}
let leading_ws = rest
.bytes()
.take_while(|b| *b == b' ' || *b == b'\t')
.count();
let after_ws = &rest[leading_ws..];
if after_ws.is_empty() || after_ws.starts_with('#') {
continue;
}
let scalar_start: usize = token.text_range().start().into();
let content_start = scalar_start + i + leading_ws;
let content_end = scalar_start + header_end;
return Some(diag_at_range(
content_start,
content_end,
diagnostic_codes::PARSE_INVALID_KEY_TOKEN,
"block scalar header line must end at EOL or with a comment",
));
}
None
}
fn check_block_scalar_leading_indent(tree: &SyntaxNode) -> Option<YamlDiagnostic> {
for token in tree
.descendants()
.filter(|n| n.kind() == SyntaxKind::YAML_SCALAR)
{
let text = token.text().to_string();
if !text.starts_with('>') && !text.starts_with('|') {
continue;
}
let header_end = text.find('\n').unwrap_or(text.len());
let bytes = text.as_bytes();
let mut i = 1usize;
let mut explicit_indent = false;
while i < header_end && (bytes[i] == b'+' || bytes[i] == b'-' || bytes[i].is_ascii_digit())
{
explicit_indent |= bytes[i].is_ascii_digit();
i += 1;
}
if explicit_indent {
continue;
}
let scalar_start: usize = token.text_range().start().into();
let mut leading_blanks: Vec<(usize, usize)> = Vec::new();
let mut cursor = header_end + 1; while cursor <= text.len() {
let line_end = text[cursor..]
.find('\n')
.map(|rel| cursor + rel)
.unwrap_or(text.len());
let line = &text[cursor..line_end];
if line.bytes().any(|b| b == b'\t') {
if line.trim_matches([' ', '\t']).is_empty() {
if line_end >= text.len() {
break;
}
cursor = line_end + 1;
continue;
}
break;
}
let space_count = line.bytes().take_while(|b| *b == b' ').count();
if space_count == line.len() {
leading_blanks.push((space_count, cursor));
} else {
let m = space_count;
if let Some(&(_, offset)) = leading_blanks.iter().find(|(sp, _)| *sp > m) {
let at = scalar_start + offset;
return Some(diag_at_range(
at,
at + 1,
diagnostic_codes::PARSE_UNEXPECTED_INDENT,
"block scalar leading empty line is more indented than its content",
));
}
break;
}
if line_end >= text.len() {
break;
}
cursor = line_end + 1;
}
}
None
}
fn check_doc_level_bare_scalar_then_colon_map(tree: &SyntaxNode) -> Option<YamlDiagnostic> {
if let Some(diag) = check_value_level_scalar_then_colon_map(tree) {
return Some(diag);
}
for doc in tree
.descendants()
.filter(|n| n.kind() == SyntaxKind::YAML_DOCUMENT)
{
let mut has_doc_start = false;
let mut last_bare_scalar: Option<SyntaxNode> = None;
for child in doc.children_with_tokens() {
match &child {
NodeOrToken::Token(t) => match t.kind() {
SyntaxKind::YAML_DOCUMENT_START => {
has_doc_start = true;
}
SyntaxKind::YAML_ANCHOR | SyntaxKind::YAML_ALIAS => {}
SyntaxKind::WHITESPACE | SyntaxKind::NEWLINE | SyntaxKind::YAML_COMMENT => {}
_ => {
last_bare_scalar = None;
}
},
NodeOrToken::Node(n) if n.kind() == SyntaxKind::YAML_SCALAR => {
last_bare_scalar = Some(n.clone());
}
NodeOrToken::Node(n) => {
if n.kind() == SyntaxKind::YAML_BLOCK_MAP
&& let Some(scalar) = last_bare_scalar.take()
&& first_entry_has_colon_only_key(n)
{
let (code, message) = if has_doc_start {
(
diagnostic_codes::LEX_TRAILING_CONTENT_AFTER_DOCUMENT_START,
"trailing content after document start marker",
)
} else {
(
diagnostic_codes::PARSE_INVALID_KEY_TOKEN,
"unexpected scalar at block-map level (no key)",
)
};
return Some(diag_at_range(
scalar.text_range().start().into(),
scalar.text_range().end().into(),
code,
message,
));
}
last_bare_scalar = None;
}
}
}
}
None
}
fn check_value_level_scalar_then_colon_map(tree: &SyntaxNode) -> Option<YamlDiagnostic> {
for value in tree
.descendants()
.filter(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_VALUE)
{
let mut last_scalar: Option<SyntaxNode> = None;
for child in value.children_with_tokens() {
match &child {
NodeOrToken::Token(t) => match t.kind() {
SyntaxKind::YAML_ANCHOR | SyntaxKind::YAML_ALIAS => {}
SyntaxKind::WHITESPACE | SyntaxKind::NEWLINE | SyntaxKind::YAML_COMMENT => {}
_ => last_scalar = None,
},
NodeOrToken::Node(n) if n.kind() == SyntaxKind::YAML_SCALAR => {
last_scalar = Some(n.clone());
}
NodeOrToken::Node(n) => {
if n.kind() == SyntaxKind::YAML_BLOCK_MAP
&& let Some(scalar) = last_scalar.take()
&& first_entry_has_colon_only_key(n)
&& scalar_is_content_implicit_key(&scalar.text().to_string())
{
let message = if scalar.text().to_string().contains('\n') {
"implicit key cannot span lines"
} else {
"mapping values are not allowed in this context"
};
return Some(diag_at_range(
scalar.text_range().start().into(),
scalar.text_range().end().into(),
diagnostic_codes::PARSE_INVALID_KEY_TOKEN,
message,
));
}
last_scalar = None;
}
}
}
}
None
}
fn scalar_is_content_implicit_key(text: &str) -> bool {
let first_line = text.split_once('\n').map_or(text, |(first, _)| first);
let mut head = first_line.trim();
while !head.is_empty() {
let token_end = head.find(char::is_whitespace).unwrap_or(head.len());
let (tok, rest) = head.split_at(token_end);
let is_property = tok.starts_with('&') || tok.starts_with('!') || tok.starts_with('*');
if !is_property {
return true;
}
head = rest.trim_start();
}
false
}
fn first_entry_has_colon_only_key(block_map: &SyntaxNode) -> bool {
let Some(first_entry) = block_map
.children()
.find(|c| c.kind() == SyntaxKind::YAML_BLOCK_MAP_ENTRY)
else {
return false;
};
let Some(key) = first_entry
.children()
.find(|c| c.kind() == SyntaxKind::YAML_BLOCK_MAP_KEY)
else {
return false;
};
let mut has_colon = false;
for child in key.children_with_tokens() {
match &child {
NodeOrToken::Token(t) => match t.kind() {
SyntaxKind::YAML_COLON => has_colon = true,
SyntaxKind::WHITESPACE | SyntaxKind::NEWLINE => {}
_ => return false,
},
NodeOrToken::Node(_) => return false,
}
}
has_colon
}
fn check_block_collection_after_value_scalar(tree: &SyntaxNode) -> Option<YamlDiagnostic> {
for value in tree
.descendants()
.filter(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_VALUE)
{
let mut last_scalar: Option<SyntaxNode> = None;
let mut saw_newline_after_scalar = false;
for child in value.children_with_tokens() {
match &child {
NodeOrToken::Token(t) => match t.kind() {
SyntaxKind::NEWLINE => {
if last_scalar.is_some() {
saw_newline_after_scalar = true;
}
}
SyntaxKind::WHITESPACE | SyntaxKind::YAML_COMMENT => {}
_ => {
last_scalar = None;
saw_newline_after_scalar = false;
}
},
NodeOrToken::Node(n) if n.kind() == SyntaxKind::YAML_SCALAR => {
last_scalar = Some(n.clone());
saw_newline_after_scalar = false;
}
NodeOrToken::Node(n) => {
if saw_newline_after_scalar
&& matches!(
n.kind(),
SyntaxKind::YAML_BLOCK_MAP | SyntaxKind::YAML_BLOCK_SEQUENCE
)
&& let Some(scalar) = last_scalar.as_ref()
&& scalar_is_content_implicit_key(&scalar.text().to_string())
{
return Some(diag_at_range(
n.text_range().start().into(),
(Into::<usize>::into(n.text_range().start())) + 1,
diagnostic_codes::PARSE_UNEXPECTED_INDENT,
"block collection cannot follow a scalar value at a deeper indent",
));
}
last_scalar = None;
saw_newline_after_scalar = false;
}
}
}
}
None
}
fn check_flow_continuation_indent(tree: &SyntaxNode, input: &str) -> Option<YamlDiagnostic> {
for flow in tree.descendants().filter(|n| {
matches!(
n.kind(),
SyntaxKind::YAML_FLOW_SEQUENCE | SyntaxKind::YAML_FLOW_MAP
)
}) {
let Some(block_map) = enclosing_block_map_for_flow(&flow) else {
continue;
};
let block_map_start: usize = block_map.text_range().start().into();
let threshold = column_of(input, block_map_start);
let flow_start: usize = flow.text_range().start().into();
let flow_end: usize = flow.text_range().end().into();
let closer = match flow.kind() {
SyntaxKind::YAML_FLOW_SEQUENCE => b']',
SyntaxKind::YAML_FLOW_MAP => b'}',
_ => unreachable!(),
};
let bytes = input.as_bytes();
let mut i = flow_start;
while i < flow_end {
if bytes[i] != b'\n' {
i += 1;
continue;
}
let line_start = i + 1;
if line_start >= flow_end {
break;
}
let mut col = 0usize;
let mut j = line_start;
while j < flow_end && (bytes[j] == b' ' || bytes[j] == b'\t') {
col += 1;
j += 1;
}
if j >= flow_end || bytes[j] == b'\n' {
i = j;
continue;
}
if bytes[j] == closer {
i = j;
continue;
}
if col <= threshold {
return Some(diag_at_range(
line_start,
j + 1,
diagnostic_codes::LEX_WRONG_INDENTED_FLOW,
"wrong indentation for continued flow collection",
));
}
i = j;
}
}
None
}
fn enclosing_block_map_for_flow(flow: &SyntaxNode) -> Option<SyntaxNode> {
let mut node = flow.parent();
let mut saw_block_map_value = false;
while let Some(current) = node {
match current.kind() {
SyntaxKind::YAML_BLOCK_MAP_VALUE => saw_block_map_value = true,
SyntaxKind::YAML_BLOCK_MAP if saw_block_map_value => return Some(current),
_ => {}
}
node = current.parent();
}
None
}
fn check_flow_doc_markers(tree: &SyntaxNode, input: &str) -> Option<YamlDiagnostic> {
let bytes = input.as_bytes();
for flow in tree.descendants().filter(|n| {
matches!(
n.kind(),
SyntaxKind::YAML_FLOW_SEQUENCE | SyntaxKind::YAML_FLOW_MAP
)
}) {
for scalar in flow
.descendants()
.filter(|n| n.kind() == SyntaxKind::YAML_SCALAR)
{
let start: usize = scalar.text_range().start().into();
let at_line_start = start == 0 || bytes.get(start - 1) == Some(&b'\n');
if !at_line_start {
continue;
}
let text = scalar.text().to_string();
let head = text.as_bytes();
if matches!(head.first(), Some(b'"' | b'\'' | b'|' | b'>')) {
continue;
}
let (msg, marker_len) = match head.get(..3) {
Some(b"---") => ("`---` document marker not allowed in flow content", 3usize),
Some(b"...") => ("`...` document marker not allowed in flow content", 3usize),
_ => continue,
};
match head.get(marker_len) {
None | Some(b' ' | b'\t' | b'\n') => {}
_ => continue,
}
return Some(diag_at_range(
start,
start + marker_len,
diagnostic_codes::PARSE_INVALID_PLAIN_SCALAR_IN_FLOW,
msg,
));
}
}
None
}
fn check_invalid_dq_escapes(tree: &SyntaxNode) -> Option<YamlDiagnostic> {
for token in tree
.descendants()
.filter(|n| n.kind() == SyntaxKind::YAML_SCALAR)
{
let text = token.text().to_string();
if !text.starts_with('"') {
continue;
}
if let Some(rel_idx) = invalid_dq_escape_offset(&text) {
let scalar_start: usize = token.text_range().start().into();
return Some(diag_at_range(
scalar_start + rel_idx,
scalar_start + rel_idx + 1,
diagnostic_codes::LEX_INVALID_DOUBLE_QUOTED_ESCAPE,
"invalid escape in double quoted scalar",
));
}
}
None
}
fn check_comment_not_preceded_by_space(tree: &SyntaxNode, input: &str) -> Option<YamlDiagnostic> {
for token in tree
.descendants_with_tokens()
.filter_map(|el| el.into_token())
.filter(|t| t.kind() == SyntaxKind::YAML_COMMENT)
{
let start: usize = token.text_range().start().into();
let preceded_ok = matches!(
input[..start].chars().next_back(),
None | Some('\n' | '\r' | ' ' | '\t')
);
if !preceded_ok {
return Some(diag_at_range(
start,
token.text_range().end().into(),
diagnostic_codes::LEX_COMMENT_NOT_PRECEDED_BY_SPACE,
"comment must be preceded by whitespace",
));
}
}
None
}
fn check_invalid_tag_chars(tree: &SyntaxNode) -> Option<YamlDiagnostic> {
for tok in tree
.descendants_with_tokens()
.filter_map(|c| c.into_token())
{
if tok.kind() != SyntaxKind::YAML_TAG {
continue;
}
let text = tok.text();
if text.starts_with("!<") {
continue;
}
for (offset, ch) in text.char_indices() {
if matches!(ch, ',' | '{' | '}') {
let start: usize = tok.text_range().start().into();
return Some(diag_at_range(
start + offset,
start + offset + ch.len_utf8(),
diagnostic_codes::PARSE_INVALID_TAG_CHARACTER,
"invalid character in tag",
));
}
}
}
None
}
fn check_anchor_decorates_alias(tree: &SyntaxNode) -> Option<YamlDiagnostic> {
for container in tree.descendants().filter(|n| {
matches!(
n.kind(),
SyntaxKind::YAML_BLOCK_MAP_KEY
| SyntaxKind::YAML_BLOCK_MAP_VALUE
| SyntaxKind::YAML_BLOCK_SEQUENCE_ITEM
| SyntaxKind::YAML_FLOW_MAP_KEY
| SyntaxKind::YAML_FLOW_MAP_VALUE
| SyntaxKind::YAML_FLOW_SEQUENCE_ITEM
)
}) {
let mut saw_anchor = false;
for child in container.children_with_tokens() {
let NodeOrToken::Token(tok) = child else {
saw_anchor = false;
continue;
};
match tok.kind() {
SyntaxKind::YAML_ANCHOR if saw_anchor => {
return Some(diag_at_range(
tok.text_range().start().into(),
tok.text_range().end().into(),
diagnostic_codes::PARSE_MULTIPLE_ANCHORS_ON_NODE,
"node cannot have multiple anchors",
));
}
SyntaxKind::YAML_ANCHOR => saw_anchor = true,
SyntaxKind::YAML_ALIAS if saw_anchor => {
return Some(diag_at_range(
tok.text_range().start().into(),
tok.text_range().end().into(),
diagnostic_codes::PARSE_ANCHOR_DECORATES_ALIAS,
"alias node cannot be decorated with an anchor",
));
}
SyntaxKind::WHITESPACE | SyntaxKind::NEWLINE | SyntaxKind::YAML_COMMENT => {}
_ => saw_anchor = false,
}
}
}
None
}
fn check_anchor_before_block_indicator(tree: &SyntaxNode) -> Option<YamlDiagnostic> {
for container in tree.descendants().filter(|n| {
matches!(
n.kind(),
SyntaxKind::YAML_DOCUMENT
| SyntaxKind::YAML_BLOCK_MAP_KEY
| SyntaxKind::YAML_BLOCK_MAP_VALUE
| SyntaxKind::YAML_BLOCK_SEQUENCE_ITEM
)
}) {
let mut anchor_pending: Option<(usize, usize)> = None;
for child in container.children_with_tokens() {
match child {
NodeOrToken::Token(tok) => match tok.kind() {
SyntaxKind::YAML_ANCHOR => {
anchor_pending = Some((
tok.text_range().start().into(),
tok.text_range().end().into(),
));
}
SyntaxKind::WHITESPACE | SyntaxKind::YAML_COMMENT => {}
SyntaxKind::NEWLINE => {
anchor_pending = None;
}
_ => {
anchor_pending = None;
}
},
NodeOrToken::Node(node) => {
if let Some((start, end)) = anchor_pending.take()
&& node.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE
{
return Some(diag_at_range(
start,
end,
diagnostic_codes::PARSE_ANCHOR_BEFORE_BLOCK_INDICATOR,
"anchor cannot precede a block sequence indicator on the same line",
));
}
}
}
}
}
None
}
fn check_anchor_without_target(tree: &SyntaxNode) -> Option<YamlDiagnostic> {
for container in tree
.descendants()
.filter(|n| n.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE_ITEM)
{
let mut saw_value = false;
for child in container.children_with_tokens() {
match child {
NodeOrToken::Token(tok) => match tok.kind() {
SyntaxKind::YAML_ANCHOR if saw_value => {
return Some(diag_at_range(
tok.text_range().start().into(),
tok.text_range().end().into(),
diagnostic_codes::PARSE_ANCHOR_WITHOUT_TARGET,
"anchor has no target node",
));
}
SyntaxKind::YAML_ALIAS => saw_value = true,
_ => {}
},
NodeOrToken::Node(_) => saw_value = true,
}
}
}
None
}
fn check_node_property_underindented(tree: &SyntaxNode, input: &str) -> Option<YamlDiagnostic> {
for entry in tree
.descendants()
.filter(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_ENTRY)
{
let Some(key_col) = entry
.children()
.find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_KEY)
.map(|k| column_of(input, k.text_range().start().into()))
else {
continue;
};
let Some(value) = entry
.children()
.find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_VALUE)
else {
continue;
};
let mut on_continuation = false;
for child in value.children_with_tokens() {
let NodeOrToken::Token(tok) = child else {
continue;
};
match tok.kind() {
SyntaxKind::NEWLINE => on_continuation = true,
SyntaxKind::WHITESPACE | SyntaxKind::YAML_COMMENT => {}
SyntaxKind::YAML_TAG | SyntaxKind::YAML_ANCHOR if on_continuation => {
let col = column_of(input, tok.text_range().start().into());
if col <= key_col {
return Some(diag_at_range(
tok.text_range().start().into(),
tok.text_range().end().into(),
diagnostic_codes::PARSE_NODE_PROPERTY_UNDERINDENTED,
"node property must be indented more than the parent key",
));
}
}
_ => {}
}
}
}
None
}
fn invalid_dq_escape_offset(text: &str) -> Option<usize> {
let mut chars = text.char_indices().peekable();
let mut in_double = false;
let mut escape_start: Option<usize> = None;
while let Some((idx, ch)) = chars.next() {
if !in_double {
if ch == '"' {
in_double = true;
}
continue;
}
if let Some(start) = escape_start.take() {
if !is_valid_dq_escape(ch) {
return Some(start);
}
continue;
}
match ch {
'\\' => {
if chars.peek().is_none() {
return Some(idx);
}
escape_start = Some(idx);
}
'"' => in_double = false,
_ => {}
}
}
None
}
fn is_valid_dq_escape(ch: char) -> bool {
matches!(
ch,
'0' | 'a'
| 'b'
| 't'
| '\t'
| '\n'
| '\r'
| 'n'
| 'v'
| 'f'
| 'r'
| 'e'
| ' '
| '"'
| '/'
| '\\'
| 'N'
| '_'
| 'L'
| 'P'
| 'x'
| 'u'
| 'U'
)
}
fn column_of(input: &str, byte_offset: usize) -> usize {
match input[..byte_offset].rfind('\n') {
Some(nl) => byte_offset - nl - 1,
None => byte_offset,
}
}
fn diag_at_range(
byte_start: usize,
byte_end: usize,
code: &'static str,
message: &'static str,
) -> YamlDiagnostic {
YamlDiagnostic {
code,
message,
byte_start,
byte_end,
}
}
#[cfg(test)]
mod tests {
use super::*;
fn run(input: &str) -> Option<YamlDiagnostic> {
validate_yaml(input)
}
#[test]
fn validate_yaml_equals_substrate_context() {
let samples = [
"title: ok\n", "this\n is\n invalid: x\n", ": a\n", "a: 1\na: 2\n", ];
for input in samples {
assert_eq!(
validate_yaml(input),
validate_yaml_with_context(input, YamlValidationContext::substrate()),
"substrate wrapper drifted for {input:?}",
);
}
}
#[test]
fn substrate_never_runs_consumer_only_checks() {
assert!(validate_yaml(": a\n").is_none());
assert!(validate_yaml("a: 1\na: 2\n").is_none());
}
#[test]
fn required_simple_key_empty_value_then_unindented_line() {
let input = "\
description:
Basin is my new Rust library for numerical optimization
with pluggable linear-algebra backends
categories:
- Rust
";
let diag = run(input).expect("expected diagnostic");
assert_eq!(
diag.code,
diagnostic_codes::LEX_REQUIRED_SIMPLE_KEY_NOT_FOUND
);
}
#[test]
fn required_simple_key_indented_value_passes() {
let input = "\
description:
Basin is my new Rust library for numerical optimization
with pluggable linear-algebra backends
categories:
- Rust
";
assert!(run(input).is_none(), "got {:?}", run(input));
}
#[test]
fn unterminated_quoted_scalar_at_eof_cq3w() {
let input = "---\nkey: \"missing closing quote";
let diag = run(input).expect("expected diagnostic");
assert_eq!(diag.code, diagnostic_codes::LEX_UNTERMINATED_QUOTED_SCALAR);
}
#[test]
fn unterminated_quoted_scalar_aborted_by_doc_marker_5trb_rxy3() {
for input in ["---\n\"\n---\n\"\n", "---\n'\n...\n'\n"] {
let diag = run(input).expect("expected diagnostic");
assert_eq!(
diag.code,
diagnostic_codes::LEX_UNTERMINATED_QUOTED_SCALAR,
"{input:?}"
);
}
}
#[test]
fn block_scalar_leading_blank_overindented_5llu() {
let input = "block scalar: >\n \n \n \n invalid\n";
let diag = run(input).expect("expected diagnostic");
assert_eq!(diag.code, diagnostic_codes::PARSE_UNEXPECTED_INDENT);
}
#[test]
fn block_scalar_leading_blank_overindented_w9l4() {
let input = "---\nblock scalar: |\n \n more spaces at the beginning\n are invalid\n";
let diag = run(input).expect("expected diagnostic");
assert_eq!(diag.code, diagnostic_codes::PARSE_UNEXPECTED_INDENT);
}
#[test]
fn block_scalar_leading_blank_overindented_s98z() {
let input = "empty block scalar: >\n \n \n \n # comment\n";
let diag = run(input).expect("expected diagnostic");
assert_eq!(diag.code, diagnostic_codes::PARSE_UNEXPECTED_INDENT);
}
#[test]
fn block_scalar_explicit_indent_indicator_not_flagged() {
let input = "a: |2\n \n more\n";
assert!(run(input).is_none(), "got {:?}", run(input));
}
#[test]
fn block_scalar_well_indented_leading_blank_passes() {
let input = "a: |\n \n body\n";
assert!(run(input).is_none(), "got {:?}", run(input));
}
#[test]
fn directive_after_content_eb22() {
let input = "---\nscalar1 # comment\n%YAML 1.2\n---\nscalar2\n";
let diag = run(input).expect("expected diagnostic");
assert_eq!(diag.code, diagnostic_codes::PARSE_DIRECTIVE_AFTER_CONTENT);
}
#[test]
fn directive_after_content_rhx7() {
let input = "---\nkey: value\n%YAML 1.2\n---\n";
let diag = run(input).expect("expected diagnostic");
assert_eq!(diag.code, diagnostic_codes::PARSE_DIRECTIVE_AFTER_CONTENT);
}
#[test]
fn directive_without_document_start_9mma() {
let input = "%YAML 1.2\n";
let diag = run(input).expect("expected diagnostic");
assert_eq!(
diag.code,
diagnostic_codes::PARSE_DIRECTIVE_WITHOUT_DOCUMENT_START
);
}
#[test]
fn directive_without_document_start_b63p() {
let input = "%YAML 1.2\n...\n";
let diag = run(input).expect("expected diagnostic");
assert_eq!(
diag.code,
diagnostic_codes::PARSE_DIRECTIVE_WITHOUT_DOCUMENT_START
);
}
#[test]
fn well_formed_directive_then_marker_passes() {
let input = "%YAML 1.2\n---\nfoo: bar\n";
assert!(run(input).is_none());
}
#[test]
fn directive_then_doc_then_directive_with_separator_passes() {
let input = "%YAML 1.2\n---\nfoo: 1\n...\n%YAML 1.2\n---\nbar: 2\n";
assert!(run(input).is_none());
}
#[test]
fn duplicate_yaml_directive_sf5v() {
let input = "%YAML 1.2\n%YAML 1.2\n---\n";
let diag = run(input).expect("expected diagnostic");
assert_eq!(diag.code, diagnostic_codes::PARSE_DUPLICATE_YAML_DIRECTIVE);
}
#[test]
fn malformed_yaml_directive_trailing_content_h7tq() {
let input = "%YAML 1.2 foo\n---\n";
let diag = run(input).expect("expected diagnostic");
assert_eq!(diag.code, diagnostic_codes::PARSE_MALFORMED_YAML_DIRECTIVE);
}
#[test]
fn yaml_directive_with_trailing_comment_passes() {
let input = "%YAML 1.2 # comment\n---\nfoo: bar\n";
assert!(run(input).is_none());
}
#[test]
fn yaml_then_tag_directive_passes() {
let input = "%YAML 1.2\n%TAG ! tag:example.com,2000:app/\n---\nfoo: bar\n";
assert!(run(input).is_none());
}
#[test]
fn yaml_directives_across_documents_pass() {
let input = "%YAML 1.2\n---\nfoo: 1\n...\n%YAML 1.2\n---\nbar: 2\n";
assert!(run(input).is_none());
}
#[test]
fn empty_input_passes() {
assert!(run("").is_none());
}
#[test]
fn plain_document_no_directives_passes() {
let input = "key: value\n";
assert!(run(input).is_none());
}
#[test]
fn plain_scalar_continuation_with_percent_passes_xlq9() {
let input = "---\nscalar\n%YAML 1.2\n";
assert!(run(input).is_none());
}
#[test]
fn percent_at_col0_inside_flow_map_is_content_ut92() {
let input = "---\n{ matches\n% : 20 }\n...\n---\n# Empty\n...\n";
assert!(run(input).is_none());
}
#[test]
fn trailing_content_after_doc_end_3hfz() {
let input = "---\nkey: value\n... invalid\n";
let diag = run(input).expect("expected diagnostic");
assert_eq!(
diag.code,
diagnostic_codes::LEX_TRAILING_CONTENT_AFTER_DOCUMENT_END
);
}
#[test]
fn trailing_content_after_flow_seq_ks4u() {
let input = "---\n[\nsequence item\n]\ninvalid item\n";
let diag = run(input).expect("expected diagnostic");
assert_eq!(
diag.code,
diagnostic_codes::PARSE_TRAILING_CONTENT_AFTER_FLOW_END
);
}
#[test]
fn trailing_extra_flow_closer_4h7k() {
let input = "---\n[ a, b, c ] ]\n";
let diag = run(input).expect("expected diagnostic");
assert_eq!(
diag.code,
diagnostic_codes::PARSE_TRAILING_CONTENT_AFTER_FLOW_END
);
}
#[test]
fn trailing_spaceless_comment_after_flow_9jba() {
let input = "---\n[ a, b, c, ]#invalid\n";
let diag = run(input).expect("expected diagnostic");
assert_eq!(
diag.code,
diagnostic_codes::PARSE_TRAILING_CONTENT_AFTER_FLOW_END
);
}
#[test]
fn flow_then_properly_spaced_comment_passes() {
let input = "---\n[ a, b ] # ok\n";
assert!(run(input).is_none());
}
#[test]
fn flow_then_doc_end_passes() {
let input = "---\n[ a, b ]\n...\n";
assert!(run(input).is_none());
}
#[test]
fn doc_end_then_newline_then_content_is_valid_new_doc() {
let input = "---\nfirst\n...\nsecond\n";
assert!(run(input).is_none());
}
#[test]
fn doc_end_with_trailing_spaced_comment_passes() {
let input = "---\nkey: value\n... # comment\n";
assert!(run(input).is_none());
}
#[test]
fn flow_seq_leading_comma_9mag() {
let input = "---\n[ , a, b, c ]\n";
let diag = run(input).expect("expected diagnostic");
assert_eq!(
diag.code,
diagnostic_codes::PARSE_INVALID_FLOW_SEQUENCE_COMMA
);
}
#[test]
fn flow_seq_double_comma_ctn5() {
let input = "---\n[ a, b, c, , ]\n";
let diag = run(input).expect("expected diagnostic");
assert_eq!(
diag.code,
diagnostic_codes::PARSE_INVALID_FLOW_SEQUENCE_COMMA
);
}
#[test]
fn flow_map_leading_comma_rejects() {
let input = "---\n{ , a: 1 }\n";
let diag = run(input).expect("expected diagnostic");
assert_eq!(
diag.code,
diagnostic_codes::PARSE_INVALID_FLOW_SEQUENCE_COMMA
);
}
#[test]
fn flow_map_double_comma_rejects() {
let input = "---\n{ a: 1, , b: 2 }\n";
let diag = run(input).expect("expected diagnostic");
assert_eq!(
diag.code,
diagnostic_codes::PARSE_INVALID_FLOW_SEQUENCE_COMMA
);
}
#[test]
fn flow_seq_trailing_comma_passes() {
let input = "---\n[ a, b, c, ]\n";
assert!(run(input).is_none());
}
#[test]
fn flow_map_trailing_comma_passes() {
let input = "---\n{ a: 1, b: 2, }\n";
assert!(run(input).is_none());
}
#[test]
fn flow_seq_well_formed_passes() {
let input = "---\n[ a, b, c ]\n";
assert!(run(input).is_none());
}
#[test]
fn flow_seq_empty_passes() {
let input = "---\n[ ]\n";
assert!(run(input).is_none());
}
#[test]
fn flow_map_implicit_null_entry_passes_8kb6() {
let input = "---\n- { single line, a: b}\n- { multi\n line, a: b}\n";
assert!(run(input).is_none());
}
#[test]
fn unterminated_flow_seq_6jtt() {
let input = "---\n[ [ a, b, c ]\n";
let diag = run(input).expect("expected diagnostic");
assert_eq!(
diag.code,
diagnostic_codes::PARSE_UNTERMINATED_FLOW_SEQUENCE
);
}
#[test]
fn unterminated_flow_map() {
let input = "---\n{ foo: 1\n";
let diag = run(input).expect("expected diagnostic");
assert_eq!(diag.code, diagnostic_codes::PARSE_UNTERMINATED_FLOW_MAP);
}
#[test]
fn balanced_nested_flow_passes() {
let input = "---\n[ [ a, b, c ] ]\n";
assert!(run(input).is_none());
}
#[test]
fn empty_flow_seq_terminated_passes() {
let input = "---\n[ ]\n";
assert!(run(input).is_none());
}
#[test]
fn flow_map_plain_entry_passes_4abk() {
let input = "{\nunquoted : \"separate\",\nhttp://foo.com,\nomitted value:,\n}\n";
assert!(run(input).is_none());
}
#[test]
fn flow_seq_implicit_key_spans_lines_dk4h() {
let input = "---\n[ key\n : value ]\n";
let diag = run(input).expect("expected diagnostic");
assert_eq!(diag.code, diagnostic_codes::PARSE_INVALID_KEY_TOKEN);
}
#[test]
fn flow_seq_implicit_key_quoted_spans_lines_zxt5() {
let input = "[ \"key\"\n :value ]\n";
let diag = run(input).expect("expected diagnostic");
assert_eq!(diag.code, diagnostic_codes::PARSE_INVALID_KEY_TOKEN);
}
#[test]
fn flow_map_missing_comma_t833() {
let input = "---\n{\n foo: 1\n bar: 2 }\n";
let diag = run(input).expect("expected diagnostic");
assert_eq!(
diag.code,
diagnostic_codes::PARSE_INVALID_FLOW_SEQUENCE_COMMA
);
}
#[test]
fn flow_seq_single_line_implicit_key_passes() {
let input = "---\n[ key: value ]\n";
assert!(run(input).is_none());
}
#[test]
fn flow_map_well_formed_multiline_passes() {
let input = "---\n{\n foo: 1,\n bar: 2\n}\n";
assert!(run(input).is_none());
}
#[test]
fn flow_map_value_starting_with_colon_passes_58mp() {
let input = "{x: :x}\n";
assert!(run(input).is_none());
}
#[test]
fn flow_map_value_starting_with_double_colon_passes_5t43() {
let input = "- { \"key\":value }\n- { \"key\"::value }\n";
assert!(run(input).is_none());
}
#[test]
fn flow_seq_explicit_key_spans_lines_passes_ct4q() {
let input = "[\n? foo\n bar : baz\n]\n";
assert!(run(input).is_none());
}
#[test]
fn flow_seq_lone_dash_yjv2() {
let input = "[-]\n";
let diag = run(input).expect("expected diagnostic");
assert_eq!(
diag.code,
diagnostic_codes::PARSE_INVALID_PLAIN_SCALAR_IN_FLOW
);
}
#[test]
fn flow_seq_lone_dash_items_g5u8() {
let input = "---\n- [-, -]\n";
let diag = run(input).expect("expected diagnostic");
assert_eq!(
diag.code,
diagnostic_codes::PARSE_INVALID_PLAIN_SCALAR_IN_FLOW
);
}
#[test]
fn flow_seq_dash_prefixed_scalar_passes() {
let input = "[-1, -x]\n";
assert!(run(input).is_none(), "got {:?}", run(input));
}
#[test]
fn block_dash_prefixed_scalar_passes() {
let input = "key: -x\n";
assert!(run(input).is_none(), "got {:?}", run(input));
}
#[test]
fn multiline_quoted_under_indent_qb6e() {
let input = "---\nquoted: \"a\nb\nc\"\n";
let diag = run(input).expect("expected diagnostic");
assert_eq!(diag.code, diagnostic_codes::PARSE_UNEXPECTED_DEDENT);
}
#[test]
fn multiline_quoted_properly_indented_passes() {
let input = "---\nquoted: \"a\n b\n c\"\n";
assert!(run(input).is_none());
}
#[test]
fn singleline_quoted_passes() {
let input = "---\nquoted: \"a b c\"\n";
assert!(run(input).is_none());
}
#[test]
fn multiline_single_quoted_under_indent_rejects() {
let input = "---\nquoted: 'a\nb\nc'\n";
let diag = run(input).expect("expected diagnostic");
assert_eq!(diag.code, diagnostic_codes::PARSE_UNEXPECTED_DEDENT);
}
#[test]
fn tab_as_indent_4ejs() {
let input = "---\na:\n\tb:\n\t\tc: value\n";
let diag = run(input).expect("expected diagnostic");
assert_eq!(diag.code, diagnostic_codes::PARSE_UNEXPECTED_INDENT);
}
#[test]
fn map_under_indent_dmg6() {
let input = "key:\n ok: 1\n wrong: 2\n";
let diag = run(input).expect("expected diagnostic");
assert_eq!(diag.code, diagnostic_codes::PARSE_UNEXPECTED_DEDENT);
}
#[test]
fn map_under_indent_quoted_n4jp() {
let input = "map:\n key1: \"quoted1\"\n key2: \"bad indentation\"\n";
let diag = run(input).expect("expected diagnostic");
assert_eq!(diag.code, diagnostic_codes::PARSE_UNEXPECTED_DEDENT);
}
#[test]
fn seq_under_indent_4hvu() {
let input = "key:\n - ok\n - also ok\n - wrong\n";
let diag = run(input).expect("expected diagnostic");
assert_eq!(diag.code, diagnostic_codes::PARSE_UNEXPECTED_DEDENT);
}
#[test]
fn seq_item_with_extra_subseq_zvh3() {
let input = "- key: value\n - item1\n";
let diag = run(input).expect("expected diagnostic");
assert_eq!(diag.code, diagnostic_codes::PARSE_UNEXPECTED_DEDENT);
}
#[test]
fn comment_in_multiline_plain_8xdj() {
let input = "key: word1\n# xxx\n word2\n";
let diag = run(input).expect("expected diagnostic");
assert_eq!(diag.code, diagnostic_codes::PARSE_UNEXPECTED_DEDENT);
}
#[test]
fn trailing_comment_in_multiline_plain_bf9h() {
let input = "---\nplain: a\n b # end of scalar\n c\n";
let diag = run(input).expect("expected diagnostic");
assert_eq!(diag.code, diagnostic_codes::PARSE_UNEXPECTED_DEDENT);
}
#[test]
fn doc_level_comment_in_multiline_plain_bs4k() {
let input = "word1 # comment\nword2\n";
let diag = run(input).expect("expected diagnostic");
assert_eq!(diag.code, diagnostic_codes::PARSE_UNEXPECTED_DEDENT);
}
#[test]
fn doc_level_single_multiline_plain_passes() {
let input = "word1\nword2\n";
assert!(run(input).is_none(), "got {:?}", run(input));
}
#[test]
fn multi_document_each_with_single_scalar_passes() {
let input = "---\nfoo\n---\nbar\n";
assert!(run(input).is_none(), "got {:?}", run(input));
}
#[test]
fn block_map_with_well_formed_entries_passes() {
let input = "key:\n a: 1\n b: 2\n";
assert!(run(input).is_none());
}
#[test]
fn block_seq_with_well_formed_items_passes() {
let input = "key:\n - a\n - b\n";
assert!(run(input).is_none());
}
#[test]
fn nested_block_seq_in_seq_item_passes() {
let input = "- - x\n - y\n- z\n";
assert!(run(input).is_none());
}
#[test]
fn value_level_inline_nested_map_zcz6() {
let input = "a: b: c: d\n";
let diag = run(input).expect("expected diagnostic");
assert_eq!(diag.code, diagnostic_codes::PARSE_INVALID_KEY_TOKEN);
}
#[test]
fn value_level_inline_nested_map_quoted_zl4z() {
let input = "---\na: 'b': c\n";
let diag = run(input).expect("expected diagnostic");
assert_eq!(diag.code, diagnostic_codes::PARSE_INVALID_KEY_TOKEN);
}
#[test]
fn value_level_property_only_scalar_then_colon_passes_w5vh() {
let input = "a: &anchor: scalar a\n";
assert!(run(input).is_none());
}
#[test]
fn value_level_colon_without_space_passes() {
let input = "a: b:c\n";
assert!(run(input).is_none());
}
#[test]
fn block_scalar_header_content_s4gj() {
let input = "---\nfolded: > first line\n second line\n";
let diag = run(input).expect("expected diagnostic");
assert_eq!(diag.code, diagnostic_codes::PARSE_INVALID_KEY_TOKEN);
}
#[test]
fn block_scalar_header_unspaced_comment_x4qw() {
let input = "block: ># comment\n scalar\n";
let diag = run(input).expect("expected diagnostic");
assert_eq!(diag.code, diagnostic_codes::PARSE_INVALID_KEY_TOKEN);
}
#[test]
fn block_scalar_with_strip_chomp_and_body_passes() {
let input = "text: |-\n body\n";
assert!(run(input).is_none());
}
#[test]
fn block_scalar_with_indent_indicator_passes() {
let input = "text: |2\n body\n";
assert!(run(input).is_none());
}
#[test]
fn block_scalar_with_spaced_comment_passes() {
let input = "text: > # ok\n body\n";
assert!(run(input).is_none());
}
#[test]
fn block_scalar_bare_header_passes() {
let input = "text: >\n body\n";
assert!(run(input).is_none());
}
#[test]
fn dq_escaped_line_break_passes_np9h() {
let input = "\"folded \nto a space,\t\n \nto a line feed, or \t\\\n \\ \tnon-content\"\n";
assert!(run(input).is_none());
}
#[test]
fn dq_escaped_line_break_with_marker_passes_q8ad() {
let input =
"---\n\"folded \nto a space,\n \nto a line feed, or \t\\\n \\ \tnon-content\"\n";
assert!(run(input).is_none());
}
#[test]
fn dq_escaped_tab_passes() {
let input = "key: \"a\\\tb\"\n";
assert!(run(input).is_none());
}
#[test]
fn dq_truly_invalid_escape_still_rejected() {
let input = "key: \"a\\qb\"\n";
let diag = run(input).expect("expected diagnostic");
assert_eq!(
diag.code,
diagnostic_codes::LEX_INVALID_DOUBLE_QUOTED_ESCAPE
);
}
#[test]
fn comment_abutting_closing_quote_rejected_su5z() {
let input = "key: \"value\"# invalid comment\n";
let diag = run(input).expect("expected diagnostic");
assert_eq!(
diag.code,
diagnostic_codes::LEX_COMMENT_NOT_PRECEDED_BY_SPACE
);
}
#[test]
fn comment_abutting_flow_comma_rejected_cvw2() {
let input = "---\n[ a, b, c,#invalid\n]\n";
let diag = run(input).expect("expected diagnostic");
assert_eq!(
diag.code,
diagnostic_codes::LEX_COMMENT_NOT_PRECEDED_BY_SPACE
);
}
#[test]
fn comment_preceded_by_space_passes() {
for input in [
"key: value # ok\n",
"# line-start comment\nkey: value\n",
"key: value\t# tab-separated\n",
] {
assert!(run(input).is_none(), "{input:?}");
}
}
#[test]
fn anchor_decorates_alias_sr86() {
let input = "key1: &a value\nkey2: &b *a\n";
let diag = run(input).expect("expected diagnostic");
assert_eq!(diag.code, diagnostic_codes::PARSE_ANCHOR_DECORATES_ALIAS);
}
#[test]
fn anchor_decorates_alias_su74() {
let input = "key1: &alias value1\n&b *alias : value2\n";
let diag = run(input).expect("expected diagnostic");
assert_eq!(diag.code, diagnostic_codes::PARSE_ANCHOR_DECORATES_ALIAS);
}
#[test]
fn anchor_followed_by_scalar_passes() {
let input = "key: &a value\n";
assert!(run(input).is_none(), "got {:?}", run(input));
}
#[test]
fn lone_alias_without_anchor_passes() {
let input = "key1: &a value\nkey2: *a\n";
assert!(run(input).is_none(), "got {:?}", run(input));
}
#[test]
fn invalid_tag_braces_lhl4() {
let input = "---\n!invalid{}tag scalar\n";
let diag = run(input).expect("expected diagnostic");
assert_eq!(diag.code, diagnostic_codes::PARSE_INVALID_TAG_CHARACTER);
}
#[test]
fn invalid_tag_comma_u99r() {
let input = "- !!str, xxx\n";
let diag = run(input).expect("expected diagnostic");
assert_eq!(diag.code, diagnostic_codes::PARSE_INVALID_TAG_CHARACTER);
}
#[test]
fn valid_tag_passes() {
let input = "key: !!str value\n";
assert!(run(input).is_none(), "got {:?}", run(input));
}
#[test]
fn verbatim_tag_with_uri_chars_passes() {
let input = "key: !<tag:example.com,2011:foo[bar]> value\n";
assert!(run(input).is_none(), "got {:?}", run(input));
}
#[test]
fn multiple_anchors_on_node_4jvg() {
let input = "top1: &node1\n &k1 key1: val1\ntop2: &node2\n &v2 val2\n";
let diag = run(input).expect("expected diagnostic");
assert_eq!(diag.code, diagnostic_codes::PARSE_MULTIPLE_ANCHORS_ON_NODE);
}
#[test]
fn single_anchor_per_node_passes() {
let input = "k1: &a 1\nk2: &b 2\n";
assert!(run(input).is_none(), "got {:?}", run(input));
}
#[test]
fn anchor_before_block_seq_indicator_sy6v() {
let input = "&anchor - sequence entry\n";
let diag = run(input).expect("expected diagnostic");
assert_eq!(
diag.code,
diagnostic_codes::PARSE_ANCHOR_BEFORE_BLOCK_INDICATOR
);
}
#[test]
fn anchor_on_own_line_before_block_seq_passes() {
let input = "&anchor\n- item\n";
assert!(run(input).is_none(), "got {:?}", run(input));
}
#[test]
fn anchor_without_target_gt5m() {
let input = "- item1\n&node\n- item2\n";
let diag = run(input).expect("expected diagnostic");
assert_eq!(
diag.code,
diagnostic_codes::LEX_REQUIRED_SIMPLE_KEY_NOT_FOUND
);
}
#[test]
fn anchor_before_block_seq_item_value_passes() {
let input = "- &a item\n- b\n";
assert!(run(input).is_none(), "got {:?}", run(input));
}
#[test]
fn undefined_tag_handle_in_second_doc_qlj7() {
let input =
"%TAG !prefix! tag:example.com,2011:\n--- !prefix!A\na: b\n--- !prefix!B\nc: d\n";
let diag = run(input).expect("expected diagnostic");
assert_eq!(diag.code, diagnostic_codes::PARSE_UNDEFINED_TAG_HANDLE);
}
#[test]
fn declared_tag_handle_in_same_doc_passes() {
let input = "%TAG !prefix! tag:example.com,2011:\n--- !prefix!a\nkey: value\n";
assert!(run(input).is_none(), "got {:?}", run(input));
}
#[test]
fn builtin_tag_handles_always_ok() {
let secondary = "--- !!str foo\n";
assert!(
run(secondary).is_none(),
"secondary: got {:?}",
run(secondary)
);
let primary = "--- !local foo\n";
assert!(run(primary).is_none(), "primary: got {:?}", run(primary));
}
#[test]
fn verbatim_tag_bypasses_handle_lookup() {
let input = "--- !<tag:example.com,2025:foo> bar\n";
assert!(run(input).is_none(), "got {:?}", run(input));
}
#[test]
fn flow_continuation_accepts_closing_bracket_at_parent_indent_depth0() {
let input = "a: [\n 1,\n 2,\n 3,\n]\n";
assert!(run(input).is_none(), "got {:?}", run(input));
}
#[test]
fn flow_continuation_accepts_closing_bracket_at_parent_indent_depth1() {
let input = "outer:\n inner: [\n a,\n b,\n ]\n";
assert!(run(input).is_none(), "got {:?}", run(input));
}
#[test]
fn flow_continuation_accepts_closing_brace_at_parent_indent() {
let input = "a: {\n k1: v1,\n k2: v2,\n}\n";
assert!(run(input).is_none(), "got {:?}", run(input));
}
#[test]
fn flow_continuation_rejects_content_line_at_parent_indent_9c9n() {
let input = "---\nflow: [a,\nb,\nc]\n";
let diag = run(input).expect("expected diagnostic");
assert_eq!(diag.code, diagnostic_codes::LEX_WRONG_INDENTED_FLOW);
}
#[test]
fn flow_continuation_rejects_comment_line_at_parent_indent_cml9() {
let input = "key: [ word1\n# xxx\n word2 ]\n";
let diag = run(input).expect("expected diagnostic");
assert_eq!(diag.code, diagnostic_codes::LEX_WRONG_INDENTED_FLOW);
}
}