#![allow(dead_code)]
use crate::syntax::{SyntaxKind, SyntaxNode, SyntaxToken};
use rowan::NodeOrToken;
use super::model::{YamlDiagnostic, diagnostic_codes};
use super::parser_v2::parse_v2;
use super::scanner::{Scanner, Token, TokenKind};
pub(crate) fn validate_yaml(input: &str) -> Option<YamlDiagnostic> {
let tokens = collect_tokens(input);
if let Some(diag) = check_directives(&tokens) {
return Some(diag);
}
let tree = parse_v2(input);
if let Some(diag) = check_trailing_content(&tree) {
return Some(diag);
}
if let Some(diag) = check_flow_commas(&tree) {
return Some(diag);
}
if let Some(diag) = check_unterminated_flow(&tree) {
return Some(diag);
}
if let Some(diag) = check_flow_context_anomalies(&tree) {
return Some(diag);
}
if let Some(diag) = check_multiline_quoted_indent(&tree, input) {
return Some(diag);
}
if let Some(diag) = check_block_indent_anomalies(&tree) {
return Some(diag);
}
if let Some(diag) = check_block_scalar_header(&tree) {
return Some(diag);
}
if let Some(diag) = check_doc_level_bare_scalar_then_colon_map(&tree) {
return Some(diag);
}
if let Some(diag) = check_flow_continuation_indent(&tree, input) {
return Some(diag);
}
if let Some(diag) = check_invalid_dq_escapes(&tree) {
return Some(diag);
}
None
}
fn collect_tokens(input: &str) -> Vec<Token> {
let mut scanner = Scanner::new(input);
let mut tokens = Vec::new();
while let Some(tok) = scanner.next_token() {
tokens.push(tok);
}
tokens
}
fn check_directives(tokens: &[Token]) -> Option<YamlDiagnostic> {
let mut seen_content = false;
for tok in tokens {
match tok.kind {
TokenKind::Directive if seen_content => {
return Some(diag_at_token(
tok,
diagnostic_codes::PARSE_DIRECTIVE_AFTER_CONTENT,
"directive requires document end before subsequent directives",
));
}
TokenKind::Directive
| TokenKind::Trivia(_)
| TokenKind::StreamStart
| TokenKind::StreamEnd => {}
TokenKind::DocumentEnd => seen_content = false,
_ => seen_content = true,
}
}
if let Some(directive) = tokens.iter().find(|t| t.kind == TokenKind::Directive)
&& !tokens.iter().any(|t| t.kind == TokenKind::DocumentStart)
{
return Some(diag_at_token(
directive,
diagnostic_codes::PARSE_DIRECTIVE_WITHOUT_DOCUMENT_START,
"directive requires an explicit document start marker",
));
}
None
}
fn diag_at_token(tok: &Token, code: &'static str, message: &'static str) -> YamlDiagnostic {
YamlDiagnostic {
code,
message,
byte_start: tok.start.index,
byte_end: tok.end.index,
}
}
fn check_trailing_content(tree: &SyntaxNode) -> Option<YamlDiagnostic> {
for doc in tree
.descendants()
.filter(|n| n.kind() == SyntaxKind::YAML_DOCUMENT)
{
if let Some(diag) = check_trailing_after_flow(&doc) {
return Some(diag);
}
}
for container in tree.descendants().filter(|n| {
matches!(
n.kind(),
SyntaxKind::YAML_BLOCK_MAP_VALUE | SyntaxKind::YAML_BLOCK_SEQUENCE_ITEM
)
}) {
if let Some(diag) = check_trailing_after_flow_in_container(&container) {
return Some(diag);
}
}
if let Some(diag) = check_trailing_after_doc_end(tree) {
return Some(diag);
}
None
}
fn check_trailing_after_flow_in_container(container: &SyntaxNode) -> Option<YamlDiagnostic> {
let mut after_flow = false;
let mut have_separator = false;
for child in container.children_with_tokens() {
match &child {
NodeOrToken::Node(n) => {
let kind = n.kind();
if matches!(
kind,
SyntaxKind::YAML_FLOW_SEQUENCE | SyntaxKind::YAML_FLOW_MAP
) {
after_flow = true;
have_separator = false;
} else if after_flow {
return Some(diag_at_range(
n.text_range().start().into(),
n.text_range().end().into(),
diagnostic_codes::PARSE_TRAILING_CONTENT_AFTER_FLOW_END,
"unexpected content after flow-collection close in block context",
));
}
}
NodeOrToken::Token(t) => {
if !after_flow {
continue;
}
match t.kind() {
SyntaxKind::WHITESPACE | SyntaxKind::NEWLINE => have_separator = true,
SyntaxKind::YAML_COMMENT => {
if !have_separator {
return Some(diag_at_range(
t.text_range().start().into(),
t.text_range().end().into(),
diagnostic_codes::PARSE_TRAILING_CONTENT_AFTER_FLOW_END,
"comment must be preceded by whitespace after flow-collection close",
));
}
}
SyntaxKind::YAML_SCALAR => {
return Some(diag_at_range(
t.text_range().start().into(),
t.text_range().end().into(),
diagnostic_codes::PARSE_TRAILING_CONTENT_AFTER_FLOW_END,
"unexpected content after flow-collection close in block context",
));
}
_ => {}
}
}
}
}
None
}
fn check_trailing_after_flow(doc: &SyntaxNode) -> Option<YamlDiagnostic> {
let mut after_flow = false;
let mut have_separator = false;
for child in doc.children_with_tokens() {
match &child {
NodeOrToken::Node(n) => {
let kind = n.kind();
if matches!(
kind,
SyntaxKind::YAML_FLOW_SEQUENCE | SyntaxKind::YAML_FLOW_MAP
) {
if after_flow {
return Some(diag_at_range(
n.text_range().start().into(),
n.text_range().end().into(),
diagnostic_codes::PARSE_TRAILING_CONTENT_AFTER_FLOW_END,
"unexpected content after flow-collection close",
));
}
after_flow = true;
have_separator = false;
} else if after_flow {
if kind == SyntaxKind::YAML_BLOCK_MAP && is_implicit_flow_key_block_map(n) {
let flow_nodes: Vec<SyntaxNode> = doc
.children()
.filter(|c| {
matches!(
c.kind(),
SyntaxKind::YAML_FLOW_SEQUENCE | SyntaxKind::YAML_FLOW_MAP
)
})
.collect();
let preceding_flow_spans_lines = flow_nodes
.last()
.map(|f| f.text().to_string().contains('\n'))
.unwrap_or(false);
if preceding_flow_spans_lines {
return Some(diag_at_range(
n.text_range().start().into(),
n.text_range().end().into(),
diagnostic_codes::PARSE_TRAILING_CONTENT_AFTER_FLOW_END,
"implicit key flow node cannot span lines",
));
}
after_flow = false;
have_separator = false;
continue;
}
return Some(diag_at_range(
n.text_range().start().into(),
n.text_range().end().into(),
diagnostic_codes::PARSE_TRAILING_CONTENT_AFTER_FLOW_END,
"unexpected content after flow-collection close",
));
}
}
NodeOrToken::Token(t) => {
if !after_flow {
continue;
}
match t.kind() {
SyntaxKind::WHITESPACE | SyntaxKind::NEWLINE => {
have_separator = true;
}
SyntaxKind::YAML_COMMENT => {
if !have_separator {
return Some(diag_at_range(
t.text_range().start().into(),
t.text_range().end().into(),
diagnostic_codes::PARSE_TRAILING_CONTENT_AFTER_FLOW_END,
"comment must be preceded by whitespace after flow-collection close",
));
}
}
SyntaxKind::YAML_DOCUMENT_END => {
after_flow = false;
have_separator = false;
}
_ => {
return Some(diag_at_range(
t.text_range().start().into(),
t.text_range().end().into(),
diagnostic_codes::PARSE_TRAILING_CONTENT_AFTER_FLOW_END,
"unexpected content after flow-collection close",
));
}
}
}
}
}
None
}
fn is_implicit_flow_key_block_map(block_map: &SyntaxNode) -> bool {
let Some(entry) = block_map
.children()
.find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_ENTRY)
else {
return false;
};
let Some(key) = entry
.children()
.find(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_KEY)
else {
return false;
};
key.children_with_tokens().all(|c| {
matches!(
c.kind(),
SyntaxKind::YAML_COLON
| SyntaxKind::WHITESPACE
| SyntaxKind::NEWLINE
| SyntaxKind::YAML_COMMENT
)
})
}
fn check_trailing_after_doc_end(tree: &SyntaxNode) -> Option<YamlDiagnostic> {
let tokens: Vec<_> = tree
.descendants_with_tokens()
.filter_map(|el| el.into_token())
.collect();
for (i, tok) in tokens.iter().enumerate() {
if tok.kind() != SyntaxKind::YAML_DOCUMENT_END {
continue;
}
let mut have_separator = false;
for next in &tokens[i + 1..] {
match next.kind() {
SyntaxKind::NEWLINE => break,
SyntaxKind::WHITESPACE => {
have_separator = true;
}
SyntaxKind::YAML_COMMENT if have_separator => break,
SyntaxKind::YAML_COMMENT => {
return Some(diag_at_range(
next.text_range().start().into(),
next.text_range().end().into(),
diagnostic_codes::LEX_TRAILING_CONTENT_AFTER_DOCUMENT_END,
"comment must be preceded by whitespace after document end marker",
));
}
_ => {
return Some(diag_at_range(
next.text_range().start().into(),
next.text_range().end().into(),
diagnostic_codes::LEX_TRAILING_CONTENT_AFTER_DOCUMENT_END,
"unexpected content on the same line as document end marker",
));
}
}
}
}
None
}
fn check_flow_commas(tree: &SyntaxNode) -> Option<YamlDiagnostic> {
for flow in tree.descendants().filter(|n| {
matches!(
n.kind(),
SyntaxKind::YAML_FLOW_SEQUENCE | SyntaxKind::YAML_FLOW_MAP
)
}) {
if let Some(diag) = check_flow_node_commas(&flow) {
return Some(diag);
}
}
None
}
fn check_flow_node_commas(flow: &SyntaxNode) -> Option<YamlDiagnostic> {
let mut seen_item_since_separator = false;
for child in flow.children_with_tokens() {
match &child {
NodeOrToken::Node(_) => {
seen_item_since_separator = true;
}
NodeOrToken::Token(t) => match t.kind() {
SyntaxKind::WHITESPACE | SyntaxKind::NEWLINE | SyntaxKind::YAML_COMMENT => {}
SyntaxKind::YAML_SCALAR if t.text() == "," => {
if !seen_item_since_separator {
return Some(diag_at_range(
t.text_range().start().into(),
t.text_range().end().into(),
diagnostic_codes::PARSE_INVALID_FLOW_SEQUENCE_COMMA,
"comma must follow a flow-collection item",
));
}
seen_item_since_separator = false;
}
SyntaxKind::YAML_SCALAR if matches!(t.text(), "[" | "]" | "{" | "}") => {}
_ => {
seen_item_since_separator = true;
}
},
}
}
None
}
fn check_unterminated_flow(tree: &SyntaxNode) -> Option<YamlDiagnostic> {
for flow in tree.descendants().filter(|n| {
matches!(
n.kind(),
SyntaxKind::YAML_FLOW_SEQUENCE | SyntaxKind::YAML_FLOW_MAP
)
}) {
let close = if flow.kind() == SyntaxKind::YAML_FLOW_SEQUENCE {
"]"
} else {
"}"
};
let has_close = flow.children_with_tokens().any(|c| {
c.as_token()
.is_some_and(|t| t.kind() == SyntaxKind::YAML_SCALAR && t.text() == close)
});
if !has_close {
let (code, message) = if flow.kind() == SyntaxKind::YAML_FLOW_SEQUENCE {
(
diagnostic_codes::PARSE_UNTERMINATED_FLOW_SEQUENCE,
"flow sequence reached end of input without `]`",
)
} else {
(
diagnostic_codes::PARSE_UNTERMINATED_FLOW_MAP,
"flow mapping reached end of input without `}`",
)
};
return Some(diag_at_range(
flow.text_range().start().into(),
flow.text_range().end().into(),
code,
message,
));
}
}
None
}
fn check_flow_context_anomalies(tree: &SyntaxNode) -> Option<YamlDiagnostic> {
for item in tree
.descendants()
.filter(|n| n.kind() == SyntaxKind::YAML_FLOW_SEQUENCE_ITEM)
{
if let Some(diag) = check_flow_seq_item_multiline_key(&item) {
return Some(diag);
}
}
for value in tree
.descendants()
.filter(|n| n.kind() == SyntaxKind::YAML_FLOW_MAP_VALUE)
{
if let Some(diag) = check_flow_map_value_extra_colon(&value) {
return Some(diag);
}
}
None
}
fn check_flow_seq_item_multiline_key(item: &SyntaxNode) -> Option<YamlDiagnostic> {
let starts_with_explicit_key = item.children_with_tokens().any(|c| {
c.as_token()
.is_some_and(|t| t.kind() == SyntaxKind::YAML_KEY)
});
if starts_with_explicit_key {
return None;
}
let mut saw_newline_before_colon = false;
for child in item.children_with_tokens() {
match &child {
NodeOrToken::Token(t) => match t.kind() {
SyntaxKind::NEWLINE => saw_newline_before_colon = true,
SyntaxKind::YAML_SCALAR if t.text().contains('\n') => {
saw_newline_before_colon = true;
}
SyntaxKind::YAML_COLON => {
if saw_newline_before_colon {
return Some(diag_at_range(
t.text_range().start().into(),
t.text_range().end().into(),
diagnostic_codes::PARSE_INVALID_KEY_TOKEN,
"implicit key in flow context cannot span lines",
));
}
break;
}
_ => {}
},
NodeOrToken::Node(_) => {}
}
}
None
}
fn check_flow_map_value_extra_colon(value: &SyntaxNode) -> Option<YamlDiagnostic> {
let mut saw_scalar = false;
for child in value.children_with_tokens() {
if let NodeOrToken::Token(t) = &child {
match t.kind() {
SyntaxKind::YAML_SCALAR => saw_scalar = true,
SyntaxKind::YAML_COLON if saw_scalar => {
return Some(diag_at_range(
t.text_range().start().into(),
t.text_range().end().into(),
diagnostic_codes::PARSE_INVALID_FLOW_SEQUENCE_COMMA,
"expected comma between flow-mapping entries",
));
}
_ => {}
}
}
}
None
}
fn check_multiline_quoted_indent(tree: &SyntaxNode, input: &str) -> Option<YamlDiagnostic> {
for value in tree
.descendants()
.filter(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_VALUE)
{
let Some(entry) = value.parent() else {
continue;
};
let Some(block_map) = entry.parent() else {
continue;
};
if block_map.kind() != SyntaxKind::YAML_BLOCK_MAP {
continue;
}
let block_map_start: usize = block_map.text_range().start().into();
let parent_indent = column_of(input, block_map_start);
for child in value.children_with_tokens() {
let NodeOrToken::Token(t) = child else {
continue;
};
if t.kind() != SyntaxKind::YAML_SCALAR {
continue;
}
let text = t.text();
if !text.contains('\n') {
continue;
}
let starts_quoted = text.starts_with('"') || text.starts_with('\'');
if !starts_quoted {
continue;
}
let scalar_start: usize = t.text_range().start().into();
let mut offset = 0usize;
let bytes = text.as_bytes();
while offset < bytes.len() {
if bytes[offset] != b'\n' {
offset += 1;
continue;
}
let line_start_in_src = scalar_start + offset + 1;
let line_end_in_text = text[offset + 1..]
.find('\n')
.map(|i| offset + 1 + i)
.unwrap_or(text.len());
let line_end_in_src = scalar_start + line_end_in_text.min(text.len());
let line_text_in_src = &input[line_start_in_src..line_end_in_src];
let leading_ws = line_text_in_src
.bytes()
.take_while(|b| *b == b' ' || *b == b'\t')
.count();
if leading_ws == line_text_in_src.len() {
offset += 1;
continue;
}
let first_non_ws_col = leading_ws;
let first_non_ws_byte = line_start_in_src + leading_ws;
if first_non_ws_col <= parent_indent {
return Some(diag_at_range(
first_non_ws_byte,
first_non_ws_byte + 1,
diagnostic_codes::PARSE_UNEXPECTED_DEDENT,
"multi-line quoted scalar continuation indented at or below parent block indent",
));
}
offset += 1;
}
}
}
None
}
fn check_block_indent_anomalies(tree: &SyntaxNode) -> Option<YamlDiagnostic> {
if let Some(diag) = check_tab_as_indent(tree) {
return Some(diag);
}
if let Some(diag) = check_inline_block_seq_in_value(tree) {
return Some(diag);
}
for node in tree.descendants().filter(|n| {
matches!(
n.kind(),
SyntaxKind::YAML_BLOCK_MAP_VALUE | SyntaxKind::YAML_BLOCK_SEQUENCE_ITEM
)
}) {
let mut struct_count = 0usize;
let mut scalar_count = 0usize;
let mut last_struct = None;
for child in node.children_with_tokens() {
match &child {
NodeOrToken::Node(n) => {
if matches!(
n.kind(),
SyntaxKind::YAML_BLOCK_MAP | SyntaxKind::YAML_BLOCK_SEQUENCE
) {
struct_count += 1;
last_struct = Some(n.clone());
}
}
NodeOrToken::Token(t) => {
if t.kind() == SyntaxKind::YAML_SCALAR {
scalar_count += 1;
}
}
}
}
if struct_count > 1 {
let n = last_struct.expect("struct_count > 1 implies last_struct set");
return Some(diag_at_range(
n.text_range().start().into(),
n.text_range().end().into(),
diagnostic_codes::PARSE_UNEXPECTED_DEDENT,
"block collection has mismatched indentation, splitting it into siblings",
));
}
if struct_count >= 1
&& scalar_count >= 1
&& node.kind() == SyntaxKind::YAML_BLOCK_MAP_VALUE
&& let Some(trailing_scalar) = scalar_after_structural_in_block_map_value(&node)
{
return Some(diag_at_range(
trailing_scalar.text_range().start().into(),
trailing_scalar.text_range().end().into(),
diagnostic_codes::PARSE_INVALID_KEY_TOKEN,
"stray scalar after a block collection in a block-map value",
));
}
if scalar_count > 1 {
let scalars: Vec<_> = node
.children_with_tokens()
.filter_map(|c| c.into_token())
.filter(|t| t.kind() == SyntaxKind::YAML_SCALAR)
.collect();
let last_scalar = scalars
.last()
.expect("scalar_count > 1 implies at least one scalar child");
let (code, message) = if node.kind() == SyntaxKind::YAML_BLOCK_MAP_VALUE {
(
diagnostic_codes::PARSE_UNEXPECTED_DEDENT,
"comment cannot appear inside a multi-line plain scalar",
)
} else {
(
diagnostic_codes::PARSE_INVALID_KEY_TOKEN,
"stray content following a block sequence item at its indent level",
)
};
return Some(diag_at_range(
last_scalar.text_range().start().into(),
last_scalar.text_range().end().into(),
code,
message,
));
}
}
None
}
fn scalar_after_structural_in_block_map_value(value: &SyntaxNode) -> Option<SyntaxToken> {
let mut saw_struct = false;
for child in value.children_with_tokens() {
match &child {
NodeOrToken::Node(n) => {
if matches!(
n.kind(),
SyntaxKind::YAML_BLOCK_MAP | SyntaxKind::YAML_BLOCK_SEQUENCE
) {
saw_struct = true;
}
}
NodeOrToken::Token(t) => {
if t.kind() == SyntaxKind::YAML_SCALAR && saw_struct {
return Some(t.clone());
}
}
}
}
None
}
fn check_inline_block_seq_in_value(tree: &SyntaxNode) -> Option<YamlDiagnostic> {
for value in tree
.descendants()
.filter(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_VALUE)
{
let mut seen_newline = false;
for child in value.children_with_tokens() {
match &child {
NodeOrToken::Token(t) => {
if t.kind() == SyntaxKind::NEWLINE {
seen_newline = true;
}
}
NodeOrToken::Node(n) => {
if n.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE && !seen_newline {
let second_item = n
.children()
.filter(|c| c.kind() == SyntaxKind::YAML_BLOCK_SEQUENCE_ITEM)
.nth(1)
.unwrap_or_else(|| n.clone());
return Some(diag_at_range(
second_item.text_range().start().into(),
(Into::<usize>::into(second_item.text_range().start())) + 1,
diagnostic_codes::PARSE_INVALID_KEY_TOKEN,
"block sequence cannot start on the same line as its key",
));
}
}
}
}
}
None
}
fn check_tab_as_indent(tree: &SyntaxNode) -> Option<YamlDiagnostic> {
for node in tree.descendants().filter(|n| {
matches!(
n.kind(),
SyntaxKind::YAML_BLOCK_MAP_VALUE
| SyntaxKind::YAML_BLOCK_MAP_KEY
| SyntaxKind::YAML_BLOCK_SEQUENCE_ITEM
| SyntaxKind::YAML_BLOCK_MAP
| SyntaxKind::YAML_BLOCK_SEQUENCE
)
}) {
let mut prev_was_newline = false;
for child in node.children_with_tokens() {
if let NodeOrToken::Token(t) = &child {
match t.kind() {
SyntaxKind::NEWLINE => prev_was_newline = true,
SyntaxKind::WHITESPACE if prev_was_newline => {
if t.text().starts_with('\t') {
return Some(diag_at_range(
t.text_range().start().into(),
t.text_range().end().into(),
diagnostic_codes::PARSE_UNEXPECTED_INDENT,
"tab character used as indentation is not allowed in YAML",
));
}
prev_was_newline = false;
}
_ => {
prev_was_newline = false;
}
}
} else {
prev_was_newline = false;
}
}
}
None
}
fn check_block_scalar_header(tree: &SyntaxNode) -> Option<YamlDiagnostic> {
for token in tree
.descendants_with_tokens()
.filter_map(|el| el.into_token())
.filter(|t| t.kind() == SyntaxKind::YAML_SCALAR)
{
let text = token.text();
if !text.starts_with('>') && !text.starts_with('|') {
continue;
}
let header_end = text.find('\n').unwrap_or(text.len());
let header = &text[..header_end];
let bytes = header.as_bytes();
let mut i = 1usize;
while i < bytes.len() && (bytes[i] == b'+' || bytes[i] == b'-' || bytes[i].is_ascii_digit())
{
i += 1;
}
let rest = &header[i..];
if rest.is_empty() {
continue;
}
if rest.starts_with('#') {
let scalar_start: usize = token.text_range().start().into();
return Some(diag_at_range(
scalar_start + i,
scalar_start + i + 1,
diagnostic_codes::PARSE_INVALID_KEY_TOKEN,
"comment after block scalar indicator must be preceded by whitespace",
));
}
let leading_ws = rest
.bytes()
.take_while(|b| *b == b' ' || *b == b'\t')
.count();
let after_ws = &rest[leading_ws..];
if after_ws.is_empty() || after_ws.starts_with('#') {
continue;
}
let scalar_start: usize = token.text_range().start().into();
let content_start = scalar_start + i + leading_ws;
let content_end = scalar_start + header_end;
return Some(diag_at_range(
content_start,
content_end,
diagnostic_codes::PARSE_INVALID_KEY_TOKEN,
"block scalar header line must end at EOL or with a comment",
));
}
None
}
fn check_doc_level_bare_scalar_then_colon_map(tree: &SyntaxNode) -> Option<YamlDiagnostic> {
if let Some(diag) = check_value_level_multiline_scalar_then_colon_map(tree) {
return Some(diag);
}
for doc in tree
.descendants()
.filter(|n| n.kind() == SyntaxKind::YAML_DOCUMENT)
{
let mut has_doc_start = false;
let mut last_bare_scalar: Option<SyntaxToken> = None;
for child in doc.children_with_tokens() {
match &child {
NodeOrToken::Token(t) => match t.kind() {
SyntaxKind::YAML_DOCUMENT_START => {
has_doc_start = true;
}
SyntaxKind::YAML_SCALAR => {
last_bare_scalar = Some(t.clone());
}
SyntaxKind::WHITESPACE | SyntaxKind::NEWLINE | SyntaxKind::YAML_COMMENT => {}
_ => {
last_bare_scalar = None;
}
},
NodeOrToken::Node(n) => {
if n.kind() == SyntaxKind::YAML_BLOCK_MAP
&& let Some(scalar) = last_bare_scalar.take()
&& first_entry_has_colon_only_key(n)
{
let (code, message) = if has_doc_start {
(
diagnostic_codes::LEX_TRAILING_CONTENT_AFTER_DOCUMENT_START,
"trailing content after document start marker",
)
} else {
(
diagnostic_codes::PARSE_INVALID_KEY_TOKEN,
"unexpected scalar at block-map level (no key)",
)
};
return Some(diag_at_range(
scalar.text_range().start().into(),
scalar.text_range().end().into(),
code,
message,
));
}
last_bare_scalar = None;
}
}
}
}
None
}
fn check_value_level_multiline_scalar_then_colon_map(tree: &SyntaxNode) -> Option<YamlDiagnostic> {
for value in tree
.descendants()
.filter(|n| n.kind() == SyntaxKind::YAML_BLOCK_MAP_VALUE)
{
let mut last_scalar: Option<SyntaxToken> = None;
for child in value.children_with_tokens() {
match &child {
NodeOrToken::Token(t) => match t.kind() {
SyntaxKind::YAML_SCALAR => last_scalar = Some(t.clone()),
SyntaxKind::WHITESPACE | SyntaxKind::NEWLINE | SyntaxKind::YAML_COMMENT => {}
_ => last_scalar = None,
},
NodeOrToken::Node(n) => {
if n.kind() == SyntaxKind::YAML_BLOCK_MAP
&& let Some(scalar) = last_scalar.take()
&& first_entry_has_colon_only_key(n)
&& scalar_text_spans_implicit_key_lines(scalar.text())
{
return Some(diag_at_range(
scalar.text_range().start().into(),
scalar.text_range().end().into(),
diagnostic_codes::PARSE_INVALID_KEY_TOKEN,
"implicit key cannot span lines",
));
}
last_scalar = None;
}
}
}
}
None
}
fn scalar_text_spans_implicit_key_lines(text: &str) -> bool {
if !text.contains('\n') {
return false;
}
let Some((first_line, _rest)) = text.split_once('\n') else {
return false;
};
let first = first_line.trim_end();
let mut head = first;
while let Some(token_end) = head.find(|c: char| c.is_whitespace()).or(Some(head.len())) {
let (tok, rest) = head.split_at(token_end);
let is_property = tok.starts_with('&') || tok.starts_with('!') || tok.starts_with('*');
if !is_property {
return true;
}
head = rest.trim_start();
if head.is_empty() {
return false;
}
}
true
}
fn first_entry_has_colon_only_key(block_map: &SyntaxNode) -> bool {
let Some(first_entry) = block_map
.children()
.find(|c| c.kind() == SyntaxKind::YAML_BLOCK_MAP_ENTRY)
else {
return false;
};
let Some(key) = first_entry
.children()
.find(|c| c.kind() == SyntaxKind::YAML_BLOCK_MAP_KEY)
else {
return false;
};
let mut has_colon = false;
for child in key.children_with_tokens() {
match &child {
NodeOrToken::Token(t) => match t.kind() {
SyntaxKind::YAML_COLON => has_colon = true,
SyntaxKind::WHITESPACE | SyntaxKind::NEWLINE => {}
_ => return false,
},
NodeOrToken::Node(_) => return false,
}
}
has_colon
}
fn check_flow_continuation_indent(tree: &SyntaxNode, input: &str) -> Option<YamlDiagnostic> {
for flow in tree.descendants().filter(|n| {
matches!(
n.kind(),
SyntaxKind::YAML_FLOW_SEQUENCE | SyntaxKind::YAML_FLOW_MAP
)
}) {
let Some(block_map) = enclosing_block_map_for_flow(&flow) else {
continue;
};
let block_map_start: usize = block_map.text_range().start().into();
let threshold = column_of(input, block_map_start);
let flow_start: usize = flow.text_range().start().into();
let flow_end: usize = flow.text_range().end().into();
let bytes = input.as_bytes();
let mut i = flow_start;
while i < flow_end {
if bytes[i] != b'\n' {
i += 1;
continue;
}
let line_start = i + 1;
if line_start >= flow_end {
break;
}
let mut col = 0usize;
let mut j = line_start;
while j < flow_end && (bytes[j] == b' ' || bytes[j] == b'\t') {
col += 1;
j += 1;
}
if j >= flow_end || bytes[j] == b'\n' {
i = j;
continue;
}
if col <= threshold {
return Some(diag_at_range(
line_start,
j + 1,
diagnostic_codes::LEX_WRONG_INDENTED_FLOW,
"wrong indentation for continued flow collection",
));
}
i = j;
}
}
None
}
fn enclosing_block_map_for_flow(flow: &SyntaxNode) -> Option<SyntaxNode> {
let mut node = flow.parent();
let mut saw_block_map_value = false;
while let Some(current) = node {
match current.kind() {
SyntaxKind::YAML_BLOCK_MAP_VALUE => saw_block_map_value = true,
SyntaxKind::YAML_BLOCK_MAP if saw_block_map_value => return Some(current),
_ => {}
}
node = current.parent();
}
None
}
fn check_invalid_dq_escapes(tree: &SyntaxNode) -> Option<YamlDiagnostic> {
for token in tree
.descendants_with_tokens()
.filter_map(|el| el.into_token())
.filter(|t| t.kind() == SyntaxKind::YAML_SCALAR)
{
let text = token.text();
if !text.starts_with('"') {
continue;
}
if let Some(rel_idx) = invalid_dq_escape_offset(text) {
let scalar_start: usize = token.text_range().start().into();
return Some(diag_at_range(
scalar_start + rel_idx,
scalar_start + rel_idx + 1,
diagnostic_codes::LEX_INVALID_DOUBLE_QUOTED_ESCAPE,
"invalid escape in double quoted scalar",
));
}
}
None
}
fn invalid_dq_escape_offset(text: &str) -> Option<usize> {
let mut chars = text.char_indices().peekable();
let mut in_double = false;
let mut escape_start: Option<usize> = None;
while let Some((idx, ch)) = chars.next() {
if !in_double {
if ch == '"' {
in_double = true;
}
continue;
}
if let Some(start) = escape_start.take() {
if !is_valid_dq_escape(ch) {
return Some(start);
}
continue;
}
match ch {
'\\' => {
if chars.peek().is_none() {
return Some(idx);
}
escape_start = Some(idx);
}
'"' => in_double = false,
_ => {}
}
}
None
}
fn is_valid_dq_escape(ch: char) -> bool {
matches!(
ch,
'0' | 'a'
| 'b'
| 't'
| 'n'
| 'v'
| 'f'
| 'r'
| 'e'
| ' '
| '"'
| '/'
| '\\'
| 'N'
| '_'
| 'L'
| 'P'
| 'x'
| 'u'
| 'U'
)
}
fn column_of(input: &str, byte_offset: usize) -> usize {
match input[..byte_offset].rfind('\n') {
Some(nl) => byte_offset - nl - 1,
None => byte_offset,
}
}
fn diag_at_range(
byte_start: usize,
byte_end: usize,
code: &'static str,
message: &'static str,
) -> YamlDiagnostic {
YamlDiagnostic {
code,
message,
byte_start,
byte_end,
}
}
#[cfg(test)]
mod tests {
use super::*;
fn run(input: &str) -> Option<YamlDiagnostic> {
validate_yaml(input)
}
#[test]
fn directive_after_content_eb22() {
let input = "---\nscalar1 # comment\n%YAML 1.2\n---\nscalar2\n";
let diag = run(input).expect("expected diagnostic");
assert_eq!(diag.code, diagnostic_codes::PARSE_DIRECTIVE_AFTER_CONTENT);
}
#[test]
fn directive_after_content_rhx7() {
let input = "---\nkey: value\n%YAML 1.2\n---\n";
let diag = run(input).expect("expected diagnostic");
assert_eq!(diag.code, diagnostic_codes::PARSE_DIRECTIVE_AFTER_CONTENT);
}
#[test]
fn directive_without_document_start_9mma() {
let input = "%YAML 1.2\n";
let diag = run(input).expect("expected diagnostic");
assert_eq!(
diag.code,
diagnostic_codes::PARSE_DIRECTIVE_WITHOUT_DOCUMENT_START
);
}
#[test]
fn directive_without_document_start_b63p() {
let input = "%YAML 1.2\n...\n";
let diag = run(input).expect("expected diagnostic");
assert_eq!(
diag.code,
diagnostic_codes::PARSE_DIRECTIVE_WITHOUT_DOCUMENT_START
);
}
#[test]
fn well_formed_directive_then_marker_passes() {
let input = "%YAML 1.2\n---\nfoo: bar\n";
assert!(run(input).is_none());
}
#[test]
fn directive_then_doc_then_directive_with_separator_passes() {
let input = "%YAML 1.2\n---\nfoo: 1\n...\n%YAML 1.2\n---\nbar: 2\n";
assert!(run(input).is_none());
}
#[test]
fn empty_input_passes() {
assert!(run("").is_none());
}
#[test]
fn plain_document_no_directives_passes() {
let input = "key: value\n";
assert!(run(input).is_none());
}
#[test]
fn plain_scalar_continuation_with_percent_passes_xlq9() {
let input = "---\nscalar\n%YAML 1.2\n";
assert!(run(input).is_none());
}
#[test]
fn percent_at_col0_inside_flow_map_is_content_ut92() {
let input = "---\n{ matches\n% : 20 }\n...\n---\n# Empty\n...\n";
assert!(run(input).is_none());
}
#[test]
fn trailing_content_after_doc_end_3hfz() {
let input = "---\nkey: value\n... invalid\n";
let diag = run(input).expect("expected diagnostic");
assert_eq!(
diag.code,
diagnostic_codes::LEX_TRAILING_CONTENT_AFTER_DOCUMENT_END
);
}
#[test]
fn trailing_content_after_flow_seq_ks4u() {
let input = "---\n[\nsequence item\n]\ninvalid item\n";
let diag = run(input).expect("expected diagnostic");
assert_eq!(
diag.code,
diagnostic_codes::PARSE_TRAILING_CONTENT_AFTER_FLOW_END
);
}
#[test]
fn trailing_extra_flow_closer_4h7k() {
let input = "---\n[ a, b, c ] ]\n";
let diag = run(input).expect("expected diagnostic");
assert_eq!(
diag.code,
diagnostic_codes::PARSE_TRAILING_CONTENT_AFTER_FLOW_END
);
}
#[test]
fn trailing_spaceless_comment_after_flow_9jba() {
let input = "---\n[ a, b, c, ]#invalid\n";
let diag = run(input).expect("expected diagnostic");
assert_eq!(
diag.code,
diagnostic_codes::PARSE_TRAILING_CONTENT_AFTER_FLOW_END
);
}
#[test]
fn flow_then_properly_spaced_comment_passes() {
let input = "---\n[ a, b ] # ok\n";
assert!(run(input).is_none());
}
#[test]
fn flow_then_doc_end_passes() {
let input = "---\n[ a, b ]\n...\n";
assert!(run(input).is_none());
}
#[test]
fn doc_end_then_newline_then_content_is_valid_new_doc() {
let input = "---\nfirst\n...\nsecond\n";
assert!(run(input).is_none());
}
#[test]
fn doc_end_with_trailing_spaced_comment_passes() {
let input = "---\nkey: value\n... # comment\n";
assert!(run(input).is_none());
}
#[test]
fn flow_seq_leading_comma_9mag() {
let input = "---\n[ , a, b, c ]\n";
let diag = run(input).expect("expected diagnostic");
assert_eq!(
diag.code,
diagnostic_codes::PARSE_INVALID_FLOW_SEQUENCE_COMMA
);
}
#[test]
fn flow_seq_double_comma_ctn5() {
let input = "---\n[ a, b, c, , ]\n";
let diag = run(input).expect("expected diagnostic");
assert_eq!(
diag.code,
diagnostic_codes::PARSE_INVALID_FLOW_SEQUENCE_COMMA
);
}
#[test]
fn flow_map_leading_comma_rejects() {
let input = "---\n{ , a: 1 }\n";
let diag = run(input).expect("expected diagnostic");
assert_eq!(
diag.code,
diagnostic_codes::PARSE_INVALID_FLOW_SEQUENCE_COMMA
);
}
#[test]
fn flow_map_double_comma_rejects() {
let input = "---\n{ a: 1, , b: 2 }\n";
let diag = run(input).expect("expected diagnostic");
assert_eq!(
diag.code,
diagnostic_codes::PARSE_INVALID_FLOW_SEQUENCE_COMMA
);
}
#[test]
fn flow_seq_trailing_comma_passes() {
let input = "---\n[ a, b, c, ]\n";
assert!(run(input).is_none());
}
#[test]
fn flow_map_trailing_comma_passes() {
let input = "---\n{ a: 1, b: 2, }\n";
assert!(run(input).is_none());
}
#[test]
fn flow_seq_well_formed_passes() {
let input = "---\n[ a, b, c ]\n";
assert!(run(input).is_none());
}
#[test]
fn flow_seq_empty_passes() {
let input = "---\n[ ]\n";
assert!(run(input).is_none());
}
#[test]
fn flow_map_implicit_null_entry_passes_8kb6() {
let input = "---\n- { single line, a: b}\n- { multi\n line, a: b}\n";
assert!(run(input).is_none());
}
#[test]
fn unterminated_flow_seq_6jtt() {
let input = "---\n[ [ a, b, c ]\n";
let diag = run(input).expect("expected diagnostic");
assert_eq!(
diag.code,
diagnostic_codes::PARSE_UNTERMINATED_FLOW_SEQUENCE
);
}
#[test]
fn unterminated_flow_map() {
let input = "---\n{ foo: 1\n";
let diag = run(input).expect("expected diagnostic");
assert_eq!(diag.code, diagnostic_codes::PARSE_UNTERMINATED_FLOW_MAP);
}
#[test]
fn balanced_nested_flow_passes() {
let input = "---\n[ [ a, b, c ] ]\n";
assert!(run(input).is_none());
}
#[test]
fn empty_flow_seq_terminated_passes() {
let input = "---\n[ ]\n";
assert!(run(input).is_none());
}
#[test]
fn flow_map_plain_entry_passes_4abk() {
let input = "{\nunquoted : \"separate\",\nhttp://foo.com,\nomitted value:,\n}\n";
assert!(run(input).is_none());
}
#[test]
fn flow_seq_implicit_key_spans_lines_dk4h() {
let input = "---\n[ key\n : value ]\n";
let diag = run(input).expect("expected diagnostic");
assert_eq!(diag.code, diagnostic_codes::PARSE_INVALID_KEY_TOKEN);
}
#[test]
fn flow_seq_implicit_key_quoted_spans_lines_zxt5() {
let input = "[ \"key\"\n :value ]\n";
let diag = run(input).expect("expected diagnostic");
assert_eq!(diag.code, diagnostic_codes::PARSE_INVALID_KEY_TOKEN);
}
#[test]
fn flow_map_missing_comma_t833() {
let input = "---\n{\n foo: 1\n bar: 2 }\n";
let diag = run(input).expect("expected diagnostic");
assert_eq!(
diag.code,
diagnostic_codes::PARSE_INVALID_FLOW_SEQUENCE_COMMA
);
}
#[test]
fn flow_seq_single_line_implicit_key_passes() {
let input = "---\n[ key: value ]\n";
assert!(run(input).is_none());
}
#[test]
fn flow_map_well_formed_multiline_passes() {
let input = "---\n{\n foo: 1,\n bar: 2\n}\n";
assert!(run(input).is_none());
}
#[test]
fn flow_map_value_starting_with_colon_passes_58mp() {
let input = "{x: :x}\n";
assert!(run(input).is_none());
}
#[test]
fn flow_map_value_starting_with_double_colon_passes_5t43() {
let input = "- { \"key\":value }\n- { \"key\"::value }\n";
assert!(run(input).is_none());
}
#[test]
fn flow_seq_explicit_key_spans_lines_passes_ct4q() {
let input = "[\n? foo\n bar : baz\n]\n";
assert!(run(input).is_none());
}
#[test]
fn multiline_quoted_under_indent_qb6e() {
let input = "---\nquoted: \"a\nb\nc\"\n";
let diag = run(input).expect("expected diagnostic");
assert_eq!(diag.code, diagnostic_codes::PARSE_UNEXPECTED_DEDENT);
}
#[test]
fn multiline_quoted_properly_indented_passes() {
let input = "---\nquoted: \"a\n b\n c\"\n";
assert!(run(input).is_none());
}
#[test]
fn singleline_quoted_passes() {
let input = "---\nquoted: \"a b c\"\n";
assert!(run(input).is_none());
}
#[test]
fn multiline_single_quoted_under_indent_rejects() {
let input = "---\nquoted: 'a\nb\nc'\n";
let diag = run(input).expect("expected diagnostic");
assert_eq!(diag.code, diagnostic_codes::PARSE_UNEXPECTED_DEDENT);
}
#[test]
fn tab_as_indent_4ejs() {
let input = "---\na:\n\tb:\n\t\tc: value\n";
let diag = run(input).expect("expected diagnostic");
assert_eq!(diag.code, diagnostic_codes::PARSE_UNEXPECTED_INDENT);
}
#[test]
fn map_under_indent_dmg6() {
let input = "key:\n ok: 1\n wrong: 2\n";
let diag = run(input).expect("expected diagnostic");
assert_eq!(diag.code, diagnostic_codes::PARSE_UNEXPECTED_DEDENT);
}
#[test]
fn map_under_indent_quoted_n4jp() {
let input = "map:\n key1: \"quoted1\"\n key2: \"bad indentation\"\n";
let diag = run(input).expect("expected diagnostic");
assert_eq!(diag.code, diagnostic_codes::PARSE_UNEXPECTED_DEDENT);
}
#[test]
fn seq_under_indent_4hvu() {
let input = "key:\n - ok\n - also ok\n - wrong\n";
let diag = run(input).expect("expected diagnostic");
assert_eq!(diag.code, diagnostic_codes::PARSE_UNEXPECTED_DEDENT);
}
#[test]
fn seq_item_with_extra_subseq_zvh3() {
let input = "- key: value\n - item1\n";
let diag = run(input).expect("expected diagnostic");
assert_eq!(diag.code, diagnostic_codes::PARSE_UNEXPECTED_DEDENT);
}
#[test]
fn comment_in_multiline_plain_8xdj() {
let input = "key: word1\n# xxx\n word2\n";
let diag = run(input).expect("expected diagnostic");
assert_eq!(diag.code, diagnostic_codes::PARSE_UNEXPECTED_DEDENT);
}
#[test]
fn trailing_comment_in_multiline_plain_bf9h() {
let input = "---\nplain: a\n b # end of scalar\n c\n";
let diag = run(input).expect("expected diagnostic");
assert_eq!(diag.code, diagnostic_codes::PARSE_UNEXPECTED_DEDENT);
}
#[test]
fn block_map_with_well_formed_entries_passes() {
let input = "key:\n a: 1\n b: 2\n";
assert!(run(input).is_none());
}
#[test]
fn block_seq_with_well_formed_items_passes() {
let input = "key:\n - a\n - b\n";
assert!(run(input).is_none());
}
#[test]
fn nested_block_seq_in_seq_item_passes() {
let input = "- - x\n - y\n- z\n";
assert!(run(input).is_none());
}
#[test]
fn block_scalar_header_content_s4gj() {
let input = "---\nfolded: > first line\n second line\n";
let diag = run(input).expect("expected diagnostic");
assert_eq!(diag.code, diagnostic_codes::PARSE_INVALID_KEY_TOKEN);
}
#[test]
fn block_scalar_header_unspaced_comment_x4qw() {
let input = "block: ># comment\n scalar\n";
let diag = run(input).expect("expected diagnostic");
assert_eq!(diag.code, diagnostic_codes::PARSE_INVALID_KEY_TOKEN);
}
#[test]
fn block_scalar_with_strip_chomp_and_body_passes() {
let input = "text: |-\n body\n";
assert!(run(input).is_none());
}
#[test]
fn block_scalar_with_indent_indicator_passes() {
let input = "text: |2\n body\n";
assert!(run(input).is_none());
}
#[test]
fn block_scalar_with_spaced_comment_passes() {
let input = "text: > # ok\n body\n";
assert!(run(input).is_none());
}
#[test]
fn block_scalar_bare_header_passes() {
let input = "text: >\n body\n";
assert!(run(input).is_none());
}
}