use rlsp_yaml_parser::{Document, Event, LineIndex, Node, ScalarStyle, Span};
use tower_lsp::lsp_types::{
SemanticToken, SemanticTokenModifier, SemanticTokenType, SemanticTokensLegend,
};
const TOKEN_PROPERTY: u32 = 0;
const TOKEN_STRING: u32 = 1;
const TOKEN_NUMBER: u32 = 2;
const TOKEN_KEYWORD: u32 = 3;
const TOKEN_VARIABLE: u32 = 4;
const TOKEN_TYPE: u32 = 5;
const TOKEN_COMMENT: u32 = 6;
const TOKEN_OPERATOR: u32 = 7;
const MOD_DECLARATION: u32 = 1;
#[must_use]
pub fn legend() -> SemanticTokensLegend {
SemanticTokensLegend {
token_types: vec![
SemanticTokenType::PROPERTY, SemanticTokenType::STRING, SemanticTokenType::NUMBER, SemanticTokenType::KEYWORD, SemanticTokenType::VARIABLE, SemanticTokenType::TYPE, SemanticTokenType::COMMENT, SemanticTokenType::OPERATOR, ],
token_modifiers: vec![
SemanticTokenModifier::DECLARATION, ],
}
}
struct RawToken {
line: u32,
start: u32,
length: u32,
token_type: u32,
token_modifiers_bitset: u32,
}
#[must_use]
pub fn semantic_tokens(docs: &[Document<Span>], text: &str) -> Vec<SemanticToken> {
let mut raw: Vec<RawToken> = Vec::new();
for doc in docs {
let idx = doc.line_index();
collect_node_tokens(&doc.root, &mut raw, idx);
}
collect_comment_tokens(text, &mut raw);
raw.sort_by_key(|t| (t.line, t.start));
let mut tokens = Vec::with_capacity(raw.len());
let mut prev_line = 0u32;
let mut prev_start = 0u32;
for rt in raw {
let delta_line = rt.line - prev_line;
let delta_start = if delta_line == 0 {
rt.start - prev_start
} else {
rt.start
};
tokens.push(SemanticToken {
delta_line,
delta_start,
length: rt.length,
token_type: rt.token_type,
token_modifiers_bitset: rt.token_modifiers_bitset,
});
prev_line = rt.line;
prev_start = rt.start;
}
tokens
}
fn collect_node_tokens(node: &Node<Span>, out: &mut Vec<RawToken>, idx: &LineIndex) {
if let Some(span) = node.anchor_loc() {
out.push(span_to_raw(span, TOKEN_VARIABLE, MOD_DECLARATION, idx));
}
if let Some(span) = node.tag_loc() {
out.push(span_to_raw(span, TOKEN_TYPE, 0, idx));
}
match node {
Node::Scalar {
loc, style, value, ..
} => {
if let Some(rt) = classify_scalar_node(value, *style, *loc, idx) {
out.push(rt);
}
}
Node::Mapping { entries, .. } => {
for (key, value) in entries {
if let Node::Scalar { loc, .. } = key {
out.push(span_to_raw(*loc, TOKEN_PROPERTY, 0, idx));
if let Some(span) = key.anchor_loc() {
out.push(span_to_raw(span, TOKEN_VARIABLE, MOD_DECLARATION, idx));
}
if let Some(span) = key.tag_loc() {
out.push(span_to_raw(span, TOKEN_TYPE, 0, idx));
}
} else {
collect_node_tokens(key, out, idx);
}
collect_node_tokens(value, out, idx);
}
}
Node::Sequence { items, .. } => {
for item in items {
collect_node_tokens(item, out, idx);
}
}
Node::Alias { loc, .. } => {
out.push(span_to_raw(*loc, TOKEN_VARIABLE, 0, idx));
}
}
}
fn collect_comment_tokens(yaml: &str, out: &mut Vec<RawToken>) {
let idx = LineIndex::new(yaml);
for result in rlsp_yaml_parser::parse_events(yaml) {
if let Ok((Event::Comment { .. }, span)) = result {
out.push(span_to_raw(span, TOKEN_COMMENT, 0, &idx));
}
}
}
fn span_to_raw(
span: Span,
token_type: u32,
token_modifiers_bitset: u32,
idx: &LineIndex,
) -> RawToken {
let (start_line, start_col) = idx.line_column(span.start);
let (_, end_col) = idx.line_column(span.end);
let length = end_col.saturating_sub(start_col);
RawToken {
line: start_line.saturating_sub(1),
start: start_col,
length,
token_type,
token_modifiers_bitset,
}
}
fn classify_scalar_node(
value: &str,
style: rlsp_yaml_parser::ScalarStyle,
loc: Span,
idx: &LineIndex,
) -> Option<RawToken> {
let (start_line, start_col) = idx.line_column(loc.start);
let (_, end_col) = idx.line_column(loc.end);
let length = end_col.saturating_sub(start_col);
if length == 0 {
return None;
}
let line = start_line.saturating_sub(1);
match style {
ScalarStyle::Literal(_) | ScalarStyle::Folded(_) => {
return Some(RawToken {
line,
start: start_col,
length: 1, token_type: TOKEN_OPERATOR,
token_modifiers_bitset: 0,
});
}
ScalarStyle::Plain | ScalarStyle::SingleQuoted | ScalarStyle::DoubleQuoted => {}
}
match style {
ScalarStyle::SingleQuoted | ScalarStyle::DoubleQuoted => {
return Some(RawToken {
line,
start: start_col,
length,
token_type: TOKEN_STRING,
token_modifiers_bitset: 0,
});
}
ScalarStyle::Plain | ScalarStyle::Literal(_) | ScalarStyle::Folded(_) => {}
}
let trimmed = value.trim();
if trimmed.is_empty() {
return None;
}
let lower = trimmed.to_ascii_lowercase();
if matches!(
lower.as_str(),
"true" | "false" | "yes" | "no" | "on" | "off" | "null" | "~"
) {
return Some(RawToken {
line,
start: start_col,
length,
token_type: TOKEN_KEYWORD,
token_modifiers_bitset: 0,
});
}
if is_number(trimmed) {
return Some(RawToken {
line,
start: start_col,
length,
token_type: TOKEN_NUMBER,
token_modifiers_bitset: 0,
});
}
Some(RawToken {
line,
start: start_col,
length,
token_type: TOKEN_STRING,
token_modifiers_bitset: 0,
})
}
fn is_number(s: &str) -> bool {
let s = s.trim();
if s.is_empty() {
return false;
}
let s = s.strip_prefix('-').unwrap_or(s);
if s.is_empty() {
return false;
}
let (int_part, rest) = split_digits(s);
if int_part.is_empty() {
return false;
}
let rest = if let Some(r) = rest.strip_prefix('.') {
let (frac, r2) = split_digits(r);
if frac.is_empty() {
return false;
}
r2
} else {
rest
};
if rest.is_empty() {
return true;
}
let rest = if rest.starts_with(['e', 'E']) {
let r = &rest[1..];
r.strip_prefix(['+', '-']).unwrap_or(r)
} else {
return false;
};
let (exp_digits, leftover) = split_digits(rest);
!exp_digits.is_empty() && leftover.is_empty()
}
fn split_digits(s: &str) -> (&str, &str) {
let end = s.find(|c: char| !c.is_ascii_digit()).unwrap_or(s.len());
s.split_at(end)
}
#[cfg(test)]
mod tests {
use rstest::rstest;
use super::*;
use crate::test_utils::parse_docs;
fn absolute(tokens: &[SemanticToken]) -> Vec<(u32, u32, u32, u32, u32)> {
let mut line = 0u32;
let mut start = 0u32;
tokens
.iter()
.map(|t| {
line += t.delta_line;
start = if t.delta_line == 0 {
start + t.delta_start
} else {
t.delta_start
};
(
line,
start,
t.length,
t.token_type,
t.token_modifiers_bitset,
)
})
.collect()
}
#[test]
fn legend_has_correct_token_type_count() {
assert_eq!(legend().token_types.len(), 8);
}
#[test]
fn legend_has_correct_modifier_count() {
assert_eq!(legend().token_modifiers.len(), 1);
}
#[test]
fn empty_document_produces_no_tokens() {
assert!(semantic_tokens(&parse_docs(""), "").is_empty());
}
#[test]
fn comment_line_produces_comment_token() {
let text = "# comment";
let abs = absolute(&semantic_tokens(&parse_docs(text), text));
let comments: Vec<_> = abs.iter().filter(|t| t.3 == TOKEN_COMMENT).collect();
assert_eq!(comments.len(), 1);
assert_eq!(comments[0].0, 0); assert_eq!(comments[0].1, 0); }
#[test]
fn comment_line_with_indent_starts_at_hash() {
let text = " # indented comment";
let abs = absolute(&semantic_tokens(&parse_docs(text), text));
let comments: Vec<_> = abs.iter().filter(|t| t.3 == TOKEN_COMMENT).collect();
assert_eq!(comments.len(), 1);
assert_eq!(comments[0].1, 2); }
#[test]
fn mapping_key_produces_property_token() {
let text = "name: value";
let abs = absolute(&semantic_tokens(&parse_docs(text), text));
let keys: Vec<_> = abs.iter().filter(|t| t.3 == TOKEN_PROPERTY).collect();
assert_eq!(keys.len(), 1);
assert_eq!(keys[0].0, 0); assert_eq!(keys[0].1, 0); assert_eq!(keys[0].2, 4); }
#[test]
fn string_value_produces_string_token() {
let text = "key: hello";
let abs = absolute(&semantic_tokens(&parse_docs(text), text));
let strings: Vec<_> = abs.iter().filter(|t| t.3 == TOKEN_STRING).collect();
assert!(!strings.is_empty());
assert_eq!(strings[0].2, 5); }
#[test]
fn quoted_string_value_produces_string_token() {
let text = r#"key: "quoted""#;
let abs = absolute(&semantic_tokens(&parse_docs(text), text));
assert!(abs.iter().any(|t| t.3 == TOKEN_STRING));
}
#[test]
fn integer_value_produces_number_token() {
let text = "count: 42";
let abs = absolute(&semantic_tokens(&parse_docs(text), text));
let nums: Vec<_> = abs.iter().filter(|t| t.3 == TOKEN_NUMBER).collect();
assert_eq!(nums.len(), 1);
assert_eq!(nums[0].2, 2); }
#[test]
fn float_value_produces_number_token() {
let text = "pi: 3.14";
let abs = absolute(&semantic_tokens(&parse_docs(text), text));
let nums: Vec<_> = abs.iter().filter(|t| t.3 == TOKEN_NUMBER).collect();
assert_eq!(nums.len(), 1);
assert_eq!(nums[0].2, 4); }
#[rstest]
#[case::true_keyword("flag: true")]
#[case::false_keyword("flag: false")]
#[case::yes_keyword("flag: yes")]
#[case::no_keyword("flag: no")]
#[case::null_keyword("x: null")]
#[case::tilde_null("x: ~")]
#[case::on_keyword("flag: on")]
#[case::off_keyword("flag: off")]
fn produces_keyword_token(#[case] input: &str) {
let abs = absolute(&semantic_tokens(&parse_docs(input), input));
assert!(abs.iter().any(|t| t.3 == TOKEN_KEYWORD));
}
#[test]
fn anchor_produces_variable_with_declaration_modifier() {
let text = "base: &anchor value";
let abs = absolute(&semantic_tokens(&parse_docs(text), text));
assert!(
abs.iter()
.any(|t| t.3 == TOKEN_VARIABLE && t.4 == MOD_DECLARATION)
);
}
#[test]
fn alias_produces_variable_without_modifier() {
let text = "a: &x val\nb: *x\n";
let abs = absolute(&semantic_tokens(&parse_docs(text), text));
assert!(abs.iter().any(|t| t.3 == TOKEN_VARIABLE && t.4 == 0));
}
#[test]
fn tag_produces_type_token() {
let text = "value: !include file.yaml";
let abs = absolute(&semantic_tokens(&parse_docs(text), text));
assert!(abs.iter().any(|t| t.3 == TOKEN_TYPE));
}
#[rstest]
#[case::pipe("text: |")]
#[case::gt("text: >")]
#[case::pipe_minus("text: |-")]
#[case::gt_minus("text: >-")]
#[case::pipe_plus("text: |+")]
#[case::gt_plus("text: >+")]
fn block_scalar_produces_operator_token(#[case] input: &str) {
let abs = absolute(&semantic_tokens(&parse_docs(input), input));
assert!(abs.iter().any(|t| t.3 == TOKEN_OPERATOR));
}
#[test]
fn delta_encoding_correct_for_multi_line_document() {
let text = "a: 1\nb: 2\n";
let abs = absolute(&semantic_tokens(&parse_docs(text), text));
let keys: Vec<_> = abs.iter().filter(|t| t.3 == TOKEN_PROPERTY).collect();
assert!(keys.iter().any(|k| k.0 == 0 && k.1 == 0)); assert!(keys.iter().any(|k| k.0 == 1 && k.1 == 0)); }
#[test]
fn delta_line_is_zero_for_tokens_on_same_line() {
let text = "key: value";
let tokens = semantic_tokens(&parse_docs(text), text);
for t in tokens.iter().skip(1) {
assert_eq!(t.delta_line, 0);
}
}
#[test]
fn delta_start_is_relative_to_previous_token_on_same_line() {
let text = "key: value";
let abs = absolute(&semantic_tokens(&parse_docs(text), text));
let prop = abs.iter().find(|t| t.3 == TOKEN_PROPERTY).unwrap();
let str_tok = abs.iter().find(|t| t.3 == TOKEN_STRING).unwrap();
assert_eq!(prop.1, 0);
assert_eq!(str_tok.1, 5);
}
#[test]
fn bare_dash_sequence_item_produces_no_token() {
let text = "items:\n -\n";
let abs = absolute(&semantic_tokens(&parse_docs(text), text));
let non_property: Vec<_> = abs.iter().filter(|t| t.3 != TOKEN_PROPERTY).collect();
assert!(
non_property.is_empty(),
"bare '-' should produce no scalar token, got: {non_property:?}"
);
}
#[test]
fn negative_integer_produces_number_token() {
let text = "temp: -42";
let abs = absolute(&semantic_tokens(&parse_docs(text), text));
let nums: Vec<_> = abs.iter().filter(|t| t.3 == TOKEN_NUMBER).collect();
assert_eq!(nums.len(), 1);
assert_eq!(nums[0].2, 3); }
#[test]
fn scientific_notation_produces_number_token() {
let text = "val: 1.5e10";
let abs = absolute(&semantic_tokens(&parse_docs(text), text));
assert_eq!(abs.iter().filter(|t| t.3 == TOKEN_NUMBER).count(), 1);
}
#[test]
fn negative_float_produces_number_token() {
let text = "val: -3.14";
let abs = absolute(&semantic_tokens(&parse_docs(text), text));
let nums: Vec<_> = abs.iter().filter(|t| t.3 == TOKEN_NUMBER).collect();
assert_eq!(nums.len(), 1);
assert_eq!(nums[0].2, 5); }
#[test]
fn sequence_item_string_value_produces_string_token() {
let text = "- hello";
let abs = absolute(&semantic_tokens(&parse_docs(text), text));
let strings: Vec<_> = abs.iter().filter(|t| t.3 == TOKEN_STRING).collect();
assert_eq!(strings.len(), 1);
assert_eq!(strings[0].2, 5); }
#[test]
fn sequence_item_number_value_produces_number_token() {
let text = "- 42";
let abs = absolute(&semantic_tokens(&parse_docs(text), text));
assert_eq!(abs.iter().filter(|t| t.3 == TOKEN_NUMBER).count(), 1);
}
#[test]
fn sequence_item_keyword_value_produces_keyword_token() {
let text = "- true";
let abs = absolute(&semantic_tokens(&parse_docs(text), text));
assert!(abs.iter().any(|t| t.3 == TOKEN_KEYWORD));
}
#[test]
fn tag_on_value_side_of_mapping_produces_type_token() {
let text = "key: !str hello";
let abs = absolute(&semantic_tokens(&parse_docs(text), text));
assert!(
abs.iter().any(|t| t.3 == TOKEN_TYPE),
"tag on value side should produce type token"
);
}
#[test]
fn anchor_on_sequence_item_produces_variable_with_declaration() {
let text = "- &myanchor value";
let abs = absolute(&semantic_tokens(&parse_docs(text), text));
assert!(
abs.iter()
.any(|t| t.3 == TOKEN_VARIABLE && t.4 == MOD_DECLARATION),
"anchor on sequence item should produce variable with declaration modifier"
);
}
#[test]
fn inline_comment_stops_marker_scan() {
let text = "key: value # ¬ananchor";
let abs = absolute(&semantic_tokens(&parse_docs(text), text));
assert!(
abs.iter().all(|t| t.3 != TOKEN_VARIABLE),
"marker inside comment should not produce variable token"
);
}
#[test]
fn delta_line_correct_across_multiple_lines() {
let text = "a: 1\n\nb: 2\n";
let abs = absolute(&semantic_tokens(&parse_docs(text), text));
let keys: Vec<_> = abs.iter().filter(|t| t.3 == TOKEN_PROPERTY).collect();
assert!(keys.iter().any(|k| k.0 == 0)); assert!(keys.iter().any(|k| k.0 == 2)); }
#[rstest]
#[case::simple_key("name: value\n", 0u32, 0u32, 4u32)]
fn mapping_key_token_position_matches_ast_key_loc(
#[case] text: &str,
#[case] expected_line: u32,
#[case] expected_col: u32,
#[case] expected_len: u32,
) {
let docs = parse_docs(text);
let abs = absolute(&semantic_tokens(&docs, text));
let keys: Vec<_> = abs.iter().filter(|t| t.3 == TOKEN_PROPERTY).collect();
assert!(!keys.is_empty(), "expected at least one property token");
let key = keys[0];
assert_eq!(key.0, expected_line, "key line mismatch");
assert_eq!(key.1, expected_col, "key col mismatch");
assert_eq!(key.2, expected_len, "key length mismatch");
}
#[rstest]
#[case::anchor_before_value("base: &anchor value\n")]
fn anchor_token_position_matches_anchor_loc_not_scalar_loc(#[case] text: &str) {
let docs = parse_docs(text);
let abs = absolute(&semantic_tokens(&docs, text));
let idx = docs[0].line_index();
let anchor_tokens: Vec<_> = abs
.iter()
.filter(|t| t.3 == TOKEN_VARIABLE && t.4 == MOD_DECLARATION)
.collect();
assert_eq!(anchor_tokens.len(), 1, "expected exactly one anchor token");
let tok = anchor_tokens[0];
let Node::Mapping { entries, .. } = &docs[0].root else {
panic!("expected mapping root");
};
let (_, value) = &entries[0];
let anchor_loc = value.anchor_loc().expect("expected anchor_loc on value");
let scalar_loc = match value {
Node::Scalar { loc, .. } => *loc,
Node::Mapping { .. } | Node::Sequence { .. } | Node::Alias { .. } => {
panic!("expected scalar value")
}
};
let (anchor_start_line, anchor_start_col) = idx.line_column(anchor_loc.start);
let (_, anchor_end_col) = idx.line_column(anchor_loc.end);
let scalar_start_col = idx.line_column(scalar_loc.start).1;
assert_eq!(
tok.0,
anchor_start_line.saturating_sub(1),
"anchor token line must match anchor_loc"
);
assert_eq!(
tok.1, anchor_start_col,
"anchor token col must match anchor_loc, not scalar_loc col {scalar_start_col}"
);
assert_eq!(
tok.2,
anchor_end_col.saturating_sub(anchor_start_col),
"anchor token length must span '&anchor'"
);
}
#[rstest]
#[case::alias_reference("a: &x val\nb: *x\n")]
fn alias_token_position_matches_alias_loc(#[case] text: &str) {
let docs = parse_docs(text);
let abs = absolute(&semantic_tokens(&docs, text));
let idx = docs[0].line_index();
let alias_tokens: Vec<_> = abs
.iter()
.filter(|t| t.3 == TOKEN_VARIABLE && t.4 == 0)
.collect();
assert!(
!alias_tokens.is_empty(),
"expected at least one alias token"
);
let tok = alias_tokens
.iter()
.find(|t| t.0 == 1)
.expect("alias on line 1");
let Node::Mapping { entries, .. } = &docs[0].root else {
panic!("expected mapping root");
};
let (_, alias_node) = &entries[1];
let Node::Alias { loc, .. } = alias_node else {
panic!("expected alias node");
};
let (alias_start_col, alias_end_col) =
(idx.line_column(loc.start).1, idx.line_column(loc.end).1);
assert_eq!(
tok.1, alias_start_col,
"alias token col must match alias.loc"
);
assert_eq!(
tok.2,
alias_end_col.saturating_sub(alias_start_col),
"alias token length must span '*x'"
);
}
#[rstest]
#[case::simple_comment("# hello\n", 0u32, 0u32, 7u32)]
fn comment_token_position_matches_event_comment_span(
#[case] text: &str,
#[case] expected_line: u32,
#[case] expected_col: u32,
#[case] expected_len: u32,
) {
let docs = parse_docs(text);
let abs = absolute(&semantic_tokens(&docs, text));
let idx = LineIndex::new(text);
let comment_tok = abs
.iter()
.find(|t| t.3 == TOKEN_COMMENT)
.expect("expected a comment token");
let comment_span = rlsp_yaml_parser::parse_events(text)
.find_map(|r| {
if let Ok((Event::Comment { .. }, span)) = r {
Some(span)
} else {
None
}
})
.expect("expected Event::Comment");
let (comment_start_line, comment_start_col) = idx.line_column(comment_span.start);
assert_eq!(
comment_tok.0,
comment_start_line.saturating_sub(1),
"comment token line must match event span"
);
assert_eq!(
comment_tok.1, comment_start_col,
"comment token col must match event span"
);
assert_eq!(comment_tok.0, expected_line, "comment line");
assert_eq!(comment_tok.1, expected_col, "comment col");
assert_eq!(comment_tok.2, expected_len, "comment length");
}
#[rstest]
#[case::int_tag("n: !!int 42\n")]
fn tagged_scalar_tag_token_at_tag_loc_not_scalar_loc(#[case] text: &str) {
let docs = parse_docs(text);
let abs = absolute(&semantic_tokens(&docs, text));
let idx = docs[0].line_index();
let Node::Mapping { entries, .. } = &docs[0].root else {
panic!("expected mapping root");
};
let (_, value) = &entries[0];
let tag_loc = value.tag_loc().expect("expected tag_loc");
let scalar_loc = match value {
Node::Scalar { loc, .. } => *loc,
Node::Mapping { .. } | Node::Sequence { .. } | Node::Alias { .. } => {
panic!("expected scalar value")
}
};
let tag_tokens: Vec<_> = abs.iter().filter(|t| t.3 == TOKEN_TYPE).collect();
assert_eq!(tag_tokens.len(), 1, "expected exactly one type token");
let tag_tok = tag_tokens[0];
let (tag_start_col, tag_end_col) = (
idx.line_column(tag_loc.start).1,
idx.line_column(tag_loc.end).1,
);
let scalar_start_col = idx.line_column(scalar_loc.start).1;
assert_eq!(
tag_tok.1, tag_start_col,
"tag token col must match tag_loc (col {tag_start_col}), not scalar col {scalar_start_col}"
);
assert_eq!(
tag_tok.2,
tag_end_col.saturating_sub(tag_start_col),
"tag length must span '!!int'"
);
let number_tok = abs.iter().find(|t| t.3 == TOKEN_NUMBER);
if let Some(num) = number_tok {
assert!(
num.1 > tag_tok.1,
"scalar token (col {}) must start after tag token (col {})",
num.1,
tag_tok.1
);
}
}
#[test]
fn anchor_on_mapping_node_produces_anchor_token() {
let text = "&m\na: 1\n";
let abs = absolute(&semantic_tokens(&parse_docs(text), text));
let anchor_tokens: Vec<_> = abs
.iter()
.filter(|t| t.3 == TOKEN_VARIABLE && t.4 == MOD_DECLARATION)
.collect();
assert!(
!anchor_tokens.is_empty(),
"expected anchor token for mapping node"
);
assert!(
anchor_tokens
.iter()
.any(|t| t.0 == 0 && t.1 == 0 && t.2 == 2),
"anchor '&m' must be at (0, 0) len 2, got: {anchor_tokens:?}"
);
}
#[test]
fn anchor_on_sequence_node_produces_anchor_token() {
let text = "items: &seq\n - one\n";
let abs = absolute(&semantic_tokens(&parse_docs(text), text));
assert!(
abs.iter()
.any(|t| t.3 == TOKEN_VARIABLE && t.4 == MOD_DECLARATION),
"anchor on sequence node should produce variable with declaration modifier"
);
}
#[test]
fn tag_on_mapping_node_produces_tag_token() {
let text = "!!map\na: 1\n";
let abs = absolute(&semantic_tokens(&parse_docs(text), text));
let type_tokens: Vec<_> = abs.iter().filter(|t| t.3 == TOKEN_TYPE).collect();
assert!(
!type_tokens.is_empty(),
"expected tag token for mapping node"
);
assert!(
type_tokens.iter().any(|t| t.0 == 0 && t.1 == 0 && t.2 == 5),
"tag '!!map' must be at (0, 0) len 5, got: {type_tokens:?}"
);
}
#[test]
fn nested_mapping_keys_all_produce_property_tokens() {
let text = "outer:\n inner: value\n";
let abs = absolute(&semantic_tokens(&parse_docs(text), text));
let keys: Vec<_> = abs.iter().filter(|t| t.3 == TOKEN_PROPERTY).collect();
assert_eq!(keys.len(), 2, "expected 2 property tokens, got: {keys:?}");
assert!(
keys.iter().any(|k| k.0 == 0 && k.1 == 0),
"'outer' at (0, 0) missing, got: {keys:?}"
);
assert!(
keys.iter().any(|k| k.0 == 1 && k.1 == 2),
"'inner' at (1, 2) missing, got: {keys:?}"
);
}
#[test]
fn indented_comment_position_from_event_span() {
let text = " # indented\n";
let abs = absolute(&semantic_tokens(&parse_docs(text), text));
let comment_tok = abs
.iter()
.find(|t| t.3 == TOKEN_COMMENT)
.expect("expected comment token");
assert_eq!(comment_tok.0, 0, "comment on line 0");
assert_eq!(comment_tok.1, 2, "comment starts at col 2 (the '#')");
assert_eq!(comment_tok.2, 10, "length of '# indented'");
}
#[test]
fn multiple_tokens_on_same_document_sorted_by_position() {
let text = "key: &anchor value\n";
let docs = parse_docs(text);
let abs = absolute(&semantic_tokens(&docs, text));
assert!(!abs.is_empty(), "expected tokens");
for window in abs.windows(2) {
let a = window[0];
let b = window[1];
assert!(
(a.0, a.1) <= (b.0, b.1),
"tokens not sorted: {:?} > {:?}",
(a.0, a.1),
(b.0, b.1)
);
}
}
#[test]
fn semantic_tokens_multibyte_key_delta_correct() {
let text = "日本語: val\n";
let docs = parse_docs(text);
let abs = absolute(&semantic_tokens(&docs, text));
let key_tok = abs
.iter()
.find(|t| t.3 == TOKEN_PROPERTY && t.0 == 0 && t.1 == 0)
.expect("expected a property token for '日本語' on line 0, col 0");
assert_eq!(
key_tok.2, 3,
"token length for '日本語' must be 3 (codepoints), not 9 (bytes), got {}",
key_tok.2
);
}
}