use glua_parser::LuaSyntaxToken;
use lsp_types::{SemanticTokenModifier, SemanticTokenType};
use rowan::{TextRange, TextSize};
use crate::handlers::semantic_token::semantic_token_builder::SemanticBuilder;
pub fn highlight_string_escapes(builder: &mut SemanticBuilder, token: &LuaSyntaxToken) {
let text = token.text();
let base = token.text_range().start();
let mut run_start: usize = 0;
let mut chars = text.char_indices().peekable();
while let Some((idx, c)) = chars.next() {
if c != '\\' {
continue;
}
push_segment(builder, text, base, run_start, idx, SegmentKind::Literal);
let valid = consume_escape(&mut chars);
let escape_end = chars.peek().map(|(i, _)| *i).unwrap_or(text.len());
let kind = if valid {
SegmentKind::ValidEscape
} else {
SegmentKind::InvalidEscape
};
push_segment(builder, text, base, idx, escape_end, kind);
run_start = escape_end;
}
push_segment(
builder,
text,
base,
run_start,
text.len(),
SegmentKind::Literal,
);
}
enum SegmentKind {
Literal,
ValidEscape,
InvalidEscape,
}
fn push_segment(
builder: &mut SemanticBuilder,
text: &str,
base: TextSize,
start: usize,
end: usize,
kind: SegmentKind,
) {
if start >= end {
return;
}
let slice = &text[start..end];
let range = TextRange::new(
base + TextSize::from(start as u32),
base + TextSize::from(end as u32),
);
let modifiers: &[SemanticTokenModifier] = match kind {
SegmentKind::Literal => &[],
SegmentKind::ValidEscape => &[SemanticTokenModifier::MODIFICATION],
SegmentKind::InvalidEscape => &[SemanticTokenModifier::DEPRECATED],
};
builder.push_at_range(slice, range, SemanticTokenType::STRING, modifiers);
}
fn consume_escape<I>(chars: &mut std::iter::Peekable<I>) -> bool
where
I: Iterator<Item = (usize, char)>,
{
let Some((_, next)) = chars.next() else {
return false;
};
match next {
'a' | 'b' | 'f' | 'n' | 'r' | 't' | 'v' | '\\' | '\'' | '"' | '\r' | '\n' => true,
'z' => {
while let Some((_, c)) = chars.peek() {
if !c.is_ascii_whitespace() {
break;
}
chars.next();
}
true
}
'x' => {
let mut count = 0;
while count < 2 {
match chars.peek() {
Some((_, d)) if d.is_ascii_hexdigit() => {
chars.next();
count += 1;
}
_ => break,
}
}
count == 2
}
'u' => {
if !matches!(chars.peek(), Some((_, '{'))) {
return false;
}
chars.next(); let mut hex = String::new();
let mut closed = false;
while let Some((_, d)) = chars.peek().copied() {
if d == '}' {
chars.next();
closed = true;
break;
}
if !d.is_ascii_hexdigit() {
break;
}
hex.push(d);
chars.next();
}
if !closed || hex.is_empty() {
return false;
}
u32::from_str_radix(&hex, 16)
.ok()
.and_then(char::from_u32)
.is_some()
}
'0'..='9' => {
let mut dec = String::new();
dec.push(next);
while dec.len() < 3 {
match chars.peek() {
Some((_, d)) if d.is_ascii_digit() => {
dec.push(*d);
chars.next();
}
_ => break,
}
}
dec.parse::<u8>().is_ok()
}
_ => false,
}
}
#[cfg(test)]
mod tests {
use super::*;
fn segments(text: &str) -> Vec<(usize, usize, &'static str)> {
let mut out = Vec::new();
let mut run_start = 0usize;
let mut chars = text.char_indices().peekable();
while let Some((idx, c)) = chars.next() {
if c != '\\' {
continue;
}
if idx > run_start {
out.push((run_start, idx - run_start, "string"));
}
let valid = consume_escape(&mut chars);
let escape_end = chars.peek().map(|(i, _)| *i).unwrap_or(text.len());
out.push((
idx,
escape_end - idx,
if valid { "escape" } else { "invalid" },
));
run_start = escape_end;
}
if text.len() > run_start {
out.push((run_start, text.len() - run_start, "string"));
}
out
}
#[test]
fn plain_string_is_single_run() {
assert_eq!(segments("\"abc\""), vec![(0, 5, "string")]);
}
#[test]
fn empty_and_unterminated() {
assert_eq!(segments("\"\""), vec![(0, 2, "string")]);
assert_eq!(segments("\""), vec![(0, 1, "string")]);
assert_eq!(segments("\"abc"), vec![(0, 4, "string")]);
}
#[test]
fn simple_escapes() {
assert_eq!(
segments("\"a\\n\""),
vec![(0, 2, "string"), (2, 2, "escape"), (4, 1, "string")]
);
for esc in ["\\\\", "\\\"", "\\t", "\\z", "\\a"] {
let s = format!("\"{esc}\"");
assert_eq!(
segments(&s),
vec![(0, 1, "string"), (1, 2, "escape"), (3, 1, "string")],
"escape {esc}"
);
}
}
#[test]
fn line_continuation_escape() {
assert_eq!(
segments("\"\\\n\""),
vec![(0, 1, "string"), (1, 2, "escape"), (3, 1, "string")]
);
}
#[test]
fn z_escape_consumes_following_whitespace() {
assert_eq!(
segments("\"a\\z\n b\""),
vec![(0, 2, "string"), (2, 5, "escape"), (7, 2, "string")]
);
}
#[test]
fn z_escape_does_not_consume_unicode_whitespace() {
assert_eq!(
segments("\"\\z\u{00a0}x\""),
vec![(0, 1, "string"), (1, 2, "escape"), (3, 4, "string")]
);
}
#[test]
fn decimal_escapes() {
assert_eq!(
segments("\"\\65\""),
vec![(0, 1, "string"), (1, 3, "escape"), (4, 1, "string")]
);
assert_eq!(
segments("\"\\9\""),
vec![(0, 1, "string"), (1, 2, "escape"), (3, 1, "string")]
);
assert_eq!(
segments("\"\\255\""),
vec![(0, 1, "string"), (1, 4, "escape"), (5, 1, "string")]
);
assert_eq!(
segments("\"\\256\""),
vec![(0, 1, "string"), (1, 4, "invalid"), (5, 1, "string")]
);
}
#[test]
fn hex_escapes() {
assert_eq!(
segments("\"\\x41\""),
vec![(0, 1, "string"), (1, 4, "escape"), (5, 1, "string")]
);
assert_eq!(
segments("\"\\x4\""),
vec![(0, 1, "string"), (1, 3, "invalid"), (4, 1, "string")]
);
assert_eq!(
segments("\"\\xZZ\""),
vec![
(0, 1, "string"),
(1, 2, "invalid"),
(3, 3, "string") ]
);
}
#[test]
fn unicode_escapes() {
assert_eq!(
segments("\"\\u{48}\""),
vec![(0, 1, "string"), (1, 6, "escape"), (7, 1, "string")]
);
assert_eq!(
segments("\"\\u{}\""),
vec![(0, 1, "string"), (1, 4, "invalid"), (5, 1, "string")]
);
assert_eq!(
segments("\"\\u48\""),
vec![(0, 1, "string"), (1, 2, "invalid"), (3, 3, "string")]
);
assert_eq!(
segments("\"\\u{110000}\""),
vec![(0, 1, "string"), (1, 10, "invalid"), (11, 1, "string")]
);
}
#[test]
fn invalid_escape() {
assert_eq!(
segments("\"\\q\""),
vec![(0, 1, "string"), (1, 2, "invalid"), (3, 1, "string")]
);
assert_eq!(segments("\"\\"), vec![(0, 1, "string"), (1, 1, "invalid")]);
}
#[test]
fn multibyte_literal_between_escapes() {
let s = "\"\u{e9}\\n\"";
assert_eq!(
segments(s),
vec![(0, 3, "string"), (3, 2, "escape"), (5, 1, "string")]
);
}
#[test]
fn consecutive_escapes() {
assert_eq!(
segments("\"\\n\\t\""),
vec![
(0, 1, "string"),
(1, 2, "escape"),
(3, 2, "escape"),
(5, 1, "string")
]
);
}
}