use crate::{ScopeId, ScopeSpan};
pub const PROPERTY_NAME: ScopeId = ScopeId::new(0);
pub const STRING_PUNCTUATION: ScopeId = ScopeId::new(1);
pub const STRING: ScopeId = ScopeId::new(2);
pub const NUMBER: ScopeId = ScopeId::new(3);
pub const CONSTANT: ScopeId = ScopeId::new(4);
pub const SCOPES: &[&str] = &[
"support.type.property-name.json",
"punctuation.definition.string.json",
"string.quoted.double.json",
"constant.numeric.json",
"constant.language.json",
];
pub fn tokenize_line_into(line: &str, spans: &mut Vec<ScopeSpan>) {
spans.clear();
let line = line.trim_end_matches(['\r', '\n']);
let bytes = line.as_bytes();
let mut pos = 0;
while pos < bytes.len() {
match bytes[pos] {
b'"' => {
let end = string_end(bytes, pos + 1);
let content_scope = if string_is_key(bytes, end) {
PROPERTY_NAME
} else {
STRING
};
push(spans, pos, pos + 1, STRING_PUNCTUATION);
push(spans, pos + 1, end, content_scope);
if end < bytes.len() {
push(spans, end, end + 1, STRING_PUNCTUATION);
pos = end + 1;
} else {
pos = end;
}
}
b'-' | b'0'..=b'9' => {
let end = number_end(bytes, pos);
push(spans, pos, end, NUMBER);
pos = end;
}
b't' if bytes[pos..].starts_with(b"true") => {
push(spans, pos, pos + 4, CONSTANT);
pos += 4;
}
b'f' if bytes[pos..].starts_with(b"false") => {
push(spans, pos, pos + 5, CONSTANT);
pos += 5;
}
b'n' if bytes[pos..].starts_with(b"null") => {
push(spans, pos, pos + 4, CONSTANT);
pos += 4;
}
_ => pos += 1,
}
}
}
fn push(spans: &mut Vec<ScopeSpan>, start: usize, end: usize, scope: ScopeId) {
if start < end {
spans.push(ScopeSpan { start, end, scope });
}
}
fn string_end(bytes: &[u8], mut pos: usize) -> usize {
let mut escaped = false;
while pos < bytes.len() {
match (bytes[pos], escaped) {
(_, true) => escaped = false,
(b'\\', false) => escaped = true,
(b'"', false) => return pos,
_ => {}
}
pos += 1;
}
pos
}
fn string_is_key(bytes: &[u8], end: usize) -> bool {
let mut pos = end.saturating_add(1);
while pos < bytes.len() && bytes[pos].is_ascii_whitespace() {
pos += 1;
}
pos < bytes.len() && bytes[pos] == b':'
}
fn number_end(bytes: &[u8], mut pos: usize) -> usize {
if bytes.get(pos) == Some(&b'-') {
pos += 1;
}
while pos < bytes.len() && bytes[pos].is_ascii_digit() {
pos += 1;
}
if bytes.get(pos) == Some(&b'.') {
pos += 1;
while pos < bytes.len() && bytes[pos].is_ascii_digit() {
pos += 1;
}
}
if matches!(bytes.get(pos), Some(b'e' | b'E')) {
pos += 1;
if matches!(bytes.get(pos), Some(b'+' | b'-')) {
pos += 1;
}
while pos < bytes.len() && bytes[pos].is_ascii_digit() {
pos += 1;
}
}
pos
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn tokenizes_keys_strings_numbers_and_constants() {
let line = r#" "a": "b\"c", "n": 12.5e-1, "ok": true"#;
let mut spans = Vec::new();
tokenize_line_into(line, &mut spans);
assert!(
spans
.iter()
.any(|span| span.scope == PROPERTY_NAME && &line[span.start..span.end] == "a")
);
assert!(
spans
.iter()
.any(|span| span.scope == STRING && &line[span.start..span.end] == r#"b\"c"#)
);
assert!(
spans
.iter()
.any(|span| span.scope == NUMBER && &line[span.start..span.end] == "12.5e-1")
);
assert!(
spans
.iter()
.any(|span| span.scope == CONSTANT && &line[span.start..span.end] == "true")
);
}
#[test]
fn keeps_unterminated_strings_on_the_same_line() {
let line = r#" "a": "unterminated"#;
let mut spans = Vec::new();
tokenize_line_into(line, &mut spans);
assert!(
spans
.iter()
.any(|span| span.scope == STRING && &line[span.start..span.end] == "unterminated")
);
}
}