1use crate::{ScopeId, ScopeSpan};
8
9pub const PROPERTY_NAME: ScopeId = ScopeId::new(0);
11pub const STRING_PUNCTUATION: ScopeId = ScopeId::new(1);
13pub const STRING: ScopeId = ScopeId::new(2);
15pub const NUMBER: ScopeId = ScopeId::new(3);
17pub const CONSTANT: ScopeId = ScopeId::new(4);
19
20pub const SCOPES: &[&str] = &[
22 "support.type.property-name.json",
23 "punctuation.definition.string.json",
24 "string.quoted.double.json",
25 "constant.numeric.json",
26 "constant.language.json",
27];
28
29pub fn tokenize_line_into(line: &str, spans: &mut Vec<ScopeSpan>) {
31 spans.clear();
32 let line = line.trim_end_matches(['\r', '\n']);
33 let bytes = line.as_bytes();
34 let mut pos = 0;
35
36 while pos < bytes.len() {
37 match bytes[pos] {
38 b'"' => {
39 let end = string_end(bytes, pos + 1);
40 let content_scope = if string_is_key(bytes, end) {
41 PROPERTY_NAME
42 } else {
43 STRING
44 };
45 push(spans, pos, pos + 1, STRING_PUNCTUATION);
46 push(spans, pos + 1, end, content_scope);
47
48 if end < bytes.len() {
49 push(spans, end, end + 1, STRING_PUNCTUATION);
50 pos = end + 1;
51 } else {
52 pos = end;
53 }
54 }
55 b'-' | b'0'..=b'9' => {
56 let end = number_end(bytes, pos);
57 push(spans, pos, end, NUMBER);
58 pos = end;
59 }
60 b't' if bytes[pos..].starts_with(b"true") => {
61 push(spans, pos, pos + 4, CONSTANT);
62 pos += 4;
63 }
64 b'f' if bytes[pos..].starts_with(b"false") => {
65 push(spans, pos, pos + 5, CONSTANT);
66 pos += 5;
67 }
68 b'n' if bytes[pos..].starts_with(b"null") => {
69 push(spans, pos, pos + 4, CONSTANT);
70 pos += 4;
71 }
72 _ => pos += 1,
73 }
74 }
75}
76
77fn push(spans: &mut Vec<ScopeSpan>, start: usize, end: usize, scope: ScopeId) {
79 if start < end {
80 spans.push(ScopeSpan { start, end, scope });
81 }
82}
83
84fn string_end(bytes: &[u8], mut pos: usize) -> usize {
86 let mut escaped = false;
87 while pos < bytes.len() {
88 match (bytes[pos], escaped) {
89 (_, true) => escaped = false,
90 (b'\\', false) => escaped = true,
91 (b'"', false) => return pos,
92 _ => {}
93 }
94 pos += 1;
95 }
96 pos
97}
98
99fn string_is_key(bytes: &[u8], end: usize) -> bool {
101 let mut pos = end.saturating_add(1);
102 while pos < bytes.len() && bytes[pos].is_ascii_whitespace() {
103 pos += 1;
104 }
105 pos < bytes.len() && bytes[pos] == b':'
106}
107
108fn number_end(bytes: &[u8], mut pos: usize) -> usize {
110 if bytes.get(pos) == Some(&b'-') {
111 pos += 1;
112 }
113 while pos < bytes.len() && bytes[pos].is_ascii_digit() {
114 pos += 1;
115 }
116 if bytes.get(pos) == Some(&b'.') {
117 pos += 1;
118 while pos < bytes.len() && bytes[pos].is_ascii_digit() {
119 pos += 1;
120 }
121 }
122 if matches!(bytes.get(pos), Some(b'e' | b'E')) {
123 pos += 1;
124 if matches!(bytes.get(pos), Some(b'+' | b'-')) {
125 pos += 1;
126 }
127 while pos < bytes.len() && bytes[pos].is_ascii_digit() {
128 pos += 1;
129 }
130 }
131 pos
132}
133
134#[cfg(test)]
135mod tests {
136 use super::*;
137
138 #[test]
139 fn tokenizes_keys_strings_numbers_and_constants() {
140 let line = r#" "a": "b\"c", "n": 12.5e-1, "ok": true"#;
141 let mut spans = Vec::new();
142
143 tokenize_line_into(line, &mut spans);
144
145 assert!(
146 spans
147 .iter()
148 .any(|span| span.scope == PROPERTY_NAME && &line[span.start..span.end] == "a")
149 );
150 assert!(
151 spans
152 .iter()
153 .any(|span| span.scope == STRING && &line[span.start..span.end] == r#"b\"c"#)
154 );
155 assert!(
156 spans
157 .iter()
158 .any(|span| span.scope == NUMBER && &line[span.start..span.end] == "12.5e-1")
159 );
160 assert!(
161 spans
162 .iter()
163 .any(|span| span.scope == CONSTANT && &line[span.start..span.end] == "true")
164 );
165 }
166
167 #[test]
168 fn keeps_unterminated_strings_on_the_same_line() {
169 let line = r#" "a": "unterminated"#;
170 let mut spans = Vec::new();
171
172 tokenize_line_into(line, &mut spans);
173
174 assert!(
175 spans
176 .iter()
177 .any(|span| span.scope == STRING && &line[span.start..span.end] == "unterminated")
178 );
179 }
180}