1#![allow(non_camel_case_types)]
4
5use logos::{Lexer, Logos};
6
7#[derive(Logos, Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
9#[repr(u16)]
10pub enum SyntaxKind {
11 #[regex(r"([ \t])+")]
12 WHITESPACE = 0,
13
14 #[regex(r"(\n|\r\n)+")]
15 NEWLINE,
16
17 #[regex(r"/\*", lex_comment_block)]
18 BLOCK_COMMENT,
19
20 #[regex(r"//[^\n\r]*")]
21 LINE_COMMENT,
22
23 #[regex(r"[A-Za-z0-9_]+", priority = 2)]
24 IDENT,
25
26 #[regex(r"@[A-Za-z0-9_]*")]
27 ANNOTATION_KEY,
28
29 #[regex(r"[*?A-Za-z0-9_]+")]
32 IDENT_WITH_GLOB,
33
34 #[token(".")]
35 PERIOD,
36
37 #[token(",")]
38 COMMA,
39
40 #[token(":")]
41 COLON,
42
43 #[regex(r#"'"#, lex_single_quote)]
44 SINGLE_QUOTE,
45
46 #[regex(r#"""#, lex_double_quote)]
47 DOUBLE_QUOTE,
48
49 #[regex(r#"`"#, lex_backtick_quote)]
50 BACKTICK_QUOTE,
51
52 #[regex(r"[+-]?[0-9_]+", priority = 4)]
53 INTEGER,
54
55 #[regex(r"0x[0-9A-Fa-f_]+")]
56 INTEGER_HEX,
57
58 #[regex(r"0o[0-7_]+")]
59 INTEGER_OCT,
60
61 #[regex(r"0b(0|1|_)+")]
62 INTEGER_BIN,
63
64 #[regex(
65 r"[-+]?((([0-9_]+)?(\.[0-9_]+)|([0-9_]+\.)([0-9_]+)?)?([eE][+-]?[0-9_]+)?|nan|inf)",
66 priority = 3
67 )]
68 FLOAT,
69
70 #[regex(r"true|false")]
71 BOOL,
72
73 #[token("null")]
74 NULL,
75
76 #[token("(")]
77 PARENTHESES_START,
78
79 #[token(")")]
80 PARENTHESES_END,
81
82 #[token("[")]
83 BRACKET_START,
84
85 #[token("]")]
86 BRACKET_END,
87
88 #[token("{")]
89 BRACE_START,
90
91 #[token("}")]
92 BRACE_END,
93
94 KEY,
96 SCALAR,
97 PROPERTY,
98 OBJECT,
99 ARRAY,
100
101 ANNOTATION_PROPERTY,
102 ANNOTATION_VALUE,
103
104 #[error]
105 ERROR,
106
107 KEYS,
108 ANNOTATIONS,
109 VALUE,
110}
111
112impl SyntaxKind {
113 pub fn is_comment(self) -> bool {
114 use SyntaxKind::*;
115 matches!(self, LINE_COMMENT | BLOCK_COMMENT)
116 }
117
118 pub fn is_ws(self) -> bool {
119 use SyntaxKind::*;
120 matches!(self, WHITESPACE | NEWLINE)
121 }
122
123 pub fn is_compose(self) -> bool {
124 use SyntaxKind::*;
125 matches!(self, OBJECT | ARRAY)
126 }
127
128 pub fn is_key(self) -> bool {
129 use SyntaxKind::*;
130 matches!(
131 self,
132 IDENT
133 | IDENT_WITH_GLOB
134 | NULL
135 | BOOL
136 | INTEGER_HEX
137 | INTEGER_BIN
138 | INTEGER_OCT
139 | INTEGER
140 | SINGLE_QUOTE
141 | DOUBLE_QUOTE
142 | BACKTICK_QUOTE
143 | FLOAT
144 )
145 }
146
147 pub fn is_ws_or_comment(self) -> bool {
148 self.is_ws() || self.is_comment()
149 }
150}
151
152impl From<SyntaxKind> for rowan::SyntaxKind {
153 fn from(kind: SyntaxKind) -> Self {
154 Self(kind as u16)
155 }
156}
157
158#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
159pub enum Lang {}
160impl rowan::Language for Lang {
161 type Kind = SyntaxKind;
162 fn kind_from_raw(raw: rowan::SyntaxKind) -> Self::Kind {
163 assert!(raw.0 <= SyntaxKind::VALUE as u16);
164 unsafe { std::mem::transmute::<u16, SyntaxKind>(raw.0) }
165 }
166 fn kind_to_raw(kind: Self::Kind) -> rowan::SyntaxKind {
167 kind.into()
168 }
169}
170
171pub type SyntaxNode = rowan::SyntaxNode<Lang>;
172pub type SyntaxToken = rowan::SyntaxToken<Lang>;
173pub type SyntaxElement = rowan::NodeOrToken<SyntaxNode, SyntaxToken>;
174
175pub fn stringify_syntax(
176 indent: usize,
177 element: SyntaxElement,
178) -> Result<String, Box<dyn std::error::Error>> {
179 let mut buf: Vec<u8> = vec![];
180 write_syntax(&mut buf, indent, element)?;
181 Ok(std::str::from_utf8(&buf)?.to_string())
182}
183
184pub fn write_syntax<T: std::io::Write>(
185 w: &mut T,
186 indent: usize,
187 element: SyntaxElement,
188) -> Result<(), Box<dyn std::error::Error>> {
189 let kind: SyntaxKind = element.kind();
190 write!(w, "{:indent$}", "", indent = indent)?;
191 match element {
192 rowan::NodeOrToken::Node(node) => {
193 writeln!(w, "{:?}@{:?}", kind, node.text_range())?;
194 for child in node.children_with_tokens() {
195 write_syntax(w, indent + 2, child)?;
196 }
197 }
198
199 rowan::NodeOrToken::Token(token) => {
200 writeln!(w, "{:?}@{:?} {:?}", kind, token.text_range(), token.text())?;
201 }
202 }
203 Ok(())
204}
205
206fn lex_comment_block(lex: &mut Lexer<SyntaxKind>) -> bool {
207 let remainder: &str = lex.remainder();
208
209 let mut asterisk_found = false;
210
211 let mut total_len = 0;
212
213 for c in remainder.chars() {
214 total_len += c.len_utf8();
215
216 if c == '*' {
217 asterisk_found = true;
218 continue;
219 }
220
221 if c == '/' && asterisk_found {
222 lex.bump(remainder[0..total_len].as_bytes().len());
223 return true;
224 }
225
226 asterisk_found = false;
227 }
228 lex.bump(remainder[0..total_len].as_bytes().len());
229 false
230}
231
232fn lex_backtick_quote(lex: &mut Lexer<SyntaxKind>) -> bool {
233 lex_string(lex, '`', true)
234}
235
236fn lex_single_quote(lex: &mut Lexer<SyntaxKind>) -> bool {
237 lex_string(lex, '\'', false)
238}
239
240fn lex_double_quote(lex: &mut Lexer<SyntaxKind>) -> bool {
241 lex_string(lex, '"', false)
242}
243
244fn lex_string(lex: &mut Lexer<SyntaxKind>, quote: char, multiline: bool) -> bool {
245 let remainder: &str = lex.remainder();
246 let mut escaped = false;
247
248 let mut total_len = 0;
249
250 for c in remainder.chars() {
251 total_len += c.len_utf8();
252
253 if c == '\\' {
254 escaped = !escaped;
255 continue;
256 }
257
258 if (c == quote && !escaped) || (c == '\n' && !multiline) {
259 lex.bump(remainder[0..total_len].as_bytes().len());
260 return true;
261 }
262
263 escaped = false;
264 }
265 lex.bump(remainder[0..total_len].as_bytes().len());
266 false
267}
268
269#[cfg(test)]
270mod tests {
271 use super::*;
272
273 macro_rules! assert_lex {
274 ($text:literal, $kind:expr) => {
275 let mut lex = SyntaxKind::lexer($text);
276 assert_eq!(lex.next(), Some($kind));
277 };
278 }
279
280 #[test]
281 fn test_lex() {
282 assert_lex!("/* comment */", SyntaxKind::BLOCK_COMMENT);
283 assert_lex!("// comment", SyntaxKind::LINE_COMMENT);
284 assert_lex!("foo", SyntaxKind::IDENT);
285 assert_lex!(r#""I'm a string\u00E9""#, SyntaxKind::DOUBLE_QUOTE);
286 assert_lex!(r#"'Say "hello"'"#, SyntaxKind::SINGLE_QUOTE);
287 assert_lex!(r#"`hello world`"#, SyntaxKind::BACKTICK_QUOTE);
288 assert_lex!("123", SyntaxKind::INTEGER);
289 assert_lex!("0xDEADBEEF", SyntaxKind::INTEGER_HEX);
290 assert_lex!("0xDE_ADBE", SyntaxKind::INTEGER_HEX);
291 assert_lex!("0o4567", SyntaxKind::INTEGER_OCT);
292 assert_lex!("0o45_67", SyntaxKind::INTEGER_OCT);
293 assert_lex!("0b11010110", SyntaxKind::INTEGER_BIN);
294 assert_lex!("0b1101_0110", SyntaxKind::INTEGER_BIN);
295 assert_lex!("3.14", SyntaxKind::FLOAT);
296 assert_lex!("-.14", SyntaxKind::FLOAT);
297 assert_lex!("-3.", SyntaxKind::FLOAT);
298 assert_lex!("true", SyntaxKind::BOOL);
299 assert_lex!("false", SyntaxKind::BOOL);
300 assert_lex!("null", SyntaxKind::NULL);
301 assert_lex!("api*", SyntaxKind::IDENT_WITH_GLOB);
302 assert_lex!("a?i*", SyntaxKind::IDENT_WITH_GLOB);
303 assert_lex!("*", SyntaxKind::IDENT_WITH_GLOB);
304 assert_lex!("**", SyntaxKind::IDENT_WITH_GLOB);
305 }
306}