1use logos::Logos;
4
5#[derive(Logos, Debug, Clone, PartialEq)]
7#[logos(skip r"[ \t]+")]
8pub enum TokenKind {
9 #[regex("(?i)STRUCTURE")]
11 Structure,
12 #[regex("(?i)BLOCKS")]
13 Blocks,
14 #[regex("(?i)COMMANDS")]
15 Commands,
16
17 #[regex("(?i)EDIT")]
19 Edit,
20 #[regex("(?i)SET")]
21 Set,
22 #[regex("(?i)MOVE")]
23 Move,
24 #[regex("(?i)TO")]
25 To,
26 #[regex("(?i)AT")]
27 At,
28 #[regex("(?i)BEFORE")]
29 Before,
30 #[regex("(?i)AFTER")]
31 After,
32 #[regex("(?i)SWAP")]
33 Swap,
34 #[regex("(?i)APPEND")]
35 Append,
36 #[regex("(?i)WITH")]
37 With,
38 #[regex("(?i)DELETE")]
39 Delete,
40 #[regex("(?i)CASCADE")]
41 Cascade,
42 #[regex("(?i)PRESERVE_CHILDREN")]
43 PreserveChildren,
44 #[regex("(?i)PRUNE")]
45 Prune,
46 #[regex("(?i)UNREACHABLE")]
47 Unreachable,
48 #[regex("(?i)WHERE")]
49 Where,
50 #[regex("(?i)DRY_RUN")]
51 DryRun,
52 #[regex("(?i)FOLD")]
53 Fold,
54 #[regex("(?i)DEPTH")]
55 Depth,
56 #[regex("(?i)MAX_TOKENS")]
57 MaxTokens,
58 #[regex("(?i)PRESERVE_TAGS")]
59 PreserveTags,
60 #[regex("(?i)LINK")]
61 Link,
62 #[regex("(?i)UNLINK")]
63 Unlink,
64 #[regex("(?i)SNAPSHOT")]
65 Snapshot,
66 #[regex("(?i)CREATE")]
67 Create,
68 #[regex("(?i)RESTORE")]
69 Restore,
70 #[regex("(?i)LIST")]
71 List,
72 #[regex("(?i)DIFF")]
73 Diff,
74 #[regex("(?i)BEGIN")]
75 Begin,
76 #[regex("(?i)TRANSACTION")]
77 Transaction,
78 #[regex("(?i)COMMIT")]
79 Commit,
80 #[regex("(?i)ROLLBACK")]
81 Rollback,
82 #[regex("(?i)ATOMIC")]
83 Atomic,
84 #[regex("(?i)VIEW")]
85 View,
86 #[regex("(?i)FOLDED")]
87 Folded,
88 #[regex("(?i)FROM")]
89 From,
90 #[regex("(?i)TEMPLATE")]
91 Template,
92 #[regex("(?i)FIRST")]
93 First,
94 #[regex("(?i)LAST")]
95 Last,
96 #[regex("(?i)WRITE_SECTION")]
97 WriteSection,
98 #[regex("(?i)BASE_LEVEL")]
99 BaseLevel,
100
101 #[regex("(?i)GOTO")]
103 Goto,
104 #[regex("(?i)BACK")]
105 Back,
106 #[regex("(?i)EXPAND")]
107 Expand,
108 #[regex("(?i)FOLLOW")]
109 Follow,
110 #[regex("(?i)PATH")]
111 Path,
112 #[regex("(?i)SEARCH")]
113 Search,
114 #[regex("(?i)FIND")]
115 Find,
116 #[regex("(?i)CTX")]
117 Ctx,
118
119 #[regex("(?i)DOWN")]
121 Down,
122 #[regex("(?i)UP")]
123 Up,
124 #[regex("(?i)SEMANTIC")]
125 Semantic,
126
127 #[regex("(?i)MODE")]
129 Mode,
130 #[regex("(?i)LIMIT")]
131 Limit,
132 #[regex("(?i)MIN_SIMILARITY")]
133 MinSimilarity,
134 #[regex("(?i)ROLES")]
135 Roles,
136 #[regex("(?i)TAGS")]
137 Tags,
138 #[regex("(?i)ROLE")]
139 Role,
140 #[regex("(?i)TAG")]
141 Tag,
142 #[regex("(?i)LABEL")]
143 Label,
144 #[regex("(?i)PATTERN")]
145 Pattern,
146 #[regex("(?i)MAX")]
147 Max,
148 #[regex("(?i)NEIGHBORHOOD")]
149 Neighborhood,
150
151 #[regex("(?i)ADD")]
153 Add,
154 #[regex("(?i)REMOVE")]
155 Remove,
156 #[regex("(?i)CLEAR")]
157 Clear,
158 #[regex("(?i)COMPRESS")]
159 Compress,
160 #[regex("(?i)RENDER")]
161 Render,
162 #[regex("(?i)STATS")]
163 Stats,
164 #[regex("(?i)FOCUS")]
165 Focus,
166 #[regex("(?i)RESULTS")]
167 Results,
168 #[regex("(?i)CHILDREN")]
169 Children,
170 #[regex("(?i)AUTO")]
171 Auto,
172 #[regex("(?i)TOKENS")]
173 Tokens,
174 #[regex("(?i)MAX_AGE")]
175 MaxAge,
176 #[regex("(?i)RELEVANCE")]
177 Relevance,
178 #[regex("(?i)REASON")]
179 Reason,
180 #[regex("(?i)METHOD")]
181 Method,
182 #[regex("(?i)FORMAT")]
183 Format,
184 #[regex("(?i)TRUNCATE")]
185 Truncate,
186 #[regex("(?i)SUMMARIZE")]
187 Summarize,
188 #[regex("(?i)STRUCTURE_ONLY")]
189 StructureOnly,
190 #[regex("(?i)SHORT_IDS")]
191 ShortIds,
192 #[regex("(?i)MARKDOWN")]
193 Markdown,
194 #[regex("(?i)FULL")]
195 Full,
196 #[regex("(?i)PREVIEW")]
197 Preview,
198 #[regex("(?i)METADATA")]
199 MetadataToken,
200 #[regex("(?i)IDS")]
201 Ids,
202 #[regex("(?i)BOTH")]
203 Both,
204
205 #[token("=")]
207 Eq,
208 #[token("!=")]
209 Ne,
210 #[token(">")]
211 Gt,
212 #[token(">=")]
213 Ge,
214 #[token("<")]
215 Lt,
216 #[token("<=")]
217 Le,
218 #[token("+=")]
219 PlusEq,
220 #[token("-=")]
221 MinusEq,
222 #[token("++")]
223 PlusPlus,
224 #[token("--")]
225 MinusMinus,
226
227 #[regex("(?i)AND")]
229 And,
230 #[regex("(?i)OR")]
231 Or,
232 #[regex("(?i)NOT")]
233 Not,
234 #[regex("(?i)CONTAINS")]
235 Contains,
236 #[regex("(?i)STARTS_WITH")]
237 StartsWith,
238 #[regex("(?i)ENDS_WITH")]
239 EndsWith,
240 #[regex("(?i)MATCHES")]
241 Matches,
242 #[regex("(?i)EXISTS")]
243 Exists,
244 #[regex("(?i)IS_NULL")]
245 IsNull,
246 #[regex("(?i)IS_NOT_NULL")]
247 IsNotNull,
248 #[regex("(?i)IS_EMPTY")]
249 IsEmpty,
250 #[regex("(?i)LENGTH")]
251 Length,
252
253 #[token("::")]
255 DoubleColon,
256 #[token(":")]
257 Colon,
258 #[token(",")]
259 Comma,
260 #[token(".")]
261 Dot,
262 #[token("#")]
263 Hash,
264 #[token("@")]
265 At_,
266 #[token("$")]
267 Dollar,
268 #[token("[")]
269 LBracket,
270 #[token("]")]
271 RBracket,
272 #[token("{")]
273 LBrace,
274 #[token("}")]
275 RBrace,
276 #[token("(")]
277 LParen,
278 #[token(")")]
279 RParen,
280
281 #[token("text")]
283 TextType,
284 #[token("table")]
285 TableType,
286 #[token("code")]
287 CodeType,
288 #[token("math")]
289 MathType,
290 #[token("media")]
291 MediaType,
292 #[token("json")]
293 JsonType,
294 #[token("binary")]
295 BinaryType,
296 #[token("composite")]
297 CompositeType,
298
299 #[token("true")]
301 True,
302 #[token("false")]
303 False,
304 #[token("null")]
305 Null,
306
307 #[regex(r"blk_[a-fA-F0-9]+")]
309 BlockId,
310
311 #[regex(r"[a-zA-Z_][a-zA-Z0-9_]*")]
312 Identifier,
313
314 #[regex(r"-?[0-9]+\.[0-9]+", |lex| lex.slice().parse::<f64>().ok())]
316 Float(f64),
317
318 #[regex(r"-?[0-9]+", |lex| lex.slice().parse::<i64>().ok())]
319 Integer(i64),
320
321 #[regex(r#""([^"\\]|\\.)*""#, |lex| {
323 let s = lex.slice();
324 Some(s[1..s.len()-1].to_string())
325 })]
326 DoubleString(String),
327
328 #[regex(r#"'([^'\\]|\\.)*'"#, |lex| {
329 let s = lex.slice();
330 Some(s[1..s.len()-1].to_string())
331 })]
332 SingleString(String),
333
334 TripleString(String),
336
337 CodeBlock(String),
339
340 #[regex(r"\|[^\n]+\|(\n\|[^\n]+\|)+", |lex| {
342 Some(lex.slice().to_string())
343 })]
344 TableLiteral(String),
345
346 #[regex(r"\n")]
348 Newline,
349
350 #[regex(r"//[^\n]*")]
352 Comment,
353}
354
355#[derive(Debug, Clone)]
357pub struct Token {
358 pub kind: TokenKind,
359 pub span: std::ops::Range<usize>,
360 pub line: usize,
361 pub column: usize,
362}
363
364pub struct Lexer<'a> {
366 inner: logos::Lexer<'a, TokenKind>,
367 line: usize,
368 column: usize,
369 last_newline_pos: usize,
370}
371
372impl<'a> Lexer<'a> {
373 pub fn new(input: &'a str) -> Self {
374 Self {
375 inner: TokenKind::lexer(input),
376 line: 1,
377 column: 1,
378 last_newline_pos: 0,
379 }
380 }
381
382 pub fn source(&self) -> &'a str {
383 self.inner.source()
384 }
385}
386
387impl<'a> Iterator for Lexer<'a> {
388 type Item = Result<Token, ()>;
389
390 fn next(&mut self) -> Option<Self::Item> {
391 loop {
392 let kind = self.inner.next()?;
393 let span = self.inner.span();
394
395 let source = self.inner.source();
397 for c in source[self.last_newline_pos..span.start].chars() {
398 if c == '\n' {
399 self.line += 1;
400 self.column = 1;
401 self.last_newline_pos = span.start;
402 } else {
403 self.column += 1;
404 }
405 }
406
407 match kind {
408 Ok(TokenKind::Comment) => continue, Ok(TokenKind::Newline) => {
410 self.line += 1;
411 self.column = 1;
412 self.last_newline_pos = span.end;
413 return Some(Ok(Token {
415 kind: TokenKind::Newline,
416 span,
417 line: self.line - 1,
418 column: 1,
419 }));
420 }
421 Ok(kind) => {
422 return Some(Ok(Token {
423 kind,
424 span,
425 line: self.line,
426 column: self.column,
427 }));
428 }
429 Err(_) => return Some(Err(())),
430 }
431 }
432 }
433}
434
435#[cfg(test)]
436mod tests {
437 use super::*;
438
439 #[test]
440 fn test_lex_structure() {
441 let input = "STRUCTURE\nblk_abc123def456: [blk_111222333444]";
442 let lexer = Lexer::new(input);
443 let tokens: Vec<_> = lexer.filter_map(|r| r.ok()).collect();
444
445 assert!(matches!(tokens[0].kind, TokenKind::Structure));
446 assert!(matches!(tokens[2].kind, TokenKind::BlockId));
447 }
448
449 #[test]
450 fn test_lex_edit_command() {
451 let input = r#"EDIT blk_abc123def456 SET content.text = "hello""#;
452 let lexer = Lexer::new(input);
453 let tokens: Vec<_> = lexer.filter_map(|r| r.ok()).collect();
454
455 assert!(matches!(tokens[0].kind, TokenKind::Edit));
456 assert!(matches!(tokens[1].kind, TokenKind::BlockId));
457 assert!(matches!(tokens[2].kind, TokenKind::Set));
458 }
459
460 #[test]
461 fn test_lex_string_types() {
462 let input = r#""double" 'single'"#;
463 let lexer = Lexer::new(input);
464 let tokens: Vec<_> = lexer.filter_map(|r| r.ok()).collect();
465
466 assert!(matches!(tokens[0].kind, TokenKind::DoubleString(_)));
467 assert!(matches!(tokens[1].kind, TokenKind::SingleString(_)));
468 }
469
470 #[test]
471 fn test_lex_operators() {
472 let input = "= += -= != >= <=";
473 let lexer = Lexer::new(input);
474 let tokens: Vec<_> = lexer.filter_map(|r| r.ok()).collect();
475
476 assert!(matches!(tokens[0].kind, TokenKind::Eq));
477 assert!(matches!(tokens[1].kind, TokenKind::PlusEq));
478 assert!(matches!(tokens[2].kind, TokenKind::MinusEq));
479 assert!(matches!(tokens[3].kind, TokenKind::Ne));
480 }
481}