Skip to main content

sochdb_query/sql/
lexer.rs

1// SPDX-License-Identifier: AGPL-3.0-or-later
2// SochDB - LLM-Optimized Embedded Database
3// Copyright (C) 2026 Sushanth Reddy Vanagala (https://github.com/sushanthpy)
4//
5// This program is free software: you can redistribute it and/or modify
6// it under the terms of the GNU Affero General Public License as published by
7// the Free Software Foundation, either version 3 of the License, or
8// (at your option) any later version.
9//
10// This program is distributed in the hope that it will be useful,
11// but WITHOUT ANY WARRANTY; without even the implied warranty of
12// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13// GNU Affero General Public License for more details.
14//
15// You should have received a copy of the GNU Affero General Public License
16// along with this program. If not, see <https://www.gnu.org/licenses/>.
17
18//! SQL Lexer
19//!
20//! Converts SQL text into a stream of tokens.
21//! Handles string literals, numbers, identifiers, keywords, and operators.
22
23use super::token::{Span, Token, TokenKind};
24use std::borrow::Cow;
25
26/// SQL Lexer errors
27#[derive(Debug, Clone, PartialEq)]
28pub struct LexError {
29    pub message: String,
30    pub span: Span,
31}
32
33impl LexError {
34    pub fn new(message: impl Into<String>, span: Span) -> Self {
35        Self {
36            message: message.into(),
37            span,
38        }
39    }
40}
41
42impl std::fmt::Display for LexError {
43    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
44        write!(
45            f,
46            "Lexer error at line {}, column {}: {}",
47            self.span.line, self.span.column, self.message
48        )
49    }
50}
51
52impl std::error::Error for LexError {}
53
54/// SQL Lexer - tokenizes SQL input
55pub struct Lexer<'a> {
56    input: &'a str,
57    bytes: &'a [u8],
58    pos: usize,
59    line: usize,
60    column: usize,
61    tokens: Vec<Token<'a>>,
62    errors: Vec<LexError>,
63    /// Counter for `?` style placeholders (auto-incrementing)
64    placeholder_counter: u32,
65}
66
67impl<'a> Lexer<'a> {
68    /// Create a new lexer for the given SQL input
69    pub fn new(input: &'a str) -> Self {
70        Self {
71            input,
72            bytes: input.as_bytes(),
73            pos: 0,
74            line: 1,
75            column: 1,
76            tokens: Vec::with_capacity(input.len() / 4),
77            errors: Vec::new(),
78            placeholder_counter: 0,
79        }
80    }
81
82    /// Tokenize the entire input
83    pub fn tokenize(mut self) -> Result<Vec<Token<'a>>, Vec<LexError>> {
84        while !self.is_at_end() {
85            self.scan_token();
86        }
87
88        // Add EOF token
89        self.tokens.push(Token::new(
90            TokenKind::Eof,
91            Span::new(self.pos, self.pos, self.line, self.column),
92            "",
93        ));
94
95        if self.errors.is_empty() {
96            Ok(self.tokens)
97        } else {
98            Err(self.errors)
99        }
100    }
101
102    fn is_at_end(&self) -> bool {
103        self.pos >= self.bytes.len()
104    }
105
106    fn advance(&mut self) -> Option<char> {
107        if self.pos >= self.bytes.len() {
108            return None;
109        }
110        let b = self.bytes[self.pos];
111        if b < 0x80 {
112            // ASCII fast path
113            self.pos += 1;
114            if b == b'\n' {
115                self.line += 1;
116                self.column = 1;
117            } else {
118                self.column += 1;
119            }
120            Some(b as char)
121        } else {
122            // Multi-byte UTF-8
123            let c = self.input[self.pos..].chars().next().unwrap();
124            self.pos += c.len_utf8();
125            self.column += 1;
126            Some(c)
127        }
128    }
129
130    fn peek(&self) -> Option<char> {
131        if self.pos >= self.bytes.len() {
132            return None;
133        }
134        let b = self.bytes[self.pos];
135        if b < 0x80 {
136            Some(b as char)
137        } else {
138            self.input[self.pos..].chars().next()
139        }
140    }
141
142    fn peek_next(&self) -> Option<char> {
143        if self.pos >= self.bytes.len() {
144            return None;
145        }
146        let first_len = if self.bytes[self.pos] < 0x80 {
147            1
148        } else {
149            self.input[self.pos..]
150                .chars()
151                .next()
152                .map_or(1, |c| c.len_utf8())
153        };
154        let next = self.pos + first_len;
155        if next >= self.bytes.len() {
156            return None;
157        }
158        let b = self.bytes[next];
159        if b < 0x80 {
160            Some(b as char)
161        } else {
162            self.input[next..].chars().next()
163        }
164    }
165
166    fn make_span(&self, start: usize, start_line: usize, start_col: usize) -> Span {
167        Span::new(start, self.pos, start_line, start_col)
168    }
169
170    fn scan_token(&mut self) {
171        let start = self.pos;
172        let start_line = self.line;
173        let start_col = self.column;
174
175        let c = match self.advance() {
176            Some(c) => c,
177            None => return,
178        };
179
180        match c {
181            // Whitespace
182            ' ' | '\t' | '\r' | '\n' => {
183                // Skip whitespace, don't emit token
184            }
185
186            // Single-character tokens
187            '(' => self.add_token(TokenKind::LParen, start, start_line, start_col),
188            ')' => self.add_token(TokenKind::RParen, start, start_line, start_col),
189            '[' => self.add_token(TokenKind::LBracket, start, start_line, start_col),
190            ']' => self.add_token(TokenKind::RBracket, start, start_line, start_col),
191            ',' => self.add_token(TokenKind::Comma, start, start_line, start_col),
192            ';' => self.add_token(TokenKind::Semicolon, start, start_line, start_col),
193            '+' => self.add_token(TokenKind::Plus, start, start_line, start_col),
194            '*' => self.add_token(TokenKind::Star, start, start_line, start_col),
195            '/' => {
196                if self.peek() == Some('/') || self.peek() == Some('*') {
197                    self.scan_comment(start, start_line, start_col);
198                } else {
199                    self.add_token(TokenKind::Slash, start, start_line, start_col);
200                }
201            }
202            '%' => self.add_token(TokenKind::Percent, start, start_line, start_col),
203            '&' => self.add_token(TokenKind::BitAnd, start, start_line, start_col),
204            '~' => self.add_token(TokenKind::BitNot, start, start_line, start_col),
205            '?' => {
206                // Auto-incrementing placeholder for JDBC/ODBC style ?
207                self.placeholder_counter += 1;
208                let span = self.make_span(start, start_line, start_col);
209                self.tokens.push(Token::new(
210                    TokenKind::Placeholder(self.placeholder_counter),
211                    span,
212                    "?",
213                ));
214            }
215            '@' => self.add_token(TokenKind::At, start, start_line, start_col),
216
217            // Two-character tokens
218            '-' => {
219                if self.peek() == Some('-') {
220                    // Line comment
221                    self.scan_line_comment(start, start_line, start_col);
222                } else if self.peek() == Some('>') {
223                    self.advance();
224                    if self.peek() == Some('>') {
225                        self.advance();
226                        self.add_token(TokenKind::DoubleArrow, start, start_line, start_col);
227                    } else {
228                        self.add_token(TokenKind::Arrow, start, start_line, start_col);
229                    }
230                } else {
231                    self.add_token(TokenKind::Minus, start, start_line, start_col);
232                }
233            }
234
235            '=' => self.add_token(TokenKind::Eq, start, start_line, start_col),
236
237            '!' => {
238                if self.peek() == Some('=') {
239                    self.advance();
240                    self.add_token(TokenKind::Ne, start, start_line, start_col);
241                } else {
242                    self.add_error("Unexpected character '!'", start, start_line, start_col);
243                }
244            }
245
246            '<' => {
247                if self.peek() == Some('=') {
248                    self.advance();
249                    self.add_token(TokenKind::Le, start, start_line, start_col);
250                } else if self.peek() == Some('-') {
251                    self.advance(); // consume '-'
252                    if self.peek() == Some('>') {
253                        self.advance(); // consume '>'
254                        self.add_token(TokenKind::BiArrow, start, start_line, start_col);
255                    } else {
256                        self.add_token(TokenKind::LeftArrow, start, start_line, start_col);
257                    }
258                } else if self.peek() == Some('>') {
259                    self.advance();
260                    self.add_token(TokenKind::Ne, start, start_line, start_col);
261                } else if self.peek() == Some('<') {
262                    self.advance();
263                    self.add_token(TokenKind::LeftShift, start, start_line, start_col);
264                } else {
265                    self.add_token(TokenKind::Lt, start, start_line, start_col);
266                }
267            }
268
269            '>' => {
270                if self.peek() == Some('=') {
271                    self.advance();
272                    self.add_token(TokenKind::Ge, start, start_line, start_col);
273                } else if self.peek() == Some('>') {
274                    self.advance();
275                    self.add_token(TokenKind::RightShift, start, start_line, start_col);
276                } else {
277                    self.add_token(TokenKind::Gt, start, start_line, start_col);
278                }
279            }
280
281            '|' => {
282                if self.peek() == Some('|') {
283                    self.advance();
284                    self.add_token(TokenKind::Concat, start, start_line, start_col);
285                } else {
286                    self.add_token(TokenKind::BitOr, start, start_line, start_col);
287                }
288            }
289
290            ':' => {
291                if self.peek() == Some(':') {
292                    self.advance();
293                    self.add_token(TokenKind::DoubleColon, start, start_line, start_col);
294                } else {
295                    self.add_token(TokenKind::Colon, start, start_line, start_col);
296                }
297            }
298
299            '.' => {
300                if self.peek().map(|c| c.is_ascii_digit()).unwrap_or(false) {
301                    self.scan_number(start, start_line, start_col, true);
302                } else {
303                    self.add_token(TokenKind::Dot, start, start_line, start_col);
304                }
305            }
306
307            // String literals
308            '\'' => self.scan_string(start, start_line, start_col, '\''),
309            '"' => self.scan_quoted_identifier(start, start_line, start_col, '"'),
310            '`' => self.scan_quoted_identifier(start, start_line, start_col, '`'),
311
312            // Blob literal (X'...')
313            'X' | 'x' if self.peek() == Some('\'') => {
314                self.advance();
315                self.scan_blob(start, start_line, start_col);
316            }
317
318            // Numbers
319            '0'..='9' => self.scan_number(start, start_line, start_col, false),
320
321            // Identifiers and keywords
322            'a'..='z' | 'A'..='Z' | '_' => self.scan_identifier(start, start_line, start_col),
323
324            // Placeholder ($1, $2, ...)
325            '$' => self.scan_placeholder(start, start_line, start_col),
326
327            _ => {
328                self.add_error(
329                    format!("Unexpected character '{}'", c),
330                    start,
331                    start_line,
332                    start_col,
333                );
334            }
335        }
336    }
337
338    fn scan_string(&mut self, start: usize, start_line: usize, start_col: usize, quote: char) {
339        let mut value = String::new();
340
341        while let Some(c) = self.peek() {
342            if c == quote {
343                self.advance();
344                // Check for escaped quote ('')
345                if self.peek() == Some(quote) {
346                    self.advance();
347                    value.push(quote);
348                } else {
349                    // End of string
350                    let span = self.make_span(start, start_line, start_col);
351                    let literal = &self.input[start..self.pos];
352                    self.tokens.push(Token::new(
353                        TokenKind::String(Cow::Owned(value)),
354                        span,
355                        literal,
356                    ));
357                    return;
358                }
359            } else if c == '\\' {
360                self.advance();
361                // Handle escape sequences
362                if let Some(escaped) = self.advance() {
363                    match escaped {
364                        'n' => value.push('\n'),
365                        'r' => value.push('\r'),
366                        't' => value.push('\t'),
367                        '\\' => value.push('\\'),
368                        '\'' => value.push('\''),
369                        '"' => value.push('"'),
370                        '0' => value.push('\0'),
371                        _ => {
372                            value.push('\\');
373                            value.push(escaped);
374                        }
375                    }
376                }
377            } else {
378                self.advance();
379                value.push(c);
380            }
381        }
382
383        self.add_error("Unterminated string literal", start, start_line, start_col);
384    }
385
386    fn scan_quoted_identifier(
387        &mut self,
388        start: usize,
389        start_line: usize,
390        start_col: usize,
391        quote: char,
392    ) {
393        let mut value = String::new();
394
395        while let Some(c) = self.peek() {
396            if c == quote {
397                self.advance();
398                // Check for escaped quote
399                if self.peek() == Some(quote) {
400                    self.advance();
401                    value.push(quote);
402                } else {
403                    let span = self.make_span(start, start_line, start_col);
404                    let literal = &self.input[start..self.pos];
405                    self.tokens.push(Token::new(
406                        TokenKind::QuotedIdentifier(Cow::Owned(value)),
407                        span,
408                        literal,
409                    ));
410                    return;
411                }
412            } else {
413                self.advance();
414                value.push(c);
415            }
416        }
417
418        self.add_error(
419            "Unterminated quoted identifier",
420            start,
421            start_line,
422            start_col,
423        );
424    }
425
426    fn scan_number(
427        &mut self,
428        start: usize,
429        start_line: usize,
430        start_col: usize,
431        started_with_dot: bool,
432    ) {
433        let num_start = start;
434        let mut has_dot = started_with_dot;
435        let mut has_exp = false;
436
437        // Consume integer part
438        while let Some(c) = self.peek() {
439            if c.is_ascii_digit() {
440                self.advance();
441            } else if c == '.' && !has_dot && !has_exp {
442                // Check it's not a range operator (..)
443                if self.peek_next() == Some('.') {
444                    break;
445                }
446                has_dot = true;
447                self.advance();
448            } else if (c == 'e' || c == 'E') && !has_exp {
449                has_exp = true;
450                self.advance();
451                // Optional sign
452                if self.peek() == Some('+') || self.peek() == Some('-') {
453                    self.advance();
454                }
455            } else {
456                break;
457            }
458        }
459
460        let literal = &self.input[num_start..self.pos];
461        let span = self.make_span(start, start_line, start_col);
462
463        if has_dot || has_exp {
464            match literal.parse::<f64>() {
465                Ok(n) => self
466                    .tokens
467                    .push(Token::new(TokenKind::Float(n), span, literal)),
468                Err(_) => self.add_error("Invalid float literal", start, start_line, start_col),
469            }
470        } else {
471            match literal.parse::<i64>() {
472                Ok(n) => self
473                    .tokens
474                    .push(Token::new(TokenKind::Integer(n), span, literal)),
475                Err(_) => self.add_error("Invalid integer literal", start, start_line, start_col),
476            }
477        }
478    }
479
480    fn scan_identifier(&mut self, start: usize, start_line: usize, start_col: usize) {
481        while let Some(c) = self.peek() {
482            if c.is_ascii_alphanumeric() || c == '_' {
483                self.advance();
484            } else {
485                break;
486            }
487        }
488
489        let literal = &self.input[start..self.pos];
490        let span = self.make_span(start, start_line, start_col);
491
492        // Check for keyword — zero allocation
493        let kind = TokenKind::from_keyword(literal).unwrap_or(TokenKind::Identifier(literal));
494
495        self.tokens.push(Token::new(kind, span, literal));
496    }
497
498    fn scan_placeholder(&mut self, start: usize, start_line: usize, start_col: usize) {
499        let mut num = String::new();
500
501        while let Some(c) = self.peek() {
502            if c.is_ascii_digit() {
503                self.advance();
504                num.push(c);
505            } else {
506                break;
507            }
508        }
509
510        let span = self.make_span(start, start_line, start_col);
511
512        if num.is_empty() {
513            self.add_error("Expected number after $", start, start_line, start_col);
514        } else if let Ok(n) = num.parse::<u32>() {
515            self.tokens.push(Token::new(
516                TokenKind::Placeholder(n),
517                span,
518                &self.input[start..self.pos],
519            ));
520        } else {
521            self.add_error("Invalid placeholder number", start, start_line, start_col);
522        }
523    }
524
525    fn scan_comment(&mut self, start: usize, start_line: usize, start_col: usize) {
526        self.advance(); // consume second / or *
527
528        if self.peek() == Some('*') || self.input[start..self.pos].ends_with('*') {
529            // Block comment /* ... */
530            let mut depth = 1;
531
532            while depth > 0 && !self.is_at_end() {
533                let c = self.peek();
534                let next = self.peek_next();
535
536                if c == Some('*') && next == Some('/') {
537                    self.advance();
538                    self.advance();
539                    depth -= 1;
540                } else if c == Some('/') && next == Some('*') {
541                    self.advance();
542                    self.advance();
543                    depth += 1;
544                } else {
545                    self.advance();
546                }
547            }
548
549            if depth > 0 {
550                self.add_error("Unterminated block comment", start, start_line, start_col);
551            }
552        } else {
553            // Line comment //
554            while let Some(c) = self.peek() {
555                if c == '\n' {
556                    break;
557                }
558                self.advance();
559            }
560        }
561        // Don't emit comment tokens
562    }
563
564    fn scan_line_comment(&mut self, _start: usize, _start_line: usize, _start_col: usize) {
565        self.advance(); // consume second -
566
567        while let Some(c) = self.peek() {
568            if c == '\n' {
569                break;
570            }
571            self.advance();
572        }
573        // Don't emit comment tokens
574    }
575
576    fn scan_blob(&mut self, start: usize, start_line: usize, start_col: usize) {
577        let mut hex = String::new();
578
579        while let Some(c) = self.peek() {
580            if c == '\'' {
581                self.advance();
582                break;
583            } else if c.is_ascii_hexdigit() {
584                self.advance();
585                hex.push(c);
586            } else if c.is_whitespace() {
587                self.advance(); // Allow whitespace in blob
588            } else {
589                self.add_error(
590                    "Invalid hex digit in blob literal",
591                    start,
592                    start_line,
593                    start_col,
594                );
595                return;
596            }
597        }
598
599        if !hex.len().is_multiple_of(2) {
600            self.add_error(
601                "Blob literal must have even number of hex digits",
602                start,
603                start_line,
604                start_col,
605            );
606            return;
607        }
608
609        let bytes: Result<Vec<u8>, _> = (0..hex.len())
610            .step_by(2)
611            .map(|i| u8::from_str_radix(&hex[i..i + 2], 16))
612            .collect();
613
614        match bytes {
615            Ok(data) => {
616                let span = self.make_span(start, start_line, start_col);
617                let literal = &self.input[start..self.pos];
618                self.tokens
619                    .push(Token::new(TokenKind::Blob(data), span, literal));
620            }
621            Err(_) => {
622                self.add_error("Invalid blob literal", start, start_line, start_col);
623            }
624        }
625    }
626
627    fn add_token(
628        &mut self,
629        kind: TokenKind<'a>,
630        start: usize,
631        start_line: usize,
632        start_col: usize,
633    ) {
634        let span = self.make_span(start, start_line, start_col);
635        let literal = &self.input[start..self.pos];
636        self.tokens.push(Token::new(kind, span, literal));
637    }
638
639    fn add_error(
640        &mut self,
641        message: impl Into<String>,
642        start: usize,
643        start_line: usize,
644        start_col: usize,
645    ) {
646        let span = self.make_span(start, start_line, start_col);
647        self.errors.push(LexError::new(message, span));
648    }
649}
650
651#[cfg(test)]
652mod tests {
653    use super::*;
654
655    #[test]
656    fn test_simple_select() {
657        let tokens = Lexer::new("SELECT * FROM users").tokenize().unwrap();
658        assert_eq!(tokens.len(), 5); // SELECT, *, FROM, users, EOF
659        assert_eq!(tokens[0].kind, TokenKind::Select);
660        assert_eq!(tokens[1].kind, TokenKind::Star);
661        assert_eq!(tokens[2].kind, TokenKind::From);
662        assert!(matches!(tokens[3].kind, TokenKind::Identifier(_)));
663    }
664
665    #[test]
666    fn test_string_literal() {
667        let tokens = Lexer::new("SELECT 'hello''world'").tokenize().unwrap();
668        assert!(matches!(&tokens[1].kind, TokenKind::String(s) if s == "hello'world"));
669    }
670
671    #[test]
672    #[allow(clippy::approx_constant)]
673    fn test_numbers() {
674        let tokens = Lexer::new("42 3.14 1e10 .5").tokenize().unwrap();
675        assert!(matches!(tokens[0].kind, TokenKind::Integer(42)));
676        assert!(matches!(tokens[1].kind, TokenKind::Float(f) if (f - 3.14).abs() < 0.001));
677        assert!(matches!(tokens[2].kind, TokenKind::Float(_)));
678        assert!(matches!(tokens[3].kind, TokenKind::Float(f) if (f - 0.5).abs() < 0.001));
679    }
680
681    #[test]
682    fn test_operators() {
683        let tokens = Lexer::new("= != <> < <= > >= || ->").tokenize().unwrap();
684        assert_eq!(tokens[0].kind, TokenKind::Eq);
685        assert_eq!(tokens[1].kind, TokenKind::Ne);
686        assert_eq!(tokens[2].kind, TokenKind::Ne);
687        assert_eq!(tokens[3].kind, TokenKind::Lt);
688        assert_eq!(tokens[4].kind, TokenKind::Le);
689        assert_eq!(tokens[5].kind, TokenKind::Gt);
690        assert_eq!(tokens[6].kind, TokenKind::Ge);
691        assert_eq!(tokens[7].kind, TokenKind::Concat);
692        assert_eq!(tokens[8].kind, TokenKind::Arrow);
693    }
694
695    #[test]
696    fn test_keywords() {
697        let tokens = Lexer::new("SELECT INSERT UPDATE DELETE FROM WHERE")
698            .tokenize()
699            .unwrap();
700        assert_eq!(tokens[0].kind, TokenKind::Select);
701        assert_eq!(tokens[1].kind, TokenKind::Insert);
702        assert_eq!(tokens[2].kind, TokenKind::Update);
703        assert_eq!(tokens[3].kind, TokenKind::Delete);
704        assert_eq!(tokens[4].kind, TokenKind::From);
705        assert_eq!(tokens[5].kind, TokenKind::Where);
706    }
707
708    #[test]
709    fn test_placeholder() {
710        let tokens = Lexer::new("$1 $2 $10").tokenize().unwrap();
711        assert!(matches!(tokens[0].kind, TokenKind::Placeholder(1)));
712        assert!(matches!(tokens[1].kind, TokenKind::Placeholder(2)));
713        assert!(matches!(tokens[2].kind, TokenKind::Placeholder(10)));
714    }
715
716    #[test]
717    fn test_line_comment() {
718        let tokens = Lexer::new("SELECT -- comment\n* FROM users")
719            .tokenize()
720            .unwrap();
721        assert_eq!(tokens.len(), 5); // SELECT, *, FROM, users, EOF
722        assert_eq!(tokens[0].kind, TokenKind::Select);
723        assert_eq!(tokens[1].kind, TokenKind::Star);
724    }
725
726    #[test]
727    fn test_blob_literal() {
728        let tokens = Lexer::new("X'48454C4C4F'").tokenize().unwrap();
729        assert!(matches!(&tokens[0].kind, TokenKind::Blob(b) if b == b"HELLO"));
730    }
731
732    #[test]
733    fn test_left_arrow() {
734        let tokens = Lexer::new("<-").tokenize().unwrap();
735        assert_eq!(tokens[0].kind, TokenKind::LeftArrow);
736    }
737
738    #[test]
739    fn test_biarrow() {
740        let tokens = Lexer::new("<->").tokenize().unwrap();
741        assert_eq!(tokens[0].kind, TokenKind::BiArrow);
742    }
743
744    #[test]
745    fn test_arrow_tokens_in_context() {
746        let tokens = Lexer::new("a -> b <- c <-> d").tokenize().unwrap();
747        assert!(matches!(tokens[0].kind, TokenKind::Identifier("a")));
748        assert_eq!(tokens[1].kind, TokenKind::Arrow);
749        assert!(matches!(tokens[2].kind, TokenKind::Identifier("b")));
750        assert_eq!(tokens[3].kind, TokenKind::LeftArrow);
751        assert!(matches!(tokens[4].kind, TokenKind::Identifier("c")));
752        assert_eq!(tokens[5].kind, TokenKind::BiArrow);
753        assert!(matches!(tokens[6].kind, TokenKind::Identifier("d")));
754    }
755
756    #[test]
757    fn test_relate_keyword() {
758        let tokens = Lexer::new("RELATE LIVE CONTENT EVENT DIFF")
759            .tokenize()
760            .unwrap();
761        assert_eq!(tokens[0].kind, TokenKind::Relate);
762        assert_eq!(tokens[1].kind, TokenKind::Live);
763        assert_eq!(tokens[2].kind, TokenKind::Content);
764        assert_eq!(tokens[3].kind, TokenKind::Event);
765        assert_eq!(tokens[4].kind, TokenKind::Diff);
766    }
767}