1use crate::errors::{Result, SqlglotError};
2use crate::tokens::{Token, TokenType};
3
4pub struct Tokenizer {
13 input: Vec<char>,
14 pos: usize,
15 line: usize,
16 col: usize,
17 pub preserve_comments: bool,
19}
20
21impl Tokenizer {
22 #[must_use]
24 pub fn new(input: &str) -> Self {
25 Self {
26 input: input.chars().collect(),
27 pos: 0,
28 line: 1,
29 col: 1,
30 preserve_comments: false,
31 }
32 }
33
34 #[must_use]
36 pub fn with_comments(input: &str) -> Self {
37 Self {
38 input: input.chars().collect(),
39 pos: 0,
40 line: 1,
41 col: 1,
42 preserve_comments: true,
43 }
44 }
45
46 pub fn tokenize(&mut self) -> Result<Vec<Token>> {
50 let mut tokens = Vec::new();
51 loop {
52 let token = self.next_token()?;
53 match token.token_type {
54 TokenType::Eof => {
55 tokens.push(token);
56 break;
57 }
58 TokenType::Whitespace => continue,
59 TokenType::LineComment | TokenType::BlockComment => {
60 if self.preserve_comments {
61 tokens.push(token);
62 }
63 }
64 _ => tokens.push(token),
65 }
66 }
67 Ok(tokens)
68 }
69
70 fn peek(&self) -> Option<char> {
71 self.input.get(self.pos).copied()
72 }
73
74 fn peek_at(&self, offset: usize) -> Option<char> {
75 self.input.get(self.pos + offset).copied()
76 }
77
78 fn advance(&mut self) -> Option<char> {
79 let ch = self.input.get(self.pos).copied();
80 if let Some(c) = ch {
81 self.pos += 1;
82 if c == '\n' {
83 self.line += 1;
84 self.col = 1;
85 } else {
86 self.col += 1;
87 }
88 }
89 ch
90 }
91
92 fn make_token(&self, token_type: TokenType, value: impl Into<String>, start: usize, start_line: usize, start_col: usize) -> Token {
93 Token::with_location(token_type, value, start, start_line, start_col)
94 }
95
96 fn next_token(&mut self) -> Result<Token> {
97 while self.peek().is_some_and(|c| c.is_whitespace()) {
99 self.advance();
100 }
101
102 let start = self.pos;
103 let start_line = self.line;
104 let start_col = self.col;
105
106 let Some(ch) = self.advance() else {
107 return Ok(self.make_token(TokenType::Eof, "", start, start_line, start_col));
108 };
109
110 match ch {
111 '(' => Ok(self.make_token(TokenType::LParen, "(", start, start_line, start_col)),
113 ')' => Ok(self.make_token(TokenType::RParen, ")", start, start_line, start_col)),
114 '[' => {
115 let mut looks_like_ident = false;
119 if let Some(first_inner) = self.peek()
120 && (first_inner.is_ascii_alphabetic() || first_inner == '_')
121 {
122 let mut scan = self.pos;
123 while scan < self.input.len() {
124 if self.input[scan] == ']' {
125 looks_like_ident = scan > self.pos;
126 break;
127 }
128 if self.input[scan] == ',' || self.input[scan] == '\n' {
129 break;
130 }
131 scan += 1;
132 }
133 }
134 if looks_like_ident {
135 self.read_quoted_identifier(start, start_line, start_col, '[')
136 } else {
137 Ok(self.make_token(TokenType::LBracket, "[", start, start_line, start_col))
138 }
139 }
140 ']' => Ok(self.make_token(TokenType::RBracket, "]", start, start_line, start_col)),
141 '{' => Ok(self.make_token(TokenType::LBrace, "{", start, start_line, start_col)),
142 '}' => Ok(self.make_token(TokenType::RBrace, "}", start, start_line, start_col)),
143 ',' => Ok(self.make_token(TokenType::Comma, ",", start, start_line, start_col)),
144 ';' => Ok(self.make_token(TokenType::Semicolon, ";", start, start_line, start_col)),
145 '.' => Ok(self.make_token(TokenType::Dot, ".", start, start_line, start_col)),
146 '+' => Ok(self.make_token(TokenType::Plus, "+", start, start_line, start_col)),
147 '~' => Ok(self.make_token(TokenType::BitwiseNot, "~", start, start_line, start_col)),
148 '@' => Ok(self.make_token(TokenType::AtSign, "@", start, start_line, start_col)),
149 '=' => Ok(self.make_token(TokenType::Eq, "=", start, start_line, start_col)),
150 '*' => Ok(self.make_token(TokenType::Star, "*", start, start_line, start_col)),
151 '%' => Ok(self.make_token(TokenType::Percent2, "%", start, start_line, start_col)),
152 '^' => Ok(self.make_token(TokenType::BitwiseXor, "^", start, start_line, start_col)),
153
154 ':' => {
156 if self.peek() == Some(':') {
157 self.advance();
158 Ok(self.make_token(TokenType::DoubleColon, "::", start, start_line, start_col))
159 } else {
160 Ok(self.make_token(TokenType::Colon, ":", start, start_line, start_col))
161 }
162 }
163
164 '-' => {
166 if self.peek() == Some('-') {
167 self.advance();
168 let mut value = String::from("--");
169 while self.peek().is_some_and(|c| c != '\n') {
170 value.push(self.advance().unwrap());
171 }
172 Ok(self.make_token(TokenType::LineComment, value, start, start_line, start_col))
173 } else if self.peek() == Some('>') {
174 self.advance();
175 if self.peek() == Some('>') {
176 self.advance();
177 Ok(self.make_token(TokenType::DoubleArrow, "->>", start, start_line, start_col))
178 } else {
179 Ok(self.make_token(TokenType::Arrow, "->", start, start_line, start_col))
180 }
181 } else {
182 Ok(self.make_token(TokenType::Minus, "-", start, start_line, start_col))
183 }
184 }
185
186 '/' => {
188 if self.peek() == Some('*') {
189 self.advance();
190 let mut value = String::from("/*");
191 let mut depth = 1;
192 while depth > 0 {
193 match self.advance() {
194 Some('*') if self.peek() == Some('/') => {
195 self.advance();
196 depth -= 1;
197 value.push_str("*/");
198 }
199 Some('/') if self.peek() == Some('*') => {
200 self.advance();
201 depth += 1;
202 value.push_str("/*");
203 }
204 Some(c) => value.push(c),
205 None => {
206 return Err(SqlglotError::TokenizerError {
207 message: "Unterminated block comment".into(),
208 position: start,
209 });
210 }
211 }
212 }
213 Ok(self.make_token(TokenType::BlockComment, value, start, start_line, start_col))
214 } else {
215 Ok(self.make_token(TokenType::Slash, "/", start, start_line, start_col))
216 }
217 }
218
219 '<' => {
221 if self.peek() == Some('=') {
222 self.advance();
223 Ok(self.make_token(TokenType::LtEq, "<=", start, start_line, start_col))
224 } else if self.peek() == Some('>') {
225 self.advance();
226 Ok(self.make_token(TokenType::Neq, "<>", start, start_line, start_col))
227 } else if self.peek() == Some('<') {
228 self.advance();
229 Ok(self.make_token(TokenType::ShiftLeft, "<<", start, start_line, start_col))
230 } else {
231 Ok(self.make_token(TokenType::Lt, "<", start, start_line, start_col))
232 }
233 }
234
235 '>' => {
237 if self.peek() == Some('=') {
238 self.advance();
239 Ok(self.make_token(TokenType::GtEq, ">=", start, start_line, start_col))
240 } else if self.peek() == Some('>') {
241 self.advance();
242 Ok(self.make_token(TokenType::ShiftRight, ">>", start, start_line, start_col))
243 } else {
244 Ok(self.make_token(TokenType::Gt, ">", start, start_line, start_col))
245 }
246 }
247
248 '!' => {
250 if self.peek() == Some('=') {
251 self.advance();
252 Ok(self.make_token(TokenType::Neq, "!=", start, start_line, start_col))
253 } else {
254 Err(SqlglotError::TokenizerError {
255 message: format!("Unexpected character: {ch}"),
256 position: start,
257 })
258 }
259 }
260
261 '|' => {
263 if self.peek() == Some('|') {
264 self.advance();
265 Ok(self.make_token(TokenType::Concat, "||", start, start_line, start_col))
266 } else {
267 Ok(self.make_token(TokenType::BitwiseOr, "|", start, start_line, start_col))
268 }
269 }
270
271 '&' => Ok(self.make_token(TokenType::BitwiseAnd, "&", start, start_line, start_col)),
273
274 '#' => {
276 if self.peek() == Some('>') {
277 self.advance();
278 if self.peek() == Some('>') {
279 self.advance();
280 Ok(self.make_token(TokenType::HashDoubleArrow, "#>>", start, start_line, start_col))
281 } else {
282 Ok(self.make_token(TokenType::HashArrow, "#>", start, start_line, start_col))
283 }
284 } else {
285 let mut value = String::from("#");
286 while self.peek().is_some_and(|c| c != '\n') {
287 value.push(self.advance().unwrap());
288 }
289 Ok(self.make_token(TokenType::LineComment, value, start, start_line, start_col))
290 }
291 }
292
293 '\'' => self.read_string(start, start_line, start_col),
295
296 c if c.is_ascii_digit() => self.read_number(start, start_line, start_col, c),
298
299 c if c.is_ascii_alphabetic() || c == '_' => self.read_identifier(start, start_line, start_col, c),
301
302 '"' => self.read_quoted_identifier(start, start_line, start_col, '"'),
304
305 '`' => self.read_quoted_identifier(start, start_line, start_col, '`'),
307
308 '$' => {
310 if self.peek().is_some_and(|c| c.is_ascii_digit()) {
311 let mut value = String::from("$");
312 while self.peek().is_some_and(|c| c.is_ascii_digit()) {
313 value.push(self.advance().unwrap());
314 }
315 Ok(self.make_token(TokenType::Parameter, value, start, start_line, start_col))
316 } else {
317 Ok(self.make_token(TokenType::Parameter, "$", start, start_line, start_col))
318 }
319 }
320
321 '?' => Ok(self.make_token(TokenType::Parameter, "?", start, start_line, start_col)),
322
323 _ => Err(SqlglotError::TokenizerError {
324 message: format!("Unexpected character: {ch}"),
325 position: start,
326 }),
327 }
328 }
329
330 fn read_string(&mut self, start: usize, start_line: usize, start_col: usize) -> Result<Token> {
331 let mut value = String::new();
332 loop {
333 match self.advance() {
334 Some('\'') => {
335 if self.peek() == Some('\'') {
336 self.advance();
337 value.push('\'');
338 } else {
339 return Ok(self.make_token(TokenType::String, value, start, start_line, start_col));
340 }
341 }
342 Some('\\') => {
343 match self.peek() {
344 Some('\\') => {
345 self.advance();
346 value.push('\\');
347 }
348 Some('n') => {
349 self.advance();
350 value.push('\n');
351 }
352 Some('t') => {
353 self.advance();
354 value.push('\t');
355 }
356 Some('r') => {
357 self.advance();
358 value.push('\r');
359 }
360 _ => {
361 value.push('\\');
362 }
363 }
364 }
365 Some(c) => value.push(c),
366 None => {
367 return Err(SqlglotError::TokenizerError {
368 message: "Unterminated string literal".into(),
369 position: start,
370 });
371 }
372 }
373 }
374 }
375
376 fn read_number(&mut self, start: usize, start_line: usize, start_col: usize, first: char) -> Result<Token> {
377 let mut value = String::new();
378 value.push(first);
379
380 if first == '0' && self.peek().is_some_and(|c| c == 'x' || c == 'X') {
381 value.push(self.advance().unwrap());
382 while self.peek().is_some_and(|c| c.is_ascii_hexdigit()) {
383 value.push(self.advance().unwrap());
384 }
385 return Ok(self.make_token(TokenType::HexString, value, start, start_line, start_col));
386 }
387
388 while self.peek().is_some_and(|c| c.is_ascii_digit()) {
389 value.push(self.advance().unwrap());
390 }
391
392 if self.peek() == Some('.') && self.peek_at(1).is_some_and(|c| c.is_ascii_digit()) {
393 value.push(self.advance().unwrap());
394 while self.peek().is_some_and(|c| c.is_ascii_digit()) {
395 value.push(self.advance().unwrap());
396 }
397 }
398
399 if self.peek().is_some_and(|c| c == 'e' || c == 'E') {
400 value.push(self.advance().unwrap());
401 if self.peek().is_some_and(|c| c == '+' || c == '-') {
402 value.push(self.advance().unwrap());
403 }
404 while self.peek().is_some_and(|c| c.is_ascii_digit()) {
405 value.push(self.advance().unwrap());
406 }
407 }
408
409 Ok(self.make_token(TokenType::Number, value, start, start_line, start_col))
410 }
411
412 fn read_identifier(&mut self, start: usize, start_line: usize, start_col: usize, first: char) -> Result<Token> {
413 let mut value = String::new();
414 value.push(first);
415 while self
416 .peek()
417 .is_some_and(|c| c.is_ascii_alphanumeric() || c == '_')
418 {
419 value.push(self.advance().unwrap());
420 }
421
422 let token_type = Self::keyword_type(&value);
423 Ok(self.make_token(token_type, value, start, start_line, start_col))
424 }
425
426 fn keyword_type(word: &str) -> TokenType {
428 match word.to_uppercase().as_str() {
429 "SELECT" => TokenType::Select,
430 "FROM" => TokenType::From,
431 "WHERE" => TokenType::Where,
432 "AND" => TokenType::And,
433 "OR" => TokenType::Or,
434 "NOT" => TokenType::Not,
435 "AS" => TokenType::As,
436 "JOIN" => TokenType::Join,
437 "INNER" => TokenType::Inner,
438 "LEFT" => TokenType::Left,
439 "RIGHT" => TokenType::Right,
440 "FULL" => TokenType::Full,
441 "OUTER" => TokenType::Outer,
442 "CROSS" => TokenType::Cross,
443 "ON" => TokenType::On,
444 "INSERT" => TokenType::Insert,
445 "INTO" => TokenType::Into,
446 "VALUES" => TokenType::Values,
447 "UPDATE" => TokenType::Update,
448 "SET" => TokenType::Set,
449 "DELETE" => TokenType::Delete,
450 "CREATE" => TokenType::Create,
451 "TABLE" => TokenType::Table,
452 "DROP" => TokenType::Drop,
453 "ALTER" => TokenType::Alter,
454 "INDEX" => TokenType::Index,
455 "IF" => TokenType::If,
456 "EXISTS" => TokenType::Exists,
457 "IN" => TokenType::In,
458 "IS" => TokenType::Is,
459 "NULL" => TokenType::Null,
460 "LIKE" => TokenType::Like,
461 "ILIKE" => TokenType::ILike,
462 "ESCAPE" => TokenType::Escape,
463 "BETWEEN" => TokenType::Between,
464 "CASE" => TokenType::Case,
465 "WHEN" => TokenType::When,
466 "THEN" => TokenType::Then,
467 "ELSE" => TokenType::Else,
468 "END" => TokenType::End,
469 "ORDER" => TokenType::Order,
470 "BY" => TokenType::By,
471 "ASC" => TokenType::Asc,
472 "DESC" => TokenType::Desc,
473 "GROUP" => TokenType::Group,
474 "HAVING" => TokenType::Having,
475 "LIMIT" => TokenType::Limit,
476 "OFFSET" => TokenType::Offset,
477 "UNION" => TokenType::Union,
478 "ALL" => TokenType::All,
479 "DISTINCT" => TokenType::Distinct,
480 "TRUE" => TokenType::True,
481 "FALSE" => TokenType::False,
482 "INTERSECT" => TokenType::Intersect,
483 "EXCEPT" => TokenType::Except,
484 "WITH" => TokenType::With,
485 "RECURSIVE" => TokenType::Recursive,
486 "ANY" => TokenType::Any,
487 "SOME" => TokenType::Some,
488 "CAST" => TokenType::Cast,
489 "OVER" => TokenType::Over,
490 "PARTITION" => TokenType::Partition,
491 "WINDOW" => TokenType::Window,
492 "ROWS" => TokenType::Rows,
493 "RANGE" => TokenType::Range,
494 "UNBOUNDED" => TokenType::Unbounded,
495 "PRECEDING" => TokenType::Preceding,
496 "FOLLOWING" => TokenType::Following,
497 "FILTER" => TokenType::Filter,
498 "INT" => TokenType::Int,
499 "INTEGER" => TokenType::Integer,
500 "BIGINT" => TokenType::BigInt,
501 "SMALLINT" => TokenType::SmallInt,
502 "TINYINT" => TokenType::TinyInt,
503 "FLOAT" => TokenType::Float,
504 "DOUBLE" => TokenType::Double,
505 "DECIMAL" => TokenType::Decimal,
506 "NUMERIC" => TokenType::Numeric,
507 "REAL" => TokenType::Real,
508 "VARCHAR" => TokenType::Varchar,
509 "CHAR" | "CHARACTER" => TokenType::Char,
510 "TEXT" => TokenType::Text,
511 "BOOLEAN" | "BOOL" => TokenType::Boolean,
512 "DATE" => TokenType::Date,
513 "TIMESTAMP" => TokenType::Timestamp,
514 "TIMESTAMPTZ" => TokenType::TimestampTz,
515 "TIME" => TokenType::Time,
516 "INTERVAL" => TokenType::Interval,
517 "BLOB" => TokenType::Blob,
518 "BYTEA" => TokenType::Bytea,
519 "JSON" => TokenType::Json,
520 "JSONB" => TokenType::Jsonb,
521 "UUID" => TokenType::Uuid,
522 "ARRAY" => TokenType::Array,
523 "MAP" => TokenType::Map,
524 "STRUCT" => TokenType::Struct,
525 "PRIMARY" => TokenType::Primary,
526 "KEY" => TokenType::Key,
527 "FOREIGN" => TokenType::Foreign,
528 "REFERENCES" => TokenType::References,
529 "UNIQUE" => TokenType::Unique,
530 "CHECK" => TokenType::Check,
531 "DEFAULT" => TokenType::Default,
532 "CONSTRAINT" => TokenType::Constraint,
533 "AUTO_INCREMENT" | "AUTOINCREMENT" => TokenType::AutoIncrement,
534 "CASCADE" => TokenType::Cascade,
535 "RESTRICT" => TokenType::Restrict,
536 "RETURNING" => TokenType::Returning,
537 "CONFLICT" => TokenType::Conflict,
538 "DO" => TokenType::Do,
539 "NOTHING" => TokenType::Nothing,
540 "REPLACE" => TokenType::Replace,
541 "IGNORE" => TokenType::Ignore,
542 "MERGE" => TokenType::Merge,
543 "MATCHED" => TokenType::Matched,
544 "USING" => TokenType::Using,
545 "TRUNCATE" => TokenType::Truncate,
546 "SCHEMA" => TokenType::Schema,
547 "DATABASE" => TokenType::Database,
548 "VIEW" => TokenType::View,
549 "MATERIALIZED" => TokenType::Materialized,
550 "TEMPORARY" => TokenType::Temporary,
551 "TEMP" => TokenType::Temp,
552 "BEGIN" => TokenType::Begin,
553 "COMMIT" => TokenType::Commit,
554 "ROLLBACK" => TokenType::Rollback,
555 "SAVEPOINT" => TokenType::Savepoint,
556 "TRANSACTION" => TokenType::Transaction,
557 "EXPLAIN" => TokenType::Explain,
558 "ANALYZE" => TokenType::Analyze,
559 "SHOW" => TokenType::Show,
560 "USE" => TokenType::Use,
561 "GRANT" => TokenType::Grant,
562 "REVOKE" => TokenType::Revoke,
563 "LATERAL" => TokenType::Lateral,
564 "UNNEST" => TokenType::Unnest,
565 "PIVOT" => TokenType::Pivot,
566 "UNPIVOT" => TokenType::Unpivot,
567 "TABLESAMPLE" => TokenType::Tablesample,
568 "FETCH" => TokenType::Fetch,
569 "FIRST" => TokenType::First,
570 "NEXT" => TokenType::Next,
571 "ONLY" => TokenType::Only,
572 "NULLS" => TokenType::Nulls,
573 "RESPECT" => TokenType::Respect,
574 "TOP" => TokenType::Top,
575 "COLLATE" => TokenType::Collate,
576 "QUALIFY" => TokenType::Qualify,
577 "XOR" => TokenType::Xor,
578 "EXTRACT" => TokenType::Extract,
579 "EPOCH" => TokenType::Epoch,
580 "YEAR" => TokenType::Year,
581 "MONTH" => TokenType::Month,
582 "DAY" => TokenType::Day,
583 "HOUR" => TokenType::Hour,
584 "MINUTE" => TokenType::Minute,
585 "SECOND" => TokenType::Second,
586 _ => TokenType::Identifier,
587 }
588 }
589
590 fn read_quoted_identifier(&mut self, start: usize, start_line: usize, start_col: usize, quote: char) -> Result<Token> {
591 let end_char = if quote == '[' { ']' } else { quote };
592 let mut value = String::new();
593 loop {
594 match self.advance() {
595 Some(c) if c == end_char => {
596 if self.peek() == Some(end_char) && end_char != ']' {
597 self.advance();
598 value.push(end_char);
599 } else {
600 return Ok(Token::with_quote(
601 TokenType::Identifier,
602 value,
603 start,
604 start_line,
605 start_col,
606 quote,
607 ));
608 }
609 }
610 Some(c) => value.push(c),
611 None => {
612 return Err(SqlglotError::TokenizerError {
613 message: format!("Unterminated quoted identifier (expected {end_char})"),
614 position: start,
615 });
616 }
617 }
618 }
619 }
620}
621
622#[cfg(test)]
623mod tests {
624 use super::*;
625
626 #[test]
627 fn test_tokenize_simple_select() {
628 let mut tokenizer = Tokenizer::new("SELECT a, b FROM t");
629 let tokens = tokenizer.tokenize().unwrap();
630 assert_eq!(tokens[0].token_type, TokenType::Select);
631 assert_eq!(tokens[1].token_type, TokenType::Identifier);
632 assert_eq!(tokens[1].value, "a");
633 assert_eq!(tokens[2].token_type, TokenType::Comma);
634 assert_eq!(tokens[3].token_type, TokenType::Identifier);
635 assert_eq!(tokens[3].value, "b");
636 assert_eq!(tokens[4].token_type, TokenType::From);
637 assert_eq!(tokens[5].token_type, TokenType::Identifier);
638 assert_eq!(tokens[5].value, "t");
639 assert_eq!(tokens[6].token_type, TokenType::Eof);
640 }
641
642 #[test]
643 fn test_tokenize_string_literal() {
644 let mut tokenizer = Tokenizer::new("'hello world'");
645 let tokens = tokenizer.tokenize().unwrap();
646 assert_eq!(tokens[0].token_type, TokenType::String);
647 assert_eq!(tokens[0].value, "hello world");
648 }
649
650 #[test]
651 fn test_tokenize_operators() {
652 let mut tokenizer = Tokenizer::new("a >= 1 AND b != 2");
653 let tokens = tokenizer.tokenize().unwrap();
654 assert_eq!(tokens[1].token_type, TokenType::GtEq);
655 assert_eq!(tokens[3].token_type, TokenType::And);
656 assert_eq!(tokens[5].token_type, TokenType::Neq);
657 }
658
659 #[test]
660 fn test_tokenize_number() {
661 let mut tokenizer = Tokenizer::new("123.45");
662 let tokens = tokenizer.tokenize().unwrap();
663 assert_eq!(tokens[0].token_type, TokenType::Number);
664 assert_eq!(tokens[0].value, "123.45");
665 }
666
667 #[test]
668 fn test_tokenize_line_comment() {
669 let mut tok = Tokenizer::with_comments("SELECT 1 -- comment\nFROM t");
670 let tokens = tok.tokenize().unwrap();
671 assert!(tokens.iter().any(|t| t.token_type == TokenType::LineComment));
672 }
673
674 #[test]
675 fn test_tokenize_block_comment() {
676 let mut tok = Tokenizer::with_comments("SELECT /* hello */ 1");
677 let tokens = tok.tokenize().unwrap();
678 assert!(tokens.iter().any(|t| t.token_type == TokenType::BlockComment));
679 }
680
681 #[test]
682 fn test_tokenize_cte_keywords() {
683 let mut tok = Tokenizer::new("WITH cte AS (SELECT 1) SELECT * FROM cte");
684 let tokens = tok.tokenize().unwrap();
685 assert_eq!(tokens[0].token_type, TokenType::With);
686 assert_eq!(tokens[2].token_type, TokenType::As);
687 }
688
689 #[test]
690 fn test_tokenize_double_colon() {
691 let mut tok = Tokenizer::new("x::int");
692 let tokens = tok.tokenize().unwrap();
693 assert_eq!(tokens[1].token_type, TokenType::DoubleColon);
694 }
695
696 #[test]
697 fn test_tokenize_cast() {
698 let mut tok = Tokenizer::new("CAST(x AS INT)");
699 let tokens = tok.tokenize().unwrap();
700 assert_eq!(tokens[0].token_type, TokenType::Cast);
701 }
702
703 #[test]
704 fn test_tokenize_window() {
705 let mut tok = Tokenizer::new("ROW_NUMBER() OVER (PARTITION BY id ORDER BY name)");
706 let tokens = tok.tokenize().unwrap();
707 assert!(tokens.iter().any(|t| t.token_type == TokenType::Over));
708 assert!(tokens.iter().any(|t| t.token_type == TokenType::Partition));
709 }
710
711 #[test]
712 fn test_line_tracking() {
713 let mut tok = Tokenizer::new("SELECT\n 1");
714 let tokens = tok.tokenize().unwrap();
715 assert_eq!(tokens[0].line, 1);
716 assert_eq!(tokens[1].line, 2);
717 }
718
719 #[test]
720 fn test_tokenize_union_intersect_except() {
721 let mut tok = Tokenizer::new("UNION INTERSECT EXCEPT");
722 let tokens = tok.tokenize().unwrap();
723 assert_eq!(tokens[0].token_type, TokenType::Union);
724 assert_eq!(tokens[1].token_type, TokenType::Intersect);
725 assert_eq!(tokens[2].token_type, TokenType::Except);
726 }
727}