1use crate::types::Span;
8use regex::Regex;
9
10pub fn find_identifier_span(sql: &str, identifier: &str, search_start: usize) -> Option<Span> {
29 if identifier.is_empty() || search_start >= sql.len() {
30 return None;
31 }
32
33 let search_text = &sql[search_start..];
34
35 if let Some(pos) = find_word_boundary_match(search_text, identifier) {
37 return Some(Span::new(
38 search_start + pos,
39 search_start + pos + identifier.len(),
40 ));
41 }
42
43 if identifier.contains('.') {
45 if let Some(pos) = find_qualified_name(search_text, identifier) {
46 return Some(Span::new(
47 search_start + pos,
48 search_start + pos + identifier.len(),
49 ));
50 }
51 }
52
53 None
54}
55
56pub fn find_cte_definition_span(sql: &str, identifier: &str, search_start: usize) -> Option<Span> {
62 if identifier.is_empty() || search_start >= sql.len() {
63 return None;
64 }
65
66 let search_text = &sql[search_start..];
67
68 let mut pos = 0;
70 while pos < search_text.len() {
71 if let Some(with_pos) = find_keyword_case_insensitive(&search_text[pos..], "WITH") {
73 let after_with = pos + with_pos + 4;
74 let after_ws = skip_whitespace_and_comments(search_text, after_with);
76
77 let after_recursive = if let Some(rec_pos) =
79 find_keyword_case_insensitive(&search_text[after_ws..], "RECURSIVE")
80 {
81 if rec_pos == 0 {
82 skip_whitespace_and_comments(search_text, after_ws + 9)
84 } else {
85 after_ws
86 }
87 } else {
88 after_ws
89 };
90
91 if let Some((start, end)) =
93 match_identifier_at(search_text, after_recursive, identifier)
94 {
95 return Some(Span::new(search_start + start, search_start + end));
96 }
97 pos = after_recursive.max(after_with);
98 continue;
99 }
100
101 if let Some(comma_pos) = search_text[pos..].find(',') {
103 let after_comma = pos + comma_pos + 1;
104 let after_ws = skip_whitespace_and_comments(search_text, after_comma);
106 if let Some((start, end)) = match_identifier_at(search_text, after_ws, identifier) {
107 return Some(Span::new(search_start + start, search_start + end));
108 }
109 pos = after_comma;
110 continue;
111 }
112
113 break;
114 }
115
116 None
117}
118
119pub fn find_derived_table_alias_span(
125 sql: &str,
126 identifier: &str,
127 search_start: usize,
128) -> Option<Span> {
129 if identifier.is_empty() || search_start >= sql.len() {
130 return None;
131 }
132
133 let search_text = &sql[search_start..];
134
135 let mut pos = 0;
137 while pos < search_text.len() {
138 if let Some(paren_pos) = search_text[pos..].find(')') {
139 let after_paren = pos + paren_pos + 1;
140 let ws_end = skip_whitespace_and_comments(search_text, after_paren);
142
143 if ws_end >= search_text.len() {
144 pos = after_paren;
145 continue;
146 }
147
148 let after_as = if search_text[ws_end..].to_ascii_uppercase().starts_with("AS") {
150 let potential_as_end = ws_end + 2;
151 let is_standalone_as = potential_as_end >= search_text.len()
152 || search_text.as_bytes()[potential_as_end].is_ascii_whitespace()
153 || search_text[potential_as_end..].starts_with("/*")
154 || search_text[potential_as_end..].starts_with("--");
155 if is_standalone_as {
156 skip_whitespace_and_comments(search_text, potential_as_end)
157 } else {
158 ws_end
159 }
160 } else {
161 ws_end
162 };
163
164 if let Some((start, end)) = match_identifier_at(search_text, after_as, identifier) {
165 return Some(Span::new(search_start + start, search_start + end));
166 }
167 pos = after_paren;
168 continue;
169 }
170 break;
171 }
172
173 None
174}
175
176fn find_keyword_case_insensitive(text: &str, keyword: &str) -> Option<usize> {
178 let text_upper = text.to_ascii_uppercase();
179 let mut search_pos = 0;
180
181 while let Some(pos) = text_upper[search_pos..].find(keyword) {
182 let abs_pos = search_pos + pos;
183 let before_ok = abs_pos == 0 || !text.as_bytes()[abs_pos - 1].is_ascii_alphanumeric();
185 let after_pos = abs_pos + keyword.len();
187 let after_ok =
188 after_pos >= text.len() || !text.as_bytes()[after_pos].is_ascii_alphanumeric();
189
190 if before_ok && after_ok {
191 return Some(abs_pos);
192 }
193 search_pos = abs_pos + 1;
194 }
195 None
196}
197
198fn skip_whitespace_and_comments(text: &str, pos: usize) -> usize {
201 let mut current = pos;
202
203 loop {
204 if current >= text.len() {
205 return current;
206 }
207
208 let remaining = &text[current..];
209
210 let ws_chars: usize = remaining
212 .chars()
213 .take_while(|c| c.is_whitespace())
214 .map(|c| c.len_utf8())
215 .sum();
216 if ws_chars > 0 {
217 current += ws_chars;
218 continue;
219 }
220
221 if let Some(after_open) = remaining.strip_prefix("/*") {
223 if let Some(end) = after_open.find("*/") {
224 current += 2 + end + 2; continue;
226 } else {
227 return text.len();
229 }
230 }
231
232 if remaining.starts_with("--") {
234 if let Some(newline) = remaining.find('\n') {
235 current += newline + 1;
236 continue;
237 } else {
238 return text.len();
240 }
241 }
242
243 break;
245 }
246
247 current
248}
249
250fn match_identifier_at(text: &str, pos: usize, identifier: &str) -> Option<(usize, usize)> {
252 if pos >= text.len() {
253 return None;
254 }
255
256 let remaining = &text[pos..];
257 let ident_upper = identifier.to_ascii_uppercase();
258
259 for (open, close) in [("\"", "\""), ("`", "`"), ("[", "]")] {
261 if remaining.starts_with(open) {
262 let after_open = open.len();
263 if remaining[after_open..]
264 .to_ascii_uppercase()
265 .starts_with(&ident_upper)
266 {
267 let ident_end = after_open + identifier.len();
268 if remaining[ident_end..].starts_with(close) {
269 return Some((pos + after_open, pos + ident_end));
270 }
271 }
272 }
273 }
274
275 if remaining.to_ascii_uppercase().starts_with(&ident_upper) {
277 let end_pos = identifier.len();
278 let after_ok = end_pos >= remaining.len()
280 || (!remaining.as_bytes()[end_pos].is_ascii_alphanumeric()
281 && remaining.as_bytes()[end_pos] != b'_');
282 if after_ok {
283 return Some((pos, pos + identifier.len()));
284 }
285 }
286
287 None
288}
289
290fn find_word_boundary_match(text: &str, identifier: &str) -> Option<usize> {
293 let pattern = format!(r"(?i)\b{}\b", regex::escape(identifier));
296
297 if let Ok(re) = Regex::new(&pattern) {
299 if let Some(m) = re.find(text) {
300 return Some(m.start());
301 }
302 }
303
304 None
307}
308
309fn find_qualified_name(text: &str, qualified_name: &str) -> Option<usize> {
311 let parts: Vec<&str> = qualified_name.split('.').collect();
313 if parts.is_empty() {
314 return None;
315 }
316
317 let pattern_parts: Vec<String> = parts
320 .iter()
321 .map(|part| {
322 format!(r#"(?:"?{}\"?)"#, regex::escape(part))
324 })
325 .collect();
326
327 let pattern = format!(r"(?i){}", pattern_parts.join(r"\."));
328
329 if let Ok(re) = Regex::new(&pattern) {
330 if let Some(m) = re.find(text) {
331 return Some(m.start());
332 }
333 }
334
335 None
336}
337
338pub fn line_col_to_offset(sql: &str, line: usize, column: usize) -> Option<usize> {
349 if line == 0 || column == 0 {
350 return None;
351 }
352
353 let bytes = sql.as_bytes();
354 let mut current_line = 1;
355 let mut offset = 0;
356
357 while current_line < line {
359 let remaining = bytes.get(offset..)?;
360 let newline_pos = remaining.iter().position(|&b| b == b'\n')?;
361 offset += newline_pos + 1;
362 current_line += 1;
363 }
364
365 let line_start = offset;
366 let remaining = bytes.get(line_start..)?;
367 let line_len = remaining
368 .iter()
369 .position(|&b| b == b'\n')
370 .unwrap_or(remaining.len());
371 let line_end = line_start + line_len;
372 let line_slice = &sql[line_start..line_end];
373
374 let mut current_column = 1;
377 for (rel_offset, _) in line_slice.char_indices() {
378 if current_column == column {
379 return Some(line_start + rel_offset);
380 }
381 current_column += 1;
382 }
383
384 if column == current_column {
385 return Some(line_end);
386 }
387
388 None
389}
390
391#[cfg(test)]
392mod tests {
393 use super::*;
394
395 #[test]
396 fn test_find_identifier_span_simple() {
397 let sql = "SELECT * FROM users WHERE id = 1";
398 let span = find_identifier_span(sql, "users", 0);
399 assert_eq!(span, Some(Span::new(14, 19)));
400 }
401
402 #[test]
403 fn test_find_identifier_span_case_insensitive() {
404 let sql = "SELECT * FROM Users WHERE id = 1";
405 let span = find_identifier_span(sql, "users", 0);
406 assert!(span.is_some());
407 }
408
409 #[test]
410 fn test_find_identifier_span_qualified() {
411 let sql = "SELECT * FROM public.users";
412 let span = find_identifier_span(sql, "public.users", 0);
413 assert_eq!(span, Some(Span::new(14, 26)));
414 }
415
416 #[test]
417 fn test_find_identifier_span_with_offset() {
418 let sql = "SELECT 1; SELECT * FROM users";
419 let span = find_identifier_span(sql, "users", 10);
420 assert_eq!(span, Some(Span::new(24, 29)));
421 }
422
423 #[test]
424 fn test_find_identifier_span_not_found() {
425 let sql = "SELECT * FROM users";
426 let span = find_identifier_span(sql, "orders", 0);
427 assert_eq!(span, None);
428 }
429
430 #[test]
431 fn test_find_identifier_word_boundary() {
432 let sql = "SELECT users_id FROM users";
433 let span = find_identifier_span(sql, "users", 0);
435 assert!(span.is_some());
436 let span = span.unwrap();
437 assert_eq!(&sql[span.start..span.end].to_lowercase(), "users");
439 }
440
441 #[test]
442 fn test_find_cte_definition_span_single() {
443 let sql = "WITH my_cte AS (SELECT 1) SELECT * FROM my_cte";
444 let span = find_cte_definition_span(sql, "my_cte", 0);
445 assert_eq!(span, Some(Span::new(5, 11)));
446 }
447
448 #[test]
449 fn test_find_cte_definition_span_multiple() {
450 let sql = "WITH cte1 AS (SELECT 1), cte2 AS (SELECT 2) SELECT * FROM cte1, cte2";
451 let first_span = find_cte_definition_span(sql, "cte1", 0).expect("cte1 span");
452 assert_eq!(first_span, Span::new(5, 9));
453
454 let second_span = find_cte_definition_span(sql, "cte2", first_span.end).expect("cte2 span");
455 assert_eq!(second_span, Span::new(25, 29));
456 }
457
458 #[test]
459 fn test_find_derived_table_alias_span() {
460 let sql = "SELECT * FROM (SELECT 1) AS derived";
461 let span = find_derived_table_alias_span(sql, "derived", 0);
462 assert_eq!(span, Some(Span::new(28, 35)));
463 let span = span.expect("derived span");
464 assert_eq!(&sql[span.start..span.end], "derived");
465 }
466
467 #[test]
468 fn test_find_cte_definition_span_quoted() {
469 let sql = r#"WITH "MyCte" AS (SELECT 1) SELECT * FROM "MyCte""#;
471 let span = find_cte_definition_span(sql, "MyCte", 0);
472 assert!(span.is_some(), "should find quoted CTE");
473 let span = span.unwrap();
474 assert_eq!(&sql[span.start..span.end], "MyCte");
475
476 let sql = "WITH `my_cte` AS (SELECT 1) SELECT * FROM `my_cte`";
478 let span = find_cte_definition_span(sql, "my_cte", 0);
479 assert!(span.is_some(), "should find backtick-quoted CTE");
480 let span = span.unwrap();
481 assert_eq!(&sql[span.start..span.end], "my_cte");
482
483 let sql = "WITH [my_cte] AS (SELECT 1) SELECT * FROM [my_cte]";
485 let span = find_cte_definition_span(sql, "my_cte", 0);
486 assert!(span.is_some(), "should find bracket-quoted CTE");
487 let span = span.unwrap();
488 assert_eq!(&sql[span.start..span.end], "my_cte");
489 }
490
491 #[test]
492 fn test_find_derived_table_alias_span_without_as() {
493 let sql = "SELECT * FROM (SELECT 1) derived";
495 let span = find_derived_table_alias_span(sql, "derived", 0);
496 assert!(span.is_some(), "should find derived alias without AS");
497 let span = span.unwrap();
498 assert_eq!(&sql[span.start..span.end], "derived");
499 }
500
501 #[test]
502 fn test_find_derived_table_alias_span_multiple() {
503 let sql = "SELECT * FROM (SELECT 1) AS a, (SELECT 2) AS b";
504 let first_span = find_derived_table_alias_span(sql, "a", 0).expect("first derived span");
505 assert_eq!(&sql[first_span.start..first_span.end], "a");
506
507 let second_span =
508 find_derived_table_alias_span(sql, "b", first_span.end).expect("second derived span");
509 assert_eq!(&sql[second_span.start..second_span.end], "b");
510 }
511
512 #[test]
513 fn test_find_derived_table_alias_span_quoted() {
514 let sql = r#"SELECT * FROM (SELECT 1) AS "Derived""#;
515 let span = find_derived_table_alias_span(sql, "Derived", 0);
516 assert!(span.is_some(), "should find quoted derived alias");
517 let span = span.unwrap();
518 assert_eq!(&sql[span.start..span.end], "Derived");
519 }
520
521 #[test]
522 fn test_line_col_to_offset_single_line() {
523 let sql = "SELECT * FROM users";
524 assert_eq!(line_col_to_offset(sql, 1, 1), Some(0));
525 assert_eq!(line_col_to_offset(sql, 1, 8), Some(7));
526 }
527
528 #[test]
529 fn test_line_col_to_offset_multi_line() {
530 let sql = "SELECT *\nFROM users\nWHERE id = 1";
531 assert_eq!(line_col_to_offset(sql, 1, 1), Some(0));
532 assert_eq!(line_col_to_offset(sql, 2, 1), Some(9));
533 assert_eq!(line_col_to_offset(sql, 3, 1), Some(20));
534 }
535
536 #[test]
537 fn test_line_col_to_offset_unicode_columns() {
538 let sql = "SELECT μ, FROM users";
539 assert_eq!(line_col_to_offset(sql, 1, 11), Some("SELECT μ, ".len()));
541 assert_eq!(line_col_to_offset(sql, 1, 12), Some("SELECT μ, F".len()));
543 }
544
545 #[test]
546 fn test_line_col_to_offset_invalid() {
547 let sql = "SELECT * FROM users";
548 assert_eq!(line_col_to_offset(sql, 0, 1), None);
549 assert_eq!(line_col_to_offset(sql, 1, 0), None);
550 assert_eq!(line_col_to_offset(sql, 5, 1), None);
551 }
552
553 #[test]
554 fn test_find_identifier_empty() {
555 let sql = "SELECT * FROM users";
556 assert_eq!(find_identifier_span(sql, "", 0), None);
557 assert_eq!(find_identifier_span("", "users", 0), None);
558 }
559
560 #[test]
566 fn test_find_cte_definition_span_recursive() {
567 let sql = "WITH RECURSIVE my_cte AS (SELECT 1 UNION ALL SELECT 2) SELECT * FROM my_cte";
568 let span = find_cte_definition_span(sql, "my_cte", 0);
569 assert!(
570 span.is_some(),
571 "should find CTE name after RECURSIVE keyword"
572 );
573 let span = span.unwrap();
574 assert_eq!(&sql[span.start..span.end], "my_cte");
575 }
576
577 #[test]
578 fn test_find_cte_definition_span_recursive_multiple() {
579 let sql = "WITH RECURSIVE cte1 AS (SELECT 1), cte2 AS (SELECT 2) SELECT * FROM cte1, cte2";
580 let first_span = find_cte_definition_span(sql, "cte1", 0);
581 assert!(
582 first_span.is_some(),
583 "should find first CTE after RECURSIVE"
584 );
585 let first_span = first_span.unwrap();
586 assert_eq!(&sql[first_span.start..first_span.end], "cte1");
587
588 let second_span = find_cte_definition_span(sql, "cte2", first_span.end);
589 assert!(second_span.is_some(), "should find second CTE after comma");
590 let second_span = second_span.unwrap();
591 assert_eq!(&sql[second_span.start..second_span.end], "cte2");
592 }
593
594 #[test]
596 fn test_find_cte_definition_span_search_start_at_end() {
597 let sql = "WITH cte AS (SELECT 1) SELECT * FROM cte";
598 let span = find_cte_definition_span(sql, "cte", sql.len());
600 assert_eq!(span, None);
601 }
602
603 #[test]
604 fn test_find_derived_table_alias_search_start_at_end() {
605 let sql = "SELECT * FROM (SELECT 1) AS derived";
606 let span = find_derived_table_alias_span(sql, "derived", sql.len());
608 assert_eq!(span, None);
609 }
610
611 #[test]
612 fn test_find_derived_table_alias_paren_at_end() {
613 let sql = "SELECT * FROM (SELECT 1)";
615 let span = find_derived_table_alias_span(sql, "anything", 0);
616 assert_eq!(span, None);
617 }
618
619 #[test]
621 fn test_word_boundary_underscore_prefix() {
622 let sql = "SELECT * FROM _users";
623 let span = find_identifier_span(sql, "_users", 0);
625 assert!(
626 span.is_some(),
627 "should find identifier starting with underscore"
628 );
629 }
630
631 #[test]
632 fn test_word_boundary_underscore_suffix_no_match() {
633 let sql = "SELECT * FROM users_table";
634 let span = find_identifier_span(sql, "users", 0);
636 assert!(
639 span.is_none() || {
640 let s = span.unwrap();
641 s.end == s.start + "users".len()
643 && (s.end >= sql.len()
644 || !sql.as_bytes()[s.end].is_ascii_alphanumeric()
645 && sql.as_bytes()[s.end] != b'_')
646 },
647 "should not match 'users' as part of 'users_table'"
648 );
649 }
650
651 #[test]
652 fn test_cte_name_with_underscore_suffix_no_match() {
653 let sql = "WITH cte_name AS (SELECT 1) SELECT * FROM cte_name";
655 let span = find_cte_definition_span(sql, "cte", 0);
656 assert!(
657 span.is_none(),
658 "should not match 'cte' as part of 'cte_name'"
659 );
660 }
661
662 #[test]
664 fn test_find_cte_definition_span_with_block_comment() {
665 let sql = "WITH /* comment */ my_cte AS (SELECT 1) SELECT * FROM my_cte";
666 let span = find_cte_definition_span(sql, "my_cte", 0);
667 assert!(span.is_some(), "should find CTE name after block comment");
668 let span = span.unwrap();
669 assert_eq!(&sql[span.start..span.end], "my_cte");
670 }
671
672 #[test]
673 fn test_find_cte_definition_span_with_line_comment() {
674 let sql = "WITH -- comment\nmy_cte AS (SELECT 1) SELECT * FROM my_cte";
675 let span = find_cte_definition_span(sql, "my_cte", 0);
676 assert!(span.is_some(), "should find CTE name after line comment");
677 let span = span.unwrap();
678 assert_eq!(&sql[span.start..span.end], "my_cte");
679 }
680
681 #[test]
682 fn test_find_derived_table_alias_with_comment() {
683 let sql = "SELECT * FROM (SELECT 1) /* comment */ AS derived";
684 let span = find_derived_table_alias_span(sql, "derived", 0);
685 assert!(span.is_some(), "should find alias after block comment");
686 let span = span.unwrap();
687 assert_eq!(&sql[span.start..span.end], "derived");
688 }
689
690 #[test]
692 fn test_find_cte_definition_not_in_string_literal() {
693 let sql = "WITH cte AS (SELECT 'cte' AS name) SELECT * FROM cte";
695 let span = find_cte_definition_span(sql, "cte", 0);
696 assert!(span.is_some(), "should find CTE definition");
697 let span = span.unwrap();
698 assert_eq!(
700 span.start, 5,
701 "should find CTE definition, not string literal"
702 );
703 assert_eq!(&sql[span.start..span.end], "cte");
704 }
705
706 #[test]
707 fn test_find_derived_alias_not_in_string_literal() {
708 let sql = "SELECT * FROM (SELECT 'derived' AS name) AS derived";
710 let span = find_derived_table_alias_span(sql, "derived", 0);
711 assert!(span.is_some(), "should find derived alias");
712 let span = span.unwrap();
713 assert_eq!(&sql[span.start..span.end], "derived");
715 assert!(
717 span.start > sql.find(')').unwrap(),
718 "span should be after closing paren"
719 );
720 }
721
722 #[test]
724 fn test_find_cte_definition_empty_identifier() {
725 let sql = "WITH cte AS (SELECT 1) SELECT * FROM cte";
726 let span = find_cte_definition_span(sql, "", 0);
727 assert_eq!(span, None, "empty identifier should return None");
728 }
729
730 #[test]
731 fn test_find_derived_table_alias_empty_identifier() {
732 let sql = "SELECT * FROM (SELECT 1) AS derived";
733 let span = find_derived_table_alias_span(sql, "", 0);
734 assert_eq!(span, None, "empty identifier should return None");
735 }
736
737 #[test]
738 fn test_find_cte_definition_empty_sql() {
739 let span = find_cte_definition_span("", "cte", 0);
740 assert_eq!(span, None, "empty SQL should return None");
741 }
742
743 #[test]
744 fn test_find_derived_table_alias_empty_sql() {
745 let span = find_derived_table_alias_span("", "derived", 0);
746 assert_eq!(span, None, "empty SQL should return None");
747 }
748
749 #[test]
750 fn test_find_cte_definition_search_start_beyond_bounds() {
751 let sql = "WITH cte AS (SELECT 1)";
752 let span = find_cte_definition_span(sql, "cte", sql.len() + 100);
753 assert_eq!(span, None, "search_start beyond bounds should return None");
754 }
755
756 #[test]
757 fn test_find_derived_table_alias_search_start_beyond_bounds() {
758 let sql = "SELECT * FROM (SELECT 1) AS derived";
759 let span = find_derived_table_alias_span(sql, "derived", sql.len() + 100);
760 assert_eq!(span, None, "search_start beyond bounds should return None");
761 }
762
763 #[test]
765 fn test_find_cte_at_end_of_sql() {
766 let sql = "WITH x AS (SELECT 1) SELECT * FROM x";
767 let span = find_cte_definition_span(sql, "x", 0);
768 assert!(span.is_some());
769 let span = span.unwrap();
770 assert_eq!(&sql[span.start..span.end], "x");
771 }
772
773 #[test]
775 fn test_match_identifier_at_short_remaining() {
776 let sql = "WITH a AS (SELECT 1) SELECT * FROM a";
777 let span = find_cte_definition_span(sql, "a", 0);
778 assert!(span.is_some());
779 let span = span.unwrap();
780 assert_eq!(&sql[span.start..span.end], "a");
781 }
782}