1use super::{split_qualified_identifiers, unquote_identifier};
8use crate::types::Span;
9
10pub fn find_identifier_span(sql: &str, identifier: &str, search_start: usize) -> Option<Span> {
29 if identifier.is_empty() || search_start >= sql.len() {
30 return None;
31 }
32 if !sql.is_char_boundary(search_start) {
33 #[cfg(feature = "tracing")]
34 tracing::warn!(
35 search_start,
36 sql_len = sql.len(),
37 "find_identifier_span: search_start is not on a UTF-8 char boundary"
38 );
39 return None;
40 }
41
42 let search_text = &sql[search_start..];
43 find_qualified_name(search_text, identifier)
44 .map(|(start, end)| Span::new(search_start + start, search_start + end))
45}
46
47pub fn find_all_identifier_spans(
61 sql: &str,
62 identifier: &str,
63 search_start: usize,
64 search_end: usize,
65) -> Vec<Span> {
66 let mut spans = Vec::new();
67 if identifier.is_empty() || search_start >= search_end || search_end > sql.len() {
68 return spans;
69 }
70 if !sql.is_char_boundary(search_start) || !sql.is_char_boundary(search_end) {
71 #[cfg(feature = "tracing")]
72 tracing::warn!(
73 search_start,
74 search_end,
75 sql_len = sql.len(),
76 "find_all_identifier_spans: search range is not on UTF-8 char boundaries"
77 );
78 return spans;
79 }
80
81 let scope = &sql[search_start..search_end];
82 let mut cursor = 0usize;
83 while let Some(occurrence) = find_identifier_occurrence(scope, identifier, cursor) {
84 spans.push(Span::new(
85 search_start + occurrence.full_start,
86 search_start + occurrence.full_end,
87 ));
88 cursor = occurrence.full_end;
89 }
90 spans
91}
92
93pub fn find_relation_occurrence_spans(
102 sql: &str,
103 identifier: &str,
104 search_start: usize,
105) -> Option<(Span, Span)> {
106 if identifier.is_empty() || search_start >= sql.len() {
107 return None;
108 }
109 if !sql.is_char_boundary(search_start) {
110 #[cfg(feature = "tracing")]
111 tracing::warn!(
112 search_start,
113 sql_len = sql.len(),
114 "find_relation_occurrence_spans: search_start is not on a UTF-8 char boundary"
115 );
116 return None;
117 }
118
119 let search_text = &sql[search_start..];
120 let occurrence = find_identifier_occurrence(search_text, identifier, 0)?;
121 Some((
122 Span::new(
123 search_start + occurrence.full_start,
124 search_start + occurrence.full_end,
125 ),
126 Span::new(
127 search_start + occurrence.tail_start,
128 search_start + occurrence.tail_end,
129 ),
130 ))
131}
132
133pub fn find_cte_body_span(sql: &str, name_span: Span) -> Option<Span> {
141 if name_span.end > sql.len() || !sql.is_char_boundary(name_span.end) {
142 #[cfg(feature = "tracing")]
143 tracing::warn!(
144 end = name_span.end,
145 sql_len = sql.len(),
146 "find_cte_body_span: name_span.end is not on a UTF-8 char boundary"
147 );
148 return None;
149 }
150
151 let bytes = sql.as_bytes();
152
153 let mut pos = skip_whitespace_and_comments(sql, name_span.end);
155
156 if pos < bytes.len() && bytes[pos] == b'(' {
158 let list_end = find_matching_paren(bytes, pos)?;
159 pos = skip_whitespace_and_comments(sql, list_end + 1);
160 }
161
162 pos = consume_ascii_keyword(sql, pos, "AS")?;
164
165 if let Some(after_not) = consume_ascii_keyword(sql, pos, "NOT") {
168 if let Some(after_materialized) = consume_ascii_keyword(sql, after_not, "MATERIALIZED") {
169 pos = after_materialized;
170 }
171 } else if let Some(after_materialized) = consume_ascii_keyword(sql, pos, "MATERIALIZED") {
172 pos = after_materialized;
173 }
174
175 if pos >= bytes.len() || bytes[pos] != b'(' {
176 return None;
177 }
178
179 let body_end = find_matching_paren(bytes, pos)?;
180 Some(Span::new(pos, body_end + 1))
181}
182
183fn find_matching_paren(bytes: &[u8], open: usize) -> Option<usize> {
189 if open >= bytes.len() || bytes[open] != b'(' {
190 return None;
191 }
192 let mut depth = 0i32;
193 let mut i = open;
194 while i < bytes.len() {
195 if let Some(skip_to) = skip_string_or_comment(bytes, i) {
196 debug_assert!(
197 skip_to > i,
198 "skip_string_or_comment must advance past the current index"
199 );
200 if skip_to <= i {
201 return None;
202 }
203 i = skip_to;
204 continue;
205 }
206 match bytes[i] {
207 b'(' => depth += 1,
208 b')' => {
209 depth -= 1;
210 if depth == 0 {
211 return Some(i);
212 }
213 }
214 _ => {}
215 }
216 i += 1;
217 }
218 None
219}
220
221fn skip_string_or_comment(bytes: &[u8], pos: usize) -> Option<usize> {
229 if pos >= bytes.len() {
230 return None;
231 }
232 if pos + 1 < bytes.len() && bytes[pos] == b'/' && bytes[pos + 1] == b'*' {
234 let mut i = pos + 2;
235 while i + 1 < bytes.len() {
236 if bytes[i] == b'*' && bytes[i + 1] == b'/' {
237 return Some(i + 2);
238 }
239 i += 1;
240 }
241 return Some(bytes.len());
242 }
243 if pos + 1 < bytes.len() && bytes[pos] == b'-' && bytes[pos + 1] == b'-' {
245 let mut i = pos + 2;
246 while i < bytes.len() {
247 if bytes[i] == b'\n' {
248 return Some(i + 1);
249 }
250 i += 1;
251 }
252 return Some(bytes.len());
253 }
254 if bytes[pos] == b'#' {
256 let mut i = pos + 1;
257 while i < bytes.len() {
258 if bytes[i] == b'\n' {
259 return Some(i + 1);
260 }
261 i += 1;
262 }
263 return Some(bytes.len());
264 }
265 if bytes[pos] == b'\'' {
267 let mut i = pos + 1;
268 while i < bytes.len() {
269 if bytes[i] == b'\'' {
270 if i + 1 < bytes.len() && bytes[i + 1] == b'\'' {
271 i += 2;
272 continue;
273 }
274 return Some(i + 1);
275 }
276 i += 1;
277 }
278 return Some(bytes.len());
279 }
280 if bytes[pos] == b'$' {
282 return skip_dollar_quoted_string(bytes, pos);
283 }
284 None
285}
286
287fn skip_dollar_quoted_string(bytes: &[u8], pos: usize) -> Option<usize> {
288 if pos >= bytes.len() || bytes[pos] != b'$' {
289 return None;
290 }
291
292 let mut tag_end = pos + 1;
293 while tag_end < bytes.len() {
294 match bytes[tag_end] {
295 b'$' => {
296 let delimiter = &bytes[pos..=tag_end];
297 let search_start = tag_end + 1;
298 let mut i = search_start;
299 while i + delimiter.len() <= bytes.len() {
300 if &bytes[i..i + delimiter.len()] == delimiter {
301 return Some(i + delimiter.len());
302 }
303 i += 1;
304 }
305 return Some(bytes.len());
306 }
307 b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9' | b'_' => tag_end += 1,
308 _ => return None,
309 }
310 }
311 None
312}
313
314pub fn find_cte_definition_span(sql: &str, identifier: &str, search_start: usize) -> Option<Span> {
320 if identifier.is_empty() || search_start >= sql.len() {
321 return None;
322 }
323
324 let search_text = &sql[search_start..];
325
326 let mut pos = 0;
328 while pos < search_text.len() {
329 if let Some(with_pos) = find_keyword_case_insensitive(&search_text[pos..], "WITH") {
331 let after_with = pos + with_pos + 4;
332 let after_ws = skip_whitespace_and_comments(search_text, after_with);
334
335 let after_recursive = if let Some(rec_pos) =
337 find_keyword_case_insensitive(&search_text[after_ws..], "RECURSIVE")
338 {
339 if rec_pos == 0 {
340 skip_whitespace_and_comments(search_text, after_ws + 9)
342 } else {
343 after_ws
344 }
345 } else {
346 after_ws
347 };
348
349 if let Some((start, end)) =
351 match_identifier_at(search_text, after_recursive, identifier)
352 {
353 return Some(Span::new(search_start + start, search_start + end));
354 }
355 pos = after_recursive.max(after_with);
356 continue;
357 }
358
359 if let Some(comma_pos) = search_text[pos..].find(',') {
361 let after_comma = pos + comma_pos + 1;
362 let after_ws = skip_whitespace_and_comments(search_text, after_comma);
364 if let Some((start, end)) = match_identifier_at(search_text, after_ws, identifier) {
365 return Some(Span::new(search_start + start, search_start + end));
366 }
367 pos = after_comma;
368 continue;
369 }
370
371 break;
372 }
373
374 None
375}
376
377pub fn find_derived_table_alias_span(
383 sql: &str,
384 identifier: &str,
385 search_start: usize,
386) -> Option<Span> {
387 if identifier.is_empty() || search_start >= sql.len() {
388 return None;
389 }
390
391 let search_text = &sql[search_start..];
392
393 let mut pos = 0;
395 while pos < search_text.len() {
396 if let Some(paren_pos) = search_text[pos..].find(')') {
397 let after_paren = pos + paren_pos + 1;
398 let ws_end = skip_whitespace_and_comments(search_text, after_paren);
400
401 if ws_end >= search_text.len() {
402 pos = after_paren;
403 continue;
404 }
405
406 let after_as = if search_text[ws_end..].to_ascii_uppercase().starts_with("AS") {
408 let potential_as_end = ws_end + 2;
409 let is_standalone_as = potential_as_end >= search_text.len()
410 || search_text.as_bytes()[potential_as_end].is_ascii_whitespace()
411 || search_text[potential_as_end..].starts_with("/*")
412 || search_text[potential_as_end..].starts_with("--");
413 if is_standalone_as {
414 skip_whitespace_and_comments(search_text, potential_as_end)
415 } else {
416 ws_end
417 }
418 } else {
419 ws_end
420 };
421
422 if let Some((start, end)) = match_identifier_at(search_text, after_as, identifier) {
423 return Some(Span::new(search_start + start, search_start + end));
424 }
425 pos = after_paren;
426 continue;
427 }
428 break;
429 }
430
431 None
432}
433
434fn find_keyword_case_insensitive(text: &str, keyword: &str) -> Option<usize> {
436 let text_upper = text.to_ascii_uppercase();
437 let mut search_pos = 0;
438
439 while let Some(pos) = text_upper[search_pos..].find(keyword) {
440 let abs_pos = search_pos + pos;
441 let before_ok = abs_pos == 0 || !is_identifier_char_before(text, abs_pos);
443 let after_pos = abs_pos + keyword.len();
445 let after_ok = after_pos >= text.len() || !is_identifier_char_at(text, after_pos);
446
447 if before_ok && after_ok {
448 return Some(abs_pos);
449 }
450 search_pos = abs_pos + 1;
451 }
452 None
453}
454
455fn skip_whitespace_and_comments(text: &str, pos: usize) -> usize {
458 let mut current = pos;
459
460 loop {
461 if current >= text.len() {
462 return current;
463 }
464
465 let remaining = &text[current..];
466
467 let ws_chars: usize = remaining
469 .chars()
470 .take_while(|c| c.is_whitespace())
471 .map(|c| c.len_utf8())
472 .sum();
473 if ws_chars > 0 {
474 current += ws_chars;
475 continue;
476 }
477
478 if let Some(after_open) = remaining.strip_prefix("/*") {
480 if let Some(end) = after_open.find("*/") {
481 current += 2 + end + 2; continue;
483 } else {
484 return text.len();
486 }
487 }
488
489 if remaining.starts_with("--") {
491 if let Some(newline) = remaining.find('\n') {
492 current += newline + 1;
493 continue;
494 } else {
495 return text.len();
497 }
498 }
499
500 break;
502 }
503
504 current
505}
506
507fn match_identifier_at(text: &str, pos: usize, identifier: &str) -> Option<(usize, usize)> {
509 if pos >= text.len() {
510 return None;
511 }
512
513 let remaining = &text[pos..];
514 let ident_upper = identifier.to_ascii_uppercase();
515
516 for (open, close) in [("\"", "\""), ("`", "`"), ("[", "]")] {
518 if remaining.starts_with(open) {
519 let after_open = open.len();
520 if remaining[after_open..]
521 .to_ascii_uppercase()
522 .starts_with(&ident_upper)
523 {
524 let ident_end = after_open + identifier.len();
525 if remaining[ident_end..].starts_with(close) {
526 return Some((pos + after_open, pos + ident_end));
527 }
528 }
529 }
530 }
531
532 if remaining.to_ascii_uppercase().starts_with(&ident_upper) {
534 let end_pos = identifier.len();
535 let after_ok = end_pos >= remaining.len() || !is_identifier_char_at(remaining, end_pos);
537 if after_ok {
538 return Some((pos, pos + identifier.len()));
539 }
540 }
541
542 None
543}
544
545fn find_qualified_name(text: &str, qualified_name: &str) -> Option<(usize, usize)> {
547 find_identifier_occurrence(text, qualified_name, 0)
548 .map(|occurrence| (occurrence.full_start, occurrence.full_end))
549}
550
551#[derive(Debug, Clone, Copy)]
552struct IdentifierOccurrence {
553 full_start: usize,
554 full_end: usize,
555 tail_start: usize,
556 tail_end: usize,
557}
558
559#[derive(Debug, Clone, Copy)]
560struct ParsedIdentifierPart {
561 content_start: usize,
562 content_end: usize,
563 full_end: usize,
564}
565
566fn find_identifier_occurrence(
567 text: &str,
568 identifier: &str,
569 search_start: usize,
570) -> Option<IdentifierOccurrence> {
571 if identifier.is_empty() || search_start >= text.len() || !text.is_char_boundary(search_start) {
572 return None;
573 }
574
575 let target_parts = parse_identifier_target_parts(identifier)?;
576 let bytes = text.as_bytes();
577 let mut cursor = search_start;
578 while cursor < bytes.len() {
579 if let Some(skip_to) = skip_string_or_comment(bytes, cursor) {
580 debug_assert!(skip_to > cursor);
581 if skip_to <= cursor {
582 return None;
583 }
584 cursor = skip_to;
585 continue;
586 }
587
588 if let Some(occurrence) = match_identifier_occurrence_at(text, cursor, &target_parts) {
589 return Some(occurrence);
590 }
591
592 cursor = advance_scan_cursor(text, cursor)?;
593 }
594
595 None
596}
597
598fn parse_identifier_target_parts(identifier: &str) -> Option<Vec<String>> {
599 let target_parts: Vec<String> = split_qualified_identifiers(identifier)
600 .into_iter()
601 .map(|part| unquote_identifier(&part))
602 .collect();
603 (!target_parts.is_empty() && target_parts.iter().all(|part| !part.is_empty()))
604 .then_some(target_parts)
605}
606
607fn match_identifier_occurrence_at(
608 text: &str,
609 start: usize,
610 target_parts: &[String],
611) -> Option<IdentifierOccurrence> {
612 if !text.is_char_boundary(start) {
613 return None;
614 }
615
616 let before_ok = start == 0 || !is_identifier_char_before(text, start);
617 if !before_ok {
618 return None;
619 }
620
621 let (full_end, tail_start, tail_end) = match_identifier_sequence_at(text, start, target_parts)?;
622 let after_ok = full_end == text.len() || !is_identifier_char_at(text, full_end);
623 after_ok.then_some(IdentifierOccurrence {
624 full_start: start,
625 full_end,
626 tail_start,
627 tail_end,
628 })
629}
630
631fn advance_scan_cursor(text: &str, cursor: usize) -> Option<usize> {
632 if !text.is_char_boundary(cursor) {
633 return Some(cursor + 1);
634 }
635 let ch = text.get(cursor..)?.chars().next()?;
636 Some(cursor + ch.len_utf8())
637}
638
639fn match_identifier_sequence_at(
640 text: &str,
641 start: usize,
642 parts: &[String],
643) -> Option<(usize, usize, usize)> {
644 if parts.is_empty() {
645 return None;
646 }
647
648 let bytes = text.as_bytes();
649 let mut current = start;
650 let mut tail = None;
651
652 for (idx, part) in parts.iter().enumerate() {
653 let parsed = match_identifier_part_at(text, current, part)?;
654
655 if idx == parts.len() - 1 {
656 tail = Some((parsed.content_start, parsed.content_end));
657 }
658
659 current = parsed.full_end;
660 if idx < parts.len() - 1 {
661 current = skip_whitespace_and_comments(text, current);
662 if current >= bytes.len() || bytes[current] != b'.' {
663 return None;
664 }
665 current = skip_whitespace_and_comments(text, current + 1);
666 }
667 }
668
669 let (tail_start, tail_end) = tail?;
670 Some((current, tail_start, tail_end))
671}
672
673fn match_identifier_part_at(
674 text: &str,
675 start: usize,
676 target_part: &str,
677) -> Option<ParsedIdentifierPart> {
678 if start >= text.len() || !text.is_char_boundary(start) || target_part.is_empty() {
679 return None;
680 }
681
682 let bytes = text.as_bytes();
683 match bytes[start] {
684 b'"' | b'`' | b'[' | b'\'' => {
685 let (close_quote, content_start) = match bytes[start] {
686 b'"' => (b'"', start + 1),
687 b'`' => (b'`', start + 1),
688 b'[' => (b']', start + 1),
689 b'\'' => (b'\'', start + 1),
690 _ => unreachable!(),
691 };
692
693 let mut i = content_start;
694 while i < bytes.len() {
695 if bytes[i] == close_quote {
696 if i + 1 < bytes.len() && bytes[i + 1] == close_quote {
697 i += 2;
698 continue;
699 }
700 let candidate = text.get(content_start..i)?;
701 if !candidate.eq_ignore_ascii_case(target_part) {
702 return None;
703 }
704 return Some(ParsedIdentifierPart {
705 content_start,
706 content_end: i,
707 full_end: i + 1,
708 });
709 }
710 i += 1;
711 }
712 None
713 }
714 _ if target_part.chars().all(is_identifier_char) => {
715 let mut end = start;
716 for ch in text.get(start..)?.chars() {
717 if !is_identifier_char(ch) {
718 break;
719 }
720 end += ch.len_utf8();
721 }
722 let candidate = text.get(start..end)?;
723 candidate
724 .eq_ignore_ascii_case(target_part)
725 .then_some(ParsedIdentifierPart {
726 content_start: start,
727 content_end: end,
728 full_end: end,
729 })
730 }
731 _ => {
732 let end = start + target_part.len();
733 let candidate = text.get(start..end)?;
734 candidate
735 .eq_ignore_ascii_case(target_part)
736 .then_some(ParsedIdentifierPart {
737 content_start: start,
738 content_end: end,
739 full_end: end,
740 })
741 }
742 }
743}
744
745fn consume_ascii_keyword(text: &str, pos: usize, keyword: &str) -> Option<usize> {
746 let pos = skip_whitespace_and_comments(text, pos);
747 let remaining = text.get(pos..)?;
748 if find_keyword_case_insensitive(remaining, keyword) != Some(0) {
749 return None;
750 }
751 Some(skip_whitespace_and_comments(text, pos + keyword.len()))
752}
753
754fn is_identifier_char(ch: char) -> bool {
755 ch.is_alphanumeric() || ch == '_'
756}
757
758fn is_identifier_char_before(text: &str, byte_offset: usize) -> bool {
759 text.get(..byte_offset)
760 .and_then(|prefix| prefix.chars().next_back())
761 .is_some_and(is_identifier_char)
762}
763
764fn is_identifier_char_at(text: &str, byte_offset: usize) -> bool {
765 text.get(byte_offset..)
766 .and_then(|suffix| suffix.chars().next())
767 .is_some_and(is_identifier_char)
768}
769
770pub fn line_col_to_offset(sql: &str, line: usize, column: usize) -> Option<usize> {
781 if line == 0 || column == 0 {
782 return None;
783 }
784
785 let bytes = sql.as_bytes();
786 let mut current_line = 1;
787 let mut offset = 0;
788
789 while current_line < line {
791 let remaining = bytes.get(offset..)?;
792 let newline_pos = remaining.iter().position(|&b| b == b'\n')?;
793 offset += newline_pos + 1;
794 current_line += 1;
795 }
796
797 let line_start = offset;
798 let remaining = bytes.get(line_start..)?;
799 let line_len = remaining
800 .iter()
801 .position(|&b| b == b'\n')
802 .unwrap_or(remaining.len());
803 let line_end = line_start + line_len;
804 let line_slice = &sql[line_start..line_end];
805
806 let mut current_column = 1;
809 for (rel_offset, _) in line_slice.char_indices() {
810 if current_column == column {
811 return Some(line_start + rel_offset);
812 }
813 current_column += 1;
814 }
815
816 if column == current_column {
817 return Some(line_end);
818 }
819
820 None
821}
822
823#[cfg(test)]
824mod tests {
825 use super::*;
826
827 #[test]
828 fn test_find_identifier_span_simple() {
829 let sql = "SELECT * FROM users WHERE id = 1";
830 let span = find_identifier_span(sql, "users", 0);
831 assert_eq!(span, Some(Span::new(14, 19)));
832 }
833
834 #[test]
835 fn test_find_identifier_span_case_insensitive() {
836 let sql = "SELECT * FROM Users WHERE id = 1";
837 let span = find_identifier_span(sql, "users", 0);
838 assert!(span.is_some());
839 }
840
841 #[test]
842 fn test_find_identifier_span_qualified() {
843 let sql = "SELECT * FROM public.users";
844 let span = find_identifier_span(sql, "public.users", 0);
845 assert_eq!(span, Some(Span::new(14, 26)));
846 }
847
848 #[test]
849 fn test_find_identifier_span_qualified_with_quotes() {
850 let sql = r#"SELECT * FROM "Public"."Users""#;
851 let span = find_identifier_span(sql, "public.users", 0);
852 assert_eq!(span, Some(Span::new(14, 30)));
853 let span = span.expect("quoted qualified span");
854 assert_eq!(&sql[span.start..span.end], r#""Public"."Users""#);
855 }
856
857 #[test]
858 fn test_find_identifier_span_with_offset() {
859 let sql = "SELECT 1; SELECT * FROM users";
860 let span = find_identifier_span(sql, "users", 10);
861 assert_eq!(span, Some(Span::new(24, 29)));
862 }
863
864 #[test]
865 fn test_find_identifier_span_not_found() {
866 let sql = "SELECT * FROM users";
867 let span = find_identifier_span(sql, "orders", 0);
868 assert_eq!(span, None);
869 }
870
871 #[test]
872 fn test_find_identifier_word_boundary() {
873 let sql = "SELECT users_id FROM users";
874 let span = find_identifier_span(sql, "users", 0);
876 assert!(span.is_some());
877 let span = span.unwrap();
878 assert_eq!(&sql[span.start..span.end].to_lowercase(), "users");
880 }
881
882 #[test]
883 fn test_find_cte_definition_span_single() {
884 let sql = "WITH my_cte AS (SELECT 1) SELECT * FROM my_cte";
885 let span = find_cte_definition_span(sql, "my_cte", 0);
886 assert_eq!(span, Some(Span::new(5, 11)));
887 }
888
889 #[test]
890 fn test_find_cte_definition_span_multiple() {
891 let sql = "WITH cte1 AS (SELECT 1), cte2 AS (SELECT 2) SELECT * FROM cte1, cte2";
892 let first_span = find_cte_definition_span(sql, "cte1", 0).expect("cte1 span");
893 assert_eq!(first_span, Span::new(5, 9));
894
895 let second_span = find_cte_definition_span(sql, "cte2", first_span.end).expect("cte2 span");
896 assert_eq!(second_span, Span::new(25, 29));
897 }
898
899 #[test]
900 fn test_find_derived_table_alias_span() {
901 let sql = "SELECT * FROM (SELECT 1) AS derived";
902 let span = find_derived_table_alias_span(sql, "derived", 0);
903 assert_eq!(span, Some(Span::new(28, 35)));
904 let span = span.expect("derived span");
905 assert_eq!(&sql[span.start..span.end], "derived");
906 }
907
908 #[test]
909 fn test_find_cte_definition_span_quoted() {
910 let sql = r#"WITH "MyCte" AS (SELECT 1) SELECT * FROM "MyCte""#;
912 let span = find_cte_definition_span(sql, "MyCte", 0);
913 assert!(span.is_some(), "should find quoted CTE");
914 let span = span.unwrap();
915 assert_eq!(&sql[span.start..span.end], "MyCte");
916
917 let sql = "WITH `my_cte` AS (SELECT 1) SELECT * FROM `my_cte`";
919 let span = find_cte_definition_span(sql, "my_cte", 0);
920 assert!(span.is_some(), "should find backtick-quoted CTE");
921 let span = span.unwrap();
922 assert_eq!(&sql[span.start..span.end], "my_cte");
923
924 let sql = "WITH [my_cte] AS (SELECT 1) SELECT * FROM [my_cte]";
926 let span = find_cte_definition_span(sql, "my_cte", 0);
927 assert!(span.is_some(), "should find bracket-quoted CTE");
928 let span = span.unwrap();
929 assert_eq!(&sql[span.start..span.end], "my_cte");
930 }
931
932 #[test]
933 fn test_find_derived_table_alias_span_without_as() {
934 let sql = "SELECT * FROM (SELECT 1) derived";
936 let span = find_derived_table_alias_span(sql, "derived", 0);
937 assert!(span.is_some(), "should find derived alias without AS");
938 let span = span.unwrap();
939 assert_eq!(&sql[span.start..span.end], "derived");
940 }
941
942 #[test]
943 fn test_find_derived_table_alias_span_multiple() {
944 let sql = "SELECT * FROM (SELECT 1) AS a, (SELECT 2) AS b";
945 let first_span = find_derived_table_alias_span(sql, "a", 0).expect("first derived span");
946 assert_eq!(&sql[first_span.start..first_span.end], "a");
947
948 let second_span =
949 find_derived_table_alias_span(sql, "b", first_span.end).expect("second derived span");
950 assert_eq!(&sql[second_span.start..second_span.end], "b");
951 }
952
953 #[test]
954 fn test_find_derived_table_alias_span_quoted() {
955 let sql = r#"SELECT * FROM (SELECT 1) AS "Derived""#;
956 let span = find_derived_table_alias_span(sql, "Derived", 0);
957 assert!(span.is_some(), "should find quoted derived alias");
958 let span = span.unwrap();
959 assert_eq!(&sql[span.start..span.end], "Derived");
960 }
961
962 #[test]
963 fn test_line_col_to_offset_single_line() {
964 let sql = "SELECT * FROM users";
965 assert_eq!(line_col_to_offset(sql, 1, 1), Some(0));
966 assert_eq!(line_col_to_offset(sql, 1, 8), Some(7));
967 }
968
969 #[test]
970 fn test_line_col_to_offset_multi_line() {
971 let sql = "SELECT *\nFROM users\nWHERE id = 1";
972 assert_eq!(line_col_to_offset(sql, 1, 1), Some(0));
973 assert_eq!(line_col_to_offset(sql, 2, 1), Some(9));
974 assert_eq!(line_col_to_offset(sql, 3, 1), Some(20));
975 }
976
977 #[test]
978 fn test_line_col_to_offset_unicode_columns() {
979 let sql = "SELECT μ, FROM users";
980 assert_eq!(line_col_to_offset(sql, 1, 11), Some("SELECT μ, ".len()));
982 assert_eq!(line_col_to_offset(sql, 1, 12), Some("SELECT μ, F".len()));
984 }
985
986 #[test]
987 fn test_line_col_to_offset_invalid() {
988 let sql = "SELECT * FROM users";
989 assert_eq!(line_col_to_offset(sql, 0, 1), None);
990 assert_eq!(line_col_to_offset(sql, 1, 0), None);
991 assert_eq!(line_col_to_offset(sql, 5, 1), None);
992 }
993
994 #[test]
995 fn test_find_identifier_empty() {
996 let sql = "SELECT * FROM users";
997 assert_eq!(find_identifier_span(sql, "", 0), None);
998 assert_eq!(find_identifier_span("", "users", 0), None);
999 }
1000
1001 #[test]
1007 fn test_find_cte_definition_span_recursive() {
1008 let sql = "WITH RECURSIVE my_cte AS (SELECT 1 UNION ALL SELECT 2) SELECT * FROM my_cte";
1009 let span = find_cte_definition_span(sql, "my_cte", 0);
1010 assert!(
1011 span.is_some(),
1012 "should find CTE name after RECURSIVE keyword"
1013 );
1014 let span = span.unwrap();
1015 assert_eq!(&sql[span.start..span.end], "my_cte");
1016 }
1017
1018 #[test]
1019 fn test_find_cte_definition_span_recursive_multiple() {
1020 let sql = "WITH RECURSIVE cte1 AS (SELECT 1), cte2 AS (SELECT 2) SELECT * FROM cte1, cte2";
1021 let first_span = find_cte_definition_span(sql, "cte1", 0);
1022 assert!(
1023 first_span.is_some(),
1024 "should find first CTE after RECURSIVE"
1025 );
1026 let first_span = first_span.unwrap();
1027 assert_eq!(&sql[first_span.start..first_span.end], "cte1");
1028
1029 let second_span = find_cte_definition_span(sql, "cte2", first_span.end);
1030 assert!(second_span.is_some(), "should find second CTE after comma");
1031 let second_span = second_span.unwrap();
1032 assert_eq!(&sql[second_span.start..second_span.end], "cte2");
1033 }
1034
1035 #[test]
1037 fn test_find_cte_definition_span_search_start_at_end() {
1038 let sql = "WITH cte AS (SELECT 1) SELECT * FROM cte";
1039 let span = find_cte_definition_span(sql, "cte", sql.len());
1041 assert_eq!(span, None);
1042 }
1043
1044 #[test]
1045 fn test_find_derived_table_alias_search_start_at_end() {
1046 let sql = "SELECT * FROM (SELECT 1) AS derived";
1047 let span = find_derived_table_alias_span(sql, "derived", sql.len());
1049 assert_eq!(span, None);
1050 }
1051
1052 #[test]
1053 fn test_find_derived_table_alias_paren_at_end() {
1054 let sql = "SELECT * FROM (SELECT 1)";
1056 let span = find_derived_table_alias_span(sql, "anything", 0);
1057 assert_eq!(span, None);
1058 }
1059
1060 #[test]
1062 fn test_word_boundary_underscore_prefix() {
1063 let sql = "SELECT * FROM _users";
1064 let span = find_identifier_span(sql, "_users", 0);
1066 assert!(
1067 span.is_some(),
1068 "should find identifier starting with underscore"
1069 );
1070 }
1071
1072 #[test]
1073 fn test_word_boundary_underscore_suffix_no_match() {
1074 let sql = "SELECT * FROM users_table";
1075 let span = find_identifier_span(sql, "users", 0);
1077 assert!(
1080 span.is_none() || {
1081 let s = span.unwrap();
1082 s.end == s.start + "users".len()
1084 && (s.end >= sql.len()
1085 || !sql.as_bytes()[s.end].is_ascii_alphanumeric()
1086 && sql.as_bytes()[s.end] != b'_')
1087 },
1088 "should not match 'users' as part of 'users_table'"
1089 );
1090 }
1091
1092 #[test]
1093 fn test_word_boundary_unicode_suffix_no_match() {
1094 let sql = "SELECT * FROM 表名";
1095 let span = find_identifier_span(sql, "表", 0);
1097 assert_eq!(span, None);
1098 }
1099
1100 #[test]
1101 fn test_word_boundary_unicode_search_offset_no_partial_match() {
1102 let sql = "SELECT 表 FROM 表名";
1103 let first = find_identifier_span(sql, "表", 0).expect("first 表");
1104 let second = find_identifier_span(sql, "表", first.end);
1105 assert_eq!(second, None);
1106 }
1107
1108 #[test]
1109 fn test_cte_name_with_underscore_suffix_no_match() {
1110 let sql = "WITH cte_name AS (SELECT 1) SELECT * FROM cte_name";
1112 let span = find_cte_definition_span(sql, "cte", 0);
1113 assert!(
1114 span.is_none(),
1115 "should not match 'cte' as part of 'cte_name'"
1116 );
1117 }
1118
1119 #[test]
1121 fn test_find_cte_definition_span_with_block_comment() {
1122 let sql = "WITH /* comment */ my_cte AS (SELECT 1) SELECT * FROM my_cte";
1123 let span = find_cte_definition_span(sql, "my_cte", 0);
1124 assert!(span.is_some(), "should find CTE name after block comment");
1125 let span = span.unwrap();
1126 assert_eq!(&sql[span.start..span.end], "my_cte");
1127 }
1128
1129 #[test]
1130 fn test_find_cte_definition_span_with_line_comment() {
1131 let sql = "WITH -- comment\nmy_cte AS (SELECT 1) SELECT * FROM my_cte";
1132 let span = find_cte_definition_span(sql, "my_cte", 0);
1133 assert!(span.is_some(), "should find CTE name after line comment");
1134 let span = span.unwrap();
1135 assert_eq!(&sql[span.start..span.end], "my_cte");
1136 }
1137
1138 #[test]
1139 fn test_find_derived_table_alias_with_comment() {
1140 let sql = "SELECT * FROM (SELECT 1) /* comment */ AS derived";
1141 let span = find_derived_table_alias_span(sql, "derived", 0);
1142 assert!(span.is_some(), "should find alias after block comment");
1143 let span = span.unwrap();
1144 assert_eq!(&sql[span.start..span.end], "derived");
1145 }
1146
1147 #[test]
1149 fn test_find_cte_definition_not_in_string_literal() {
1150 let sql = "WITH cte AS (SELECT 'cte' AS name) SELECT * FROM cte";
1152 let span = find_cte_definition_span(sql, "cte", 0);
1153 assert!(span.is_some(), "should find CTE definition");
1154 let span = span.unwrap();
1155 assert_eq!(
1157 span.start, 5,
1158 "should find CTE definition, not string literal"
1159 );
1160 assert_eq!(&sql[span.start..span.end], "cte");
1161 }
1162
1163 #[test]
1164 fn test_find_derived_alias_not_in_string_literal() {
1165 let sql = "SELECT * FROM (SELECT 'derived' AS name) AS derived";
1167 let span = find_derived_table_alias_span(sql, "derived", 0);
1168 assert!(span.is_some(), "should find derived alias");
1169 let span = span.unwrap();
1170 assert_eq!(&sql[span.start..span.end], "derived");
1172 assert!(
1174 span.start > sql.find(')').unwrap(),
1175 "span should be after closing paren"
1176 );
1177 }
1178
1179 #[test]
1181 fn test_find_cte_definition_empty_identifier() {
1182 let sql = "WITH cte AS (SELECT 1) SELECT * FROM cte";
1183 let span = find_cte_definition_span(sql, "", 0);
1184 assert_eq!(span, None, "empty identifier should return None");
1185 }
1186
1187 #[test]
1188 fn test_find_derived_table_alias_empty_identifier() {
1189 let sql = "SELECT * FROM (SELECT 1) AS derived";
1190 let span = find_derived_table_alias_span(sql, "", 0);
1191 assert_eq!(span, None, "empty identifier should return None");
1192 }
1193
1194 #[test]
1195 fn test_find_cte_definition_empty_sql() {
1196 let span = find_cte_definition_span("", "cte", 0);
1197 assert_eq!(span, None, "empty SQL should return None");
1198 }
1199
1200 #[test]
1201 fn test_find_derived_table_alias_empty_sql() {
1202 let span = find_derived_table_alias_span("", "derived", 0);
1203 assert_eq!(span, None, "empty SQL should return None");
1204 }
1205
1206 #[test]
1207 fn test_find_cte_definition_search_start_beyond_bounds() {
1208 let sql = "WITH cte AS (SELECT 1)";
1209 let span = find_cte_definition_span(sql, "cte", sql.len() + 100);
1210 assert_eq!(span, None, "search_start beyond bounds should return None");
1211 }
1212
1213 #[test]
1214 fn test_find_derived_table_alias_search_start_beyond_bounds() {
1215 let sql = "SELECT * FROM (SELECT 1) AS derived";
1216 let span = find_derived_table_alias_span(sql, "derived", sql.len() + 100);
1217 assert_eq!(span, None, "search_start beyond bounds should return None");
1218 }
1219
1220 #[test]
1222 fn test_find_cte_at_end_of_sql() {
1223 let sql = "WITH x AS (SELECT 1) SELECT * FROM x";
1224 let span = find_cte_definition_span(sql, "x", 0);
1225 assert!(span.is_some());
1226 let span = span.unwrap();
1227 assert_eq!(&sql[span.start..span.end], "x");
1228 }
1229
1230 #[test]
1232 fn test_match_identifier_at_short_remaining() {
1233 let sql = "WITH a AS (SELECT 1) SELECT * FROM a";
1234 let span = find_cte_definition_span(sql, "a", 0);
1235 assert!(span.is_some());
1236 let span = span.unwrap();
1237 assert_eq!(&sql[span.start..span.end], "a");
1238 }
1239
1240 #[test]
1241 fn test_find_all_identifier_spans_multiple_refs() {
1242 let sql = "SELECT * FROM users u WHERE u.id IN (SELECT id FROM users)";
1243 let spans = find_all_identifier_spans(sql, "users", 0, sql.len());
1244 assert_eq!(spans.len(), 2);
1245 for span in &spans {
1246 assert_eq!(&sql[span.start..span.end], "users");
1247 }
1248 }
1249
1250 #[test]
1251 fn test_find_all_identifier_spans_cte_declaration_and_refs() {
1252 let sql = "WITH a AS (SELECT 1) SELECT a.x FROM a";
1253 let spans = find_all_identifier_spans(sql, "a", 0, sql.len());
1254 assert_eq!(spans.len(), 3);
1256 assert!(spans
1257 .iter()
1258 .all(|s| &sql[s.start..s.end] == "a" && s.end > s.start));
1259 }
1260
1261 #[test]
1262 fn test_find_all_identifier_spans_ignores_string_literals_and_comments() {
1263 let sql = "SELECT * FROM users WHERE name = 'users' -- users\n/* users */";
1264 let spans = find_all_identifier_spans(sql, "users", 0, sql.len());
1265 assert_eq!(spans.len(), 1);
1267 assert_eq!(spans[0].start, 14);
1268 assert_eq!(spans[0].end, 19);
1269 }
1270
1271 #[test]
1272 fn test_find_all_identifier_spans_word_boundary() {
1273 let sql = "SELECT * FROM users_archive, users";
1274 let spans = find_all_identifier_spans(sql, "users", 0, sql.len());
1275 assert_eq!(spans.len(), 1);
1277 assert_eq!(&sql[spans[0].start..spans[0].end], "users");
1278 }
1279
1280 #[test]
1281 fn test_find_all_identifier_spans_case_insensitive() {
1282 let sql = "SELECT * FROM Users JOIN USERS u ON u.id = Users.id";
1283 let spans = find_all_identifier_spans(sql, "users", 0, sql.len());
1284 assert_eq!(spans.len(), 3);
1285 }
1286
1287 #[test]
1288 fn test_find_all_identifier_spans_respects_search_bounds() {
1289 let sql = "users users users";
1290 let spans = find_all_identifier_spans(sql, "users", 6, 12);
1291 assert_eq!(spans.len(), 1);
1292 assert_eq!(spans[0].start, 6);
1293 assert_eq!(spans[0].end, 11);
1294 }
1295
1296 #[test]
1297 fn test_find_cte_body_span_simple() {
1298 let sql = "WITH a AS (SELECT 1) SELECT * FROM a";
1299 let name_span = Span::new(5, 6);
1301 let body = find_cte_body_span(sql, name_span).expect("body span");
1302 assert_eq!(&sql[body.start..body.end], "(SELECT 1)");
1303 }
1304
1305 #[test]
1306 fn test_find_cte_body_span_nested_parens() {
1307 let sql = "WITH a AS (SELECT (1 + 2) AS x) SELECT * FROM a";
1308 let name_span = Span::new(5, 6);
1309 let body = find_cte_body_span(sql, name_span).expect("body span");
1310 assert_eq!(&sql[body.start..body.end], "(SELECT (1 + 2) AS x)");
1311 }
1312
1313 #[test]
1314 fn test_find_cte_body_span_paren_in_string_literal() {
1315 let sql = "WITH a AS (SELECT ')' AS c) SELECT * FROM a";
1316 let name_span = Span::new(5, 6);
1317 let body = find_cte_body_span(sql, name_span).expect("body span");
1318 assert_eq!(&sql[body.start..body.end], "(SELECT ')' AS c)");
1319 }
1320
1321 #[test]
1322 fn test_find_cte_body_span_missing_paren_returns_none() {
1323 let sql = "WITH a AS SELECT 1";
1325 let name_span = Span::new(5, 6);
1326 assert_eq!(find_cte_body_span(sql, name_span), None);
1327 }
1328
1329 #[test]
1330 fn test_find_cte_body_span_with_whitespace_and_comment() {
1331 let sql = "WITH a /* note */ AS (SELECT 1) SELECT * FROM a";
1332 let name_span = Span::new(5, 6);
1333 let body = find_cte_body_span(sql, name_span).expect("body span");
1334 assert_eq!(&sql[body.start..body.end], "(SELECT 1)");
1335 }
1336
1337 #[test]
1338 fn test_find_cte_body_span_with_column_list() {
1339 let sql = "WITH a(x, y) AS (SELECT 1, 2) SELECT * FROM a";
1340 let name_span = Span::new(5, 6);
1341 let body = find_cte_body_span(sql, name_span).expect("body span");
1342 assert_eq!(&sql[body.start..body.end], "(SELECT 1, 2)");
1343 }
1344
1345 #[test]
1346 fn test_find_cte_body_span_with_materialized_modifier() {
1347 let sql = "WITH a AS MATERIALIZED (SELECT 1) SELECT * FROM a";
1348 let name_span = Span::new(5, 6);
1349 let body = find_cte_body_span(sql, name_span).expect("body span");
1350 assert_eq!(&sql[body.start..body.end], "(SELECT 1)");
1351 }
1352
1353 #[test]
1354 fn test_find_cte_body_span_with_not_materialized_modifier() {
1355 let sql = "WITH a AS NOT MATERIALIZED (SELECT 1) SELECT * FROM a";
1356 let name_span = Span::new(5, 6);
1357 let body = find_cte_body_span(sql, name_span).expect("body span");
1358 assert_eq!(&sql[body.start..body.end], "(SELECT 1)");
1359 }
1360
1361 #[test]
1362 fn test_find_identifier_span_skips_string_literal_before_match() {
1363 let sql = "SELECT 'users' AS x FROM users";
1364 let span = find_identifier_span(sql, "users", 0).expect("users span");
1365 assert_eq!(&sql[span.start..span.end], "users");
1366 assert_eq!(span, Span::new(25, 30));
1367 }
1368
1369 #[test]
1370 fn test_find_relation_occurrence_spans_quoted_identifier_with_embedded_dots() {
1371 let sql = "SELECT * FROM \"my.schema\".\"my.table\"";
1372 let (full_span, name_span) =
1373 find_relation_occurrence_spans(sql, "\"my.schema\".\"my.table\"", 0)
1374 .expect("relation span");
1375 assert_eq!(
1376 &sql[full_span.start..full_span.end],
1377 "\"my.schema\".\"my.table\""
1378 );
1379 assert_eq!(&sql[name_span.start..name_span.end], "my.table");
1380 }
1381
1382 #[test]
1390 fn test_find_all_identifier_spans_skips_non_ascii_comment() {
1391 let sql = "SELECT * /* µ µµµ */ FROM users WHERE id = 1";
1395 let spans = find_all_identifier_spans(sql, "users", 0, sql.len());
1396 assert_eq!(spans.len(), 1);
1397 assert_eq!(&sql[spans[0].start..spans[0].end], "users");
1398 }
1399
1400 #[test]
1401 fn test_find_all_identifier_spans_skips_non_ascii_line_comment() {
1402 let sql = "SELECT * FROM users -- é comment\nJOIN users u";
1403 let spans = find_all_identifier_spans(sql, "users", 0, sql.len());
1404 assert_eq!(spans.len(), 2);
1405 }
1406
1407 #[test]
1408 fn test_find_all_identifier_spans_skips_non_ascii_string_literal() {
1409 let sql = "SELECT 'héllo users' FROM users";
1411 let spans = find_all_identifier_spans(sql, "users", 0, sql.len());
1412 assert_eq!(spans.len(), 1);
1414 assert_eq!(&sql[spans[0].start..spans[0].end], "users");
1415 }
1416
1417 #[test]
1418 fn test_find_cte_body_span_with_non_ascii_body_contents() {
1419 let sql = "WITH a AS (SELECT 'µ' AS x, (1 + 2) AS y) SELECT * FROM a";
1420 let name_span = Span::new(5, 6);
1421 let body = find_cte_body_span(sql, name_span).expect("body span");
1422 assert_eq!(
1423 &sql[body.start..body.end],
1424 "(SELECT 'µ' AS x, (1 + 2) AS y)"
1425 );
1426 }
1427
1428 #[test]
1429 fn test_find_all_identifier_spans_non_ascii_between_occurrences() {
1430 let sql = "SELECT users.id -- µ\nFROM users /* ñ */ JOIN users";
1432 let spans = find_all_identifier_spans(sql, "users", 0, sql.len());
1433 assert_eq!(spans.len(), 3);
1434 }
1435
1436 #[test]
1437 fn test_find_all_identifier_spans_ignores_hash_comments() {
1438 let sql = "SELECT 1 # users\nFROM users";
1439 let spans = find_all_identifier_spans(sql, "users", 0, sql.len());
1440 assert_eq!(spans.len(), 1);
1441 assert_eq!(&sql[spans[0].start..spans[0].end], "users");
1442 assert_eq!(spans[0], Span::new(22, 27));
1443 }
1444
1445 #[test]
1446 fn test_find_identifier_span_skips_dollar_quoted_string_literal() {
1447 let sql = "SELECT $$users$$ AS x FROM users";
1448 let span = find_identifier_span(sql, "users", 0).expect("users span");
1449 assert_eq!(&sql[span.start..span.end], "users");
1450 assert_eq!(span, Span::new(27, 32));
1451 }
1452
1453 #[test]
1454 fn test_find_cte_body_span_with_dollar_quoted_string() {
1455 let sql = "WITH a AS (SELECT $$)$$ AS x) SELECT * FROM a";
1456 let name_span = Span::new(5, 6);
1457 let body = find_cte_body_span(sql, name_span).expect("body span");
1458 assert_eq!(&sql[body.start..body.end], "(SELECT $$)$$ AS x)");
1459 }
1460}