flowscope_core/analyzer/helpers/
span.rs

1//! Utilities for finding identifier spans in SQL text.
2//!
3//! This module provides functions to locate identifiers in SQL source code
4//! for error reporting. Since sqlparser doesn't expose AST node locations,
5//! we use text search to find approximate positions.
6
7use crate::types::Span;
8use regex::Regex;
9
10/// Finds the byte offset span of an identifier in SQL text.
11///
12/// Searches for the identifier as a whole word (not part of another identifier).
13/// Returns the first match found, or `None` if not found.
14///
15/// # Arguments
16///
17/// * `sql` - The SQL source text
18/// * `identifier` - The identifier to find (table name, column name, etc.)
19/// * `search_start` - Byte offset to start searching from (for multi-statement SQL)
20///
21/// # Example
22///
23/// ```ignore
24/// let sql = "SELECT * FROM users WHERE id = 1";
25/// let span = find_identifier_span(sql, "users", 0);
26/// assert_eq!(span, Some(Span { start: 14, end: 19 }));
27/// ```
28pub fn find_identifier_span(sql: &str, identifier: &str, search_start: usize) -> Option<Span> {
29    if identifier.is_empty() || search_start >= sql.len() {
30        return None;
31    }
32
33    let search_text = &sql[search_start..];
34
35    // Try exact match first (case-insensitive, word boundary)
36    if let Some(pos) = find_word_boundary_match(search_text, identifier) {
37        return Some(Span::new(
38            search_start + pos,
39            search_start + pos + identifier.len(),
40        ));
41    }
42
43    // For qualified names like "schema.table", try to find the full pattern
44    if identifier.contains('.') {
45        if let Some(pos) = find_qualified_name(search_text, identifier) {
46            return Some(Span::new(
47                search_start + pos,
48                search_start + pos + identifier.len(),
49            ));
50        }
51    }
52
53    None
54}
55
56/// Finds the span of a CTE definition name in SQL text.
57///
58/// Matches `WITH name`, `WITH RECURSIVE name`, or `, name` patterns and returns the span for `name`.
59/// Handles SQL comments between keywords and identifiers.
60/// Uses string operations instead of regex for performance.
61pub fn find_cte_definition_span(sql: &str, identifier: &str, search_start: usize) -> Option<Span> {
62    if identifier.is_empty() || search_start >= sql.len() {
63        return None;
64    }
65
66    let search_text = &sql[search_start..];
67
68    // Find CTE anchors: "WITH" keyword or comma separator
69    let mut pos = 0;
70    while pos < search_text.len() {
71        // Look for "WITH" keyword (case-insensitive, word boundary)
72        if let Some(with_pos) = find_keyword_case_insensitive(&search_text[pos..], "WITH") {
73            let after_with = pos + with_pos + 4;
74            // Skip whitespace and comments after WITH
75            let after_ws = skip_whitespace_and_comments(search_text, after_with);
76
77            // Check for optional RECURSIVE keyword
78            let after_recursive = if let Some(rec_pos) =
79                find_keyword_case_insensitive(&search_text[after_ws..], "RECURSIVE")
80            {
81                if rec_pos == 0 {
82                    // RECURSIVE found immediately after whitespace
83                    skip_whitespace_and_comments(search_text, after_ws + 9)
84                } else {
85                    after_ws
86                }
87            } else {
88                after_ws
89            };
90
91            // Try to match the identifier at this position
92            if let Some((start, end)) =
93                match_identifier_at(search_text, after_recursive, identifier)
94            {
95                return Some(Span::new(search_start + start, search_start + end));
96            }
97            pos = after_recursive.max(after_with);
98            continue;
99        }
100
101        // Look for comma separator
102        if let Some(comma_pos) = search_text[pos..].find(',') {
103            let after_comma = pos + comma_pos + 1;
104            // Skip whitespace and comments after comma
105            let after_ws = skip_whitespace_and_comments(search_text, after_comma);
106            if let Some((start, end)) = match_identifier_at(search_text, after_ws, identifier) {
107                return Some(Span::new(search_start + start, search_start + end));
108            }
109            pos = after_comma;
110            continue;
111        }
112
113        break;
114    }
115
116    None
117}
118
119/// Finds the span of a derived table alias in SQL text.
120///
121/// Matches `) alias` or `) AS alias` patterns and returns the span for `alias`.
122/// Handles SQL comments between the closing paren and the alias.
123/// Uses string operations instead of regex for performance.
124pub fn find_derived_table_alias_span(
125    sql: &str,
126    identifier: &str,
127    search_start: usize,
128) -> Option<Span> {
129    if identifier.is_empty() || search_start >= sql.len() {
130        return None;
131    }
132
133    let search_text = &sql[search_start..];
134
135    // Find closing paren anchors
136    let mut pos = 0;
137    while pos < search_text.len() {
138        if let Some(paren_pos) = search_text[pos..].find(')') {
139            let after_paren = pos + paren_pos + 1;
140            // Skip whitespace and comments
141            let ws_end = skip_whitespace_and_comments(search_text, after_paren);
142
143            if ws_end >= search_text.len() {
144                pos = after_paren;
145                continue;
146            }
147
148            // Check for optional "AS" keyword (must be followed by whitespace or comment, not "ASC")
149            let after_as = if search_text[ws_end..].to_ascii_uppercase().starts_with("AS") {
150                let potential_as_end = ws_end + 2;
151                let is_standalone_as = potential_as_end >= search_text.len()
152                    || search_text.as_bytes()[potential_as_end].is_ascii_whitespace()
153                    || search_text[potential_as_end..].starts_with("/*")
154                    || search_text[potential_as_end..].starts_with("--");
155                if is_standalone_as {
156                    skip_whitespace_and_comments(search_text, potential_as_end)
157                } else {
158                    ws_end
159                }
160            } else {
161                ws_end
162            };
163
164            if let Some((start, end)) = match_identifier_at(search_text, after_as, identifier) {
165                return Some(Span::new(search_start + start, search_start + end));
166            }
167            pos = after_paren;
168            continue;
169        }
170        break;
171    }
172
173    None
174}
175
176/// Finds a keyword case-insensitively with word boundary check.
177fn find_keyword_case_insensitive(text: &str, keyword: &str) -> Option<usize> {
178    let text_upper = text.to_ascii_uppercase();
179    let mut search_pos = 0;
180
181    while let Some(pos) = text_upper[search_pos..].find(keyword) {
182        let abs_pos = search_pos + pos;
183        // Check word boundary before
184        let before_ok = abs_pos == 0 || !text.as_bytes()[abs_pos - 1].is_ascii_alphanumeric();
185        // Check word boundary after
186        let after_pos = abs_pos + keyword.len();
187        let after_ok =
188            after_pos >= text.len() || !text.as_bytes()[after_pos].is_ascii_alphanumeric();
189
190        if before_ok && after_ok {
191            return Some(abs_pos);
192        }
193        search_pos = abs_pos + 1;
194    }
195    None
196}
197
198/// Skips whitespace and SQL comments (block `/* */` and line `-- \n`).
199/// Returns the position after all whitespace and comments.
200fn skip_whitespace_and_comments(text: &str, pos: usize) -> usize {
201    let mut current = pos;
202
203    loop {
204        if current >= text.len() {
205            return current;
206        }
207
208        let remaining = &text[current..];
209
210        // Skip whitespace first
211        let ws_chars: usize = remaining
212            .chars()
213            .take_while(|c| c.is_whitespace())
214            .map(|c| c.len_utf8())
215            .sum();
216        if ws_chars > 0 {
217            current += ws_chars;
218            continue;
219        }
220
221        // Check for block comment /* ... */
222        if let Some(after_open) = remaining.strip_prefix("/*") {
223            if let Some(end) = after_open.find("*/") {
224                current += 2 + end + 2; // Skip /* + content + */
225                continue;
226            } else {
227                // Unclosed comment - skip to end
228                return text.len();
229            }
230        }
231
232        // Check for line comment -- ... \n
233        if remaining.starts_with("--") {
234            if let Some(newline) = remaining.find('\n') {
235                current += newline + 1;
236                continue;
237            } else {
238                // No newline - comment goes to end
239                return text.len();
240            }
241        }
242
243        // No more whitespace or comments
244        break;
245    }
246
247    current
248}
249
250/// Matches an identifier at the given position (case-insensitive, handles quoting).
251fn match_identifier_at(text: &str, pos: usize, identifier: &str) -> Option<(usize, usize)> {
252    if pos >= text.len() {
253        return None;
254    }
255
256    let remaining = &text[pos..];
257    let ident_upper = identifier.to_ascii_uppercase();
258
259    // Check for quoted variants first
260    for (open, close) in [("\"", "\""), ("`", "`"), ("[", "]")] {
261        if remaining.starts_with(open) {
262            let after_open = open.len();
263            if remaining[after_open..]
264                .to_ascii_uppercase()
265                .starts_with(&ident_upper)
266            {
267                let ident_end = after_open + identifier.len();
268                if remaining[ident_end..].starts_with(close) {
269                    return Some((pos + after_open, pos + ident_end));
270                }
271            }
272        }
273    }
274
275    // Check for unquoted identifier with word boundary
276    if remaining.to_ascii_uppercase().starts_with(&ident_upper) {
277        let end_pos = identifier.len();
278        // Ensure word boundary after identifier (not alphanumeric and not underscore)
279        let after_ok = end_pos >= remaining.len()
280            || (!remaining.as_bytes()[end_pos].is_ascii_alphanumeric()
281                && remaining.as_bytes()[end_pos] != b'_');
282        if after_ok {
283            return Some((pos, pos + identifier.len()));
284        }
285    }
286
287    None
288}
289
290/// Finds an identifier at a word boundary (not part of another word).
291/// Word boundaries consider underscores as part of identifiers (SQL convention).
292fn find_word_boundary_match(text: &str, identifier: &str) -> Option<usize> {
293    // For simple identifiers, use word boundary matching
294    // Note: \b in regex considers underscore as a word character, which is correct for SQL
295    let pattern = format!(r"(?i)\b{}\b", regex::escape(identifier));
296
297    // Try to compile the pattern
298    if let Ok(re) = Regex::new(&pattern) {
299        if let Some(m) = re.find(text) {
300            return Some(m.start());
301        }
302    }
303
304    // No fallback to simple substring search - we need word boundaries
305    // to avoid matching "users" inside "users_table"
306    None
307}
308
309/// Finds a qualified identifier (e.g., "schema.table") in text.
310fn find_qualified_name(text: &str, qualified_name: &str) -> Option<usize> {
311    // Split the qualified name and search for the pattern
312    let parts: Vec<&str> = qualified_name.split('.').collect();
313    if parts.is_empty() {
314        return None;
315    }
316
317    // Build a pattern that matches the qualified name with optional quotes
318    // e.g., "public.users" should match: public.users, "public"."users", public."users", etc.
319    let pattern_parts: Vec<String> = parts
320        .iter()
321        .map(|part| {
322            // Match the part with optional surrounding quotes
323            format!(r#"(?:"?{}\"?)"#, regex::escape(part))
324        })
325        .collect();
326
327    let pattern = format!(r"(?i){}", pattern_parts.join(r"\."));
328
329    if let Ok(re) = Regex::new(&pattern) {
330        if let Some(m) = re.find(text) {
331            return Some(m.start());
332        }
333    }
334
335    None
336}
337
338/// Calculates the byte offset for a given line and column in SQL text.
339///
340/// This is useful for converting line:column positions (from parse errors)
341/// to byte offsets for the Span type.
342///
343/// # Arguments
344///
345/// * `sql` - The SQL source text
346/// * `line` - Line number (1-indexed)
347/// * `column` - Column number (1-indexed)
348pub fn line_col_to_offset(sql: &str, line: usize, column: usize) -> Option<usize> {
349    if line == 0 || column == 0 {
350        return None;
351    }
352
353    let bytes = sql.as_bytes();
354    let mut current_line = 1;
355    let mut offset = 0;
356
357    // Advance `offset` to the start of the requested line.
358    while current_line < line {
359        let remaining = bytes.get(offset..)?;
360        let newline_pos = remaining.iter().position(|&b| b == b'\n')?;
361        offset += newline_pos + 1;
362        current_line += 1;
363    }
364
365    let line_start = offset;
366    let remaining = bytes.get(line_start..)?;
367    let line_len = remaining
368        .iter()
369        .position(|&b| b == b'\n')
370        .unwrap_or(remaining.len());
371    let line_end = line_start + line_len;
372    let line_slice = &sql[line_start..line_end];
373
374    // sqlparser reports columns in characters, so iterate char_indices to convert
375    // the 1-based column into a byte offset.
376    let mut current_column = 1;
377    for (rel_offset, _) in line_slice.char_indices() {
378        if current_column == column {
379            return Some(line_start + rel_offset);
380        }
381        current_column += 1;
382    }
383
384    if column == current_column {
385        return Some(line_end);
386    }
387
388    None
389}
390
391#[cfg(test)]
392mod tests {
393    use super::*;
394
395    #[test]
396    fn test_find_identifier_span_simple() {
397        let sql = "SELECT * FROM users WHERE id = 1";
398        let span = find_identifier_span(sql, "users", 0);
399        assert_eq!(span, Some(Span::new(14, 19)));
400    }
401
402    #[test]
403    fn test_find_identifier_span_case_insensitive() {
404        let sql = "SELECT * FROM Users WHERE id = 1";
405        let span = find_identifier_span(sql, "users", 0);
406        assert!(span.is_some());
407    }
408
409    #[test]
410    fn test_find_identifier_span_qualified() {
411        let sql = "SELECT * FROM public.users";
412        let span = find_identifier_span(sql, "public.users", 0);
413        assert_eq!(span, Some(Span::new(14, 26)));
414    }
415
416    #[test]
417    fn test_find_identifier_span_with_offset() {
418        let sql = "SELECT 1; SELECT * FROM users";
419        let span = find_identifier_span(sql, "users", 10);
420        assert_eq!(span, Some(Span::new(24, 29)));
421    }
422
423    #[test]
424    fn test_find_identifier_span_not_found() {
425        let sql = "SELECT * FROM users";
426        let span = find_identifier_span(sql, "orders", 0);
427        assert_eq!(span, None);
428    }
429
430    #[test]
431    fn test_find_identifier_word_boundary() {
432        let sql = "SELECT users_id FROM users";
433        // Should find "users" as whole word, not "users" in "users_id"
434        let span = find_identifier_span(sql, "users", 0);
435        assert!(span.is_some());
436        let span = span.unwrap();
437        // Should match the standalone "users", not the one in "users_id"
438        assert_eq!(&sql[span.start..span.end].to_lowercase(), "users");
439    }
440
441    #[test]
442    fn test_find_cte_definition_span_single() {
443        let sql = "WITH my_cte AS (SELECT 1) SELECT * FROM my_cte";
444        let span = find_cte_definition_span(sql, "my_cte", 0);
445        assert_eq!(span, Some(Span::new(5, 11)));
446    }
447
448    #[test]
449    fn test_find_cte_definition_span_multiple() {
450        let sql = "WITH cte1 AS (SELECT 1), cte2 AS (SELECT 2) SELECT * FROM cte1, cte2";
451        let first_span = find_cte_definition_span(sql, "cte1", 0).expect("cte1 span");
452        assert_eq!(first_span, Span::new(5, 9));
453
454        let second_span = find_cte_definition_span(sql, "cte2", first_span.end).expect("cte2 span");
455        assert_eq!(second_span, Span::new(25, 29));
456    }
457
458    #[test]
459    fn test_find_derived_table_alias_span() {
460        let sql = "SELECT * FROM (SELECT 1) AS derived";
461        let span = find_derived_table_alias_span(sql, "derived", 0);
462        assert_eq!(span, Some(Span::new(28, 35)));
463        let span = span.expect("derived span");
464        assert_eq!(&sql[span.start..span.end], "derived");
465    }
466
467    #[test]
468    fn test_find_cte_definition_span_quoted() {
469        // Double-quoted identifier
470        let sql = r#"WITH "MyCte" AS (SELECT 1) SELECT * FROM "MyCte""#;
471        let span = find_cte_definition_span(sql, "MyCte", 0);
472        assert!(span.is_some(), "should find quoted CTE");
473        let span = span.unwrap();
474        assert_eq!(&sql[span.start..span.end], "MyCte");
475
476        // Backtick-quoted identifier
477        let sql = "WITH `my_cte` AS (SELECT 1) SELECT * FROM `my_cte`";
478        let span = find_cte_definition_span(sql, "my_cte", 0);
479        assert!(span.is_some(), "should find backtick-quoted CTE");
480        let span = span.unwrap();
481        assert_eq!(&sql[span.start..span.end], "my_cte");
482
483        // Bracket-quoted identifier
484        let sql = "WITH [my_cte] AS (SELECT 1) SELECT * FROM [my_cte]";
485        let span = find_cte_definition_span(sql, "my_cte", 0);
486        assert!(span.is_some(), "should find bracket-quoted CTE");
487        let span = span.unwrap();
488        assert_eq!(&sql[span.start..span.end], "my_cte");
489    }
490
491    #[test]
492    fn test_find_derived_table_alias_span_without_as() {
493        // Derived table without AS keyword
494        let sql = "SELECT * FROM (SELECT 1) derived";
495        let span = find_derived_table_alias_span(sql, "derived", 0);
496        assert!(span.is_some(), "should find derived alias without AS");
497        let span = span.unwrap();
498        assert_eq!(&sql[span.start..span.end], "derived");
499    }
500
501    #[test]
502    fn test_find_derived_table_alias_span_multiple() {
503        let sql = "SELECT * FROM (SELECT 1) AS a, (SELECT 2) AS b";
504        let first_span = find_derived_table_alias_span(sql, "a", 0).expect("first derived span");
505        assert_eq!(&sql[first_span.start..first_span.end], "a");
506
507        let second_span =
508            find_derived_table_alias_span(sql, "b", first_span.end).expect("second derived span");
509        assert_eq!(&sql[second_span.start..second_span.end], "b");
510    }
511
512    #[test]
513    fn test_find_derived_table_alias_span_quoted() {
514        let sql = r#"SELECT * FROM (SELECT 1) AS "Derived""#;
515        let span = find_derived_table_alias_span(sql, "Derived", 0);
516        assert!(span.is_some(), "should find quoted derived alias");
517        let span = span.unwrap();
518        assert_eq!(&sql[span.start..span.end], "Derived");
519    }
520
521    #[test]
522    fn test_line_col_to_offset_single_line() {
523        let sql = "SELECT * FROM users";
524        assert_eq!(line_col_to_offset(sql, 1, 1), Some(0));
525        assert_eq!(line_col_to_offset(sql, 1, 8), Some(7));
526    }
527
528    #[test]
529    fn test_line_col_to_offset_multi_line() {
530        let sql = "SELECT *\nFROM users\nWHERE id = 1";
531        assert_eq!(line_col_to_offset(sql, 1, 1), Some(0));
532        assert_eq!(line_col_to_offset(sql, 2, 1), Some(9));
533        assert_eq!(line_col_to_offset(sql, 3, 1), Some(20));
534    }
535
536    #[test]
537    fn test_line_col_to_offset_unicode_columns() {
538        let sql = "SELECT μ, FROM users";
539        // Column 11 should point at the 'F' byte even though the line includes a multi-byte char.
540        assert_eq!(line_col_to_offset(sql, 1, 11), Some("SELECT μ, ".len()));
541        // Column 12 moves one character to the right (the 'R').
542        assert_eq!(line_col_to_offset(sql, 1, 12), Some("SELECT μ, F".len()));
543    }
544
545    #[test]
546    fn test_line_col_to_offset_invalid() {
547        let sql = "SELECT * FROM users";
548        assert_eq!(line_col_to_offset(sql, 0, 1), None);
549        assert_eq!(line_col_to_offset(sql, 1, 0), None);
550        assert_eq!(line_col_to_offset(sql, 5, 1), None);
551    }
552
553    #[test]
554    fn test_find_identifier_empty() {
555        let sql = "SELECT * FROM users";
556        assert_eq!(find_identifier_span(sql, "", 0), None);
557        assert_eq!(find_identifier_span("", "users", 0), None);
558    }
559
560    // ============================================================================
561    // Regression tests for prior code review findings
562    // ============================================================================
563
564    // Issue 1: WITH RECURSIVE not supported
565    #[test]
566    fn test_find_cte_definition_span_recursive() {
567        let sql = "WITH RECURSIVE my_cte AS (SELECT 1 UNION ALL SELECT 2) SELECT * FROM my_cte";
568        let span = find_cte_definition_span(sql, "my_cte", 0);
569        assert!(
570            span.is_some(),
571            "should find CTE name after RECURSIVE keyword"
572        );
573        let span = span.unwrap();
574        assert_eq!(&sql[span.start..span.end], "my_cte");
575    }
576
577    #[test]
578    fn test_find_cte_definition_span_recursive_multiple() {
579        let sql = "WITH RECURSIVE cte1 AS (SELECT 1), cte2 AS (SELECT 2) SELECT * FROM cte1, cte2";
580        let first_span = find_cte_definition_span(sql, "cte1", 0);
581        assert!(
582            first_span.is_some(),
583            "should find first CTE after RECURSIVE"
584        );
585        let first_span = first_span.unwrap();
586        assert_eq!(&sql[first_span.start..first_span.end], "cte1");
587
588        let second_span = find_cte_definition_span(sql, "cte2", first_span.end);
589        assert!(second_span.is_some(), "should find second CTE after comma");
590        let second_span = second_span.unwrap();
591        assert_eq!(&sql[second_span.start..second_span.end], "cte2");
592    }
593
594    // Issue 2: Bounds checking - search_start at end of string
595    #[test]
596    fn test_find_cte_definition_span_search_start_at_end() {
597        let sql = "WITH cte AS (SELECT 1) SELECT * FROM cte";
598        // Search starting at the very end should return None, not panic
599        let span = find_cte_definition_span(sql, "cte", sql.len());
600        assert_eq!(span, None);
601    }
602
603    #[test]
604    fn test_find_derived_table_alias_search_start_at_end() {
605        let sql = "SELECT * FROM (SELECT 1) AS derived";
606        // Search starting at the very end should return None, not panic
607        let span = find_derived_table_alias_span(sql, "derived", sql.len());
608        assert_eq!(span, None);
609    }
610
611    #[test]
612    fn test_find_derived_table_alias_paren_at_end() {
613        // Edge case: closing paren at end with no alias
614        let sql = "SELECT * FROM (SELECT 1)";
615        let span = find_derived_table_alias_span(sql, "anything", 0);
616        assert_eq!(span, None);
617    }
618
619    // Issue 3: Word boundary logic - underscore handling
620    #[test]
621    fn test_word_boundary_underscore_prefix() {
622        let sql = "SELECT * FROM _users";
623        // Should find "_users" as identifier, not fail to match
624        let span = find_identifier_span(sql, "_users", 0);
625        assert!(
626            span.is_some(),
627            "should find identifier starting with underscore"
628        );
629    }
630
631    #[test]
632    fn test_word_boundary_underscore_suffix_no_match() {
633        let sql = "SELECT * FROM users_table";
634        // Should NOT match "users" because it's followed by underscore
635        let span = find_identifier_span(sql, "users", 0);
636        // This tests the bug: the current code may incorrectly match "users" within "users_table"
637        // because of operator precedence: `!x && y != z` instead of `!(x || y == z)`
638        assert!(
639            span.is_none() || {
640                let s = span.unwrap();
641                // If it matched, verify it's the whole word not a prefix
642                s.end == s.start + "users".len()
643                    && (s.end >= sql.len()
644                        || !sql.as_bytes()[s.end].is_ascii_alphanumeric()
645                            && sql.as_bytes()[s.end] != b'_')
646            },
647            "should not match 'users' as part of 'users_table'"
648        );
649    }
650
651    #[test]
652    fn test_cte_name_with_underscore_suffix_no_match() {
653        // When searching for "cte" it should not match "cte_name"
654        let sql = "WITH cte_name AS (SELECT 1) SELECT * FROM cte_name";
655        let span = find_cte_definition_span(sql, "cte", 0);
656        assert!(
657            span.is_none(),
658            "should not match 'cte' as part of 'cte_name'"
659        );
660    }
661
662    // Issue 4: Comments not handled
663    #[test]
664    fn test_find_cte_definition_span_with_block_comment() {
665        let sql = "WITH /* comment */ my_cte AS (SELECT 1) SELECT * FROM my_cte";
666        let span = find_cte_definition_span(sql, "my_cte", 0);
667        assert!(span.is_some(), "should find CTE name after block comment");
668        let span = span.unwrap();
669        assert_eq!(&sql[span.start..span.end], "my_cte");
670    }
671
672    #[test]
673    fn test_find_cte_definition_span_with_line_comment() {
674        let sql = "WITH -- comment\nmy_cte AS (SELECT 1) SELECT * FROM my_cte";
675        let span = find_cte_definition_span(sql, "my_cte", 0);
676        assert!(span.is_some(), "should find CTE name after line comment");
677        let span = span.unwrap();
678        assert_eq!(&sql[span.start..span.end], "my_cte");
679    }
680
681    #[test]
682    fn test_find_derived_table_alias_with_comment() {
683        let sql = "SELECT * FROM (SELECT 1) /* comment */ AS derived";
684        let span = find_derived_table_alias_span(sql, "derived", 0);
685        assert!(span.is_some(), "should find alias after block comment");
686        let span = span.unwrap();
687        assert_eq!(&sql[span.start..span.end], "derived");
688    }
689
690    // Issue 5: String literals may contain false matches
691    #[test]
692    fn test_find_cte_definition_not_in_string_literal() {
693        // The CTE name "cte" appears in a string literal first, then as actual CTE
694        let sql = "WITH cte AS (SELECT 'cte' AS name) SELECT * FROM cte";
695        let span = find_cte_definition_span(sql, "cte", 0);
696        assert!(span.is_some(), "should find CTE definition");
697        let span = span.unwrap();
698        // Should find the definition at position 5, not the string literal
699        assert_eq!(
700            span.start, 5,
701            "should find CTE definition, not string literal"
702        );
703        assert_eq!(&sql[span.start..span.end], "cte");
704    }
705
706    #[test]
707    fn test_find_derived_alias_not_in_string_literal() {
708        // The alias appears in a string literal inside the subquery
709        let sql = "SELECT * FROM (SELECT 'derived' AS name) AS derived";
710        let span = find_derived_table_alias_span(sql, "derived", 0);
711        assert!(span.is_some(), "should find derived alias");
712        let span = span.unwrap();
713        // Should find the actual alias after the closing paren, not the string
714        assert_eq!(&sql[span.start..span.end], "derived");
715        // The alias position should be after the closing paren
716        assert!(
717            span.start > sql.find(')').unwrap(),
718            "span should be after closing paren"
719        );
720    }
721
722    // Issue 6: Edge cases for empty/malformed inputs
723    #[test]
724    fn test_find_cte_definition_empty_identifier() {
725        let sql = "WITH cte AS (SELECT 1) SELECT * FROM cte";
726        let span = find_cte_definition_span(sql, "", 0);
727        assert_eq!(span, None, "empty identifier should return None");
728    }
729
730    #[test]
731    fn test_find_derived_table_alias_empty_identifier() {
732        let sql = "SELECT * FROM (SELECT 1) AS derived";
733        let span = find_derived_table_alias_span(sql, "", 0);
734        assert_eq!(span, None, "empty identifier should return None");
735    }
736
737    #[test]
738    fn test_find_cte_definition_empty_sql() {
739        let span = find_cte_definition_span("", "cte", 0);
740        assert_eq!(span, None, "empty SQL should return None");
741    }
742
743    #[test]
744    fn test_find_derived_table_alias_empty_sql() {
745        let span = find_derived_table_alias_span("", "derived", 0);
746        assert_eq!(span, None, "empty SQL should return None");
747    }
748
749    #[test]
750    fn test_find_cte_definition_search_start_beyond_bounds() {
751        let sql = "WITH cte AS (SELECT 1)";
752        let span = find_cte_definition_span(sql, "cte", sql.len() + 100);
753        assert_eq!(span, None, "search_start beyond bounds should return None");
754    }
755
756    #[test]
757    fn test_find_derived_table_alias_search_start_beyond_bounds() {
758        let sql = "SELECT * FROM (SELECT 1) AS derived";
759        let span = find_derived_table_alias_span(sql, "derived", sql.len() + 100);
760        assert_eq!(span, None, "search_start beyond bounds should return None");
761    }
762
763    // Additional edge case: identifier at very end of SQL
764    #[test]
765    fn test_find_cte_at_end_of_sql() {
766        let sql = "WITH x AS (SELECT 1) SELECT * FROM x";
767        let span = find_cte_definition_span(sql, "x", 0);
768        assert!(span.is_some());
769        let span = span.unwrap();
770        assert_eq!(&sql[span.start..span.end], "x");
771    }
772
773    // Test for potential panic in match_identifier_at with short remaining text
774    #[test]
775    fn test_match_identifier_at_short_remaining() {
776        let sql = "WITH a AS (SELECT 1) SELECT * FROM a";
777        let span = find_cte_definition_span(sql, "a", 0);
778        assert!(span.is_some());
779        let span = span.unwrap();
780        assert_eq!(&sql[span.start..span.end], "a");
781    }
782}