flowscope_core/completion/
context.rs

1use sqlparser::keywords::Keyword;
2use sqlparser::tokenizer::{Token, TokenWithSpan, Tokenizer, Word};
3
4use crate::analyzer::helpers::line_col_to_offset;
5use crate::analyzer::schema_registry::SchemaRegistry;
6use crate::types::{
7    AstContext, CompletionClause, CompletionColumn, CompletionContext, CompletionItem,
8    CompletionItemCategory, CompletionItemKind, CompletionItemsResult, CompletionKeywordHints,
9    CompletionKeywordSet, CompletionRequest, CompletionTable, CompletionToken, CompletionTokenKind,
10    Dialect, SchemaMetadata, Span,
11};
12
13use super::ast_extractor::{extract_ast_context, extract_lateral_aliases};
14use super::functions::{get_function_completions, FunctionCompletionContext};
15use super::parse_strategies::try_parse_for_completion;
16
17/// Maximum SQL input size (10MB) to prevent memory exhaustion.
18/// This matches the TypeScript validation limit.
19const MAX_SQL_LENGTH: usize = 10 * 1024 * 1024;
20
21// Scoring constants for completion item ranking.
22// Higher scores = higher priority in completion list.
23//
24// Scoring guidelines:
25// - Base category scores start at 1000 and decrease by 100 per rank
26// - Prefix matches add 100-300 depending on match quality
27// - Context-aware adjustments range from -300 to +800
28// - Type compatibility adds +100 for matches, -50 for mismatches
29
30/// Bonus for column name prefix matches (when typing matches the column name portion of "table.column")
31const SCORE_COLUMN_NAME_MATCH_BONUS: i32 = 150;
32/// Bonus for items that are specific to the current clause context
33const SCORE_CLAUSE_SPECIFIC_BONUS: i32 = 50;
34/// Special boost for FROM keyword when typing 'f' in SELECT clause (most common transition)
35const SCORE_FROM_KEYWORD_BOOST: i32 = 800;
36/// Penalty for non-FROM keywords when typing 'f' in SELECT clause
37const SCORE_OTHER_KEYWORD_PENALTY: i32 = -200;
38/// Penalty for function names starting with 'f' to deprioritize vs FROM keyword
39const SCORE_F_FUNCTION_PENALTY: i32 = -250;
40/// Additional penalty for functions starting with 'from_' (e.g., from_json)
41const SCORE_FROM_FUNCTION_PENALTY: i32 = -300;
42/// Bonus for columns whose type matches the expected type in comparison context.
43/// Applied when the column can be implicitly cast to the expected type (e.g., INT matches INT).
44const SCORE_TYPE_COMPATIBLE: i32 = 100;
45/// Penalty for columns whose type is incompatible with expected type.
46/// Smaller magnitude than bonus to avoid completely hiding potentially useful columns.
47const SCORE_TYPE_INCOMPATIBLE: i32 = -50;
48
49#[derive(Debug, Clone)]
50struct TokenInfo {
51    token: Token,
52    span: Span,
53}
54
55#[derive(Debug, Clone)]
56struct StatementInfo {
57    index: usize,
58    span: Span,
59    tokens: Vec<TokenInfo>,
60}
61
62const GLOBAL_KEYWORDS: &[&str] = &[
63    "SELECT",
64    "FROM",
65    "WHERE",
66    "JOIN",
67    "LEFT",
68    "RIGHT",
69    "FULL",
70    "INNER",
71    "CROSS",
72    "OUTER",
73    "ON",
74    "USING",
75    "GROUP",
76    "BY",
77    "HAVING",
78    "ORDER",
79    "LIMIT",
80    "OFFSET",
81    "QUALIFY",
82    "WINDOW",
83    "INSERT",
84    "UPDATE",
85    "DELETE",
86    "CREATE",
87    "ALTER",
88    "DROP",
89    "VALUES",
90    "WITH",
91    "DISTINCT",
92    "UNION",
93    "INTERSECT",
94    "EXCEPT",
95    "ATTACH",
96    "DETACH",
97    "COPY",
98    "EXPORT",
99    "IMPORT",
100    "PIVOT",
101    "UNPIVOT",
102    "EXPLAIN",
103    "SUMMARIZE",
104    "DESCRIBE",
105    "SHOW",
106];
107
108const OPERATOR_HINTS: &[&str] = &[
109    "=", "!=", "<>", "<", "<=", ">", ">=", "+", "-", "*", "/", "%", "||", "AND", "OR", "NOT", "IN",
110    "LIKE", "ILIKE", "IS", "IS NOT", "BETWEEN",
111];
112
113const AGGREGATE_HINTS: &[&str] = &[
114    "COUNT",
115    "SUM",
116    "AVG",
117    "MIN",
118    "MAX",
119    "ARRAY_AGG",
120    "STRING_AGG",
121    "BOOL_AND",
122    "BOOL_OR",
123    "STDDEV",
124    "VARIANCE",
125];
126
127const SNIPPET_HINTS: &[&str] = &[
128    "CASE WHEN ... THEN ... END",
129    "COALESCE(expr, ...)",
130    "CAST(expr AS type)",
131    "COUNT(*)",
132    "FILTER (WHERE ...)",
133    "OVER (PARTITION BY ...)",
134];
135
136const SELECT_KEYWORDS: &[&str] = &[
137    "DISTINCT", "ALL", "AS", "CASE", "WHEN", "THEN", "ELSE", "END", "NULLIF", "COALESCE", "CAST",
138    "FILTER", "OVER",
139];
140
141const FROM_KEYWORDS: &[&str] = &[
142    "JOIN", "LEFT", "RIGHT", "FULL", "INNER", "CROSS", "OUTER", "LATERAL", "UNNEST", "AS", "ON",
143    "USING",
144];
145
146const WHERE_KEYWORDS: &[&str] = &[
147    "AND", "OR", "NOT", "IN", "EXISTS", "LIKE", "ILIKE", "IS", "NULL", "TRUE", "FALSE", "BETWEEN",
148];
149
150const GROUP_BY_KEYWORDS: &[&str] = &["HAVING", "ROLLUP", "CUBE", "GROUPING", "SETS"];
151
152const ORDER_BY_KEYWORDS: &[&str] = &["ASC", "DESC", "NULLS", "FIRST", "LAST"];
153
154const JOIN_KEYWORDS: &[&str] = &["ON", "USING"];
155
156fn keyword_set_for_clause(clause: CompletionClause) -> CompletionKeywordSet {
157    let keywords = match clause {
158        CompletionClause::Select => SELECT_KEYWORDS,
159        CompletionClause::From => FROM_KEYWORDS,
160        CompletionClause::Where | CompletionClause::On => WHERE_KEYWORDS,
161        CompletionClause::GroupBy => GROUP_BY_KEYWORDS,
162        CompletionClause::OrderBy => ORDER_BY_KEYWORDS,
163        CompletionClause::Join => JOIN_KEYWORDS,
164        CompletionClause::Limit => &["OFFSET"],
165        CompletionClause::Qualify => &["OVER", "WINDOW"],
166        CompletionClause::Window => &["PARTITION", "ORDER", "ROWS", "RANGE"],
167        CompletionClause::Insert => &["INTO", "VALUES", "SELECT"],
168        CompletionClause::Update => &["SET", "WHERE"],
169        CompletionClause::Delete => &["FROM", "WHERE"],
170        CompletionClause::With => &["AS", "SELECT"],
171        CompletionClause::Having => WHERE_KEYWORDS,
172        CompletionClause::Unknown => &[],
173    };
174
175    CompletionKeywordSet {
176        keywords: keywords.iter().map(|k| k.to_string()).collect(),
177        operators: OPERATOR_HINTS.iter().map(|op| op.to_string()).collect(),
178        aggregates: AGGREGATE_HINTS.iter().map(|agg| agg.to_string()).collect(),
179        snippets: SNIPPET_HINTS
180            .iter()
181            .map(|snippet| snippet.to_string())
182            .collect(),
183    }
184}
185
186fn global_keyword_set() -> CompletionKeywordSet {
187    CompletionKeywordSet {
188        keywords: GLOBAL_KEYWORDS.iter().map(|k| k.to_string()).collect(),
189        operators: OPERATOR_HINTS.iter().map(|op| op.to_string()).collect(),
190        aggregates: AGGREGATE_HINTS.iter().map(|agg| agg.to_string()).collect(),
191        snippets: SNIPPET_HINTS
192            .iter()
193            .map(|snippet| snippet.to_string())
194            .collect(),
195    }
196}
197
198fn token_span_to_offsets(sql: &str, span: &sqlparser::tokenizer::Span) -> Option<Span> {
199    let start = line_col_to_offset(sql, span.start.line as usize, span.start.column as usize)?;
200    let end = line_col_to_offset(sql, span.end.line as usize, span.end.column as usize)?;
201    Some(Span::new(start, end))
202}
203
204fn tokenize_sql(sql: &str, dialect: Dialect) -> Result<Vec<TokenInfo>, String> {
205    use sqlparser::tokenizer::Whitespace;
206
207    let dialect = dialect.to_sqlparser_dialect();
208    let mut tokenizer = Tokenizer::new(dialect.as_ref(), sql);
209    let tokens: Vec<TokenWithSpan> = tokenizer
210        .tokenize_with_location()
211        .map_err(|err| err.to_string())?;
212
213    let mut token_infos = Vec::new();
214    for token in tokens {
215        // Skip regular whitespace but keep comments for cursor detection
216        if let Token::Whitespace(ws) = &token.token {
217            match ws {
218                Whitespace::SingleLineComment { .. } | Whitespace::MultiLineComment(_) => {
219                    // Keep comment tokens
220                }
221                _ => continue, // Skip spaces, newlines, tabs
222            }
223        }
224        if let Some(span) = token_span_to_offsets(sql, &token.span) {
225            token_infos.push(TokenInfo {
226                token: token.token,
227                span,
228            });
229        }
230    }
231
232    Ok(token_infos)
233}
234
235/// Split tokenized SQL into statement boundaries.
236///
237/// Note: This is intentionally separate from `analyzer/input.rs::compute_statement_ranges`.
238/// That function operates on raw SQL text (for parsing before tokenization), while this
239/// function works with already-tokenized input and preserves per-statement token lists
240/// for clause detection and completion context building.
241fn split_statements(tokens: &[TokenInfo], sql_len: usize) -> Vec<StatementInfo> {
242    if tokens.is_empty() {
243        return vec![StatementInfo {
244            index: 0,
245            span: Span::new(0, sql_len),
246            tokens: Vec::new(),
247        }];
248    }
249
250    let mut statements = Vec::new();
251    let mut current_tokens = Vec::new();
252    let mut current_start: Option<usize> = None;
253    let mut statement_index = 0;
254
255    for token in tokens {
256        if current_start.is_none() {
257            current_start = Some(token.span.start);
258        }
259
260        if matches!(token.token, Token::SemiColon) {
261            let end = token.span.start;
262            if let Some(start) = current_start {
263                statements.push(StatementInfo {
264                    index: statement_index,
265                    span: Span::new(start, end.max(start)),
266                    tokens: current_tokens.clone(),
267                });
268                statement_index += 1;
269                current_tokens.clear();
270                current_start = None;
271            }
272            continue;
273        }
274
275        current_tokens.push(token.clone());
276    }
277
278    if let Some(start) = current_start {
279        let end = current_tokens
280            .last()
281            .map(|token| token.span.end)
282            .unwrap_or(start);
283        statements.push(StatementInfo {
284            index: statement_index,
285            span: Span::new(start, end.max(start)),
286            tokens: current_tokens,
287        });
288    }
289
290    statements
291}
292
293fn find_statement_for_cursor(statements: &[StatementInfo], cursor_offset: usize) -> StatementInfo {
294    if statements.is_empty() {
295        return StatementInfo {
296            index: 0,
297            span: Span::new(0, 0),
298            tokens: Vec::new(),
299        };
300    }
301
302    // Cursor is within a statement's bounds
303    for statement in statements {
304        if cursor_offset >= statement.span.start && cursor_offset <= statement.span.end {
305            return statement.clone();
306        }
307    }
308
309    // Cursor is between statements or after all statements - find the closest preceding statement
310    let mut candidate = &statements[0];
311    for statement in statements {
312        if cursor_offset < statement.span.start {
313            return candidate.clone();
314        }
315        candidate = statement;
316    }
317
318    // Cursor is after all statements - return the last one
319    candidate.clone()
320}
321
322fn keyword_from_token(token: &Token) -> Option<String> {
323    match token {
324        Token::Word(word) if word.keyword != Keyword::NoKeyword => Some(word.value.to_uppercase()),
325        _ => None,
326    }
327}
328
329fn is_identifier_word(word: &Word) -> bool {
330    word.quote_style.is_some() || word.keyword == Keyword::NoKeyword
331}
332
333fn detect_clause(tokens: &[TokenInfo], cursor_offset: usize) -> CompletionClause {
334    let mut clause = CompletionClause::Unknown;
335
336    for (index, token_info) in tokens.iter().enumerate() {
337        if token_info.span.start > cursor_offset {
338            break;
339        }
340
341        if let Some(keyword) = keyword_from_token(&token_info.token) {
342            match keyword.as_str() {
343                "SELECT" => clause = CompletionClause::Select,
344                "FROM" => clause = CompletionClause::From,
345                "WHERE" => clause = CompletionClause::Where,
346                "JOIN" => clause = CompletionClause::Join,
347                "ON" => clause = CompletionClause::On,
348                "HAVING" => clause = CompletionClause::Having,
349                "LIMIT" => clause = CompletionClause::Limit,
350                "QUALIFY" => clause = CompletionClause::Qualify,
351                "WINDOW" => clause = CompletionClause::Window,
352                "INSERT" => clause = CompletionClause::Insert,
353                "UPDATE" => clause = CompletionClause::Update,
354                "DELETE" => clause = CompletionClause::Delete,
355                "WITH" => clause = CompletionClause::With,
356                "GROUP" => {
357                    if let Some(next) = tokens.get(index + 1) {
358                        if keyword_from_token(&next.token).as_deref() == Some("BY") {
359                            clause = CompletionClause::GroupBy;
360                        }
361                    }
362                }
363                "ORDER" => {
364                    if let Some(next) = tokens.get(index + 1) {
365                        if keyword_from_token(&next.token).as_deref() == Some("BY") {
366                            clause = CompletionClause::OrderBy;
367                        }
368                    }
369                }
370                _ => {}
371            }
372        }
373    }
374
375    clause
376}
377
378/// Detects whether the statement contains a GROUP BY clause.
379///
380/// This is used for context-aware function scoring - aggregates get boosted
381/// when GROUP BY is present.
382fn has_group_by(tokens: &[TokenInfo]) -> bool {
383    for (index, token_info) in tokens.iter().enumerate() {
384        if let Some(keyword) = keyword_from_token(&token_info.token) {
385            if keyword == "GROUP" {
386                if let Some(next) = tokens.get(index + 1) {
387                    if keyword_from_token(&next.token).as_deref() == Some("BY") {
388                        return true;
389                    }
390                }
391            }
392        }
393    }
394    false
395}
396
397/// Detects whether the cursor is currently inside an `OVER (...)` window clause.
398///
399/// Clause detection never reports `CompletionClause::Window` when typing inside
400/// regular `OVER` expressions, so we manually track parentheses that follow an
401/// `OVER` keyword before the cursor position.
402fn in_over_clause(tokens: &[TokenInfo], cursor_offset: usize) -> bool {
403    let mut pending_over = false;
404    let mut paren_depth: usize = 0;
405    let mut over_stack: Vec<usize> = Vec::new();
406
407    for token_info in tokens {
408        if token_info.span.start >= cursor_offset {
409            break;
410        }
411
412        match &token_info.token {
413            Token::Word(word) => {
414                if word.keyword == Keyword::NoKeyword {
415                    pending_over = false;
416                } else if keyword_from_token(&token_info.token).as_deref() == Some("OVER") {
417                    pending_over = true;
418                }
419            }
420            Token::LParen => {
421                paren_depth = paren_depth.saturating_add(1);
422                if pending_over {
423                    over_stack.push(paren_depth);
424                    pending_over = false;
425                }
426            }
427            Token::RParen => {
428                if paren_depth > 0 {
429                    if over_stack.last() == Some(&paren_depth) {
430                        over_stack.pop();
431                    }
432                    paren_depth -= 1;
433                }
434                if pending_over {
435                    pending_over = false;
436                }
437            }
438            Token::Whitespace(_) => {}
439            _ => {
440                if pending_over {
441                    pending_over = false;
442                }
443            }
444        }
445    }
446
447    !over_stack.is_empty()
448}
449
450use crate::generated::{can_implicitly_cast, normalize_type_name, CanonicalType};
451
452/// Represents the expected type context for completion scoring.
453///
454/// When the cursor is in a binary expression context (e.g., `WHERE age > |`),
455/// we can infer the expected type from the left operand and score columns
456/// by type compatibility.
457#[derive(Debug, Clone)]
458pub(crate) struct TypeContext {
459    /// The expected canonical type for completions
460    pub expected_type: CanonicalType,
461    /// The column/expression name that provided the expected type (for debugging)
462    #[allow(dead_code)]
463    pub source_name: String,
464}
465
466/// Attempts to infer the expected type context from the tokens before the cursor.
467///
468/// This is used in WHERE, HAVING, and ON clauses to boost type-compatible columns.
469/// For example, in `WHERE age > |`, we detect that `age` is an INTEGER and boost
470/// integer-compatible columns in the completion list.
471///
472/// # Supported patterns
473/// - `column > |` - simple comparison
474/// - `(column) > |` - parenthesized column
475/// - `NOT column > |` - NOT prefix (skipped)
476/// - `((column)) > |` - nested parentheses
477///
478/// # Boundary conditions
479/// - `column > 10 AND |` - returns None (new expression after AND/OR)
480/// - `WHERE |` - returns None (no comparison context)
481fn infer_type_context(
482    tokens: &[TokenInfo],
483    cursor_offset: usize,
484    sql: &str,
485    registry: &SchemaRegistry,
486    tables: &[CompletionTable],
487) -> Option<TypeContext> {
488    // Collect tokens before cursor
489    let tokens_before: Vec<&TokenInfo> = tokens
490        .iter()
491        .filter(|t| t.span.end <= cursor_offset)
492        .collect();
493
494    if tokens_before.is_empty() {
495        return None;
496    }
497
498    // Phase 1: Walk backward to find comparison operator, skipping balanced parentheses
499    let mut idx = tokens_before.len();
500    let mut paren_depth: i32 = 0;
501    let mut comparison_idx: Option<usize> = None;
502
503    while idx > 0 {
504        idx -= 1;
505        let token = &tokens_before[idx].token;
506
507        match token {
508            // Track parentheses (walking backward: ) increases depth, ( decreases)
509            Token::RParen => {
510                paren_depth += 1;
511            }
512            Token::LParen => {
513                paren_depth -= 1;
514                if paren_depth < 0 {
515                    // Unbalanced - we've gone past the start of this expression
516                    return None;
517                }
518            }
519            // AND/OR mark a boolean boundary - cursor is in a new expression
520            Token::Word(word)
521                if paren_depth == 0 && matches!(word.keyword, Keyword::AND | Keyword::OR) =>
522            {
523                return None;
524            }
525            // Clause boundaries - stop searching
526            Token::Word(word)
527                if paren_depth == 0
528                    && matches!(
529                        word.keyword,
530                        Keyword::WHERE
531                            | Keyword::FROM
532                            | Keyword::SELECT
533                            | Keyword::HAVING
534                            | Keyword::ON
535                            | Keyword::JOIN
536                    ) =>
537            {
538                return None;
539            }
540            // Found comparison operator at depth 0
541            Token::Eq | Token::Neq | Token::Lt | Token::Gt | Token::LtEq | Token::GtEq
542                if paren_depth == 0 =>
543            {
544                comparison_idx = Some(idx);
545                break;
546            }
547            _ => {}
548        }
549    }
550
551    let comp_idx = comparison_idx?;
552    if comp_idx == 0 {
553        return None; // No tokens before the operator
554    }
555
556    // Phase 2: Find identifier before the comparison operator, skipping NOT and parentheses
557    // For `(age) > |`, we need to find `age` which is inside the parens
558    idx = comp_idx;
559    paren_depth = 0;
560
561    while idx > 0 {
562        idx -= 1;
563        let token = &tokens_before[idx].token;
564
565        match token {
566            // Track closing parens (walking backward: ) increases depth)
567            Token::RParen => {
568                paren_depth += 1;
569            }
570            // Track opening parens (walking backward: ( decreases depth)
571            Token::LParen => {
572                paren_depth -= 1;
573                if paren_depth < 0 {
574                    return None; // Unbalanced - we've exited the expression
575                }
576            }
577            // Skip NOT keyword (unary prefix)
578            Token::Word(word) if word.keyword == Keyword::NOT => {
579                continue;
580            }
581            // AND/OR boundary at depth 0 - stop
582            Token::Word(word)
583                if paren_depth == 0 && matches!(word.keyword, Keyword::AND | Keyword::OR) =>
584            {
585                return None;
586            }
587            // Found identifier - accept at any depth (it's inside grouping parens)
588            // For `(age) > |`, we find `age` at depth 1
589            Token::Word(word) if word.keyword == Keyword::NoKeyword => {
590                let identifier = sql
591                    .get(tokens_before[idx].span.start..tokens_before[idx].span.end)
592                    .unwrap_or(&word.value)
593                    .to_string();
594
595                // Look up type in schema
596                for table in tables {
597                    if let Some(data_type) =
598                        registry.lookup_column_type(&table.canonical, &identifier)
599                    {
600                        if let Some(canonical_type) = normalize_type_name(&data_type) {
601                            return Some(TypeContext {
602                                expected_type: canonical_type,
603                                source_name: identifier,
604                            });
605                        }
606                    }
607                }
608                return None; // Identifier found but not in schema
609            }
610            _ => {}
611        }
612    }
613
614    None
615}
616
617/// Calculates a type compatibility score for a column given an expected type.
618///
619/// Returns a positive score bonus for compatible types, negative for incompatible.
620/// Compatibility is determined by whether the column type can be implicitly cast
621/// to the expected type (one direction only).
622fn type_compatibility_score(column_type: Option<&str>, expected: &TypeContext) -> i32 {
623    match column_type.and_then(normalize_type_name) {
624        Some(col_type) => {
625            // Check if column type can be cast TO expected type
626            // (e.g., for "age > |" where age is INTEGER, we want other integers)
627            if col_type == expected.expected_type
628                || can_implicitly_cast(col_type, expected.expected_type)
629            {
630                SCORE_TYPE_COMPATIBLE
631            } else {
632                SCORE_TYPE_INCOMPATIBLE
633            }
634        }
635        None => {
636            // Unknown type - no adjustment
637            0
638        }
639    }
640}
641
642fn token_kind(token: &Token) -> CompletionTokenKind {
643    use sqlparser::tokenizer::Whitespace;
644
645    match token {
646        Token::Word(word) => {
647            // Quoted identifiers (double quotes, backticks, brackets depending on dialect)
648            // should suppress completions when cursor is inside them
649            if word.quote_style.is_some() {
650                CompletionTokenKind::QuotedIdentifier
651            } else if word.keyword == Keyword::NoKeyword {
652                CompletionTokenKind::Identifier
653            } else {
654                CompletionTokenKind::Keyword
655            }
656        }
657        Token::Number(_, _)
658        | Token::SingleQuotedString(_)
659        | Token::DoubleQuotedString(_)
660        | Token::NationalStringLiteral(_)
661        | Token::EscapedStringLiteral(_)
662        | Token::HexStringLiteral(_) => CompletionTokenKind::Literal,
663        Token::Eq
664        | Token::Neq
665        | Token::Lt
666        | Token::Gt
667        | Token::LtEq
668        | Token::GtEq
669        | Token::Plus
670        | Token::Minus
671        | Token::Mul
672        | Token::Div
673        | Token::Mod
674        | Token::StringConcat => CompletionTokenKind::Operator,
675        Token::Comma
676        | Token::Period
677        | Token::LParen
678        | Token::RParen
679        | Token::SemiColon
680        | Token::LBracket
681        | Token::RBracket
682        | Token::LBrace
683        | Token::RBrace
684        | Token::Colon
685        | Token::DoubleColon
686        | Token::Assignment => CompletionTokenKind::Symbol,
687        // Comments (line and block)
688        Token::Whitespace(Whitespace::SingleLineComment { .. })
689        | Token::Whitespace(Whitespace::MultiLineComment(_)) => CompletionTokenKind::Comment,
690        _ => CompletionTokenKind::Unknown,
691    }
692}
693
694fn find_token_at_cursor(
695    tokens: &[TokenInfo],
696    cursor_offset: usize,
697    sql: &str,
698) -> Option<CompletionToken> {
699    for token in tokens {
700        if cursor_offset >= token.span.start && cursor_offset <= token.span.end {
701            let value = sql
702                .get(token.span.start..token.span.end)
703                .unwrap_or_default()
704                .to_string();
705            return Some(CompletionToken {
706                value,
707                kind: token_kind(&token.token),
708                span: token.span,
709            });
710        }
711    }
712    None
713}
714
715fn parse_tables(tokens: &[TokenInfo]) -> Vec<(String, Option<String>)> {
716    let mut tables = Vec::new();
717    let mut in_from_clause = false;
718    let mut expecting_table = false;
719    let mut index = 0;
720
721    while index < tokens.len() {
722        let token = &tokens[index].token;
723        let keyword = keyword_from_token(token);
724
725        if let Some(keyword) = keyword.as_deref() {
726            match keyword {
727                "FROM" => {
728                    in_from_clause = true;
729                    expecting_table = true;
730                    index += 1;
731                    continue;
732                }
733                "JOIN" => {
734                    expecting_table = true;
735                    index += 1;
736                    continue;
737                }
738                "WHERE" | "GROUP" | "ORDER" | "HAVING" | "LIMIT" | "QUALIFY" | "WINDOW" => {
739                    in_from_clause = false;
740                    expecting_table = false;
741                }
742                "UPDATE" | "INTO" => {
743                    expecting_table = true;
744                    index += 1;
745                    continue;
746                }
747                _ => {}
748            }
749        }
750
751        if in_from_clause && matches!(token, Token::Comma) {
752            expecting_table = true;
753            index += 1;
754            continue;
755        }
756
757        if !expecting_table {
758            index += 1;
759            continue;
760        }
761
762        if matches!(token, Token::LParen) {
763            let mut depth = 1;
764            index += 1;
765            while index < tokens.len() && depth > 0 {
766                match tokens[index].token {
767                    Token::LParen => depth += 1,
768                    Token::RParen => depth -= 1,
769                    _ => {}
770                }
771                index += 1;
772            }
773
774            let (alias, consumed) = parse_alias(tokens, index);
775            tables.push((String::new(), alias));
776            index = consumed;
777
778            expecting_table = false;
779            continue;
780        }
781
782        let (table_name, consumed) = match parse_table_name(tokens, index) {
783            Some(result) => result,
784            None => {
785                index += 1;
786                continue;
787            }
788        };
789
790        let (alias, consumed_alias) = parse_alias(tokens, consumed);
791        tables.push((table_name, alias));
792        index = consumed_alias;
793        expecting_table = false;
794    }
795
796    tables
797}
798
799fn parse_table_name(tokens: &[TokenInfo], start: usize) -> Option<(String, usize)> {
800    let mut parts = Vec::new();
801    let mut index = start;
802
803    loop {
804        let token = tokens.get(index)?;
805        match &token.token {
806            // Accept any word token in table name context.
807            // SQL keywords like PUBLIC, USER, TABLE are commonly used as schema/table names.
808            Token::Word(word) => {
809                parts.push(word.value.clone());
810                index += 1;
811            }
812            _ => break,
813        }
814
815        if matches!(tokens.get(index).map(|t| &t.token), Some(Token::Period)) {
816            index += 1;
817            continue;
818        }
819        break;
820    }
821
822    if parts.is_empty() {
823        None
824    } else {
825        Some((parts.join("."), index))
826    }
827}
828
829fn parse_alias(tokens: &[TokenInfo], start: usize) -> (Option<String>, usize) {
830    let mut index = start;
831
832    if let Some(token) = tokens.get(index) {
833        if keyword_from_token(&token.token).as_deref() == Some("AS") {
834            index += 1;
835        }
836    }
837
838    if let Some(token) = tokens.get(index) {
839        if let Token::Word(word) = &token.token {
840            if is_identifier_word(word) {
841                return (Some(word.value.clone()), index + 1);
842            }
843        }
844    }
845
846    (None, index)
847}
848
849fn build_columns(tables: &[CompletionTable], registry: &SchemaRegistry) -> Vec<CompletionColumn> {
850    let mut columns = Vec::new();
851    let mut column_counts = std::collections::HashMap::new();
852
853    for table in tables {
854        if table.canonical.is_empty() {
855            continue;
856        }
857        if let Some(entry) = registry.get(&table.canonical) {
858            for column in &entry.table.columns {
859                let normalized = registry.normalize_identifier(&column.name);
860                *column_counts.entry(normalized).or_insert(0usize) += 1;
861            }
862        }
863    }
864
865    for table in tables {
866        if table.canonical.is_empty() {
867            continue;
868        }
869        let table_label = table.alias.clone().unwrap_or_else(|| table.name.clone());
870        if let Some(entry) = registry.get(&table.canonical) {
871            for column in &entry.table.columns {
872                let normalized = registry.normalize_identifier(&column.name);
873                let is_ambiguous = column_counts.get(&normalized).copied().unwrap_or(0) > 1;
874                columns.push(CompletionColumn {
875                    name: column.name.clone(),
876                    data_type: column.data_type.clone(),
877                    table: Some(table_label.clone()),
878                    canonical_table: Some(table.canonical.clone()),
879                    is_ambiguous,
880                });
881            }
882        }
883    }
884
885    columns
886}
887
888fn token_list_for_statement(tokens: &[TokenInfo], span: &Span) -> Vec<TokenInfo> {
889    tokens
890        .iter()
891        .filter(|token| token.span.start >= span.start && token.span.end <= span.end)
892        .cloned()
893        .collect()
894}
895
896#[must_use]
897pub fn completion_context(request: &CompletionRequest) -> CompletionContext {
898    let sql = request.sql.as_str();
899    let sql_len = sql.len();
900
901    // Validate input size to prevent memory exhaustion
902    if sql_len > MAX_SQL_LENGTH {
903        return CompletionContext::from_error(format!(
904            "SQL exceeds maximum length of {} bytes ({} bytes provided)",
905            MAX_SQL_LENGTH, sql_len
906        ));
907    }
908
909    // Validate cursor_offset is within bounds and on a valid UTF-8 char boundary
910    if request.cursor_offset > sql_len {
911        return CompletionContext::from_error(format!(
912            "cursor_offset ({}) exceeds SQL length ({})",
913            request.cursor_offset, sql_len
914        ));
915    }
916    if !sql.is_char_boundary(request.cursor_offset) {
917        return CompletionContext::from_error(format!(
918            "cursor_offset ({}) does not land on a valid UTF-8 character boundary",
919            request.cursor_offset
920        ));
921    }
922
923    // SchemaRegistry::new returns (registry, issues) where issues contains schema validation
924    // warnings. We intentionally discard these for completion context since we want to
925    // provide completions even when schema metadata has minor issues.
926    let (registry, _schema_issues) = SchemaRegistry::new(request.schema.as_ref(), request.dialect);
927
928    let tokens = match tokenize_sql(sql, request.dialect) {
929        Ok(tokens) => tokens,
930        Err(_) => {
931            return CompletionContext::empty();
932        }
933    };
934
935    let statements = split_statements(&tokens, sql_len);
936    let statement = find_statement_for_cursor(&statements, request.cursor_offset);
937    let statement_tokens = if statement.tokens.is_empty() {
938        token_list_for_statement(&tokens, &statement.span)
939    } else {
940        statement.tokens.clone()
941    };
942
943    let clause = detect_clause(&statement_tokens, request.cursor_offset);
944    let token = find_token_at_cursor(&statement_tokens, request.cursor_offset, sql);
945
946    let tables_raw = parse_tables(&statement_tokens);
947    let mut tables = Vec::new();
948
949    for (name, alias) in tables_raw {
950        if name.is_empty() {
951            tables.push(CompletionTable {
952                name: name.clone(),
953                canonical: String::new(),
954                alias,
955                matched_schema: false,
956            });
957            continue;
958        }
959
960        let resolution = registry.canonicalize_table_reference(&name);
961        tables.push(CompletionTable {
962            name,
963            canonical: resolution.canonical,
964            alias,
965            matched_schema: resolution.matched_schema,
966        });
967    }
968
969    let columns = build_columns(&tables, &registry);
970
971    CompletionContext {
972        statement_index: statement.index,
973        statement_span: statement.span,
974        clause,
975        token,
976        tables_in_scope: tables,
977        columns_in_scope: columns,
978        keyword_hints: CompletionKeywordHints {
979            global: global_keyword_set(),
980            clause: keyword_set_for_clause(clause),
981        },
982        error: None,
983    }
984}
985
986fn clause_category_order(clause: CompletionClause) -> &'static [CompletionItemCategory] {
987    use CompletionItemCategory as Category;
988    match clause {
989        CompletionClause::Select => &[
990            Category::Column,
991            Category::Function,
992            Category::Aggregate,
993            Category::Table,
994            Category::Keyword,
995            Category::Operator,
996            Category::Snippet,
997            Category::SchemaTable,
998        ],
999        CompletionClause::From | CompletionClause::Join => &[
1000            Category::Table,
1001            Category::SchemaTable,
1002            Category::Keyword,
1003            Category::Column,
1004            Category::Function,
1005            Category::Operator,
1006            Category::Aggregate,
1007            Category::Snippet,
1008        ],
1009        CompletionClause::On
1010        | CompletionClause::Where
1011        | CompletionClause::Having
1012        | CompletionClause::Qualify => &[
1013            Category::Column,
1014            Category::Operator,
1015            Category::Function,
1016            Category::Aggregate,
1017            Category::Keyword,
1018            Category::Table,
1019            Category::SchemaTable,
1020            Category::Snippet,
1021        ],
1022        CompletionClause::GroupBy | CompletionClause::OrderBy => &[
1023            Category::Column,
1024            Category::Function,
1025            Category::Aggregate,
1026            Category::Keyword,
1027            Category::Table,
1028            Category::SchemaTable,
1029            Category::Operator,
1030            Category::Snippet,
1031        ],
1032        CompletionClause::Limit => &[
1033            Category::Keyword,
1034            Category::Column,
1035            Category::Function,
1036            Category::Aggregate,
1037            Category::Table,
1038            Category::SchemaTable,
1039            Category::Operator,
1040            Category::Snippet,
1041        ],
1042        CompletionClause::Window => &[
1043            Category::Function,
1044            Category::Column,
1045            Category::Keyword,
1046            Category::Aggregate,
1047            Category::Table,
1048            Category::SchemaTable,
1049            Category::Operator,
1050            Category::Snippet,
1051        ],
1052        CompletionClause::Insert | CompletionClause::Update => &[
1053            Category::Table,
1054            Category::SchemaTable,
1055            Category::Column,
1056            Category::Keyword,
1057            Category::Function,
1058            Category::Operator,
1059            Category::Aggregate,
1060            Category::Snippet,
1061        ],
1062        CompletionClause::Delete => &[
1063            Category::Table,
1064            Category::SchemaTable,
1065            Category::Keyword,
1066            Category::Column,
1067            Category::Function,
1068            Category::Operator,
1069            Category::Aggregate,
1070            Category::Snippet,
1071        ],
1072        CompletionClause::With => &[
1073            Category::Keyword,
1074            Category::Table,
1075            Category::SchemaTable,
1076            Category::Column,
1077            Category::Function,
1078            Category::Operator,
1079            Category::Aggregate,
1080            Category::Snippet,
1081        ],
1082        CompletionClause::Unknown => &[
1083            Category::Column,
1084            Category::Table,
1085            Category::SchemaTable,
1086            Category::Keyword,
1087            Category::Function,
1088            Category::Operator,
1089            Category::Aggregate,
1090            Category::Snippet,
1091        ],
1092    }
1093}
1094
1095fn category_score(clause: CompletionClause, category: CompletionItemCategory) -> i32 {
1096    let order = clause_category_order(clause);
1097    let index = order
1098        .iter()
1099        .position(|item| *item == category)
1100        .unwrap_or(order.len());
1101    1000 - (index as i32 * 100)
1102}
1103
1104fn prefix_score(label: &str, token: &str) -> i32 {
1105    if token.is_empty() {
1106        return 0;
1107    }
1108    let normalized_label = label.to_lowercase();
1109    if normalized_label == token {
1110        return 300;
1111    }
1112    if normalized_label.starts_with(token) {
1113        return 200;
1114    }
1115    if normalized_label.contains(token) {
1116        return 100;
1117    }
1118    0
1119}
1120
1121/// Extracts the column name portion from a potentially qualified label.
1122///
1123/// Used for prefix scoring to match user input against just the column name,
1124/// even when the label includes a table qualifier for disambiguation.
1125///
1126/// # Examples
1127/// - `"name"` → `"name"`
1128/// - `"users.name"` → `"name"`
1129/// - `"public.users.name"` → `"name"`
1130fn column_name_from_label(label: &str) -> &str {
1131    label.rsplit_once('.').map(|(_, col)| col).unwrap_or(label)
1132}
1133
1134fn should_show_for_cursor(sql: &str, cursor_offset: usize, token_value: &str) -> bool {
1135    if !token_value.is_empty() {
1136        return true;
1137    }
1138    // cursor_offset must be > 0 (we need to look at the previous character) and at a
1139    // valid UTF-8 char boundary. The is_char_boundary check also catches out-of-bounds
1140    // offsets (returns false for cursor_offset > sql.len()) and handles the case where
1141    // an external client (e.g., LSP) sends a byte offset in the middle of a multi-byte
1142    // character.
1143    if cursor_offset == 0 || !sql.is_char_boundary(cursor_offset) {
1144        return false;
1145    }
1146
1147    // Optimized previous character lookup: O(1) for ASCII (common case),
1148    // O(n) fallback only for multi-byte UTF-8 characters.
1149    let prev_byte = sql.as_bytes()[cursor_offset - 1];
1150
1151    // Fast path: if it's an ASCII byte, we can check directly without UTF-8 decoding
1152    if prev_byte.is_ascii() {
1153        let prev_char = prev_byte as char;
1154        if prev_char == '.' || prev_char == '(' || prev_char == ',' {
1155            return true;
1156        }
1157        // Whitespace after SQL keywords is a valid completion position
1158        // (e.g., "SELECT |" or "FROM |"). Return true to allow completions.
1159        if prev_char.is_ascii_whitespace() {
1160            return true;
1161        }
1162        // Not a trigger character - don't show completions in middle of identifiers
1163        return false;
1164    }
1165
1166    // Slow path: non-ASCII byte, need to properly decode UTF-8.
1167    // This handles multi-byte characters like Unicode whitespace.
1168    // Find the previous character by scanning backwards to the character boundary.
1169    // UTF-8 continuation bytes have the pattern 10xxxxxx (0x80-0xBF), so we scan
1170    // backwards until we find a byte that isn't a continuation byte.
1171    // This is O(1) bounded since UTF-8 characters are at most 4 bytes.
1172    let mut char_start = cursor_offset - 1;
1173    // Safety: UTF-8 characters are at most 4 bytes, so we need at most 3 backward steps
1174    for _ in 0..3 {
1175        if char_start == 0 || sql.is_char_boundary(char_start) {
1176            break;
1177        }
1178        char_start -= 1;
1179    }
1180    // If we still haven't found a valid boundary, the string is malformed
1181    if !sql.is_char_boundary(char_start) {
1182        return false;
1183    }
1184    let prev_char = match sql[char_start..cursor_offset].chars().next() {
1185        Some(ch) => ch,
1186        None => return false,
1187    };
1188    if prev_char == '.' || prev_char == '(' || prev_char == ',' {
1189        return true;
1190    }
1191    if prev_char.is_whitespace() {
1192        return true;
1193    }
1194    // Not a trigger character - don't show completions in middle of identifiers
1195    false
1196}
1197
1198/// Checks if a character is valid in an unquoted SQL identifier.
1199///
1200/// Currently only handles ASCII identifiers (alphanumeric, underscore, dollar sign).
1201/// Note: Some SQL dialects support Unicode identifiers, but this function intentionally
1202/// restricts to ASCII for consistent cross-dialect behavior. Quoted identifiers can
1203/// still contain any Unicode characters.
1204fn is_identifier_char(ch: char) -> bool {
1205    ch.is_ascii_alphanumeric() || ch == '_' || ch == '$'
1206}
1207
1208/// Extracts the last identifier from a SQL fragment.
1209///
1210/// Handles both quoted identifiers (e.g., `"My Table"`) and unquoted identifiers.
1211/// Returns `None` if the source is empty or contains only non-identifier characters.
1212///
1213/// # Examples
1214/// - `"SELECT users"` → `Some("users")`
1215/// - `"\"My Table\""` → `Some("My Table")`
1216/// - `"schema.table"` → `Some("table")`
1217fn extract_last_identifier(source: &str) -> Option<String> {
1218    let trimmed = source.trim_end();
1219    if trimmed.is_empty() {
1220        return None;
1221    }
1222
1223    if let Some(stripped) = trimmed.strip_suffix('"') {
1224        if let Some(start) = stripped.rfind('"') {
1225            return Some(stripped[start + 1..].to_string());
1226        }
1227    }
1228
1229    let end = trimmed.len();
1230    let mut start = end;
1231    for (idx, ch) in trimmed.char_indices().rev() {
1232        if is_identifier_char(ch) {
1233            start = idx;
1234        } else {
1235            break;
1236        }
1237    }
1238
1239    if start == end {
1240        None
1241    } else {
1242        Some(trimmed[start..end].to_string())
1243    }
1244}
1245
1246/// Extracts the qualifier (table alias or schema name) from SQL at the cursor position.
1247///
1248/// This function identifies when the user is typing after a dot (`.`), indicating
1249/// they want completions scoped to a specific table, alias, or schema.
1250///
1251/// # Examples
1252/// - `"users."` at offset 6 → `Some("users")` (trailing dot)
1253/// - `"u.name"` at offset 6 → `Some("u")` (mid-token after dot)
1254/// - `"SELECT"` at offset 6 → `None` (no qualifier)
1255///
1256/// # Safety
1257/// Returns `None` if `cursor_offset` is out of bounds or not on a valid UTF-8 boundary.
1258fn extract_qualifier(sql: &str, cursor_offset: usize) -> Option<String> {
1259    if cursor_offset == 0 || cursor_offset > sql.len() {
1260        return None;
1261    }
1262    // Ensure cursor_offset lands on a valid UTF-8 char boundary to prevent panic
1263    if !sql.is_char_boundary(cursor_offset) {
1264        return None;
1265    }
1266
1267    let prefix = &sql[..cursor_offset];
1268    let trimmed = prefix.trim_end();
1269    if trimmed.is_empty() {
1270        return None;
1271    }
1272
1273    if let Some(stripped) = trimmed.strip_suffix('.') {
1274        let before_dot = stripped.trim_end();
1275        return extract_last_identifier(before_dot);
1276    }
1277
1278    if let Some(dot_idx) = trimmed.rfind('.') {
1279        let whitespace_idx = trimmed.rfind(|ch: char| ch.is_whitespace());
1280        let dot_after_space = whitespace_idx.is_none_or(|space| dot_idx > space);
1281        if dot_after_space {
1282            let before_dot = trimmed[..dot_idx].trim_end();
1283            return extract_last_identifier(before_dot);
1284        }
1285    }
1286
1287    None
1288}
1289
1290fn build_columns_from_schema(
1291    schema: &SchemaMetadata,
1292    registry: &SchemaRegistry,
1293) -> Vec<CompletionColumn> {
1294    let mut columns = Vec::new();
1295    let mut column_counts = std::collections::HashMap::new();
1296
1297    for table in &schema.tables {
1298        for column in &table.columns {
1299            let normalized = registry.normalize_identifier(&column.name);
1300            *column_counts.entry(normalized).or_insert(0usize) += 1;
1301        }
1302    }
1303
1304    for table in &schema.tables {
1305        let table_label = table.name.clone();
1306        for column in &table.columns {
1307            let normalized = registry.normalize_identifier(&column.name);
1308            let is_ambiguous = column_counts.get(&normalized).copied().unwrap_or(0) > 1;
1309            columns.push(CompletionColumn {
1310                name: column.name.clone(),
1311                data_type: column.data_type.clone(),
1312                table: Some(table_label.clone()),
1313                canonical_table: Some(table_label.clone()),
1314                is_ambiguous,
1315            });
1316        }
1317    }
1318
1319    columns
1320}
1321
1322fn build_columns_for_table(
1323    schema: &SchemaMetadata,
1324    registry: &SchemaRegistry,
1325    target_schema: Option<&str>,
1326    table_name: &str,
1327) -> Vec<CompletionColumn> {
1328    let normalized_target = registry.normalize_identifier(table_name);
1329    let mut columns = Vec::new();
1330
1331    for table in &schema.tables {
1332        let schema_matches = target_schema.is_none_or(|schema_name| {
1333            table
1334                .schema
1335                .as_ref()
1336                .map(|schema| {
1337                    registry.normalize_identifier(schema)
1338                        == registry.normalize_identifier(schema_name)
1339                })
1340                .unwrap_or(false)
1341        });
1342        if !schema_matches {
1343            continue;
1344        }
1345        if registry.normalize_identifier(&table.name) != normalized_target {
1346            continue;
1347        }
1348
1349        for column in &table.columns {
1350            columns.push(CompletionColumn {
1351                name: column.name.clone(),
1352                data_type: column.data_type.clone(),
1353                table: Some(table.name.clone()),
1354                canonical_table: Some(table.name.clone()),
1355                is_ambiguous: false,
1356            });
1357        }
1358    }
1359
1360    columns
1361}
1362
1363fn schema_tables_for_qualifier(
1364    schema: &SchemaMetadata,
1365    registry: &SchemaRegistry,
1366    qualifier: &str,
1367) -> Vec<(String, String)> {
1368    let normalized = registry.normalize_identifier(qualifier);
1369    let mut tables = Vec::new();
1370
1371    for table in &schema.tables {
1372        let schema_matches = table
1373            .schema
1374            .as_ref()
1375            .is_some_and(|table_schema| registry.normalize_identifier(table_schema) == normalized);
1376        let catalog_matches = table
1377            .catalog
1378            .as_ref()
1379            .is_some_and(|catalog| registry.normalize_identifier(catalog) == normalized);
1380
1381        if schema_matches {
1382            let label = match table.schema.as_ref() {
1383                Some(table_schema) => format!("{table_schema}.{}", table.name),
1384                None => table.name.clone(),
1385            };
1386            tables.push((label, table.name.clone()));
1387            continue;
1388        }
1389
1390        if catalog_matches {
1391            let label = match table.catalog.as_ref() {
1392                Some(catalog) => format!("{catalog}.{}", table.name),
1393                None => table.name.clone(),
1394            };
1395            tables.push((label, table.name.clone()));
1396        }
1397    }
1398
1399    tables
1400}
1401
1402#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1403enum QualifierTarget {
1404    ColumnLabel,
1405    SchemaTable,
1406    SchemaOnly,
1407}
1408
1409#[derive(Debug)]
1410struct QualifierResolution {
1411    target: QualifierTarget,
1412    label: Option<String>,
1413    schema: Option<String>,
1414    table: Option<String>,
1415}
1416
1417fn resolve_qualifier(
1418    qualifier: &str,
1419    tables: &[CompletionTable],
1420    schema: Option<&SchemaMetadata>,
1421    registry: &SchemaRegistry,
1422) -> Option<QualifierResolution> {
1423    let normalized = registry.normalize_identifier(qualifier);
1424
1425    for table in tables {
1426        if let Some(alias) = table.alias.as_ref() {
1427            if registry.normalize_identifier(alias) == normalized {
1428                return Some(QualifierResolution {
1429                    target: QualifierTarget::ColumnLabel,
1430                    label: Some(alias.clone()),
1431                    schema: None,
1432                    table: None,
1433                });
1434            }
1435        }
1436    }
1437
1438    let schema = schema?;
1439
1440    let schema_name = schema.tables.iter().find_map(|table| {
1441        table.schema.as_ref().and_then(|table_schema| {
1442            if registry.normalize_identifier(table_schema) == normalized {
1443                Some(table_schema.clone())
1444            } else {
1445                None
1446            }
1447        })
1448    });
1449    let catalog_name = schema.tables.iter().find_map(|table| {
1450        table.catalog.as_ref().and_then(|catalog| {
1451            if registry.normalize_identifier(catalog) == normalized {
1452                Some(catalog.clone())
1453            } else {
1454                None
1455            }
1456        })
1457    });
1458    let table_name_matches_schema = schema
1459        .tables
1460        .iter()
1461        .any(|table| registry.normalize_identifier(&table.name) == normalized);
1462
1463    if let Some(schema_name) = schema_name.as_ref() {
1464        if !table_name_matches_schema {
1465            return Some(QualifierResolution {
1466                target: QualifierTarget::SchemaOnly,
1467                label: None,
1468                schema: Some(schema_name.clone()),
1469                table: None,
1470            });
1471        }
1472    }
1473
1474    if let Some(catalog_name) = catalog_name.as_ref() {
1475        if !table_name_matches_schema {
1476            return Some(QualifierResolution {
1477                target: QualifierTarget::SchemaOnly,
1478                label: None,
1479                schema: Some(catalog_name.clone()),
1480                table: None,
1481            });
1482        }
1483    }
1484
1485    for table in tables {
1486        if registry.normalize_identifier(&table.name) == normalized {
1487            let label = table.alias.clone().unwrap_or_else(|| table.name.clone());
1488            return Some(QualifierResolution {
1489                target: QualifierTarget::ColumnLabel,
1490                label: Some(label),
1491                schema: None,
1492                table: None,
1493            });
1494        }
1495    }
1496
1497    for table in &schema.tables {
1498        if registry.normalize_identifier(&table.name) == normalized {
1499            return Some(QualifierResolution {
1500                target: QualifierTarget::SchemaTable,
1501                label: None,
1502                schema: table.schema.clone(),
1503                table: Some(table.name.clone()),
1504            });
1505        }
1506    }
1507
1508    if let Some(schema_name) = schema_name {
1509        return Some(QualifierResolution {
1510            target: QualifierTarget::SchemaOnly,
1511            label: None,
1512            schema: Some(schema_name),
1513            table: None,
1514        });
1515    }
1516
1517    None
1518}
1519
1520fn uppercase_keyword(value: &str) -> String {
1521    value.to_ascii_uppercase()
1522}
1523
1524/// Determines if completions should be suppressed in SELECT clause.
1525///
1526/// Suppresses completions when schema metadata suggests columns should exist
1527/// but we couldn't derive any for this context. This prevents showing misleading
1528/// keyword-only completions when the user expects column suggestions.
1529///
1530/// Returns `true` (suppress) in these cases:
1531/// - Schema is provided but contains no column metadata at all
1532/// - Schema has columns but none could be derived for the current scope
1533///
1534/// Returns `false` (show completions) when:
1535/// - Not in SELECT clause
1536/// - A qualifier is present (e.g., `users.`)
1537/// - Columns were successfully derived
1538/// - No schema metadata was provided
1539fn should_suppress_select_completions(
1540    clause: CompletionClause,
1541    has_qualifier: bool,
1542    columns_empty: bool,
1543    schema_provided: bool,
1544    schema_has_columns: bool,
1545) -> bool {
1546    // Only applies to SELECT clause without qualifier and no columns
1547    if clause != CompletionClause::Select || has_qualifier || !columns_empty {
1548        return false;
1549    }
1550
1551    // Suppress when schema is provided but has no column metadata
1552    if schema_provided && !schema_has_columns {
1553        return true;
1554    }
1555
1556    // Suppress when schema has columns but we couldn't derive any for this context
1557    if schema_has_columns {
1558        return true;
1559    }
1560
1561    false
1562}
1563
1564/// Generate completion items from a keyword set with the given clause_specific flag.
1565fn items_from_keyword_set(
1566    keyword_set: &CompletionKeywordSet,
1567    clause_specific: bool,
1568) -> Vec<CompletionItem> {
1569    let mut items = Vec::new();
1570
1571    for keyword in &keyword_set.keywords {
1572        let label = uppercase_keyword(keyword);
1573        items.push(CompletionItem {
1574            label: label.clone(),
1575            insert_text: label,
1576            kind: CompletionItemKind::Keyword,
1577            category: CompletionItemCategory::Keyword,
1578            score: 0,
1579            clause_specific,
1580            detail: None,
1581        });
1582    }
1583
1584    for operator in &keyword_set.operators {
1585        items.push(CompletionItem {
1586            label: operator.clone(),
1587            insert_text: operator.clone(),
1588            kind: CompletionItemKind::Operator,
1589            category: CompletionItemCategory::Operator,
1590            score: 0,
1591            clause_specific,
1592            detail: None,
1593        });
1594    }
1595
1596    for aggregate in &keyword_set.aggregates {
1597        let label = uppercase_keyword(aggregate);
1598        items.push(CompletionItem {
1599            label: label.clone(),
1600            insert_text: format!("{label}("),
1601            kind: CompletionItemKind::Function,
1602            category: CompletionItemCategory::Aggregate,
1603            score: 0,
1604            clause_specific,
1605            detail: None,
1606        });
1607    }
1608
1609    for snippet in &keyword_set.snippets {
1610        items.push(CompletionItem {
1611            label: snippet.clone(),
1612            insert_text: snippet.clone(),
1613            kind: CompletionItemKind::Snippet,
1614            category: CompletionItemCategory::Snippet,
1615            score: 0,
1616            clause_specific,
1617            detail: None,
1618        });
1619    }
1620
1621    items
1622}
1623
1624/// Enrich columns with CTE and subquery columns from AST context.
1625///
1626/// Uses a HashSet for O(1) deduplication instead of O(n²) iteration.
1627fn enrich_columns_from_ast(
1628    columns: &mut Vec<CompletionColumn>,
1629    tables: &[CompletionTable],
1630    ast_ctx: &AstContext,
1631) {
1632    use std::collections::HashSet;
1633
1634    // Build a set of existing (table, column) pairs for O(1) dedup lookups
1635    // Key: (lowercased_table_name, lowercased_column_name)
1636    let mut seen: HashSet<(String, String)> = columns
1637        .iter()
1638        .filter_map(|c| {
1639            c.table
1640                .as_ref()
1641                .map(|t| (t.to_lowercase(), c.name.to_lowercase()))
1642        })
1643        .collect();
1644
1645    // Add columns from CTEs
1646    for (cte_name, cte_info) in &ast_ctx.cte_definitions {
1647        // Check if this CTE is referenced in tables
1648        let cte_in_scope = tables.iter().any(|t| {
1649            t.name.eq_ignore_ascii_case(cte_name) || t.canonical.eq_ignore_ascii_case(cte_name)
1650        });
1651
1652        if cte_in_scope {
1653            // Use declared columns if available, otherwise use projected columns
1654            let cte_columns = if !cte_info.declared_columns.is_empty() {
1655                cte_info
1656                    .declared_columns
1657                    .iter()
1658                    .map(|name| CompletionColumn {
1659                        name: name.clone(),
1660                        table: Some(cte_name.clone()),
1661                        canonical_table: Some(cte_name.clone()),
1662                        data_type: None,
1663                        is_ambiguous: false,
1664                    })
1665                    .collect::<Vec<_>>()
1666            } else {
1667                cte_info
1668                    .projected_columns
1669                    .iter()
1670                    .filter(|c| c.name != "*") // Skip wildcards
1671                    .map(|col| CompletionColumn {
1672                        name: col.name.clone(),
1673                        table: Some(cte_name.clone()),
1674                        canonical_table: Some(cte_name.clone()),
1675                        data_type: col.data_type.clone(),
1676                        is_ambiguous: false,
1677                    })
1678                    .collect::<Vec<_>>()
1679            };
1680
1681            for col in cte_columns {
1682                let key = (cte_name.to_lowercase(), col.name.to_lowercase());
1683                if seen.insert(key) {
1684                    columns.push(col);
1685                }
1686            }
1687        }
1688    }
1689
1690    // Add columns from subquery aliases
1691    for (alias, subquery_info) in &ast_ctx.subquery_aliases {
1692        let subquery_in_scope = tables.iter().any(|t| {
1693            t.name.eq_ignore_ascii_case(alias)
1694                || t.alias
1695                    .as_ref()
1696                    .map(|a| a.eq_ignore_ascii_case(alias))
1697                    .unwrap_or(false)
1698        });
1699
1700        if subquery_in_scope {
1701            for col in &subquery_info.projected_columns {
1702                if col.name == "*" {
1703                    continue; // Skip wildcards
1704                }
1705
1706                let key = (alias.to_lowercase(), col.name.to_lowercase());
1707                if seen.insert(key) {
1708                    columns.push(CompletionColumn {
1709                        name: col.name.clone(),
1710                        table: Some(alias.clone()),
1711                        canonical_table: Some(alias.clone()),
1712                        data_type: col.data_type.clone(),
1713                        is_ambiguous: false,
1714                    });
1715                }
1716            }
1717        }
1718    }
1719}
1720
1721/// Enrich tables with CTE definitions from AST context.
1722fn enrich_tables_from_ast(tables: &mut Vec<CompletionTable>, ast_ctx: &AstContext) {
1723    // Add CTE definitions as completable tables
1724    for cte_name in ast_ctx.cte_definitions.keys() {
1725        if !tables.iter().any(|t| t.name.eq_ignore_ascii_case(cte_name)) {
1726            tables.push(CompletionTable {
1727                name: cte_name.clone(),
1728                canonical: cte_name.clone(),
1729                alias: None,
1730                matched_schema: false,
1731            });
1732        }
1733    }
1734}
1735
1736#[must_use]
1737pub fn completion_items(request: &CompletionRequest) -> CompletionItemsResult {
1738    let context = completion_context(request);
1739    if let Some(error) = context.error.clone() {
1740        return CompletionItemsResult {
1741            clause: context.clause,
1742            token: context.token,
1743            should_show: false,
1744            items: Vec::new(),
1745            error: Some(error),
1746        };
1747    }
1748
1749    let token_value = context
1750        .token
1751        .as_ref()
1752        .map(|token| token.value.trim().to_lowercase())
1753        .unwrap_or_default();
1754
1755    // Suppress completions when cursor is inside special tokens
1756    // (string literals, number literals, comments, quoted identifiers)
1757    if let Some(ref token) = context.token {
1758        let suppress_inside = matches!(
1759            token.kind,
1760            CompletionTokenKind::Literal
1761                | CompletionTokenKind::Comment
1762                | CompletionTokenKind::QuotedIdentifier
1763        );
1764        if suppress_inside
1765            && request.cursor_offset > token.span.start
1766            && request.cursor_offset < token.span.end
1767        {
1768            return CompletionItemsResult {
1769                clause: context.clause,
1770                token: context.token,
1771                should_show: false,
1772                items: Vec::new(),
1773                error: None,
1774            };
1775        }
1776    }
1777
1778    let should_show = should_show_for_cursor(&request.sql, request.cursor_offset, &token_value);
1779    if !should_show {
1780        return CompletionItemsResult {
1781            clause: context.clause,
1782            token: context.token,
1783            should_show,
1784            items: Vec::new(),
1785            error: None,
1786        };
1787    }
1788
1789    // SchemaRegistry::new returns (registry, issues). Issues are intentionally discarded
1790    // because completion should work even with schema validation warnings.
1791    let (registry, _schema_issues) = SchemaRegistry::new(request.schema.as_ref(), request.dialect);
1792    let qualifier = extract_qualifier(&request.sql, request.cursor_offset);
1793    let qualifier_resolution = qualifier.as_ref().and_then(|value| {
1794        resolve_qualifier(
1795            value,
1796            &context.tables_in_scope,
1797            request.schema.as_ref(),
1798            &registry,
1799        )
1800    });
1801    let restrict_to_columns = qualifier_resolution.is_some();
1802
1803    let mut items = Vec::new();
1804    let mut seen = std::collections::HashSet::new();
1805
1806    let mut push_item = |item: CompletionItem| {
1807        let key = format!("{:?}:{}:{}", item.category, item.label, item.insert_text);
1808        if seen.insert(key) {
1809            items.push(item);
1810        }
1811    };
1812
1813    // Tokenize once for GROUP BY detection and type context inference
1814    let tokens_opt = tokenize_sql(&request.sql, request.dialect).ok();
1815    let statement_tokens_opt = tokens_opt
1816        .as_ref()
1817        .map(|tokens| token_list_for_statement(tokens, &context.statement_span));
1818
1819    if !restrict_to_columns {
1820        // Add smart function completions with context-aware scoring before keyword hints so they
1821        // retain signature metadata and clause-specific scoring.
1822        let group_by_present = statement_tokens_opt
1823            .as_ref()
1824            .map(|tokens| has_group_by(tokens))
1825            .unwrap_or(false);
1826        let in_window_context = if context.clause == CompletionClause::Window {
1827            true
1828        } else {
1829            statement_tokens_opt
1830                .as_ref()
1831                .map(|tokens| in_over_clause(tokens, request.cursor_offset))
1832                .unwrap_or(false)
1833        };
1834
1835        let function_prefix = context.token.as_ref().and_then(|token| match token.kind {
1836            CompletionTokenKind::Identifier
1837            | CompletionTokenKind::Keyword
1838            | CompletionTokenKind::QuotedIdentifier => {
1839                let trimmed = token.value.trim();
1840                if trimmed.is_empty() {
1841                    None
1842                } else {
1843                    Some(trimmed.to_string())
1844                }
1845            }
1846            _ => None,
1847        });
1848
1849        let func_ctx = FunctionCompletionContext {
1850            clause: context.clause,
1851            has_group_by: group_by_present,
1852            in_window_context,
1853            prefix: function_prefix,
1854        };
1855
1856        for item in get_function_completions(&func_ctx) {
1857            push_item(item);
1858        }
1859
1860        for item in items_from_keyword_set(&context.keyword_hints.clause, true) {
1861            push_item(item);
1862        }
1863        for item in items_from_keyword_set(&context.keyword_hints.global, false) {
1864            push_item(item);
1865        }
1866    }
1867
1868    // Infer type context for WHERE/HAVING/ON clauses
1869    // This is used for type-aware column scoring (reuses tokens from above)
1870    let type_context = if matches!(
1871        context.clause,
1872        CompletionClause::Where | CompletionClause::Having | CompletionClause::On
1873    ) {
1874        statement_tokens_opt.as_ref().and_then(|tokens| {
1875            infer_type_context(
1876                tokens,
1877                request.cursor_offset,
1878                &request.sql,
1879                &registry,
1880                &context.tables_in_scope,
1881            )
1882        })
1883    } else {
1884        None
1885    };
1886
1887    let mut columns = context.columns_in_scope.clone();
1888    if columns.is_empty() && context.clause == CompletionClause::Select {
1889        if let Some(schema) = request.schema.as_ref() {
1890            columns = build_columns_from_schema(schema, &registry);
1891        }
1892    }
1893
1894    // Try AST-based enrichment for CTE and subquery columns
1895    let mut tables_enriched = context.tables_in_scope.clone();
1896    let parse_result =
1897        try_parse_for_completion(&request.sql, request.cursor_offset, request.dialect);
1898    if let Some(ref result) = parse_result {
1899        let ast_ctx = extract_ast_context(&result.statements);
1900        // Enrich tables with CTE definitions
1901        enrich_tables_from_ast(&mut tables_enriched, &ast_ctx);
1902        // Enrich columns with CTE and subquery columns
1903        enrich_columns_from_ast(&mut columns, &tables_enriched, &ast_ctx);
1904    }
1905
1906    // Extract lateral aliases for dialects that support them (e.g., DuckDB, BigQuery, Snowflake)
1907    // Lateral aliases are only available in SELECT clause, without a table qualifier
1908    let should_add_lateral_aliases = context.clause == CompletionClause::Select
1909        && request.dialect.lateral_column_alias()
1910        && !restrict_to_columns;
1911
1912    if should_add_lateral_aliases {
1913        if let Some(ref result) = parse_result {
1914            for alias in extract_lateral_aliases(&result.statements, &request.sql) {
1915                // Only include aliases within the current statement and before cursor
1916                let statement_span = context.statement_span;
1917                if alias.definition_end >= request.cursor_offset
1918                    || statement_span.end <= statement_span.start
1919                {
1920                    continue;
1921                }
1922                if alias.definition_end <= statement_span.start
1923                    || alias.definition_end > statement_span.end
1924                {
1925                    continue;
1926                }
1927                // Only include aliases from the SELECT projection that contains the cursor
1928                // This prevents CTE aliases from leaking into outer SELECT scopes
1929                if request.cursor_offset < alias.projection_start
1930                    || request.cursor_offset > alias.projection_end
1931                {
1932                    continue;
1933                }
1934                // Avoid duplicating if the alias name matches an existing column
1935                let already_exists = columns
1936                    .iter()
1937                    .any(|c| c.name.eq_ignore_ascii_case(&alias.name));
1938                if !already_exists {
1939                    columns.push(CompletionColumn {
1940                        name: alias.name,
1941                        data_type: Some("lateral alias".to_string()),
1942                        table: None,
1943                        canonical_table: None,
1944                        is_ambiguous: false,
1945                    });
1946                }
1947            }
1948        }
1949    }
1950
1951    if let Some(resolution) = qualifier_resolution.as_ref() {
1952        match resolution.target {
1953            QualifierTarget::ColumnLabel => {
1954                if let Some(label) = resolution.label.as_ref() {
1955                    let normalized = registry.normalize_identifier(label);
1956                    columns.retain(|column| {
1957                        column
1958                            .table
1959                            .as_ref()
1960                            .map(|table| registry.normalize_identifier(table) == normalized)
1961                            .unwrap_or(false)
1962                    });
1963                }
1964            }
1965            QualifierTarget::SchemaTable => {
1966                columns = request
1967                    .schema
1968                    .as_ref()
1969                    .map(|schema| {
1970                        build_columns_for_table(
1971                            schema,
1972                            &registry,
1973                            resolution.schema.as_deref(),
1974                            resolution.table.as_deref().unwrap_or_default(),
1975                        )
1976                    })
1977                    .unwrap_or_default();
1978            }
1979            QualifierTarget::SchemaOnly => {
1980                columns.clear();
1981            }
1982        }
1983    }
1984
1985    let schema_has_columns = request
1986        .schema
1987        .as_ref()
1988        .map(|schema| schema.tables.iter().any(|table| !table.columns.is_empty()))
1989        .unwrap_or(false);
1990    let schema_provided = request.schema.is_some();
1991
1992    // Cache emptiness check before consuming columns to avoid clone during iteration
1993    let has_columns = !columns.is_empty();
1994
1995    if should_suppress_select_completions(
1996        context.clause,
1997        qualifier_resolution.is_some(),
1998        !has_columns,
1999        schema_provided,
2000        schema_has_columns,
2001    ) {
2002        return CompletionItemsResult {
2003            clause: context.clause,
2004            token: context.token,
2005            should_show: false,
2006            items: Vec::new(),
2007            error: None,
2008        };
2009    }
2010
2011    // Use into_iter() to take ownership of columns, avoiding clones where possible
2012    for column in columns {
2013        let (label, insert_text) = if restrict_to_columns {
2014            // Both label and insert_text are the column name
2015            let name = column.name;
2016            (name.clone(), name)
2017        } else if column.is_ambiguous {
2018            if let Some(table) = &column.table {
2019                let label = format!("{table}.{}", column.name);
2020                let insert_text = label.clone();
2021                (label, insert_text)
2022            } else {
2023                let name = column.name;
2024                (name.clone(), name)
2025            }
2026        } else {
2027            let name = column.name;
2028            (name.clone(), name)
2029        };
2030        push_item(CompletionItem {
2031            label,
2032            insert_text,
2033            kind: CompletionItemKind::Column,
2034            category: CompletionItemCategory::Column,
2035            score: 0,
2036            clause_specific: true,
2037            detail: column.data_type,
2038        });
2039    }
2040
2041    let schema_tables_only = qualifier_resolution
2042        .as_ref()
2043        .map(|resolution| resolution.target == QualifierTarget::SchemaOnly)
2044        .unwrap_or(false);
2045
2046    if schema_tables_only {
2047        if let Some(schema_name) = qualifier_resolution
2048            .as_ref()
2049            .and_then(|resolution| resolution.schema.as_deref())
2050        {
2051            if let Some(schema) = request.schema.as_ref() {
2052                for (label, insert_text) in
2053                    schema_tables_for_qualifier(schema, &registry, schema_name)
2054                {
2055                    push_item(CompletionItem {
2056                        label,
2057                        insert_text,
2058                        kind: CompletionItemKind::SchemaTable,
2059                        category: CompletionItemCategory::SchemaTable,
2060                        score: 0,
2061                        clause_specific: false,
2062                        detail: None,
2063                    });
2064                }
2065            }
2066        }
2067    }
2068
2069    let suppress_tables = restrict_to_columns
2070        || schema_tables_only
2071        || (context.clause == CompletionClause::Select && has_columns);
2072
2073    if !suppress_tables {
2074        for table in &tables_enriched {
2075            let label = table
2076                .alias
2077                .as_ref()
2078                .map(|alias| format!("{alias} ({})", table.name))
2079                .unwrap_or_else(|| table.name.clone());
2080            let insert_text = table.alias.clone().unwrap_or_else(|| table.name.clone());
2081            push_item(CompletionItem {
2082                label,
2083                insert_text,
2084                kind: CompletionItemKind::Table,
2085                category: CompletionItemCategory::Table,
2086                score: 0,
2087                clause_specific: true,
2088                detail: if table.canonical.is_empty() {
2089                    None
2090                } else {
2091                    Some(table.canonical.clone())
2092                },
2093            });
2094        }
2095
2096        if let Some(schema) = &request.schema {
2097            for table in &schema.tables {
2098                let label = match &table.schema {
2099                    Some(schema_name) => format!("{schema_name}.{}", table.name),
2100                    None => table.name.clone(),
2101                };
2102                let insert_text = label.clone();
2103                push_item(CompletionItem {
2104                    label,
2105                    insert_text,
2106                    kind: CompletionItemKind::SchemaTable,
2107                    category: CompletionItemCategory::SchemaTable,
2108                    score: 0,
2109                    clause_specific: false,
2110                    detail: None,
2111                });
2112            }
2113        }
2114    }
2115
2116    for item in items.iter_mut() {
2117        let precomputed_score = item.score;
2118        let category_base = category_score(context.clause, item.category);
2119        let prefix = prefix_score(&item.label, &token_value);
2120        let column_prefix = if item.category == CompletionItemCategory::Column {
2121            let column_name = column_name_from_label(&item.label);
2122            let column_score = prefix_score(column_name, &token_value);
2123            if column_score > 0 {
2124                column_score.saturating_add(SCORE_COLUMN_NAME_MATCH_BONUS)
2125            } else {
2126                0
2127            }
2128        } else {
2129            0
2130        };
2131        let clause_score = if item.clause_specific {
2132            SCORE_CLAUSE_SPECIFIC_BONUS
2133        } else {
2134            0
2135        };
2136
2137        // Type compatibility scoring for columns in comparison contexts.
2138        //
2139        // Design note: For columns, `item.detail` contains the SQL data type (e.g., "INTEGER").
2140        // This coupling is intentional - the detail field displays type info in the UI, and we
2141        // reuse it for type-aware scoring. If `detail` format changes for columns, update
2142        // `type_compatibility_score` accordingly.
2143        let type_score = if item.category == CompletionItemCategory::Column {
2144            if let Some(ref ctx) = type_context {
2145                type_compatibility_score(item.detail.as_deref(), ctx)
2146            } else {
2147                0
2148            }
2149        } else {
2150            0
2151        };
2152
2153        let mut special = 0;
2154        if context.clause == CompletionClause::Select && token_value.starts_with('f') {
2155            let label_lower = item.label.to_lowercase();
2156            if item.category == CompletionItemCategory::Keyword && label_lower == "from" {
2157                special = SCORE_FROM_KEYWORD_BOOST;
2158            } else if item.category == CompletionItemCategory::Keyword {
2159                special = SCORE_OTHER_KEYWORD_PENALTY;
2160            } else if item.kind == CompletionItemKind::Function && label_lower.starts_with("from_")
2161            {
2162                special = SCORE_FROM_FUNCTION_PENALTY;
2163            } else if item.kind == CompletionItemKind::Function && label_lower.starts_with('f') {
2164                special = SCORE_F_FUNCTION_PENALTY;
2165            }
2166        }
2167        let prefix_score = prefix.max(column_prefix);
2168        // Use saturating arithmetic to prevent overflow with extreme inputs
2169        item.score = precomputed_score
2170            .saturating_add(category_base)
2171            .saturating_add(prefix_score)
2172            .saturating_add(clause_score)
2173            .saturating_add(type_score)
2174            .saturating_add(special);
2175    }
2176
2177    items.sort_by(|a, b| {
2178        b.score
2179            .cmp(&a.score)
2180            .then_with(|| a.label.to_lowercase().cmp(&b.label.to_lowercase()))
2181    });
2182
2183    CompletionItemsResult {
2184        clause: context.clause,
2185        token: context.token,
2186        should_show,
2187        items,
2188        error: None,
2189    }
2190}
2191
2192#[cfg(test)]
2193mod tests {
2194    use super::*;
2195    use crate::types::{
2196        ColumnSchema, CompletionClause, CompletionItemCategory, CompletionRequest, Dialect,
2197        SchemaMetadata, SchemaTable,
2198    };
2199
2200    #[test]
2201    fn test_completion_clause_detection() {
2202        let sql = "SELECT * FROM users WHERE ";
2203        let request = CompletionRequest {
2204            sql: sql.to_string(),
2205            dialect: Dialect::Duckdb,
2206            // Cursor at end of string (after trailing space)
2207            cursor_offset: sql.len(),
2208            schema: None,
2209        };
2210
2211        let context = completion_context(&request);
2212        assert_eq!(context.clause, CompletionClause::Where);
2213    }
2214
2215    #[test]
2216    fn test_completion_tables_and_columns() {
2217        let schema = SchemaMetadata {
2218            default_catalog: None,
2219            default_schema: Some("public".to_string()),
2220            search_path: None,
2221            case_sensitivity: None,
2222            allow_implied: true,
2223            tables: vec![
2224                SchemaTable {
2225                    catalog: None,
2226                    schema: Some("public".to_string()),
2227                    name: "users".to_string(),
2228                    columns: vec![
2229                        ColumnSchema {
2230                            name: "id".to_string(),
2231                            data_type: Some("integer".to_string()),
2232                            is_primary_key: None,
2233                            foreign_key: None,
2234                        },
2235                        ColumnSchema {
2236                            name: "name".to_string(),
2237                            data_type: Some("varchar".to_string()),
2238                            is_primary_key: None,
2239                            foreign_key: None,
2240                        },
2241                    ],
2242                },
2243                SchemaTable {
2244                    catalog: None,
2245                    schema: Some("public".to_string()),
2246                    name: "orders".to_string(),
2247                    columns: vec![
2248                        ColumnSchema {
2249                            name: "id".to_string(),
2250                            data_type: Some("integer".to_string()),
2251                            is_primary_key: None,
2252                            foreign_key: None,
2253                        },
2254                        ColumnSchema {
2255                            name: "user_id".to_string(),
2256                            data_type: Some("integer".to_string()),
2257                            is_primary_key: None,
2258                            foreign_key: None,
2259                        },
2260                    ],
2261                },
2262            ],
2263        };
2264
2265        let sql = "SELECT u. FROM users u JOIN orders o ON u.id = o.user_id";
2266        let cursor_offset = sql.find("u.").unwrap() + 2;
2267
2268        let request = CompletionRequest {
2269            sql: sql.to_string(),
2270            dialect: Dialect::Duckdb,
2271            cursor_offset,
2272            schema: Some(schema),
2273        };
2274
2275        let context = completion_context(&request);
2276        assert_eq!(context.tables_in_scope.len(), 2);
2277        assert!(context
2278            .columns_in_scope
2279            .iter()
2280            .any(|col| col.name == "name"));
2281        assert!(context
2282            .columns_in_scope
2283            .iter()
2284            .any(|col| col.name == "user_id"));
2285        assert!(context
2286            .columns_in_scope
2287            .iter()
2288            .any(|col| col.name == "id" && col.is_ambiguous));
2289    }
2290
2291    #[test]
2292    fn test_completion_items_respects_table_qualifier() {
2293        let schema = SchemaMetadata {
2294            default_catalog: None,
2295            default_schema: Some("public".to_string()),
2296            search_path: None,
2297            case_sensitivity: None,
2298            allow_implied: true,
2299            tables: vec![
2300                SchemaTable {
2301                    catalog: None,
2302                    schema: Some("public".to_string()),
2303                    name: "users".to_string(),
2304                    columns: vec![
2305                        ColumnSchema {
2306                            name: "id".to_string(),
2307                            data_type: Some("integer".to_string()),
2308                            is_primary_key: None,
2309                            foreign_key: None,
2310                        },
2311                        ColumnSchema {
2312                            name: "name".to_string(),
2313                            data_type: Some("varchar".to_string()),
2314                            is_primary_key: None,
2315                            foreign_key: None,
2316                        },
2317                    ],
2318                },
2319                SchemaTable {
2320                    catalog: None,
2321                    schema: Some("public".to_string()),
2322                    name: "orders".to_string(),
2323                    columns: vec![ColumnSchema {
2324                        name: "total".to_string(),
2325                        data_type: Some("integer".to_string()),
2326                        is_primary_key: None,
2327                        foreign_key: None,
2328                    }],
2329                },
2330            ],
2331        };
2332
2333        let sql = "SELECT u. FROM users u";
2334        let cursor_offset = sql.find("u.").unwrap() + 2;
2335
2336        let request = CompletionRequest {
2337            sql: sql.to_string(),
2338            dialect: Dialect::Duckdb,
2339            cursor_offset,
2340            schema: Some(schema),
2341        };
2342
2343        let result = completion_items(&request);
2344        assert!(result.should_show);
2345        assert!(result
2346            .items
2347            .iter()
2348            .all(|item| item.category == CompletionItemCategory::Column));
2349        assert!(result.items.iter().any(|item| item.label == "id"));
2350        assert!(!result.items.iter().any(|item| item.label == "total"));
2351    }
2352
2353    #[test]
2354    fn test_completion_items_select_prefers_columns_over_tables() {
2355        let schema = SchemaMetadata {
2356            default_catalog: None,
2357            default_schema: Some("public".to_string()),
2358            search_path: None,
2359            case_sensitivity: None,
2360            allow_implied: true,
2361            tables: vec![SchemaTable {
2362                catalog: None,
2363                schema: Some("public".to_string()),
2364                name: "users".to_string(),
2365                columns: vec![ColumnSchema {
2366                    name: "email".to_string(),
2367                    data_type: Some("varchar".to_string()),
2368                    is_primary_key: None,
2369                    foreign_key: None,
2370                }],
2371            }],
2372        };
2373
2374        let sql = "SELECT e";
2375        let cursor_offset = sql.len();
2376
2377        let request = CompletionRequest {
2378            sql: sql.to_string(),
2379            dialect: Dialect::Duckdb,
2380            cursor_offset,
2381            schema: Some(schema),
2382        };
2383
2384        let result = completion_items(&request);
2385        assert!(result.should_show);
2386        assert!(result
2387            .items
2388            .iter()
2389            .any(|item| item.category == CompletionItemCategory::Column));
2390        assert!(!result
2391            .items
2392            .iter()
2393            .any(|item| item.category == CompletionItemCategory::Table));
2394        assert!(!result
2395            .items
2396            .iter()
2397            .any(|item| item.category == CompletionItemCategory::SchemaTable));
2398    }
2399
2400    // Unit tests for string helper functions
2401
2402    #[test]
2403    fn test_extract_last_identifier_simple() {
2404        assert_eq!(extract_last_identifier("users"), Some("users".to_string()));
2405        assert_eq!(
2406            extract_last_identifier("foo_bar"),
2407            Some("foo_bar".to_string())
2408        );
2409        assert_eq!(
2410            extract_last_identifier("table123"),
2411            Some("table123".to_string())
2412        );
2413    }
2414
2415    #[test]
2416    fn test_extract_last_identifier_with_spaces() {
2417        assert_eq!(
2418            extract_last_identifier("SELECT users"),
2419            Some("users".to_string())
2420        );
2421        assert_eq!(extract_last_identifier("users "), Some("users".to_string()));
2422        assert_eq!(
2423            extract_last_identifier("  users  "),
2424            Some("users".to_string())
2425        );
2426    }
2427
2428    #[test]
2429    fn test_extract_last_identifier_quoted() {
2430        assert_eq!(
2431            extract_last_identifier("\"MyTable\""),
2432            Some("MyTable".to_string())
2433        );
2434        assert_eq!(
2435            extract_last_identifier("SELECT \"My Table\""),
2436            Some("My Table".to_string())
2437        );
2438        assert_eq!(
2439            extract_last_identifier("\"schema\".\"table\""),
2440            Some("table".to_string())
2441        );
2442    }
2443
2444    #[test]
2445    fn test_extract_last_identifier_empty() {
2446        assert_eq!(extract_last_identifier(""), None);
2447        assert_eq!(extract_last_identifier("   "), None);
2448        // Note: "SELECT " extracts "SELECT" because the function doesn't distinguish keywords
2449        assert_eq!(
2450            extract_last_identifier("SELECT "),
2451            Some("SELECT".to_string())
2452        );
2453        // Only punctuation/operators return None
2454        assert_eq!(extract_last_identifier("("), None);
2455        assert_eq!(extract_last_identifier(", "), None);
2456    }
2457
2458    #[test]
2459    fn test_extract_qualifier_with_trailing_dot() {
2460        assert_eq!(extract_qualifier("users.", 6), Some("users".to_string()));
2461        assert_eq!(extract_qualifier("SELECT u.", 9), Some("u".to_string()));
2462        assert_eq!(
2463            extract_qualifier("schema.table.", 13),
2464            Some("table".to_string())
2465        );
2466    }
2467
2468    #[test]
2469    fn test_extract_qualifier_mid_token() {
2470        assert_eq!(
2471            extract_qualifier("users.name", 10),
2472            Some("users".to_string())
2473        );
2474        assert_eq!(extract_qualifier("SELECT u.id", 11), Some("u".to_string()));
2475    }
2476
2477    #[test]
2478    fn test_extract_qualifier_no_qualifier() {
2479        assert_eq!(extract_qualifier("SELECT", 6), None);
2480        assert_eq!(extract_qualifier("users", 5), None);
2481        assert_eq!(extract_qualifier("", 0), None);
2482    }
2483
2484    #[test]
2485    fn test_extract_qualifier_cursor_at_start() {
2486        assert_eq!(extract_qualifier("users.name", 0), None);
2487    }
2488
2489    #[test]
2490    fn test_extract_qualifier_cursor_out_of_bounds() {
2491        assert_eq!(extract_qualifier("users", 100), None);
2492    }
2493
2494    #[test]
2495    fn test_extract_qualifier_utf8_boundary() {
2496        // Multi-byte UTF-8 character (emoji is 4 bytes)
2497        let sql = "SELECT 🎉.";
2498        // Cursor in middle of emoji (invalid boundary) should return None
2499        assert_eq!(extract_qualifier(sql, 8), None); // Middle of emoji
2500                                                     // Cursor after emoji + dot should work
2501        assert_eq!(extract_qualifier(sql, sql.len()), None); // 🎉 is not identifier char
2502    }
2503
2504    #[test]
2505    fn test_extract_qualifier_quoted_identifier() {
2506        assert_eq!(
2507            extract_qualifier("\"My Schema\".", 12),
2508            Some("My Schema".to_string())
2509        );
2510    }
2511
2512    // Unit tests for resolve_qualifier
2513
2514    #[test]
2515    fn test_resolve_qualifier_alias_match() {
2516        let tables = vec![CompletionTable {
2517            name: "users".to_string(),
2518            canonical: "public.users".to_string(),
2519            alias: Some("u".to_string()),
2520            matched_schema: true,
2521        }];
2522        let (registry, _) = SchemaRegistry::new(None, Dialect::Duckdb);
2523
2524        let result = resolve_qualifier("u", &tables, None, &registry);
2525        assert!(result.is_some());
2526        let resolution = result.unwrap();
2527        assert_eq!(resolution.target, QualifierTarget::ColumnLabel);
2528        assert_eq!(resolution.label, Some("u".to_string()));
2529    }
2530
2531    #[test]
2532    fn test_resolve_qualifier_table_name_match() {
2533        // When table is in tables_in_scope (without alias), qualifier matches table name
2534        // Note: Schema metadata is required for table name matching (vs just alias matching)
2535        let schema = SchemaMetadata {
2536            default_catalog: None,
2537            default_schema: Some("public".to_string()),
2538            search_path: None,
2539            case_sensitivity: None,
2540            allow_implied: true,
2541            tables: vec![SchemaTable {
2542                catalog: None,
2543                schema: Some("public".to_string()),
2544                name: "users".to_string(),
2545                columns: vec![],
2546            }],
2547        };
2548        let tables = vec![CompletionTable {
2549            name: "users".to_string(),
2550            canonical: "public.users".to_string(),
2551            alias: None,
2552            matched_schema: true,
2553        }];
2554        let (registry, _) = SchemaRegistry::new(Some(&schema), Dialect::Duckdb);
2555
2556        let result = resolve_qualifier("users", &tables, Some(&schema), &registry);
2557        assert!(
2558            result.is_some(),
2559            "Should match table name in tables_in_scope"
2560        );
2561        let resolution = result.unwrap();
2562        assert_eq!(resolution.target, QualifierTarget::ColumnLabel);
2563        // When no alias, label is the table name itself
2564        assert_eq!(resolution.label, Some("users".to_string()));
2565    }
2566
2567    #[test]
2568    fn test_resolve_qualifier_schema_only() {
2569        let schema = SchemaMetadata {
2570            default_catalog: None,
2571            default_schema: None,
2572            search_path: None,
2573            case_sensitivity: None,
2574            allow_implied: true,
2575            tables: vec![SchemaTable {
2576                catalog: None,
2577                schema: Some("myschema".to_string()),
2578                name: "mytable".to_string(),
2579                columns: vec![],
2580            }],
2581        };
2582        let (registry, _) = SchemaRegistry::new(Some(&schema), Dialect::Duckdb);
2583
2584        let result = resolve_qualifier("myschema", &[], Some(&schema), &registry);
2585        assert!(result.is_some());
2586        let resolution = result.unwrap();
2587        assert_eq!(resolution.target, QualifierTarget::SchemaOnly);
2588        assert_eq!(resolution.schema, Some("myschema".to_string()));
2589    }
2590
2591    #[test]
2592    fn test_resolve_qualifier_schema_table() {
2593        let schema = SchemaMetadata {
2594            default_catalog: None,
2595            default_schema: None,
2596            search_path: None,
2597            case_sensitivity: None,
2598            allow_implied: true,
2599            tables: vec![SchemaTable {
2600                catalog: None,
2601                schema: Some("public".to_string()),
2602                name: "users".to_string(),
2603                columns: vec![ColumnSchema {
2604                    name: "id".to_string(),
2605                    data_type: Some("integer".to_string()),
2606                    is_primary_key: None,
2607                    foreign_key: None,
2608                }],
2609            }],
2610        };
2611        let (registry, _) = SchemaRegistry::new(Some(&schema), Dialect::Duckdb);
2612
2613        // When qualifier matches a table name in schema (but not in tables_in_scope)
2614        let result = resolve_qualifier("users", &[], Some(&schema), &registry);
2615        assert!(result.is_some());
2616        let resolution = result.unwrap();
2617        assert_eq!(resolution.target, QualifierTarget::SchemaTable);
2618        assert_eq!(resolution.table, Some("users".to_string()));
2619    }
2620
2621    #[test]
2622    fn test_resolve_qualifier_no_match() {
2623        let (registry, _) = SchemaRegistry::new(None, Dialect::Duckdb);
2624        let result = resolve_qualifier("nonexistent", &[], None, &registry);
2625        assert!(result.is_none());
2626    }
2627
2628    #[test]
2629    fn test_resolve_qualifier_case_insensitive() {
2630        let tables = vec![CompletionTable {
2631            name: "Users".to_string(),
2632            canonical: "public.users".to_string(),
2633            alias: Some("U".to_string()),
2634            matched_schema: true,
2635        }];
2636        let (registry, _) = SchemaRegistry::new(None, Dialect::Duckdb);
2637
2638        // Should match case-insensitively
2639        let result = resolve_qualifier("u", &tables, None, &registry);
2640        assert!(result.is_some());
2641        assert_eq!(result.unwrap().target, QualifierTarget::ColumnLabel);
2642    }
2643
2644    // Test for column_name_from_label
2645
2646    #[test]
2647    fn test_column_name_from_label() {
2648        assert_eq!(column_name_from_label("name"), "name");
2649        assert_eq!(column_name_from_label("users.name"), "name");
2650        assert_eq!(column_name_from_label("public.users.name"), "name");
2651    }
2652
2653    // Tests for hybrid AST-based completion enrichment
2654
2655    #[test]
2656    fn test_cte_column_completion() {
2657        // Test that CTE columns appear in completion
2658        let sql = "WITH cte AS (SELECT id, name FROM users) SELECT cte. FROM cte";
2659        let cursor_offset = sql.find("cte.").unwrap() + 4; // Position after "cte."
2660
2661        let request = CompletionRequest {
2662            sql: sql.to_string(),
2663            dialect: Dialect::Generic,
2664            cursor_offset,
2665            schema: None,
2666        };
2667
2668        let result = completion_items(&request);
2669        assert!(result.should_show, "Should show completions after 'cte.'");
2670
2671        // Check that CTE columns are in the completion items
2672        let column_names: Vec<&str> = result
2673            .items
2674            .iter()
2675            .filter(|item| item.category == CompletionItemCategory::Column)
2676            .map(|item| item.label.as_str())
2677            .collect();
2678
2679        assert!(
2680            column_names.contains(&"id"),
2681            "Should have 'id' column from CTE. Columns found: {:?}",
2682            column_names
2683        );
2684        assert!(
2685            column_names.contains(&"name"),
2686            "Should have 'name' column from CTE. Columns found: {:?}",
2687            column_names
2688        );
2689    }
2690
2691    #[test]
2692    fn test_cte_with_declared_columns() {
2693        // Test CTE with explicit column declaration: WITH cte(a, b) AS (...)
2694        let sql = "WITH cte(x, y) AS (SELECT id, name FROM users) SELECT cte. FROM cte";
2695        let cursor_offset = sql.find("cte.").unwrap() + 4;
2696
2697        let request = CompletionRequest {
2698            sql: sql.to_string(),
2699            dialect: Dialect::Generic,
2700            cursor_offset,
2701            schema: None,
2702        };
2703
2704        let result = completion_items(&request);
2705        assert!(result.should_show);
2706
2707        let column_names: Vec<&str> = result
2708            .items
2709            .iter()
2710            .filter(|item| item.category == CompletionItemCategory::Column)
2711            .map(|item| item.label.as_str())
2712            .collect();
2713
2714        // Should use declared names (x, y) not projected names (id, name)
2715        assert!(
2716            column_names.contains(&"x"),
2717            "Should have declared column 'x'. Columns found: {:?}",
2718            column_names
2719        );
2720        assert!(
2721            column_names.contains(&"y"),
2722            "Should have declared column 'y'. Columns found: {:?}",
2723            column_names
2724        );
2725    }
2726
2727    #[test]
2728    fn test_subquery_alias_column_completion() {
2729        // Test that subquery alias columns appear in completion
2730        // Note: The cursor must be AFTER the FROM clause for AST parsing to include the subquery
2731        let sql = "SELECT * FROM (SELECT a, b FROM t) AS sub WHERE sub.";
2732        let cursor_offset = sql.len(); // Position at the end after "sub."
2733
2734        let request = CompletionRequest {
2735            sql: sql.to_string(),
2736            dialect: Dialect::Generic,
2737            cursor_offset,
2738            schema: None,
2739        };
2740
2741        let result = completion_items(&request);
2742        assert!(result.should_show, "Should show completions after 'sub.'");
2743
2744        let column_names: Vec<&str> = result
2745            .items
2746            .iter()
2747            .filter(|item| item.category == CompletionItemCategory::Column)
2748            .map(|item| item.label.as_str())
2749            .collect();
2750
2751        assert!(
2752            column_names.contains(&"a"),
2753            "Should have 'a' column from subquery. Columns found: {:?}",
2754            column_names
2755        );
2756        assert!(
2757            column_names.contains(&"b"),
2758            "Should have 'b' column from subquery. Columns found: {:?}",
2759            column_names
2760        );
2761    }
2762
2763    #[test]
2764    fn test_recursive_cte_column_completion() {
2765        // Test that recursive CTE base case columns appear in completion
2766        let sql = r#"
2767            WITH RECURSIVE cte AS (
2768                SELECT 1 AS n
2769                UNION ALL
2770                SELECT n + 1 FROM cte WHERE n < 10
2771            )
2772            SELECT cte. FROM cte
2773        "#;
2774        let cursor_offset = sql.find("cte.").unwrap() + 4;
2775
2776        let request = CompletionRequest {
2777            sql: sql.to_string(),
2778            dialect: Dialect::Generic,
2779            cursor_offset,
2780            schema: None,
2781        };
2782
2783        let result = completion_items(&request);
2784        assert!(result.should_show);
2785
2786        let column_names: Vec<&str> = result
2787            .items
2788            .iter()
2789            .filter(|item| item.category == CompletionItemCategory::Column)
2790            .map(|item| item.label.as_str())
2791            .collect();
2792
2793        assert!(
2794            column_names.contains(&"n"),
2795            "Should have 'n' column from recursive CTE base case. Columns found: {:?}",
2796            column_names
2797        );
2798    }
2799
2800    #[test]
2801    fn test_multiple_ctes_column_completion() {
2802        // Test completion with multiple CTEs
2803        let sql = r#"
2804            WITH
2805                users_cte AS (SELECT id, name FROM users),
2806                orders_cte AS (SELECT order_id, user_id FROM orders)
2807            SELECT users_cte. FROM users_cte, orders_cte
2808        "#;
2809        let cursor_offset = sql.find("users_cte.").unwrap() + 10;
2810
2811        let request = CompletionRequest {
2812            sql: sql.to_string(),
2813            dialect: Dialect::Generic,
2814            cursor_offset,
2815            schema: None,
2816        };
2817
2818        let result = completion_items(&request);
2819        assert!(result.should_show);
2820
2821        let column_names: Vec<&str> = result
2822            .items
2823            .iter()
2824            .filter(|item| item.category == CompletionItemCategory::Column)
2825            .map(|item| item.label.as_str())
2826            .collect();
2827
2828        // Should have columns from users_cte (the qualified table)
2829        assert!(
2830            column_names.contains(&"id"),
2831            "Should have 'id' column from users_cte. Columns found: {:?}",
2832            column_names
2833        );
2834        assert!(
2835            column_names.contains(&"name"),
2836            "Should have 'name' column from users_cte. Columns found: {:?}",
2837            column_names
2838        );
2839    }
2840
2841    #[test]
2842    fn test_type_context_inference() {
2843        // Direct test of type context inference
2844        let schema = SchemaMetadata {
2845            default_catalog: None,
2846            default_schema: Some("public".to_string()),
2847            search_path: None,
2848            case_sensitivity: None,
2849            allow_implied: true,
2850            tables: vec![SchemaTable {
2851                catalog: None,
2852                schema: Some("public".to_string()),
2853                name: "users".to_string(),
2854                columns: vec![
2855                    ColumnSchema {
2856                        name: "age".to_string(),
2857                        data_type: Some("integer".to_string()),
2858                        is_primary_key: None,
2859                        foreign_key: None,
2860                    },
2861                    ColumnSchema {
2862                        name: "name".to_string(),
2863                        data_type: Some("varchar".to_string()),
2864                        is_primary_key: None,
2865                        foreign_key: None,
2866                    },
2867                ],
2868            }],
2869        };
2870
2871        let sql = "SELECT * FROM users WHERE age > ";
2872        let cursor_offset = sql.len();
2873
2874        // Tokenize
2875        let tokens = tokenize_sql(sql, Dialect::Duckdb).expect("tokenization should succeed");
2876
2877        // Create registry and completion context
2878        let (registry, _) = SchemaRegistry::new(Some(&schema), Dialect::Duckdb);
2879
2880        // Get completion context to have tables with canonical names
2881        let request = CompletionRequest {
2882            sql: sql.to_string(),
2883            dialect: Dialect::Duckdb,
2884            cursor_offset,
2885            schema: Some(schema.clone()),
2886        };
2887        let ctx = completion_context(&request);
2888
2889        // Test type context inference
2890        let type_ctx =
2891            infer_type_context(&tokens, cursor_offset, sql, &registry, &ctx.tables_in_scope);
2892
2893        assert!(
2894            type_ctx.is_some(),
2895            "Should infer type context from 'age > '. Tables in scope: {:?}",
2896            ctx.tables_in_scope
2897                .iter()
2898                .map(|t| format!("{}(canonical:{})", t.name, t.canonical))
2899                .collect::<Vec<_>>()
2900        );
2901
2902        let type_ctx = type_ctx.unwrap();
2903        assert_eq!(
2904            type_ctx.expected_type,
2905            CanonicalType::Integer,
2906            "Expected type should be Integer for 'age' column"
2907        );
2908    }
2909
2910    #[test]
2911    fn test_type_aware_column_completion_in_where() {
2912        // Test that type-compatible columns score higher in comparison contexts
2913        let schema = SchemaMetadata {
2914            default_catalog: None,
2915            default_schema: Some("public".to_string()),
2916            search_path: None,
2917            case_sensitivity: None,
2918            allow_implied: true,
2919            tables: vec![SchemaTable {
2920                catalog: None,
2921                schema: Some("public".to_string()),
2922                name: "users".to_string(),
2923                columns: vec![
2924                    ColumnSchema {
2925                        name: "age".to_string(),
2926                        data_type: Some("integer".to_string()),
2927                        is_primary_key: None,
2928                        foreign_key: None,
2929                    },
2930                    ColumnSchema {
2931                        name: "created_at".to_string(),
2932                        data_type: Some("timestamp".to_string()),
2933                        is_primary_key: None,
2934                        foreign_key: None,
2935                    },
2936                    ColumnSchema {
2937                        name: "name".to_string(),
2938                        data_type: Some("varchar".to_string()),
2939                        is_primary_key: None,
2940                        foreign_key: None,
2941                    },
2942                    ColumnSchema {
2943                        name: "score".to_string(),
2944                        data_type: Some("integer".to_string()),
2945                        is_primary_key: None,
2946                        foreign_key: None,
2947                    },
2948                ],
2949            }],
2950        };
2951
2952        // Cursor after "age > " - should boost integer-compatible columns
2953        let sql = "SELECT * FROM users WHERE age > ";
2954        let cursor_offset = sql.len();
2955
2956        let request = CompletionRequest {
2957            sql: sql.to_string(),
2958            dialect: Dialect::Duckdb,
2959            cursor_offset,
2960            schema: Some(schema),
2961        };
2962
2963        let result = completion_items(&request);
2964        assert!(result.should_show);
2965
2966        // Find column completions
2967        let columns: Vec<_> = result
2968            .items
2969            .iter()
2970            .filter(|item| item.category == CompletionItemCategory::Column)
2971            .collect();
2972
2973        // age and score (both integers) should score higher than name (varchar)
2974        let age_item = columns.iter().find(|c| c.label == "age");
2975        let score_item = columns.iter().find(|c| c.label == "score");
2976        let name_item = columns.iter().find(|c| c.label == "name");
2977
2978        assert!(age_item.is_some(), "age column should be in completions");
2979        assert!(
2980            score_item.is_some(),
2981            "score column should be in completions"
2982        );
2983        assert!(name_item.is_some(), "name column should be in completions");
2984
2985        // Integer columns should score higher than varchar in "age > " context
2986        let age_score = age_item.unwrap().score;
2987        let score_score = score_item.unwrap().score;
2988        let name_score = name_item.unwrap().score;
2989
2990        assert!(
2991            age_score > name_score,
2992            "Integer column 'age' (score: {}) should rank higher than varchar 'name' (score: {}) in integer comparison context",
2993            age_score,
2994            name_score
2995        );
2996        assert!(
2997            score_score > name_score,
2998            "Integer column 'score' (score: {}) should rank higher than varchar 'name' (score: {}) in integer comparison context",
2999            score_score,
3000            name_score
3001        );
3002    }
3003
3004    #[test]
3005    fn test_type_context_with_parentheses() {
3006        // Test that parentheses around identifier are handled: WHERE (age) > |
3007        let schema = SchemaMetadata {
3008            default_catalog: None,
3009            default_schema: Some("public".to_string()),
3010            search_path: None,
3011            case_sensitivity: None,
3012            allow_implied: true,
3013            tables: vec![SchemaTable {
3014                catalog: None,
3015                schema: Some("public".to_string()),
3016                name: "users".to_string(),
3017                columns: vec![ColumnSchema {
3018                    name: "age".to_string(),
3019                    data_type: Some("integer".to_string()),
3020                    is_primary_key: None,
3021                    foreign_key: None,
3022                }],
3023            }],
3024        };
3025
3026        let sql = "SELECT * FROM users WHERE (age) > ";
3027        let cursor_offset = sql.len();
3028
3029        let tokens = tokenize_sql(sql, Dialect::Duckdb).expect("tokenization should succeed");
3030        let (registry, _) = SchemaRegistry::new(Some(&schema), Dialect::Duckdb);
3031        let request = CompletionRequest {
3032            sql: sql.to_string(),
3033            dialect: Dialect::Duckdb,
3034            cursor_offset,
3035            schema: Some(schema),
3036        };
3037        let ctx = completion_context(&request);
3038
3039        let type_ctx =
3040            infer_type_context(&tokens, cursor_offset, sql, &registry, &ctx.tables_in_scope);
3041
3042        assert!(
3043            type_ctx.is_some(),
3044            "Should infer type context from '(age) > '"
3045        );
3046        assert_eq!(type_ctx.unwrap().expected_type, CanonicalType::Integer);
3047    }
3048
3049    #[test]
3050    fn test_type_context_with_nested_parentheses() {
3051        // Test nested parens: WHERE ((age)) > |
3052        let schema = SchemaMetadata {
3053            default_catalog: None,
3054            default_schema: Some("public".to_string()),
3055            search_path: None,
3056            case_sensitivity: None,
3057            allow_implied: true,
3058            tables: vec![SchemaTable {
3059                catalog: None,
3060                schema: Some("public".to_string()),
3061                name: "users".to_string(),
3062                columns: vec![ColumnSchema {
3063                    name: "age".to_string(),
3064                    data_type: Some("integer".to_string()),
3065                    is_primary_key: None,
3066                    foreign_key: None,
3067                }],
3068            }],
3069        };
3070
3071        let sql = "SELECT * FROM users WHERE ((age)) > ";
3072        let cursor_offset = sql.len();
3073
3074        let tokens = tokenize_sql(sql, Dialect::Duckdb).expect("tokenization should succeed");
3075        let (registry, _) = SchemaRegistry::new(Some(&schema), Dialect::Duckdb);
3076        let request = CompletionRequest {
3077            sql: sql.to_string(),
3078            dialect: Dialect::Duckdb,
3079            cursor_offset,
3080            schema: Some(schema),
3081        };
3082        let ctx = completion_context(&request);
3083
3084        let type_ctx =
3085            infer_type_context(&tokens, cursor_offset, sql, &registry, &ctx.tables_in_scope);
3086
3087        assert!(
3088            type_ctx.is_some(),
3089            "Should infer type context from '((age)) > '"
3090        );
3091        assert_eq!(type_ctx.unwrap().expected_type, CanonicalType::Integer);
3092    }
3093
3094    #[test]
3095    fn test_type_context_after_and_returns_none() {
3096        // After AND/OR, we're in a new expression - should return None
3097        let schema = SchemaMetadata {
3098            default_catalog: None,
3099            default_schema: Some("public".to_string()),
3100            search_path: None,
3101            case_sensitivity: None,
3102            allow_implied: true,
3103            tables: vec![SchemaTable {
3104                catalog: None,
3105                schema: Some("public".to_string()),
3106                name: "users".to_string(),
3107                columns: vec![ColumnSchema {
3108                    name: "age".to_string(),
3109                    data_type: Some("integer".to_string()),
3110                    is_primary_key: None,
3111                    foreign_key: None,
3112                }],
3113            }],
3114        };
3115
3116        let sql = "SELECT * FROM users WHERE age > 10 AND ";
3117        let cursor_offset = sql.len();
3118
3119        let tokens = tokenize_sql(sql, Dialect::Duckdb).expect("tokenization should succeed");
3120        let (registry, _) = SchemaRegistry::new(Some(&schema), Dialect::Duckdb);
3121        let request = CompletionRequest {
3122            sql: sql.to_string(),
3123            dialect: Dialect::Duckdb,
3124            cursor_offset,
3125            schema: Some(schema),
3126        };
3127        let ctx = completion_context(&request);
3128
3129        let type_ctx =
3130            infer_type_context(&tokens, cursor_offset, sql, &registry, &ctx.tables_in_scope);
3131
3132        assert!(
3133            type_ctx.is_none(),
3134            "Should return None after AND (new expression context)"
3135        );
3136    }
3137
3138    #[test]
3139    fn test_type_context_after_or_returns_none() {
3140        // After OR, we're in a new expression - should return None
3141        let schema = SchemaMetadata {
3142            default_catalog: None,
3143            default_schema: Some("public".to_string()),
3144            search_path: None,
3145            case_sensitivity: None,
3146            allow_implied: true,
3147            tables: vec![SchemaTable {
3148                catalog: None,
3149                schema: Some("public".to_string()),
3150                name: "users".to_string(),
3151                columns: vec![ColumnSchema {
3152                    name: "age".to_string(),
3153                    data_type: Some("integer".to_string()),
3154                    is_primary_key: None,
3155                    foreign_key: None,
3156                }],
3157            }],
3158        };
3159
3160        let sql = "SELECT * FROM users WHERE age > 10 OR ";
3161        let cursor_offset = sql.len();
3162
3163        let tokens = tokenize_sql(sql, Dialect::Duckdb).expect("tokenization should succeed");
3164        let (registry, _) = SchemaRegistry::new(Some(&schema), Dialect::Duckdb);
3165        let request = CompletionRequest {
3166            sql: sql.to_string(),
3167            dialect: Dialect::Duckdb,
3168            cursor_offset,
3169            schema: Some(schema),
3170        };
3171        let ctx = completion_context(&request);
3172
3173        let type_ctx =
3174            infer_type_context(&tokens, cursor_offset, sql, &registry, &ctx.tables_in_scope);
3175
3176        assert!(
3177            type_ctx.is_none(),
3178            "Should return None after OR (new expression context)"
3179        );
3180    }
3181
3182    // Lateral column alias completion tests
3183
3184    #[test]
3185    fn test_lateral_alias_completion_duckdb() {
3186        let sql = "SELECT price * qty AS total, ";
3187        let cursor_offset = sql.len();
3188
3189        let request = CompletionRequest {
3190            sql: sql.to_string(),
3191            dialect: Dialect::Duckdb,
3192            cursor_offset,
3193            schema: None,
3194        };
3195
3196        let result = completion_items(&request);
3197
3198        // 'total' should be available as a lateral alias
3199        let total_item = result
3200            .items
3201            .iter()
3202            .find(|i| i.label == "total" && i.detail == Some("lateral alias".to_string()));
3203        assert!(
3204            total_item.is_some(),
3205            "Lateral alias 'total' should be in completions for DuckDB"
3206        );
3207    }
3208
3209    #[test]
3210    fn test_lateral_alias_not_available_postgres() {
3211        let sql = "SELECT price * qty AS total, ";
3212        let cursor_offset = sql.len();
3213
3214        let request = CompletionRequest {
3215            sql: sql.to_string(),
3216            dialect: Dialect::Postgres,
3217            cursor_offset,
3218            schema: None,
3219        };
3220
3221        let result = completion_items(&request);
3222
3223        // 'total' should NOT be available as a lateral alias in PostgreSQL
3224        let total_item = result
3225            .items
3226            .iter()
3227            .find(|i| i.label == "total" && i.detail == Some("lateral alias".to_string()));
3228        assert!(
3229            total_item.is_none(),
3230            "Lateral alias should not appear for PostgreSQL"
3231        );
3232    }
3233
3234    #[test]
3235    fn test_lateral_alias_position_aware() {
3236        // Cursor is within the SELECT but before the alias definition ends
3237        let sql = "SELECT a + b AS total FROM t";
3238        let cursor_offset = 9; // After "SELECT a "
3239
3240        let request = CompletionRequest {
3241            sql: sql.to_string(),
3242            dialect: Dialect::Duckdb,
3243            cursor_offset,
3244            schema: None,
3245        };
3246
3247        let result = completion_items(&request);
3248
3249        // 'total' should NOT be available - cursor is before alias definition
3250        let total_item = result
3251            .items
3252            .iter()
3253            .find(|i| i.label == "total" && i.detail == Some("lateral alias".to_string()));
3254        assert!(
3255            total_item.is_none(),
3256            "Alias defined after cursor should not appear"
3257        );
3258    }
3259
3260    #[test]
3261    fn test_multiple_lateral_aliases() {
3262        let sql = "SELECT a AS x, b AS y, ";
3263        let cursor_offset = sql.len();
3264
3265        let request = CompletionRequest {
3266            sql: sql.to_string(),
3267            dialect: Dialect::Duckdb,
3268            cursor_offset,
3269            schema: None,
3270        };
3271
3272        let result = completion_items(&request);
3273
3274        // Both 'x' and 'y' should be available
3275        let x_item = result
3276            .items
3277            .iter()
3278            .find(|i| i.label == "x" && i.detail == Some("lateral alias".to_string()));
3279        let y_item = result
3280            .items
3281            .iter()
3282            .find(|i| i.label == "y" && i.detail == Some("lateral alias".to_string()));
3283        assert!(
3284            x_item.is_some(),
3285            "Lateral alias 'x' should be in completions"
3286        );
3287        assert!(
3288            y_item.is_some(),
3289            "Lateral alias 'y' should be in completions"
3290        );
3291    }
3292
3293    #[test]
3294    fn test_lateral_alias_quoted() {
3295        let sql = r#"SELECT a AS "My Total", "#;
3296        let cursor_offset = sql.len();
3297
3298        let request = CompletionRequest {
3299            sql: sql.to_string(),
3300            dialect: Dialect::Duckdb,
3301            cursor_offset,
3302            schema: None,
3303        };
3304
3305        let result = completion_items(&request);
3306
3307        // Quoted alias should be available
3308        let alias_item = result
3309            .items
3310            .iter()
3311            .find(|i| i.label == "My Total" && i.detail == Some("lateral alias".to_string()));
3312        assert!(
3313            alias_item.is_some(),
3314            "Quoted lateral alias should be in completions"
3315        );
3316    }
3317
3318    #[test]
3319    fn test_lateral_alias_bigquery_dialect() {
3320        // BigQuery also supports lateral aliases
3321        let sql = "SELECT price AS p, p * 0.1 AS ";
3322        let cursor_offset = sql.len();
3323
3324        let request = CompletionRequest {
3325            sql: sql.to_string(),
3326            dialect: Dialect::Bigquery,
3327            cursor_offset,
3328            schema: None,
3329        };
3330
3331        let result = completion_items(&request);
3332
3333        // 'p' should be available as a lateral alias
3334        let p_item = result
3335            .items
3336            .iter()
3337            .find(|i| i.label == "p" && i.detail == Some("lateral alias".to_string()));
3338        assert!(
3339            p_item.is_some(),
3340            "Lateral alias 'p' should be in completions for BigQuery"
3341        );
3342    }
3343
3344    #[test]
3345    fn test_lateral_alias_snowflake_dialect() {
3346        // Snowflake also supports lateral aliases
3347        let sql = "SELECT amount AS amt, ";
3348        let cursor_offset = sql.len();
3349
3350        let request = CompletionRequest {
3351            sql: sql.to_string(),
3352            dialect: Dialect::Snowflake,
3353            cursor_offset,
3354            schema: None,
3355        };
3356
3357        let result = completion_items(&request);
3358
3359        // 'amt' should be available as a lateral alias
3360        let amt_item = result
3361            .items
3362            .iter()
3363            .find(|i| i.label == "amt" && i.detail == Some("lateral alias".to_string()));
3364        assert!(
3365            amt_item.is_some(),
3366            "Lateral alias 'amt' should be in completions for Snowflake"
3367        );
3368    }
3369
3370    #[test]
3371    fn test_lateral_alias_not_in_from_clause() {
3372        // Lateral aliases should not appear when cursor is in FROM clause
3373        let sql = "SELECT a AS x FROM ";
3374        let cursor_offset = sql.len();
3375
3376        let request = CompletionRequest {
3377            sql: sql.to_string(),
3378            dialect: Dialect::Duckdb,
3379            cursor_offset,
3380            schema: None,
3381        };
3382
3383        let result = completion_items(&request);
3384
3385        // 'x' should NOT be available in FROM clause context
3386        let x_item = result
3387            .items
3388            .iter()
3389            .find(|i| i.label == "x" && i.detail == Some("lateral alias".to_string()));
3390        assert!(
3391            x_item.is_none(),
3392            "Lateral alias should not appear in FROM clause"
3393        );
3394    }
3395
3396    #[test]
3397    fn test_lateral_alias_not_with_qualifier() {
3398        // Lateral aliases should not appear when there's a table qualifier (e.g., "t.")
3399        let sql = "SELECT a AS x, t.";
3400        let cursor_offset = sql.len();
3401
3402        let schema = SchemaMetadata {
3403            default_catalog: None,
3404            default_schema: None,
3405            search_path: None,
3406            case_sensitivity: None,
3407            allow_implied: true,
3408            tables: vec![SchemaTable {
3409                catalog: None,
3410                schema: None,
3411                name: "t".to_string(),
3412                columns: vec![ColumnSchema {
3413                    name: "col1".to_string(),
3414                    data_type: Some("integer".to_string()),
3415                    is_primary_key: None,
3416                    foreign_key: None,
3417                }],
3418            }],
3419        };
3420
3421        let request = CompletionRequest {
3422            sql: sql.to_string(),
3423            dialect: Dialect::Duckdb,
3424            cursor_offset,
3425            schema: Some(schema),
3426        };
3427
3428        let result = completion_items(&request);
3429
3430        // When there's a qualifier, we should only show columns from that table
3431        // Lateral aliases should not appear (they don't have a table qualifier)
3432        let x_item = result
3433            .items
3434            .iter()
3435            .find(|i| i.label == "x" && i.detail == Some("lateral alias".to_string()));
3436        assert!(
3437            x_item.is_none(),
3438            "Lateral alias should not appear when using table qualifier"
3439        );
3440    }
3441
3442    #[test]
3443    fn test_should_show_for_cursor_utf8_boundary() {
3444        // Multi-byte UTF-8 character (emoji is 4 bytes)
3445        let sql = "SELECT 🎉 FROM";
3446        // Emoji starts at byte 7, cursor at byte 8 is mid-character
3447        let mid_emoji_offset = 8;
3448
3449        // Should not panic, should return false for invalid boundary
3450        assert!(!should_show_for_cursor(sql, mid_emoji_offset, ""));
3451    }
3452
3453    #[test]
3454    fn test_should_show_for_cursor_valid_positions() {
3455        // Test various valid cursor positions
3456        let sql = "SELECT . FROM";
3457        assert!(should_show_for_cursor(sql, 8, "")); // After dot
3458        assert!(!should_show_for_cursor(sql, 0, "")); // At start (no prev char)
3459        assert!(should_show_for_cursor(sql, 7, "")); // After space
3460    }
3461
3462    #[test]
3463    fn test_should_show_for_cursor_out_of_bounds() {
3464        let sql = "SELECT";
3465        assert!(!should_show_for_cursor(sql, 100, "")); // Way out of bounds
3466        assert!(!should_show_for_cursor(sql, sql.len() + 1, "")); // Just past end
3467    }
3468}
flowscope_core/completion/context.rs

flowscope_core/completion/
context.rs