Skip to main content

flowscope_core/linter/rules/
rf_002.rs

1//! LINT_RF_002: References qualification.
2//!
3//! In multi-table queries, require qualified column references.
4
5use std::collections::HashSet;
6
7use regex::Regex;
8
9use crate::linter::config::LintConfig;
10use crate::linter::rule::{LintContext, LintRule};
11use crate::parser::parse_sql_with_dialect;
12use crate::types::{issue_codes, Dialect, Issue};
13use sqlparser::ast::{
14    Expr, Function, FunctionArg, FunctionArgExpr, FunctionArguments, Ident, ObjectNamePart,
15    OrderByKind, Query, Select, SelectItem, SelectItemQualifiedWildcardKind, SetExpr, Statement,
16    TableFactor, TableWithJoins,
17};
18
19use super::semantic_helpers::{
20    join_on_expr, select_projection_alias_set, visit_select_expressions,
21};
22
23pub struct ReferencesQualification {
24    force_enable: bool,
25    ignore_words: HashSet<String>,
26    ignore_words_regex: Option<Regex>,
27    subqueries_ignore_external_references: bool,
28}
29
30impl ReferencesQualification {
31    pub fn from_config(config: &LintConfig) -> Self {
32        let mut ignore_words: HashSet<String> = HashSet::new();
33        if let Some(values) =
34            config.rule_option_string_list(issue_codes::LINT_RF_002, "ignore_words")
35        {
36            for value in values {
37                ignore_words.extend(split_ignore_words_csv(&value));
38            }
39        }
40
41        if let Some(csv) = config.rule_option_str(issue_codes::LINT_RF_002, "ignore_words") {
42            ignore_words.extend(split_ignore_words_csv(csv));
43        }
44
45        let ignore_words_regex = config
46            .rule_option_str(issue_codes::LINT_RF_002, "ignore_words_regex")
47            .and_then(|pattern| Regex::new(pattern).ok());
48
49        Self {
50            force_enable: config
51                .rule_option_bool(issue_codes::LINT_RF_002, "force_enable")
52                .unwrap_or(true),
53            ignore_words,
54            ignore_words_regex,
55            subqueries_ignore_external_references: config
56                .rule_option_bool(
57                    issue_codes::LINT_RF_002,
58                    "subqueries_ignore_external_references",
59                )
60                .unwrap_or(false),
61        }
62    }
63}
64
65impl Default for ReferencesQualification {
66    fn default() -> Self {
67        Self {
68            force_enable: true,
69            ignore_words: HashSet::new(),
70            ignore_words_regex: None,
71            subqueries_ignore_external_references: false,
72        }
73    }
74}
75
76impl LintRule for ReferencesQualification {
77    fn code(&self) -> &'static str {
78        issue_codes::LINT_RF_002
79    }
80
81    fn name(&self) -> &'static str {
82        "References qualification"
83    }
84
85    fn description(&self) -> &'static str {
86        "References should be qualified if select has more than one referenced table/view."
87    }
88
89    fn check(&self, statement: &Statement, ctx: &LintContext) -> Vec<Issue> {
90        if !self.force_enable {
91            return Vec::new();
92        }
93
94        let declared_variables =
95            declared_variables_before_statement(ctx.sql, ctx.dialect(), ctx.statement_index);
96        let context = QualificationContext {
97            dialect: ctx.dialect(),
98            ignore_words: &self.ignore_words,
99            ignore_words_regex: self.ignore_words_regex.as_ref(),
100            declared_variables: &declared_variables,
101            subqueries_ignore_external_references: self.subqueries_ignore_external_references,
102        };
103
104        let unqualified_count = violations_in_statement(statement, &context, 0);
105
106        (0..unqualified_count)
107            .map(|_| {
108                Issue::warning(
109                    issue_codes::LINT_RF_002,
110                    "Use qualified references in multi-table queries.",
111                )
112                .with_statement(ctx.statement_index)
113            })
114            .collect()
115    }
116}
117
118struct QualificationContext<'a> {
119    dialect: Dialect,
120    ignore_words: &'a HashSet<String>,
121    ignore_words_regex: Option<&'a Regex>,
122    declared_variables: &'a HashSet<String>,
123    subqueries_ignore_external_references: bool,
124}
125
126#[derive(Default)]
127struct SourceInfo {
128    normal_source_count: usize,
129    value_table_aliases: HashSet<String>,
130}
131
132fn split_ignore_words_csv(raw: &str) -> impl Iterator<Item = String> + '_ {
133    raw.split(',')
134        .map(str::trim)
135        .filter(|word| !word.is_empty())
136        .map(|word| word.to_ascii_uppercase())
137}
138
139fn declared_variables_before_statement(
140    sql: &str,
141    dialect: Dialect,
142    statement_index: usize,
143) -> HashSet<String> {
144    if !matches!(dialect, Dialect::Bigquery) {
145        return HashSet::new();
146    }
147
148    let Ok(statements) = parse_sql_with_dialect(sql, dialect) else {
149        return HashSet::new();
150    };
151
152    let mut names = HashSet::new();
153    for statement in statements.into_iter().take(statement_index) {
154        if let Statement::Declare { stmts } = statement {
155            for declare in stmts {
156                for name in declare.names {
157                    names.insert(name.value.to_ascii_uppercase());
158                }
159            }
160        }
161    }
162
163    names
164}
165
166fn violations_in_statement(
167    statement: &Statement,
168    ctx: &QualificationContext,
169    external_sources: usize,
170) -> usize {
171    match statement {
172        Statement::Query(query) => violations_in_query(query, ctx, external_sources),
173        Statement::Insert(insert) => insert
174            .source
175            .as_ref()
176            .map_or(0, |query| violations_in_query(query, ctx, external_sources)),
177        Statement::CreateView { query, .. } => violations_in_query(query, ctx, external_sources),
178        Statement::CreateTable(create) => create
179            .query
180            .as_ref()
181            .map_or(0, |query| violations_in_query(query, ctx, external_sources)),
182        _ => 0,
183    }
184}
185
186fn violations_in_query(
187    query: &Query,
188    ctx: &QualificationContext,
189    external_sources: usize,
190) -> usize {
191    let mut count = 0usize;
192
193    if let Some(with) = &query.with {
194        for cte in &with.cte_tables {
195            // CTE bodies do not inherit parent query scope.
196            count += violations_in_query(&cte.query, ctx, 0);
197        }
198    }
199
200    count += violations_in_set_expr(&query.body, ctx, external_sources);
201
202    if let Some(order_by) = &query.order_by {
203        if let OrderByKind::Expressions(order_exprs) = &order_by.kind {
204            let child_external = child_external_sources(external_sources, ctx);
205            for order_expr in order_exprs {
206                count += nested_subquery_violations_in_expr(&order_expr.expr, ctx, child_external);
207            }
208        }
209    }
210
211    count
212}
213
214fn violations_in_set_expr(
215    set_expr: &SetExpr,
216    ctx: &QualificationContext,
217    external_sources: usize,
218) -> usize {
219    match set_expr {
220        SetExpr::Select(select) => violations_in_select(select, ctx, external_sources),
221        SetExpr::Query(query) => violations_in_query(query, ctx, external_sources),
222        SetExpr::SetOperation { left, right, .. } => {
223            violations_in_set_expr(left, ctx, external_sources)
224                + violations_in_set_expr(right, ctx, external_sources)
225        }
226        SetExpr::Insert(statement)
227        | SetExpr::Update(statement)
228        | SetExpr::Delete(statement)
229        | SetExpr::Merge(statement) => violations_in_statement(statement, ctx, external_sources),
230        _ => 0,
231    }
232}
233
234fn violations_in_select(
235    select: &Select,
236    ctx: &QualificationContext,
237    external_sources: usize,
238) -> usize {
239    let source_info = collect_source_info(select, ctx.dialect);
240
241    let effective_external = if ctx.subqueries_ignore_external_references {
242        0
243    } else {
244        external_sources
245    };
246    let effective_source_count = source_info.normal_source_count + effective_external;
247
248    let aliases = select_projection_alias_set(select);
249
250    let mut count = 0usize;
251    if effective_source_count > 1 {
252        count += unqualified_references_in_select_scope(
253            select,
254            &aliases,
255            &source_info.value_table_aliases,
256            ctx,
257        );
258    }
259
260    let child_external = child_external_sources(effective_source_count, ctx);
261    count += nested_subquery_violations_in_select(select, ctx, child_external);
262
263    count
264}
265
266fn child_external_sources(current_effective_sources: usize, ctx: &QualificationContext) -> usize {
267    if ctx.subqueries_ignore_external_references {
268        0
269    } else {
270        current_effective_sources
271    }
272}
273
274fn collect_source_info(select: &Select, dialect: Dialect) -> SourceInfo {
275    let mut info = SourceInfo::default();
276
277    for from_item in &select.from {
278        collect_source_info_from_table_factor(&from_item.relation, dialect, &mut info);
279        for join in &from_item.joins {
280            collect_source_info_from_table_factor(&join.relation, dialect, &mut info);
281        }
282    }
283
284    info
285}
286
287fn collect_source_info_from_table_factor(
288    table_factor: &TableFactor,
289    dialect: Dialect,
290    info: &mut SourceInfo,
291) {
292    if is_value_table_function(table_factor, dialect) {
293        if let Some(alias) = table_factor_value_table_alias(table_factor) {
294            info.value_table_aliases.insert(alias);
295        }
296        if let TableFactor::UNNEST {
297            with_offset_alias: Some(alias),
298            ..
299        } = table_factor
300        {
301            info.value_table_aliases
302                .insert(alias.value.to_ascii_uppercase());
303        }
304        return;
305    }
306
307    match table_factor {
308        TableFactor::NestedJoin {
309            table_with_joins, ..
310        } => {
311            collect_source_info_from_table_factor(&table_with_joins.relation, dialect, info);
312            for join in &table_with_joins.joins {
313                collect_source_info_from_table_factor(&join.relation, dialect, info);
314            }
315        }
316        TableFactor::Pivot { table, .. }
317        | TableFactor::Unpivot { table, .. }
318        | TableFactor::MatchRecognize { table, .. } => {
319            collect_source_info_from_table_factor(table, dialect, info);
320        }
321        _ => {
322            info.normal_source_count += 1;
323        }
324    }
325}
326
327fn table_factor_value_table_alias(table_factor: &TableFactor) -> Option<String> {
328    let alias = match table_factor {
329        TableFactor::Table { alias, .. }
330        | TableFactor::Derived { alias, .. }
331        | TableFactor::TableFunction { alias, .. }
332        | TableFactor::Function { alias, .. }
333        | TableFactor::UNNEST { alias, .. }
334        | TableFactor::JsonTable { alias, .. }
335        | TableFactor::OpenJsonTable { alias, .. }
336        | TableFactor::NestedJoin { alias, .. }
337        | TableFactor::Pivot { alias, .. }
338        | TableFactor::Unpivot { alias, .. }
339        | TableFactor::MatchRecognize { alias, .. }
340        | TableFactor::XmlTable { alias, .. }
341        | TableFactor::SemanticView { alias, .. } => alias.as_ref(),
342    }?;
343
344    Some(alias.name.value.to_ascii_uppercase())
345}
346
347fn is_value_table_function(table_factor: &TableFactor, dialect: Dialect) -> bool {
348    // UNNEST produces a value table whose alias names a column, not a table.
349    // BigQuery parses UNNEST as a dedicated TableFactor; PostgreSQL also supports
350    // UNNEST in FROM and sqlparser emits the same AST node.
351    matches!(dialect, Dialect::Bigquery | Dialect::Postgres)
352        && matches!(table_factor, TableFactor::UNNEST { .. })
353}
354
355fn unqualified_references_in_select_scope(
356    select: &Select,
357    aliases: &HashSet<String>,
358    value_table_aliases: &HashSet<String>,
359    ctx: &QualificationContext,
360) -> usize {
361    let projection_unqualified_full =
362        projection_unqualified_count_with_aliases(select, aliases, value_table_aliases, ctx);
363    let projection_unqualified_sequential =
364        projection_unqualified_count_sequential(select, value_table_aliases, ctx);
365
366    let mut unqualified_in_select = 0usize;
367    visit_select_expressions(select, &mut |expr| {
368        unqualified_in_select += count_unqualified_references_in_expr_no_subqueries(
369            expr,
370            aliases,
371            value_table_aliases,
372            ctx,
373            &HashSet::new(),
374        );
375    });
376
377    unqualified_in_select.saturating_sub(projection_unqualified_full)
378        + projection_unqualified_sequential
379}
380
381fn projection_unqualified_count_with_aliases(
382    select: &Select,
383    aliases: &HashSet<String>,
384    value_table_aliases: &HashSet<String>,
385    ctx: &QualificationContext,
386) -> usize {
387    select
388        .projection
389        .iter()
390        .map(|item| match item {
391            SelectItem::UnnamedExpr(expr) | SelectItem::ExprWithAlias { expr, .. } => {
392                count_unqualified_references_in_expr_no_subqueries(
393                    expr,
394                    aliases,
395                    value_table_aliases,
396                    ctx,
397                    &HashSet::new(),
398                )
399            }
400            SelectItem::QualifiedWildcard(SelectItemQualifiedWildcardKind::Expr(expr), _) => {
401                count_unqualified_references_in_expr_no_subqueries(
402                    expr,
403                    aliases,
404                    value_table_aliases,
405                    ctx,
406                    &HashSet::new(),
407                )
408            }
409            SelectItem::QualifiedWildcard(_, _) => 0,
410            _ => 0,
411        })
412        .sum()
413}
414
415fn projection_unqualified_count_sequential(
416    select: &Select,
417    value_table_aliases: &HashSet<String>,
418    ctx: &QualificationContext,
419) -> usize {
420    let mut aliases_before = HashSet::new();
421    let mut unqualified = 0usize;
422
423    for item in &select.projection {
424        match item {
425            SelectItem::UnnamedExpr(expr) => {
426                unqualified += count_unqualified_references_in_expr_no_subqueries(
427                    expr,
428                    &aliases_before,
429                    value_table_aliases,
430                    ctx,
431                    &HashSet::new(),
432                );
433            }
434            SelectItem::ExprWithAlias { expr, alias } => {
435                unqualified += count_unqualified_references_in_expr_no_subqueries(
436                    expr,
437                    &aliases_before,
438                    value_table_aliases,
439                    ctx,
440                    &HashSet::new(),
441                );
442                aliases_before.insert(alias.value.to_ascii_uppercase());
443            }
444            _ => {}
445        }
446    }
447
448    unqualified
449}
450
451fn count_unqualified_references_in_expr_no_subqueries(
452    expr: &Expr,
453    aliases: &HashSet<String>,
454    value_table_aliases: &HashSet<String>,
455    ctx: &QualificationContext,
456    lambda_params: &HashSet<String>,
457) -> usize {
458    match expr {
459        Expr::Identifier(identifier) => identifier_is_unqualified_reference(
460            identifier,
461            aliases,
462            value_table_aliases,
463            ctx,
464            lambda_params,
465        )
466        .into(),
467        Expr::CompoundIdentifier(_) => 0,
468        Expr::CompoundFieldAccess { root, .. } => {
469            count_unqualified_references_in_expr_no_subqueries(
470                root,
471                aliases,
472                value_table_aliases,
473                ctx,
474                lambda_params,
475            )
476        }
477        Expr::JsonAccess { value, .. } => count_unqualified_references_in_expr_no_subqueries(
478            value,
479            aliases,
480            value_table_aliases,
481            ctx,
482            lambda_params,
483        ),
484        Expr::BinaryOp { left, right, .. }
485        | Expr::AnyOp { left, right, .. }
486        | Expr::AllOp { left, right, .. }
487        | Expr::IsDistinctFrom(left, right)
488        | Expr::IsNotDistinctFrom(left, right) => {
489            count_unqualified_references_in_expr_no_subqueries(
490                left,
491                aliases,
492                value_table_aliases,
493                ctx,
494                lambda_params,
495            ) + count_unqualified_references_in_expr_no_subqueries(
496                right,
497                aliases,
498                value_table_aliases,
499                ctx,
500                lambda_params,
501            )
502        }
503        Expr::UnaryOp { expr: inner, .. }
504        | Expr::Nested(inner)
505        | Expr::IsNull(inner)
506        | Expr::IsNotNull(inner)
507        | Expr::IsTrue(inner)
508        | Expr::IsNotTrue(inner)
509        | Expr::IsFalse(inner)
510        | Expr::IsNotFalse(inner)
511        | Expr::IsUnknown(inner)
512        | Expr::IsNotUnknown(inner)
513        | Expr::Cast { expr: inner, .. }
514        | Expr::AtTimeZone {
515            timestamp: inner, ..
516        }
517        | Expr::Extract { expr: inner, .. }
518        | Expr::Ceil { expr: inner, .. }
519        | Expr::Floor { expr: inner, .. }
520        | Expr::Position { expr: inner, .. }
521        | Expr::Substring { expr: inner, .. }
522        | Expr::Trim { expr: inner, .. } => count_unqualified_references_in_expr_no_subqueries(
523            inner,
524            aliases,
525            value_table_aliases,
526            ctx,
527            lambda_params,
528        ),
529        Expr::InList { expr, list, .. } => {
530            count_unqualified_references_in_expr_no_subqueries(
531                expr,
532                aliases,
533                value_table_aliases,
534                ctx,
535                lambda_params,
536            ) + list
537                .iter()
538                .map(|item| {
539                    count_unqualified_references_in_expr_no_subqueries(
540                        item,
541                        aliases,
542                        value_table_aliases,
543                        ctx,
544                        lambda_params,
545                    )
546                })
547                .sum::<usize>()
548        }
549        Expr::InSubquery { expr, .. } => count_unqualified_references_in_expr_no_subqueries(
550            expr,
551            aliases,
552            value_table_aliases,
553            ctx,
554            lambda_params,
555        ),
556        Expr::InUnnest {
557            expr, array_expr, ..
558        } => {
559            count_unqualified_references_in_expr_no_subqueries(
560                expr,
561                aliases,
562                value_table_aliases,
563                ctx,
564                lambda_params,
565            ) + count_unqualified_references_in_expr_no_subqueries(
566                array_expr,
567                aliases,
568                value_table_aliases,
569                ctx,
570                lambda_params,
571            )
572        }
573        Expr::Between {
574            expr, low, high, ..
575        } => {
576            count_unqualified_references_in_expr_no_subqueries(
577                expr,
578                aliases,
579                value_table_aliases,
580                ctx,
581                lambda_params,
582            ) + count_unqualified_references_in_expr_no_subqueries(
583                low,
584                aliases,
585                value_table_aliases,
586                ctx,
587                lambda_params,
588            ) + count_unqualified_references_in_expr_no_subqueries(
589                high,
590                aliases,
591                value_table_aliases,
592                ctx,
593                lambda_params,
594            )
595        }
596        Expr::Case {
597            operand,
598            conditions,
599            else_result,
600            ..
601        } => {
602            let mut count = 0usize;
603            if let Some(operand) = operand {
604                count += count_unqualified_references_in_expr_no_subqueries(
605                    operand,
606                    aliases,
607                    value_table_aliases,
608                    ctx,
609                    lambda_params,
610                );
611            }
612            for when in conditions {
613                count += count_unqualified_references_in_expr_no_subqueries(
614                    &when.condition,
615                    aliases,
616                    value_table_aliases,
617                    ctx,
618                    lambda_params,
619                );
620                count += count_unqualified_references_in_expr_no_subqueries(
621                    &when.result,
622                    aliases,
623                    value_table_aliases,
624                    ctx,
625                    lambda_params,
626                );
627            }
628            if let Some(otherwise) = else_result {
629                count += count_unqualified_references_in_expr_no_subqueries(
630                    otherwise,
631                    aliases,
632                    value_table_aliases,
633                    ctx,
634                    lambda_params,
635                );
636            }
637            count
638        }
639        Expr::Function(function) => count_unqualified_references_in_function_no_subqueries(
640            function,
641            aliases,
642            value_table_aliases,
643            ctx,
644            lambda_params,
645        ),
646        Expr::Lambda(lambda) => {
647            let mut next_lambda_params = lambda_params.clone();
648            for param in &lambda.params {
649                next_lambda_params.insert(param.value.to_ascii_uppercase());
650            }
651            count_unqualified_references_in_expr_no_subqueries(
652                &lambda.body,
653                aliases,
654                value_table_aliases,
655                ctx,
656                &next_lambda_params,
657            )
658        }
659        Expr::Subquery(_) | Expr::Exists { .. } => 0,
660        _ => 0,
661    }
662}
663
664fn count_unqualified_references_in_function_no_subqueries(
665    function: &Function,
666    aliases: &HashSet<String>,
667    value_table_aliases: &HashSet<String>,
668    ctx: &QualificationContext,
669    lambda_params: &HashSet<String>,
670) -> usize {
671    let mut count = 0usize;
672
673    if let FunctionArguments::List(arguments) = &function.args {
674        for (index, arg) in arguments.args.iter().enumerate() {
675            count += match arg {
676                FunctionArg::Unnamed(FunctionArgExpr::Expr(expr))
677                | FunctionArg::Named {
678                    arg: FunctionArgExpr::Expr(expr),
679                    ..
680                }
681                | FunctionArg::ExprNamed {
682                    arg: FunctionArgExpr::Expr(expr),
683                    ..
684                } => {
685                    if should_skip_identifier_reference_for_function_arg(function, index, expr) {
686                        0
687                    } else {
688                        count_unqualified_references_in_expr_no_subqueries(
689                            expr,
690                            aliases,
691                            value_table_aliases,
692                            ctx,
693                            lambda_params,
694                        )
695                    }
696                }
697                _ => 0,
698            };
699        }
700    }
701
702    if let Some(filter) = &function.filter {
703        count += count_unqualified_references_in_expr_no_subqueries(
704            filter,
705            aliases,
706            value_table_aliases,
707            ctx,
708            lambda_params,
709        );
710    }
711
712    for order_expr in &function.within_group {
713        count += count_unqualified_references_in_expr_no_subqueries(
714            &order_expr.expr,
715            aliases,
716            value_table_aliases,
717            ctx,
718            lambda_params,
719        );
720    }
721
722    if let Some(sqlparser::ast::WindowType::WindowSpec(spec)) = &function.over {
723        for expr in &spec.partition_by {
724            count += count_unqualified_references_in_expr_no_subqueries(
725                expr,
726                aliases,
727                value_table_aliases,
728                ctx,
729                lambda_params,
730            );
731        }
732        for order_expr in &spec.order_by {
733            count += count_unqualified_references_in_expr_no_subqueries(
734                &order_expr.expr,
735                aliases,
736                value_table_aliases,
737                ctx,
738                lambda_params,
739            );
740        }
741    }
742
743    count
744}
745
746fn nested_subquery_violations_in_select(
747    select: &Select,
748    ctx: &QualificationContext,
749    child_external_sources: usize,
750) -> usize {
751    let mut count = 0usize;
752
753    visit_select_expressions(select, &mut |expr| {
754        count += nested_subquery_violations_in_expr(expr, ctx, child_external_sources);
755    });
756
757    for from_item in &select.from {
758        count += nested_subquery_violations_in_table_factor(
759            &from_item.relation,
760            ctx,
761            child_external_sources,
762        );
763        for join in &from_item.joins {
764            count += nested_subquery_violations_in_table_factor(
765                &join.relation,
766                ctx,
767                child_external_sources,
768            );
769            if let Some(on_expr) = join_on_expr(&join.join_operator) {
770                count += nested_subquery_violations_in_expr(on_expr, ctx, child_external_sources);
771            }
772        }
773    }
774
775    count
776}
777
778fn nested_subquery_violations_in_expr(
779    expr: &Expr,
780    ctx: &QualificationContext,
781    child_external_sources: usize,
782) -> usize {
783    match expr {
784        Expr::InSubquery { expr, subquery, .. } => {
785            nested_subquery_violations_in_expr(expr, ctx, child_external_sources)
786                + violations_in_query(subquery, ctx, child_external_sources)
787        }
788        Expr::Subquery(subquery) | Expr::Exists { subquery, .. } => {
789            violations_in_query(subquery, ctx, child_external_sources)
790        }
791        Expr::BinaryOp { left, right, .. }
792        | Expr::AnyOp { left, right, .. }
793        | Expr::AllOp { left, right, .. }
794        | Expr::IsDistinctFrom(left, right)
795        | Expr::IsNotDistinctFrom(left, right) => {
796            nested_subquery_violations_in_expr(left, ctx, child_external_sources)
797                + nested_subquery_violations_in_expr(right, ctx, child_external_sources)
798        }
799        Expr::UnaryOp { expr: inner, .. }
800        | Expr::Nested(inner)
801        | Expr::IsNull(inner)
802        | Expr::IsNotNull(inner)
803        | Expr::IsTrue(inner)
804        | Expr::IsNotTrue(inner)
805        | Expr::IsFalse(inner)
806        | Expr::IsNotFalse(inner)
807        | Expr::IsUnknown(inner)
808        | Expr::IsNotUnknown(inner)
809        | Expr::Cast { expr: inner, .. }
810        | Expr::AtTimeZone {
811            timestamp: inner, ..
812        }
813        | Expr::Extract { expr: inner, .. }
814        | Expr::Ceil { expr: inner, .. }
815        | Expr::Floor { expr: inner, .. }
816        | Expr::Position { expr: inner, .. }
817        | Expr::Substring { expr: inner, .. }
818        | Expr::Trim { expr: inner, .. }
819        | Expr::JsonAccess { value: inner, .. }
820        | Expr::CompoundFieldAccess { root: inner, .. } => {
821            nested_subquery_violations_in_expr(inner, ctx, child_external_sources)
822        }
823        Expr::InList { expr, list, .. } => {
824            nested_subquery_violations_in_expr(expr, ctx, child_external_sources)
825                + list
826                    .iter()
827                    .map(|item| {
828                        nested_subquery_violations_in_expr(item, ctx, child_external_sources)
829                    })
830                    .sum::<usize>()
831        }
832        Expr::InUnnest {
833            expr, array_expr, ..
834        } => {
835            nested_subquery_violations_in_expr(expr, ctx, child_external_sources)
836                + nested_subquery_violations_in_expr(array_expr, ctx, child_external_sources)
837        }
838        Expr::Between {
839            expr, low, high, ..
840        } => {
841            nested_subquery_violations_in_expr(expr, ctx, child_external_sources)
842                + nested_subquery_violations_in_expr(low, ctx, child_external_sources)
843                + nested_subquery_violations_in_expr(high, ctx, child_external_sources)
844        }
845        Expr::Case {
846            operand,
847            conditions,
848            else_result,
849            ..
850        } => {
851            let mut count = operand.as_ref().map_or(0, |expr| {
852                nested_subquery_violations_in_expr(expr, ctx, child_external_sources)
853            });
854            for when in conditions {
855                count += nested_subquery_violations_in_expr(
856                    &when.condition,
857                    ctx,
858                    child_external_sources,
859                );
860                count +=
861                    nested_subquery_violations_in_expr(&when.result, ctx, child_external_sources);
862            }
863            if let Some(otherwise) = else_result {
864                count += nested_subquery_violations_in_expr(otherwise, ctx, child_external_sources);
865            }
866            count
867        }
868        Expr::Function(function) => {
869            let mut count = 0usize;
870            if let FunctionArguments::List(arguments) = &function.args {
871                for arg in &arguments.args {
872                    count += match arg {
873                        FunctionArg::Unnamed(FunctionArgExpr::Expr(expr))
874                        | FunctionArg::Named {
875                            arg: FunctionArgExpr::Expr(expr),
876                            ..
877                        }
878                        | FunctionArg::ExprNamed {
879                            arg: FunctionArgExpr::Expr(expr),
880                            ..
881                        } => nested_subquery_violations_in_expr(expr, ctx, child_external_sources),
882                        _ => 0,
883                    };
884                }
885            }
886            if let Some(filter) = &function.filter {
887                count += nested_subquery_violations_in_expr(filter, ctx, child_external_sources);
888            }
889            for order_expr in &function.within_group {
890                count += nested_subquery_violations_in_expr(
891                    &order_expr.expr,
892                    ctx,
893                    child_external_sources,
894                );
895            }
896            if let Some(sqlparser::ast::WindowType::WindowSpec(spec)) = &function.over {
897                for expr in &spec.partition_by {
898                    count += nested_subquery_violations_in_expr(expr, ctx, child_external_sources);
899                }
900                for order_expr in &spec.order_by {
901                    count += nested_subquery_violations_in_expr(
902                        &order_expr.expr,
903                        ctx,
904                        child_external_sources,
905                    );
906                }
907            }
908            count
909        }
910        Expr::Lambda(lambda) => {
911            nested_subquery_violations_in_expr(&lambda.body, ctx, child_external_sources)
912        }
913        _ => 0,
914    }
915}
916
917fn nested_subquery_violations_in_table_factor(
918    table_factor: &TableFactor,
919    ctx: &QualificationContext,
920    child_external_sources: usize,
921) -> usize {
922    match table_factor {
923        TableFactor::Derived {
924            lateral, subquery, ..
925        } => {
926            let external = if *lateral { child_external_sources } else { 0 };
927            violations_in_query(subquery, ctx, external)
928        }
929        TableFactor::TableFunction { expr, .. } => {
930            nested_subquery_violations_in_expr(expr, ctx, child_external_sources)
931        }
932        TableFactor::Function { args, .. } => args
933            .iter()
934            .map(|arg| match arg {
935                FunctionArg::Unnamed(FunctionArgExpr::Expr(expr))
936                | FunctionArg::Named {
937                    arg: FunctionArgExpr::Expr(expr),
938                    ..
939                }
940                | FunctionArg::ExprNamed {
941                    arg: FunctionArgExpr::Expr(expr),
942                    ..
943                } => nested_subquery_violations_in_expr(expr, ctx, child_external_sources),
944                _ => 0,
945            })
946            .sum(),
947        TableFactor::UNNEST { array_exprs, .. } => array_exprs
948            .iter()
949            .map(|expr| nested_subquery_violations_in_expr(expr, ctx, child_external_sources))
950            .sum(),
951        TableFactor::JsonTable { json_expr, .. } | TableFactor::OpenJsonTable { json_expr, .. } => {
952            nested_subquery_violations_in_expr(json_expr, ctx, child_external_sources)
953        }
954        TableFactor::NestedJoin {
955            table_with_joins, ..
956        } => nested_subquery_violations_in_table_with_joins(
957            table_with_joins,
958            ctx,
959            child_external_sources,
960        ),
961        TableFactor::Pivot {
962            table,
963            aggregate_functions,
964            value_column,
965            default_on_null,
966            ..
967        } => {
968            let mut count =
969                nested_subquery_violations_in_table_factor(table, ctx, child_external_sources);
970            for expr_with_alias in aggregate_functions {
971                count += nested_subquery_violations_in_expr(
972                    &expr_with_alias.expr,
973                    ctx,
974                    child_external_sources,
975                );
976            }
977            for expr in value_column {
978                count += nested_subquery_violations_in_expr(expr, ctx, child_external_sources);
979            }
980            if let Some(expr) = default_on_null {
981                count += nested_subquery_violations_in_expr(expr, ctx, child_external_sources);
982            }
983            count
984        }
985        TableFactor::Unpivot {
986            table,
987            value,
988            columns,
989            ..
990        } => {
991            let mut count =
992                nested_subquery_violations_in_table_factor(table, ctx, child_external_sources);
993            count += nested_subquery_violations_in_expr(value, ctx, child_external_sources);
994            for expr_with_alias in columns {
995                count += nested_subquery_violations_in_expr(
996                    &expr_with_alias.expr,
997                    ctx,
998                    child_external_sources,
999                );
1000            }
1001            count
1002        }
1003        TableFactor::MatchRecognize {
1004            table,
1005            partition_by,
1006            order_by,
1007            measures,
1008            symbols,
1009            ..
1010        } => {
1011            let mut count =
1012                nested_subquery_violations_in_table_factor(table, ctx, child_external_sources);
1013            for expr in partition_by {
1014                count += nested_subquery_violations_in_expr(expr, ctx, child_external_sources);
1015            }
1016            for order in order_by {
1017                count +=
1018                    nested_subquery_violations_in_expr(&order.expr, ctx, child_external_sources);
1019            }
1020            for measure in measures {
1021                count +=
1022                    nested_subquery_violations_in_expr(&measure.expr, ctx, child_external_sources);
1023            }
1024            for symbol in symbols {
1025                count += nested_subquery_violations_in_expr(
1026                    &symbol.definition,
1027                    ctx,
1028                    child_external_sources,
1029                );
1030            }
1031            count
1032        }
1033        TableFactor::XmlTable { row_expression, .. } => {
1034            nested_subquery_violations_in_expr(row_expression, ctx, child_external_sources)
1035        }
1036        _ => 0,
1037    }
1038}
1039
1040fn nested_subquery_violations_in_table_with_joins(
1041    table_with_joins: &TableWithJoins,
1042    ctx: &QualificationContext,
1043    child_external_sources: usize,
1044) -> usize {
1045    let mut count = nested_subquery_violations_in_table_factor(
1046        &table_with_joins.relation,
1047        ctx,
1048        child_external_sources,
1049    );
1050
1051    for join in &table_with_joins.joins {
1052        count +=
1053            nested_subquery_violations_in_table_factor(&join.relation, ctx, child_external_sources);
1054        if let Some(on_expr) = join_on_expr(&join.join_operator) {
1055            count += nested_subquery_violations_in_expr(on_expr, ctx, child_external_sources);
1056        }
1057    }
1058
1059    count
1060}
1061
1062fn identifier_is_unqualified_reference(
1063    identifier: &Ident,
1064    aliases: &HashSet<String>,
1065    value_table_aliases: &HashSet<String>,
1066    ctx: &QualificationContext,
1067    lambda_params: &HashSet<String>,
1068) -> bool {
1069    let name = identifier.value.as_str();
1070    let normalized = name.to_ascii_uppercase();
1071
1072    if aliases.contains(&normalized)
1073        || value_table_aliases.contains(&normalized)
1074        || lambda_params.contains(&normalized)
1075        || ctx.declared_variables.contains(&normalized)
1076    {
1077        return false;
1078    }
1079
1080    if name.starts_with('@') {
1081        return false;
1082    }
1083
1084    if ctx.ignore_words.contains(&normalized) {
1085        return false;
1086    }
1087
1088    if let Some(regex) = ctx.ignore_words_regex {
1089        if regex.is_match(name) {
1090            return false;
1091        }
1092    }
1093
1094    true
1095}
1096
1097fn should_skip_identifier_reference_for_function_arg(
1098    function: &Function,
1099    arg_index: usize,
1100    expr: &Expr,
1101) -> bool {
1102    let Expr::Identifier(ident) = expr else {
1103        return false;
1104    };
1105    if ident.quote_style.is_some() || !is_date_part_identifier(&ident.value) {
1106        return false;
1107    }
1108
1109    let Some(function_name) = function_name_upper(function) else {
1110        return false;
1111    };
1112    if !is_datepart_function_name(&function_name) {
1113        return false;
1114    }
1115
1116    // Dialect-specific datepart-position differences exist; to avoid false
1117    // positives across common dialects we allow datepart keywords in the first
1118    // two argument positions of known datepart functions.
1119    arg_index <= 1
1120}
1121
1122fn function_name_upper(function: &Function) -> Option<String> {
1123    function
1124        .name
1125        .0
1126        .last()
1127        .and_then(ObjectNamePart::as_ident)
1128        .map(|ident| ident.value.to_ascii_uppercase())
1129}
1130
1131fn is_datepart_function_name(name: &str) -> bool {
1132    matches!(
1133        name,
1134        "DATEDIFF"
1135            | "DATE_DIFF"
1136            | "DATEADD"
1137            | "DATE_ADD"
1138            | "DATE_PART"
1139            | "DATETIME_TRUNC"
1140            | "TIME_TRUNC"
1141            | "TIMESTAMP_TRUNC"
1142            | "TIMESTAMP_DIFF"
1143            | "TIMESTAMPDIFF"
1144    )
1145}
1146
1147fn is_date_part_identifier(value: &str) -> bool {
1148    matches!(
1149        value.to_ascii_uppercase().as_str(),
1150        "YEAR"
1151            | "QUARTER"
1152            | "MONTH"
1153            | "WEEK"
1154            | "DAY"
1155            | "DOW"
1156            | "DOY"
1157            | "HOUR"
1158            | "MINUTE"
1159            | "SECOND"
1160            | "MILLISECOND"
1161            | "MICROSECOND"
1162            | "NANOSECOND"
1163    )
1164}
1165
1166#[cfg(test)]
1167mod tests {
1168    use super::*;
1169    use crate::linter::rule::with_active_dialect;
1170    use crate::parser::{parse_sql, parse_sql_with_dialect};
1171
1172    fn run(sql: &str) -> Vec<Issue> {
1173        let statements = parse_sql(sql).expect("parse");
1174        let rule = ReferencesQualification::default();
1175        statements
1176            .iter()
1177            .enumerate()
1178            .flat_map(|(index, statement)| {
1179                rule.check(
1180                    statement,
1181                    &LintContext {
1182                        sql,
1183                        statement_range: 0..sql.len(),
1184                        statement_index: index,
1185                    },
1186                )
1187            })
1188            .collect()
1189    }
1190
1191    fn run_in_dialect(sql: &str, dialect: Dialect) -> Vec<Issue> {
1192        let statements = parse_sql_with_dialect(sql, dialect).expect("parse");
1193        let rule = ReferencesQualification::default();
1194        let mut issues = Vec::new();
1195        with_active_dialect(dialect, || {
1196            for (index, statement) in statements.iter().enumerate() {
1197                issues.extend(rule.check(
1198                    statement,
1199                    &LintContext {
1200                        sql,
1201                        statement_range: 0..sql.len(),
1202                        statement_index: index,
1203                    },
1204                ));
1205            }
1206        });
1207        issues
1208    }
1209
1210    fn run_with_config(sql: &str, dialect: Dialect, config_json: serde_json::Value) -> Vec<Issue> {
1211        let statements = parse_sql_with_dialect(sql, dialect).expect("parse");
1212        let config = LintConfig {
1213            enabled: true,
1214            disabled_rules: vec![],
1215            rule_configs: std::collections::BTreeMap::from([(
1216                "references.qualification".to_string(),
1217                config_json,
1218            )]),
1219        };
1220        let rule = ReferencesQualification::from_config(&config);
1221        let mut issues = Vec::new();
1222        with_active_dialect(dialect, || {
1223            for (index, statement) in statements.iter().enumerate() {
1224                issues.extend(rule.check(
1225                    statement,
1226                    &LintContext {
1227                        sql,
1228                        statement_range: 0..sql.len(),
1229                        statement_index: index,
1230                    },
1231                ));
1232            }
1233        });
1234        issues
1235    }
1236
1237    // --- Edge cases adopted from sqlfluff RF02 ---
1238
1239    #[test]
1240    fn allows_fully_qualified_multi_table_query() {
1241        let issues = run("SELECT foo.a, vee.b FROM foo LEFT JOIN vee ON vee.a = foo.a");
1242        assert!(issues.is_empty());
1243    }
1244
1245    #[test]
1246    fn flags_unqualified_multi_table_query() {
1247        let issues = run("SELECT a, b FROM foo LEFT JOIN vee ON vee.a = foo.a");
1248        assert!(!issues.is_empty());
1249        assert!(issues
1250            .iter()
1251            .all(|issue| issue.code == issue_codes::LINT_RF_002));
1252    }
1253
1254    #[test]
1255    fn allows_qualified_multi_table_query_inside_subquery() {
1256        let issues =
1257            run("SELECT a FROM (SELECT foo.a, vee.b FROM foo LEFT JOIN vee ON vee.a = foo.a)");
1258        assert!(issues.is_empty());
1259    }
1260
1261    #[test]
1262    fn flags_unqualified_multi_table_query_inside_subquery() {
1263        let issues = run("SELECT a FROM (SELECT a, b FROM foo LEFT JOIN vee ON vee.a = foo.a)");
1264        assert!(!issues.is_empty());
1265    }
1266
1267    #[test]
1268    fn force_enable_false_disables_rule() {
1269        let config = LintConfig {
1270            enabled: true,
1271            disabled_rules: vec![],
1272            rule_configs: std::collections::BTreeMap::from([(
1273                "LINT_RF_002".to_string(),
1274                serde_json::json!({"force_enable": false}),
1275            )]),
1276        };
1277        let rule = ReferencesQualification::from_config(&config);
1278        let sql = "SELECT a, b FROM foo LEFT JOIN vee ON vee.a = foo.a";
1279        let statements = parse_sql(sql).expect("parse");
1280        let issues = rule.check(
1281            &statements[0],
1282            &LintContext {
1283                sql,
1284                statement_range: 0..sql.len(),
1285                statement_index: 0,
1286            },
1287        );
1288        assert!(issues.is_empty());
1289    }
1290
1291    #[test]
1292    fn flags_projection_self_alias_in_multi_source_query() {
1293        let issues = run("SELECT foo AS foo FROM a LEFT JOIN b ON a.id = b.id");
1294        assert!(!issues.is_empty());
1295        assert!(issues
1296            .iter()
1297            .all(|issue| issue.code == issue_codes::LINT_RF_002));
1298    }
1299
1300    #[test]
1301    fn allows_later_projection_reference_to_previous_alias() {
1302        let issues = run("SELECT a.bar AS baz, baz FROM a LEFT JOIN b ON a.id = b.id");
1303        assert!(issues.is_empty());
1304    }
1305
1306    #[test]
1307    fn allows_bigquery_value_table_function_in_source_count() {
1308        let sql = "select unqualified_reference_from_table_a, _t_start from a left join unnest(generate_timestamp_array('2020-01-01','2020-01-30', interval 1 day)) as _t_start on true";
1309        let issues = run_in_dialect(sql, Dialect::Bigquery);
1310        assert!(issues.is_empty(), "{issues:?}");
1311    }
1312
1313    #[test]
1314    fn allows_bigquery_unqualified_value_table_alias_with_other_tables() {
1315        let sql = "select a.*, b.*, _t_start from a left join b on true left join unnest(generate_timestamp_array('2020-01-01','2020-01-30', interval 1 day)) as _t_start on true";
1316        let issues = run_in_dialect(sql, Dialect::Bigquery);
1317        assert!(issues.is_empty(), "{issues:?}");
1318    }
1319
1320    #[test]
1321    fn allows_mysql_session_variable_reference() {
1322        let sql = "SET @someVar = 1; SELECT Table1.Col1, Table2.Col2 FROM Table1 LEFT JOIN Table2 ON Table1.Join1 = Table2.Join1 WHERE Table1.FilterCol = @someVar;";
1323        let issues = run_in_dialect(sql, Dialect::Mysql);
1324        assert!(issues.is_empty(), "{issues:?}");
1325    }
1326
1327    #[test]
1328    fn flags_snowflake_table_plus_flatten_unqualified_value_reference() {
1329        let sql = "SELECT r.rec:foo::string AS foo, value:bar::string AS bar FROM foo.bar AS r, LATERAL FLATTEN(input => r.rec:result) AS x";
1330        let issues = run_in_dialect(sql, Dialect::Snowflake);
1331        assert!(!issues.is_empty(), "{issues:?}");
1332    }
1333
1334    #[test]
1335    fn ignore_words_config_skips_named_unqualified_refs() {
1336        let sql = "SELECT test1, test2 FROM t_table1 LEFT JOIN t_table_2 ON TRUE";
1337        let issues = run_with_config(
1338            sql,
1339            Dialect::Generic,
1340            serde_json::json!({"ignore_words":"test1,test2"}),
1341        );
1342        assert!(issues.is_empty(), "{issues:?}");
1343    }
1344
1345    #[test]
1346    fn ignore_words_regex_config_skips_named_unqualified_refs() {
1347        let sql = "SELECT _test1, _test2 FROM t_table1 LEFT JOIN t_table_2 ON TRUE";
1348        let issues = run_with_config(
1349            sql,
1350            Dialect::Generic,
1351            serde_json::json!({"ignore_words_regex":"^_"}),
1352        );
1353        assert!(issues.is_empty(), "{issues:?}");
1354    }
1355
1356    #[test]
1357    fn declared_bigquery_variables_are_exempt() {
1358        let sql = "DECLARE run_time TIMESTAMP DEFAULT '2020-01-01 00:00:00'; SELECT table_a.age FROM table_a INNER JOIN table_b ON table_a.id = table_b.id WHERE table_a.start_date <= run_time;";
1359        let issues = run_in_dialect(sql, Dialect::Bigquery);
1360        assert!(issues.is_empty(), "{issues:?}");
1361    }
1362
1363    #[test]
1364    fn flags_unqualified_subquery_reference_when_outer_scope_exists() {
1365        let sql = "SELECT a FROM foo WHERE a IN (SELECT a FROM bar)";
1366        let issues = run(sql);
1367        assert!(!issues.is_empty(), "{issues:?}");
1368    }
1369
1370    #[test]
1371    fn subqueries_ignore_external_references_allows_unqualified_subquery_reference() {
1372        let sql = "SELECT a FROM foo WHERE a IN (SELECT a FROM bar)";
1373        let issues = run_with_config(
1374            sql,
1375            Dialect::Generic,
1376            serde_json::json!({"subqueries_ignore_external_references": true}),
1377        );
1378        assert!(issues.is_empty(), "{issues:?}");
1379    }
1380
1381    #[test]
1382    fn flags_scalar_subquery_unqualified_reference() {
1383        let sql = "SELECT (SELECT max(id) FROM foo2) AS f1 FROM bar";
1384        let issues = run(sql);
1385        assert!(!issues.is_empty(), "{issues:?}");
1386    }
1387
1388    #[test]
1389    fn flags_exists_subquery_unqualified_reference() {
1390        let sql = "SELECT id FROM bar WHERE EXISTS (SELECT 1 FROM foo2 WHERE bar.id = id)";
1391        let issues = run(sql);
1392        assert!(!issues.is_empty(), "{issues:?}");
1393    }
1394
1395    #[test]
1396    fn flags_nested_correlated_subquery_inside_from_clause() {
1397        let sql = "SELECT a.id AS a_id, b.id AS b_id FROM (SELECT id FROM foo WHERE id IN (SELECT id FROM baz)) AS a INNER JOIN bar AS b ON a.id = b.id";
1398        let issues = run(sql);
1399        assert!(!issues.is_empty(), "{issues:?}");
1400    }
1401}