Skip to main content

flowscope_core/linter/rules/
rf_002.rs

1//! LINT_RF_002: References qualification.
2//!
3//! In multi-table queries, require qualified column references.
4
5use std::collections::HashSet;
6
7use regex::Regex;
8
9use crate::linter::config::LintConfig;
10use crate::linter::rule::{LintContext, LintRule};
11use crate::parser::parse_sql_with_dialect;
12use crate::types::{issue_codes, Dialect, Issue};
13use sqlparser::ast::{
14    CreateView, Expr, Function, FunctionArg, FunctionArgExpr, FunctionArguments, Ident,
15    ObjectNamePart, OrderByKind, Query, Select, SelectItem, SelectItemQualifiedWildcardKind,
16    SetExpr, Statement, TableFactor, TableWithJoins,
17};
18
19use super::semantic_helpers::{
20    join_on_expr, select_projection_alias_set, visit_select_expressions,
21};
22
23pub struct ReferencesQualification {
24    force_enable: bool,
25    ignore_words: HashSet<String>,
26    ignore_words_regex: Option<Regex>,
27    subqueries_ignore_external_references: bool,
28}
29
30impl ReferencesQualification {
31    pub fn from_config(config: &LintConfig) -> Self {
32        let mut ignore_words: HashSet<String> = HashSet::new();
33        if let Some(values) =
34            config.rule_option_string_list(issue_codes::LINT_RF_002, "ignore_words")
35        {
36            for value in values {
37                ignore_words.extend(split_ignore_words_csv(&value));
38            }
39        }
40
41        if let Some(csv) = config.rule_option_str(issue_codes::LINT_RF_002, "ignore_words") {
42            ignore_words.extend(split_ignore_words_csv(csv));
43        }
44
45        let ignore_words_regex = config
46            .rule_option_str(issue_codes::LINT_RF_002, "ignore_words_regex")
47            .and_then(|pattern| Regex::new(pattern).ok());
48
49        Self {
50            force_enable: config
51                .rule_option_bool(issue_codes::LINT_RF_002, "force_enable")
52                .unwrap_or(true),
53            ignore_words,
54            ignore_words_regex,
55            subqueries_ignore_external_references: config
56                .rule_option_bool(
57                    issue_codes::LINT_RF_002,
58                    "subqueries_ignore_external_references",
59                )
60                .unwrap_or(false),
61        }
62    }
63}
64
65impl Default for ReferencesQualification {
66    fn default() -> Self {
67        Self {
68            force_enable: true,
69            ignore_words: HashSet::new(),
70            ignore_words_regex: None,
71            subqueries_ignore_external_references: false,
72        }
73    }
74}
75
76impl LintRule for ReferencesQualification {
77    fn code(&self) -> &'static str {
78        issue_codes::LINT_RF_002
79    }
80
81    fn name(&self) -> &'static str {
82        "References qualification"
83    }
84
85    fn description(&self) -> &'static str {
86        "References should be qualified if select has more than one referenced table/view."
87    }
88
89    fn check(&self, statement: &Statement, ctx: &LintContext) -> Vec<Issue> {
90        if !self.force_enable {
91            return Vec::new();
92        }
93
94        let declared_variables =
95            declared_variables_before_statement(ctx.sql, ctx.dialect(), ctx.statement_index);
96        let context = QualificationContext {
97            dialect: ctx.dialect(),
98            ignore_words: &self.ignore_words,
99            ignore_words_regex: self.ignore_words_regex.as_ref(),
100            declared_variables: &declared_variables,
101            subqueries_ignore_external_references: self.subqueries_ignore_external_references,
102        };
103
104        let unqualified_count = violations_in_statement(statement, &context, 0);
105
106        (0..unqualified_count)
107            .map(|_| {
108                Issue::warning(
109                    issue_codes::LINT_RF_002,
110                    "Use qualified references in multi-table queries.",
111                )
112                .with_statement(ctx.statement_index)
113            })
114            .collect()
115    }
116}
117
118struct QualificationContext<'a> {
119    dialect: Dialect,
120    ignore_words: &'a HashSet<String>,
121    ignore_words_regex: Option<&'a Regex>,
122    declared_variables: &'a HashSet<String>,
123    subqueries_ignore_external_references: bool,
124}
125
126#[derive(Default)]
127struct SourceInfo {
128    normal_source_count: usize,
129    value_table_aliases: HashSet<String>,
130}
131
132fn split_ignore_words_csv(raw: &str) -> impl Iterator<Item = String> + '_ {
133    raw.split(',')
134        .map(str::trim)
135        .filter(|word| !word.is_empty())
136        .map(|word| word.to_ascii_uppercase())
137}
138
139fn declared_variables_before_statement(
140    sql: &str,
141    dialect: Dialect,
142    statement_index: usize,
143) -> HashSet<String> {
144    if !matches!(dialect, Dialect::Bigquery) {
145        return HashSet::new();
146    }
147
148    let Ok(statements) = parse_sql_with_dialect(sql, dialect) else {
149        return HashSet::new();
150    };
151
152    let mut names = HashSet::new();
153    for statement in statements.into_iter().take(statement_index) {
154        if let Statement::Declare { stmts } = statement {
155            for declare in stmts {
156                for name in declare.names {
157                    names.insert(name.value.to_ascii_uppercase());
158                }
159            }
160        }
161    }
162
163    names
164}
165
166fn violations_in_statement(
167    statement: &Statement,
168    ctx: &QualificationContext,
169    external_sources: usize,
170) -> usize {
171    match statement {
172        Statement::Query(query) => violations_in_query(query, ctx, external_sources),
173        Statement::Insert(insert) => insert
174            .source
175            .as_ref()
176            .map_or(0, |query| violations_in_query(query, ctx, external_sources)),
177        Statement::CreateView(CreateView { query, .. }) => {
178            violations_in_query(query, ctx, external_sources)
179        }
180        Statement::CreateTable(create) => create
181            .query
182            .as_ref()
183            .map_or(0, |query| violations_in_query(query, ctx, external_sources)),
184        _ => 0,
185    }
186}
187
188fn violations_in_query(
189    query: &Query,
190    ctx: &QualificationContext,
191    external_sources: usize,
192) -> usize {
193    let mut count = 0usize;
194
195    if let Some(with) = &query.with {
196        for cte in &with.cte_tables {
197            // CTE bodies do not inherit parent query scope.
198            count += violations_in_query(&cte.query, ctx, 0);
199        }
200    }
201
202    count += violations_in_set_expr(&query.body, ctx, external_sources);
203
204    if let Some(order_by) = &query.order_by {
205        if let OrderByKind::Expressions(order_exprs) = &order_by.kind {
206            let child_external = child_external_sources(external_sources, ctx);
207            for order_expr in order_exprs {
208                count += nested_subquery_violations_in_expr(&order_expr.expr, ctx, child_external);
209            }
210        }
211    }
212
213    count
214}
215
216fn violations_in_set_expr(
217    set_expr: &SetExpr,
218    ctx: &QualificationContext,
219    external_sources: usize,
220) -> usize {
221    match set_expr {
222        SetExpr::Select(select) => violations_in_select(select, ctx, external_sources),
223        SetExpr::Query(query) => violations_in_query(query, ctx, external_sources),
224        SetExpr::SetOperation { left, right, .. } => {
225            violations_in_set_expr(left, ctx, external_sources)
226                + violations_in_set_expr(right, ctx, external_sources)
227        }
228        SetExpr::Insert(statement)
229        | SetExpr::Update(statement)
230        | SetExpr::Delete(statement)
231        | SetExpr::Merge(statement) => violations_in_statement(statement, ctx, external_sources),
232        _ => 0,
233    }
234}
235
236fn violations_in_select(
237    select: &Select,
238    ctx: &QualificationContext,
239    external_sources: usize,
240) -> usize {
241    let source_info = collect_source_info(select, ctx.dialect);
242
243    let effective_external = if ctx.subqueries_ignore_external_references {
244        0
245    } else {
246        external_sources
247    };
248    let effective_source_count = source_info.normal_source_count + effective_external;
249
250    let aliases = select_projection_alias_set(select);
251
252    let mut count = 0usize;
253    if effective_source_count > 1 {
254        count += unqualified_references_in_select_scope(
255            select,
256            &aliases,
257            &source_info.value_table_aliases,
258            ctx,
259        );
260    }
261
262    let child_external = child_external_sources(effective_source_count, ctx);
263    count += nested_subquery_violations_in_select(select, ctx, child_external);
264
265    count
266}
267
268fn child_external_sources(current_effective_sources: usize, ctx: &QualificationContext) -> usize {
269    if ctx.subqueries_ignore_external_references {
270        0
271    } else {
272        current_effective_sources
273    }
274}
275
276fn collect_source_info(select: &Select, dialect: Dialect) -> SourceInfo {
277    let mut info = SourceInfo::default();
278
279    for from_item in &select.from {
280        collect_source_info_from_table_factor(&from_item.relation, dialect, &mut info);
281        for join in &from_item.joins {
282            collect_source_info_from_table_factor(&join.relation, dialect, &mut info);
283        }
284    }
285
286    info
287}
288
289fn collect_source_info_from_table_factor(
290    table_factor: &TableFactor,
291    dialect: Dialect,
292    info: &mut SourceInfo,
293) {
294    if is_value_table_function(table_factor, dialect) {
295        if let Some(alias) = table_factor_value_table_alias(table_factor) {
296            info.value_table_aliases.insert(alias);
297        }
298        if let TableFactor::UNNEST {
299            with_offset_alias: Some(alias),
300            ..
301        } = table_factor
302        {
303            info.value_table_aliases
304                .insert(alias.value.to_ascii_uppercase());
305        }
306        return;
307    }
308
309    match table_factor {
310        TableFactor::NestedJoin {
311            table_with_joins, ..
312        } => {
313            collect_source_info_from_table_factor(&table_with_joins.relation, dialect, info);
314            for join in &table_with_joins.joins {
315                collect_source_info_from_table_factor(&join.relation, dialect, info);
316            }
317        }
318        TableFactor::Pivot { table, .. }
319        | TableFactor::Unpivot { table, .. }
320        | TableFactor::MatchRecognize { table, .. } => {
321            collect_source_info_from_table_factor(table, dialect, info);
322        }
323        _ => {
324            info.normal_source_count += 1;
325        }
326    }
327}
328
329fn table_factor_value_table_alias(table_factor: &TableFactor) -> Option<String> {
330    let alias = match table_factor {
331        TableFactor::Table { alias, .. }
332        | TableFactor::Derived { alias, .. }
333        | TableFactor::TableFunction { alias, .. }
334        | TableFactor::Function { alias, .. }
335        | TableFactor::UNNEST { alias, .. }
336        | TableFactor::JsonTable { alias, .. }
337        | TableFactor::OpenJsonTable { alias, .. }
338        | TableFactor::NestedJoin { alias, .. }
339        | TableFactor::Pivot { alias, .. }
340        | TableFactor::Unpivot { alias, .. }
341        | TableFactor::MatchRecognize { alias, .. }
342        | TableFactor::XmlTable { alias, .. }
343        | TableFactor::SemanticView { alias, .. } => alias.as_ref(),
344    }?;
345
346    Some(alias.name.value.to_ascii_uppercase())
347}
348
349fn is_value_table_function(table_factor: &TableFactor, dialect: Dialect) -> bool {
350    // UNNEST produces a value table whose alias names a column, not a table.
351    // BigQuery parses UNNEST as a dedicated TableFactor; PostgreSQL also supports
352    // UNNEST in FROM and sqlparser emits the same AST node.
353    matches!(dialect, Dialect::Bigquery | Dialect::Postgres)
354        && matches!(table_factor, TableFactor::UNNEST { .. })
355}
356
357fn unqualified_references_in_select_scope(
358    select: &Select,
359    aliases: &HashSet<String>,
360    value_table_aliases: &HashSet<String>,
361    ctx: &QualificationContext,
362) -> usize {
363    let projection_unqualified_full =
364        projection_unqualified_count_with_aliases(select, aliases, value_table_aliases, ctx);
365    let projection_unqualified_sequential =
366        projection_unqualified_count_sequential(select, value_table_aliases, ctx);
367
368    let mut unqualified_in_select = 0usize;
369    visit_select_expressions(select, &mut |expr| {
370        unqualified_in_select += count_unqualified_references_in_expr_no_subqueries(
371            expr,
372            aliases,
373            value_table_aliases,
374            ctx,
375            &HashSet::new(),
376        );
377    });
378
379    unqualified_in_select.saturating_sub(projection_unqualified_full)
380        + projection_unqualified_sequential
381}
382
383fn projection_unqualified_count_with_aliases(
384    select: &Select,
385    aliases: &HashSet<String>,
386    value_table_aliases: &HashSet<String>,
387    ctx: &QualificationContext,
388) -> usize {
389    select
390        .projection
391        .iter()
392        .map(|item| match item {
393            SelectItem::UnnamedExpr(expr) | SelectItem::ExprWithAlias { expr, .. } => {
394                count_unqualified_references_in_expr_no_subqueries(
395                    expr,
396                    aliases,
397                    value_table_aliases,
398                    ctx,
399                    &HashSet::new(),
400                )
401            }
402            SelectItem::QualifiedWildcard(SelectItemQualifiedWildcardKind::Expr(expr), _) => {
403                count_unqualified_references_in_expr_no_subqueries(
404                    expr,
405                    aliases,
406                    value_table_aliases,
407                    ctx,
408                    &HashSet::new(),
409                )
410            }
411            SelectItem::QualifiedWildcard(_, _) => 0,
412            _ => 0,
413        })
414        .sum()
415}
416
417fn projection_unqualified_count_sequential(
418    select: &Select,
419    value_table_aliases: &HashSet<String>,
420    ctx: &QualificationContext,
421) -> usize {
422    let mut aliases_before = HashSet::new();
423    let mut unqualified = 0usize;
424
425    for item in &select.projection {
426        match item {
427            SelectItem::UnnamedExpr(expr) => {
428                unqualified += count_unqualified_references_in_expr_no_subqueries(
429                    expr,
430                    &aliases_before,
431                    value_table_aliases,
432                    ctx,
433                    &HashSet::new(),
434                );
435            }
436            SelectItem::ExprWithAlias { expr, alias } => {
437                unqualified += count_unqualified_references_in_expr_no_subqueries(
438                    expr,
439                    &aliases_before,
440                    value_table_aliases,
441                    ctx,
442                    &HashSet::new(),
443                );
444                aliases_before.insert(alias.value.to_ascii_uppercase());
445            }
446            _ => {}
447        }
448    }
449
450    unqualified
451}
452
453fn count_unqualified_references_in_expr_no_subqueries(
454    expr: &Expr,
455    aliases: &HashSet<String>,
456    value_table_aliases: &HashSet<String>,
457    ctx: &QualificationContext,
458    lambda_params: &HashSet<String>,
459) -> usize {
460    match expr {
461        Expr::Identifier(identifier) => identifier_is_unqualified_reference(
462            identifier,
463            aliases,
464            value_table_aliases,
465            ctx,
466            lambda_params,
467        )
468        .into(),
469        Expr::CompoundIdentifier(_) => 0,
470        Expr::CompoundFieldAccess { root, .. } => {
471            count_unqualified_references_in_expr_no_subqueries(
472                root,
473                aliases,
474                value_table_aliases,
475                ctx,
476                lambda_params,
477            )
478        }
479        Expr::JsonAccess { value, .. } => count_unqualified_references_in_expr_no_subqueries(
480            value,
481            aliases,
482            value_table_aliases,
483            ctx,
484            lambda_params,
485        ),
486        Expr::BinaryOp { left, right, .. }
487        | Expr::AnyOp { left, right, .. }
488        | Expr::AllOp { left, right, .. }
489        | Expr::IsDistinctFrom(left, right)
490        | Expr::IsNotDistinctFrom(left, right) => {
491            count_unqualified_references_in_expr_no_subqueries(
492                left,
493                aliases,
494                value_table_aliases,
495                ctx,
496                lambda_params,
497            ) + count_unqualified_references_in_expr_no_subqueries(
498                right,
499                aliases,
500                value_table_aliases,
501                ctx,
502                lambda_params,
503            )
504        }
505        Expr::UnaryOp { expr: inner, .. }
506        | Expr::Nested(inner)
507        | Expr::IsNull(inner)
508        | Expr::IsNotNull(inner)
509        | Expr::IsTrue(inner)
510        | Expr::IsNotTrue(inner)
511        | Expr::IsFalse(inner)
512        | Expr::IsNotFalse(inner)
513        | Expr::IsUnknown(inner)
514        | Expr::IsNotUnknown(inner)
515        | Expr::Cast { expr: inner, .. }
516        | Expr::AtTimeZone {
517            timestamp: inner, ..
518        }
519        | Expr::Extract { expr: inner, .. }
520        | Expr::Ceil { expr: inner, .. }
521        | Expr::Floor { expr: inner, .. }
522        | Expr::Position { expr: inner, .. }
523        | Expr::Substring { expr: inner, .. }
524        | Expr::Trim { expr: inner, .. } => count_unqualified_references_in_expr_no_subqueries(
525            inner,
526            aliases,
527            value_table_aliases,
528            ctx,
529            lambda_params,
530        ),
531        Expr::InList { expr, list, .. } => {
532            count_unqualified_references_in_expr_no_subqueries(
533                expr,
534                aliases,
535                value_table_aliases,
536                ctx,
537                lambda_params,
538            ) + list
539                .iter()
540                .map(|item| {
541                    count_unqualified_references_in_expr_no_subqueries(
542                        item,
543                        aliases,
544                        value_table_aliases,
545                        ctx,
546                        lambda_params,
547                    )
548                })
549                .sum::<usize>()
550        }
551        Expr::InSubquery { expr, .. } => count_unqualified_references_in_expr_no_subqueries(
552            expr,
553            aliases,
554            value_table_aliases,
555            ctx,
556            lambda_params,
557        ),
558        Expr::InUnnest {
559            expr, array_expr, ..
560        } => {
561            count_unqualified_references_in_expr_no_subqueries(
562                expr,
563                aliases,
564                value_table_aliases,
565                ctx,
566                lambda_params,
567            ) + count_unqualified_references_in_expr_no_subqueries(
568                array_expr,
569                aliases,
570                value_table_aliases,
571                ctx,
572                lambda_params,
573            )
574        }
575        Expr::Between {
576            expr, low, high, ..
577        } => {
578            count_unqualified_references_in_expr_no_subqueries(
579                expr,
580                aliases,
581                value_table_aliases,
582                ctx,
583                lambda_params,
584            ) + count_unqualified_references_in_expr_no_subqueries(
585                low,
586                aliases,
587                value_table_aliases,
588                ctx,
589                lambda_params,
590            ) + count_unqualified_references_in_expr_no_subqueries(
591                high,
592                aliases,
593                value_table_aliases,
594                ctx,
595                lambda_params,
596            )
597        }
598        Expr::Case {
599            operand,
600            conditions,
601            else_result,
602            ..
603        } => {
604            let mut count = 0usize;
605            if let Some(operand) = operand {
606                count += count_unqualified_references_in_expr_no_subqueries(
607                    operand,
608                    aliases,
609                    value_table_aliases,
610                    ctx,
611                    lambda_params,
612                );
613            }
614            for when in conditions {
615                count += count_unqualified_references_in_expr_no_subqueries(
616                    &when.condition,
617                    aliases,
618                    value_table_aliases,
619                    ctx,
620                    lambda_params,
621                );
622                count += count_unqualified_references_in_expr_no_subqueries(
623                    &when.result,
624                    aliases,
625                    value_table_aliases,
626                    ctx,
627                    lambda_params,
628                );
629            }
630            if let Some(otherwise) = else_result {
631                count += count_unqualified_references_in_expr_no_subqueries(
632                    otherwise,
633                    aliases,
634                    value_table_aliases,
635                    ctx,
636                    lambda_params,
637                );
638            }
639            count
640        }
641        Expr::Function(function) => count_unqualified_references_in_function_no_subqueries(
642            function,
643            aliases,
644            value_table_aliases,
645            ctx,
646            lambda_params,
647        ),
648        Expr::Lambda(lambda) => {
649            let mut next_lambda_params = lambda_params.clone();
650            for param in &lambda.params {
651                next_lambda_params.insert(param.value.to_ascii_uppercase());
652            }
653            count_unqualified_references_in_expr_no_subqueries(
654                &lambda.body,
655                aliases,
656                value_table_aliases,
657                ctx,
658                &next_lambda_params,
659            )
660        }
661        Expr::Subquery(_) | Expr::Exists { .. } => 0,
662        _ => 0,
663    }
664}
665
666fn count_unqualified_references_in_function_no_subqueries(
667    function: &Function,
668    aliases: &HashSet<String>,
669    value_table_aliases: &HashSet<String>,
670    ctx: &QualificationContext,
671    lambda_params: &HashSet<String>,
672) -> usize {
673    let mut count = 0usize;
674
675    if let FunctionArguments::List(arguments) = &function.args {
676        for (index, arg) in arguments.args.iter().enumerate() {
677            count += match arg {
678                FunctionArg::Unnamed(FunctionArgExpr::Expr(expr))
679                | FunctionArg::Named {
680                    arg: FunctionArgExpr::Expr(expr),
681                    ..
682                }
683                | FunctionArg::ExprNamed {
684                    arg: FunctionArgExpr::Expr(expr),
685                    ..
686                } => {
687                    if should_skip_identifier_reference_for_function_arg(function, index, expr) {
688                        0
689                    } else {
690                        count_unqualified_references_in_expr_no_subqueries(
691                            expr,
692                            aliases,
693                            value_table_aliases,
694                            ctx,
695                            lambda_params,
696                        )
697                    }
698                }
699                _ => 0,
700            };
701        }
702    }
703
704    if let Some(filter) = &function.filter {
705        count += count_unqualified_references_in_expr_no_subqueries(
706            filter,
707            aliases,
708            value_table_aliases,
709            ctx,
710            lambda_params,
711        );
712    }
713
714    for order_expr in &function.within_group {
715        count += count_unqualified_references_in_expr_no_subqueries(
716            &order_expr.expr,
717            aliases,
718            value_table_aliases,
719            ctx,
720            lambda_params,
721        );
722    }
723
724    if let Some(sqlparser::ast::WindowType::WindowSpec(spec)) = &function.over {
725        for expr in &spec.partition_by {
726            count += count_unqualified_references_in_expr_no_subqueries(
727                expr,
728                aliases,
729                value_table_aliases,
730                ctx,
731                lambda_params,
732            );
733        }
734        for order_expr in &spec.order_by {
735            count += count_unqualified_references_in_expr_no_subqueries(
736                &order_expr.expr,
737                aliases,
738                value_table_aliases,
739                ctx,
740                lambda_params,
741            );
742        }
743    }
744
745    count
746}
747
748fn nested_subquery_violations_in_select(
749    select: &Select,
750    ctx: &QualificationContext,
751    child_external_sources: usize,
752) -> usize {
753    let mut count = 0usize;
754
755    visit_select_expressions(select, &mut |expr| {
756        count += nested_subquery_violations_in_expr(expr, ctx, child_external_sources);
757    });
758
759    for from_item in &select.from {
760        count += nested_subquery_violations_in_table_factor(
761            &from_item.relation,
762            ctx,
763            child_external_sources,
764        );
765        for join in &from_item.joins {
766            count += nested_subquery_violations_in_table_factor(
767                &join.relation,
768                ctx,
769                child_external_sources,
770            );
771            if let Some(on_expr) = join_on_expr(&join.join_operator) {
772                count += nested_subquery_violations_in_expr(on_expr, ctx, child_external_sources);
773            }
774        }
775    }
776
777    count
778}
779
780fn nested_subquery_violations_in_expr(
781    expr: &Expr,
782    ctx: &QualificationContext,
783    child_external_sources: usize,
784) -> usize {
785    match expr {
786        Expr::InSubquery { expr, subquery, .. } => {
787            nested_subquery_violations_in_expr(expr, ctx, child_external_sources)
788                + violations_in_query(subquery, ctx, child_external_sources)
789        }
790        Expr::Subquery(subquery) | Expr::Exists { subquery, .. } => {
791            violations_in_query(subquery, ctx, child_external_sources)
792        }
793        Expr::BinaryOp { left, right, .. }
794        | Expr::AnyOp { left, right, .. }
795        | Expr::AllOp { left, right, .. }
796        | Expr::IsDistinctFrom(left, right)
797        | Expr::IsNotDistinctFrom(left, right) => {
798            nested_subquery_violations_in_expr(left, ctx, child_external_sources)
799                + nested_subquery_violations_in_expr(right, ctx, child_external_sources)
800        }
801        Expr::UnaryOp { expr: inner, .. }
802        | Expr::Nested(inner)
803        | Expr::IsNull(inner)
804        | Expr::IsNotNull(inner)
805        | Expr::IsTrue(inner)
806        | Expr::IsNotTrue(inner)
807        | Expr::IsFalse(inner)
808        | Expr::IsNotFalse(inner)
809        | Expr::IsUnknown(inner)
810        | Expr::IsNotUnknown(inner)
811        | Expr::Cast { expr: inner, .. }
812        | Expr::AtTimeZone {
813            timestamp: inner, ..
814        }
815        | Expr::Extract { expr: inner, .. }
816        | Expr::Ceil { expr: inner, .. }
817        | Expr::Floor { expr: inner, .. }
818        | Expr::Position { expr: inner, .. }
819        | Expr::Substring { expr: inner, .. }
820        | Expr::Trim { expr: inner, .. }
821        | Expr::JsonAccess { value: inner, .. }
822        | Expr::CompoundFieldAccess { root: inner, .. } => {
823            nested_subquery_violations_in_expr(inner, ctx, child_external_sources)
824        }
825        Expr::InList { expr, list, .. } => {
826            nested_subquery_violations_in_expr(expr, ctx, child_external_sources)
827                + list
828                    .iter()
829                    .map(|item| {
830                        nested_subquery_violations_in_expr(item, ctx, child_external_sources)
831                    })
832                    .sum::<usize>()
833        }
834        Expr::InUnnest {
835            expr, array_expr, ..
836        } => {
837            nested_subquery_violations_in_expr(expr, ctx, child_external_sources)
838                + nested_subquery_violations_in_expr(array_expr, ctx, child_external_sources)
839        }
840        Expr::Between {
841            expr, low, high, ..
842        } => {
843            nested_subquery_violations_in_expr(expr, ctx, child_external_sources)
844                + nested_subquery_violations_in_expr(low, ctx, child_external_sources)
845                + nested_subquery_violations_in_expr(high, ctx, child_external_sources)
846        }
847        Expr::Case {
848            operand,
849            conditions,
850            else_result,
851            ..
852        } => {
853            let mut count = operand.as_ref().map_or(0, |expr| {
854                nested_subquery_violations_in_expr(expr, ctx, child_external_sources)
855            });
856            for when in conditions {
857                count += nested_subquery_violations_in_expr(
858                    &when.condition,
859                    ctx,
860                    child_external_sources,
861                );
862                count +=
863                    nested_subquery_violations_in_expr(&when.result, ctx, child_external_sources);
864            }
865            if let Some(otherwise) = else_result {
866                count += nested_subquery_violations_in_expr(otherwise, ctx, child_external_sources);
867            }
868            count
869        }
870        Expr::Function(function) => {
871            let mut count = 0usize;
872            if let FunctionArguments::List(arguments) = &function.args {
873                for arg in &arguments.args {
874                    count += match arg {
875                        FunctionArg::Unnamed(FunctionArgExpr::Expr(expr))
876                        | FunctionArg::Named {
877                            arg: FunctionArgExpr::Expr(expr),
878                            ..
879                        }
880                        | FunctionArg::ExprNamed {
881                            arg: FunctionArgExpr::Expr(expr),
882                            ..
883                        } => nested_subquery_violations_in_expr(expr, ctx, child_external_sources),
884                        _ => 0,
885                    };
886                }
887            }
888            if let Some(filter) = &function.filter {
889                count += nested_subquery_violations_in_expr(filter, ctx, child_external_sources);
890            }
891            for order_expr in &function.within_group {
892                count += nested_subquery_violations_in_expr(
893                    &order_expr.expr,
894                    ctx,
895                    child_external_sources,
896                );
897            }
898            if let Some(sqlparser::ast::WindowType::WindowSpec(spec)) = &function.over {
899                for expr in &spec.partition_by {
900                    count += nested_subquery_violations_in_expr(expr, ctx, child_external_sources);
901                }
902                for order_expr in &spec.order_by {
903                    count += nested_subquery_violations_in_expr(
904                        &order_expr.expr,
905                        ctx,
906                        child_external_sources,
907                    );
908                }
909            }
910            count
911        }
912        Expr::Lambda(lambda) => {
913            nested_subquery_violations_in_expr(&lambda.body, ctx, child_external_sources)
914        }
915        _ => 0,
916    }
917}
918
919fn nested_subquery_violations_in_table_factor(
920    table_factor: &TableFactor,
921    ctx: &QualificationContext,
922    child_external_sources: usize,
923) -> usize {
924    match table_factor {
925        TableFactor::Derived {
926            lateral, subquery, ..
927        } => {
928            let external = if *lateral { child_external_sources } else { 0 };
929            violations_in_query(subquery, ctx, external)
930        }
931        TableFactor::TableFunction { expr, .. } => {
932            nested_subquery_violations_in_expr(expr, ctx, child_external_sources)
933        }
934        TableFactor::Function { args, .. } => args
935            .iter()
936            .map(|arg| match arg {
937                FunctionArg::Unnamed(FunctionArgExpr::Expr(expr))
938                | FunctionArg::Named {
939                    arg: FunctionArgExpr::Expr(expr),
940                    ..
941                }
942                | FunctionArg::ExprNamed {
943                    arg: FunctionArgExpr::Expr(expr),
944                    ..
945                } => nested_subquery_violations_in_expr(expr, ctx, child_external_sources),
946                _ => 0,
947            })
948            .sum(),
949        TableFactor::UNNEST { array_exprs, .. } => array_exprs
950            .iter()
951            .map(|expr| nested_subquery_violations_in_expr(expr, ctx, child_external_sources))
952            .sum(),
953        TableFactor::JsonTable { json_expr, .. } | TableFactor::OpenJsonTable { json_expr, .. } => {
954            nested_subquery_violations_in_expr(json_expr, ctx, child_external_sources)
955        }
956        TableFactor::NestedJoin {
957            table_with_joins, ..
958        } => nested_subquery_violations_in_table_with_joins(
959            table_with_joins,
960            ctx,
961            child_external_sources,
962        ),
963        TableFactor::Pivot {
964            table,
965            aggregate_functions,
966            value_column,
967            default_on_null,
968            ..
969        } => {
970            let mut count =
971                nested_subquery_violations_in_table_factor(table, ctx, child_external_sources);
972            for expr_with_alias in aggregate_functions {
973                count += nested_subquery_violations_in_expr(
974                    &expr_with_alias.expr,
975                    ctx,
976                    child_external_sources,
977                );
978            }
979            for expr in value_column {
980                count += nested_subquery_violations_in_expr(expr, ctx, child_external_sources);
981            }
982            if let Some(expr) = default_on_null {
983                count += nested_subquery_violations_in_expr(expr, ctx, child_external_sources);
984            }
985            count
986        }
987        TableFactor::Unpivot {
988            table,
989            value,
990            columns,
991            ..
992        } => {
993            let mut count =
994                nested_subquery_violations_in_table_factor(table, ctx, child_external_sources);
995            count += nested_subquery_violations_in_expr(value, ctx, child_external_sources);
996            for expr_with_alias in columns {
997                count += nested_subquery_violations_in_expr(
998                    &expr_with_alias.expr,
999                    ctx,
1000                    child_external_sources,
1001                );
1002            }
1003            count
1004        }
1005        TableFactor::MatchRecognize {
1006            table,
1007            partition_by,
1008            order_by,
1009            measures,
1010            symbols,
1011            ..
1012        } => {
1013            let mut count =
1014                nested_subquery_violations_in_table_factor(table, ctx, child_external_sources);
1015            for expr in partition_by {
1016                count += nested_subquery_violations_in_expr(expr, ctx, child_external_sources);
1017            }
1018            for order in order_by {
1019                count +=
1020                    nested_subquery_violations_in_expr(&order.expr, ctx, child_external_sources);
1021            }
1022            for measure in measures {
1023                count +=
1024                    nested_subquery_violations_in_expr(&measure.expr, ctx, child_external_sources);
1025            }
1026            for symbol in symbols {
1027                count += nested_subquery_violations_in_expr(
1028                    &symbol.definition,
1029                    ctx,
1030                    child_external_sources,
1031                );
1032            }
1033            count
1034        }
1035        TableFactor::XmlTable { row_expression, .. } => {
1036            nested_subquery_violations_in_expr(row_expression, ctx, child_external_sources)
1037        }
1038        _ => 0,
1039    }
1040}
1041
1042fn nested_subquery_violations_in_table_with_joins(
1043    table_with_joins: &TableWithJoins,
1044    ctx: &QualificationContext,
1045    child_external_sources: usize,
1046) -> usize {
1047    let mut count = nested_subquery_violations_in_table_factor(
1048        &table_with_joins.relation,
1049        ctx,
1050        child_external_sources,
1051    );
1052
1053    for join in &table_with_joins.joins {
1054        count +=
1055            nested_subquery_violations_in_table_factor(&join.relation, ctx, child_external_sources);
1056        if let Some(on_expr) = join_on_expr(&join.join_operator) {
1057            count += nested_subquery_violations_in_expr(on_expr, ctx, child_external_sources);
1058        }
1059    }
1060
1061    count
1062}
1063
1064fn identifier_is_unqualified_reference(
1065    identifier: &Ident,
1066    aliases: &HashSet<String>,
1067    value_table_aliases: &HashSet<String>,
1068    ctx: &QualificationContext,
1069    lambda_params: &HashSet<String>,
1070) -> bool {
1071    let name = identifier.value.as_str();
1072    let normalized = name.to_ascii_uppercase();
1073
1074    if aliases.contains(&normalized)
1075        || value_table_aliases.contains(&normalized)
1076        || lambda_params.contains(&normalized)
1077        || ctx.declared_variables.contains(&normalized)
1078    {
1079        return false;
1080    }
1081
1082    if name.starts_with('@') {
1083        return false;
1084    }
1085
1086    if ctx.ignore_words.contains(&normalized) {
1087        return false;
1088    }
1089
1090    if let Some(regex) = ctx.ignore_words_regex {
1091        if regex.is_match(name) {
1092            return false;
1093        }
1094    }
1095
1096    true
1097}
1098
1099fn should_skip_identifier_reference_for_function_arg(
1100    function: &Function,
1101    arg_index: usize,
1102    expr: &Expr,
1103) -> bool {
1104    let Expr::Identifier(ident) = expr else {
1105        return false;
1106    };
1107    if ident.quote_style.is_some() || !is_date_part_identifier(&ident.value) {
1108        return false;
1109    }
1110
1111    let Some(function_name) = function_name_upper(function) else {
1112        return false;
1113    };
1114    if !is_datepart_function_name(&function_name) {
1115        return false;
1116    }
1117
1118    // Dialect-specific datepart-position differences exist; to avoid false
1119    // positives across common dialects we allow datepart keywords in the first
1120    // two argument positions of known datepart functions.
1121    arg_index <= 1
1122}
1123
1124fn function_name_upper(function: &Function) -> Option<String> {
1125    function
1126        .name
1127        .0
1128        .last()
1129        .and_then(ObjectNamePart::as_ident)
1130        .map(|ident| ident.value.to_ascii_uppercase())
1131}
1132
1133fn is_datepart_function_name(name: &str) -> bool {
1134    matches!(
1135        name,
1136        "DATEDIFF"
1137            | "DATE_DIFF"
1138            | "DATEADD"
1139            | "DATE_ADD"
1140            | "DATE_PART"
1141            | "DATETIME_TRUNC"
1142            | "TIME_TRUNC"
1143            | "TIMESTAMP_TRUNC"
1144            | "TIMESTAMP_DIFF"
1145            | "TIMESTAMPDIFF"
1146    )
1147}
1148
1149fn is_date_part_identifier(value: &str) -> bool {
1150    matches!(
1151        value.to_ascii_uppercase().as_str(),
1152        "YEAR"
1153            | "QUARTER"
1154            | "MONTH"
1155            | "WEEK"
1156            | "DAY"
1157            | "DOW"
1158            | "DOY"
1159            | "HOUR"
1160            | "MINUTE"
1161            | "SECOND"
1162            | "MILLISECOND"
1163            | "MICROSECOND"
1164            | "NANOSECOND"
1165    )
1166}
1167
1168#[cfg(test)]
1169mod tests {
1170    use super::*;
1171    use crate::linter::rule::with_active_dialect;
1172    use crate::parser::{parse_sql, parse_sql_with_dialect};
1173
1174    fn run(sql: &str) -> Vec<Issue> {
1175        let statements = parse_sql(sql).expect("parse");
1176        let rule = ReferencesQualification::default();
1177        statements
1178            .iter()
1179            .enumerate()
1180            .flat_map(|(index, statement)| {
1181                rule.check(
1182                    statement,
1183                    &LintContext {
1184                        sql,
1185                        statement_range: 0..sql.len(),
1186                        statement_index: index,
1187                    },
1188                )
1189            })
1190            .collect()
1191    }
1192
1193    fn run_in_dialect(sql: &str, dialect: Dialect) -> Vec<Issue> {
1194        let statements = parse_sql_with_dialect(sql, dialect).expect("parse");
1195        let rule = ReferencesQualification::default();
1196        let mut issues = Vec::new();
1197        with_active_dialect(dialect, || {
1198            for (index, statement) in statements.iter().enumerate() {
1199                issues.extend(rule.check(
1200                    statement,
1201                    &LintContext {
1202                        sql,
1203                        statement_range: 0..sql.len(),
1204                        statement_index: index,
1205                    },
1206                ));
1207            }
1208        });
1209        issues
1210    }
1211
1212    fn run_with_config(sql: &str, dialect: Dialect, config_json: serde_json::Value) -> Vec<Issue> {
1213        let statements = parse_sql_with_dialect(sql, dialect).expect("parse");
1214        let config = LintConfig {
1215            enabled: true,
1216            disabled_rules: vec![],
1217            rule_configs: std::collections::BTreeMap::from([(
1218                "references.qualification".to_string(),
1219                config_json,
1220            )]),
1221        };
1222        let rule = ReferencesQualification::from_config(&config);
1223        let mut issues = Vec::new();
1224        with_active_dialect(dialect, || {
1225            for (index, statement) in statements.iter().enumerate() {
1226                issues.extend(rule.check(
1227                    statement,
1228                    &LintContext {
1229                        sql,
1230                        statement_range: 0..sql.len(),
1231                        statement_index: index,
1232                    },
1233                ));
1234            }
1235        });
1236        issues
1237    }
1238
1239    // --- Edge cases adopted from sqlfluff RF02 ---
1240
1241    #[test]
1242    fn allows_fully_qualified_multi_table_query() {
1243        let issues = run("SELECT foo.a, vee.b FROM foo LEFT JOIN vee ON vee.a = foo.a");
1244        assert!(issues.is_empty());
1245    }
1246
1247    #[test]
1248    fn flags_unqualified_multi_table_query() {
1249        let issues = run("SELECT a, b FROM foo LEFT JOIN vee ON vee.a = foo.a");
1250        assert!(!issues.is_empty());
1251        assert!(issues
1252            .iter()
1253            .all(|issue| issue.code == issue_codes::LINT_RF_002));
1254    }
1255
1256    #[test]
1257    fn allows_qualified_multi_table_query_inside_subquery() {
1258        let issues =
1259            run("SELECT a FROM (SELECT foo.a, vee.b FROM foo LEFT JOIN vee ON vee.a = foo.a)");
1260        assert!(issues.is_empty());
1261    }
1262
1263    #[test]
1264    fn flags_unqualified_multi_table_query_inside_subquery() {
1265        let issues = run("SELECT a FROM (SELECT a, b FROM foo LEFT JOIN vee ON vee.a = foo.a)");
1266        assert!(!issues.is_empty());
1267    }
1268
1269    #[test]
1270    fn force_enable_false_disables_rule() {
1271        let config = LintConfig {
1272            enabled: true,
1273            disabled_rules: vec![],
1274            rule_configs: std::collections::BTreeMap::from([(
1275                "LINT_RF_002".to_string(),
1276                serde_json::json!({"force_enable": false}),
1277            )]),
1278        };
1279        let rule = ReferencesQualification::from_config(&config);
1280        let sql = "SELECT a, b FROM foo LEFT JOIN vee ON vee.a = foo.a";
1281        let statements = parse_sql(sql).expect("parse");
1282        let issues = rule.check(
1283            &statements[0],
1284            &LintContext {
1285                sql,
1286                statement_range: 0..sql.len(),
1287                statement_index: 0,
1288            },
1289        );
1290        assert!(issues.is_empty());
1291    }
1292
1293    #[test]
1294    fn flags_projection_self_alias_in_multi_source_query() {
1295        let issues = run("SELECT foo AS foo FROM a LEFT JOIN b ON a.id = b.id");
1296        assert!(!issues.is_empty());
1297        assert!(issues
1298            .iter()
1299            .all(|issue| issue.code == issue_codes::LINT_RF_002));
1300    }
1301
1302    #[test]
1303    fn allows_later_projection_reference_to_previous_alias() {
1304        let issues = run("SELECT a.bar AS baz, baz FROM a LEFT JOIN b ON a.id = b.id");
1305        assert!(issues.is_empty());
1306    }
1307
1308    #[test]
1309    fn allows_bigquery_value_table_function_in_source_count() {
1310        let sql = "select unqualified_reference_from_table_a, _t_start from a left join unnest(generate_timestamp_array('2020-01-01','2020-01-30', interval 1 day)) as _t_start on true";
1311        let issues = run_in_dialect(sql, Dialect::Bigquery);
1312        assert!(issues.is_empty(), "{issues:?}");
1313    }
1314
1315    #[test]
1316    fn allows_bigquery_unqualified_value_table_alias_with_other_tables() {
1317        let sql = "select a.*, b.*, _t_start from a left join b on true left join unnest(generate_timestamp_array('2020-01-01','2020-01-30', interval 1 day)) as _t_start on true";
1318        let issues = run_in_dialect(sql, Dialect::Bigquery);
1319        assert!(issues.is_empty(), "{issues:?}");
1320    }
1321
1322    #[test]
1323    fn allows_mysql_session_variable_reference() {
1324        let sql = "SET @someVar = 1; SELECT Table1.Col1, Table2.Col2 FROM Table1 LEFT JOIN Table2 ON Table1.Join1 = Table2.Join1 WHERE Table1.FilterCol = @someVar;";
1325        let issues = run_in_dialect(sql, Dialect::Mysql);
1326        assert!(issues.is_empty(), "{issues:?}");
1327    }
1328
1329    #[test]
1330    fn flags_snowflake_table_plus_flatten_unqualified_value_reference() {
1331        let sql = "SELECT r.rec:foo::string AS foo, value:bar::string AS bar FROM foo.bar AS r, LATERAL FLATTEN(input => r.rec:result) AS x";
1332        let issues = run_in_dialect(sql, Dialect::Snowflake);
1333        assert!(!issues.is_empty(), "{issues:?}");
1334    }
1335
1336    #[test]
1337    fn ignore_words_config_skips_named_unqualified_refs() {
1338        let sql = "SELECT test1, test2 FROM t_table1 LEFT JOIN t_table_2 ON TRUE";
1339        let issues = run_with_config(
1340            sql,
1341            Dialect::Generic,
1342            serde_json::json!({"ignore_words":"test1,test2"}),
1343        );
1344        assert!(issues.is_empty(), "{issues:?}");
1345    }
1346
1347    #[test]
1348    fn ignore_words_regex_config_skips_named_unqualified_refs() {
1349        let sql = "SELECT _test1, _test2 FROM t_table1 LEFT JOIN t_table_2 ON TRUE";
1350        let issues = run_with_config(
1351            sql,
1352            Dialect::Generic,
1353            serde_json::json!({"ignore_words_regex":"^_"}),
1354        );
1355        assert!(issues.is_empty(), "{issues:?}");
1356    }
1357
1358    #[test]
1359    fn declared_bigquery_variables_are_exempt() {
1360        let sql = "DECLARE run_time TIMESTAMP DEFAULT '2020-01-01 00:00:00'; SELECT table_a.age FROM table_a INNER JOIN table_b ON table_a.id = table_b.id WHERE table_a.start_date <= run_time;";
1361        let issues = run_in_dialect(sql, Dialect::Bigquery);
1362        assert!(issues.is_empty(), "{issues:?}");
1363    }
1364
1365    #[test]
1366    fn flags_unqualified_subquery_reference_when_outer_scope_exists() {
1367        let sql = "SELECT a FROM foo WHERE a IN (SELECT a FROM bar)";
1368        let issues = run(sql);
1369        assert!(!issues.is_empty(), "{issues:?}");
1370    }
1371
1372    #[test]
1373    fn subqueries_ignore_external_references_allows_unqualified_subquery_reference() {
1374        let sql = "SELECT a FROM foo WHERE a IN (SELECT a FROM bar)";
1375        let issues = run_with_config(
1376            sql,
1377            Dialect::Generic,
1378            serde_json::json!({"subqueries_ignore_external_references": true}),
1379        );
1380        assert!(issues.is_empty(), "{issues:?}");
1381    }
1382
1383    #[test]
1384    fn flags_scalar_subquery_unqualified_reference() {
1385        let sql = "SELECT (SELECT max(id) FROM foo2) AS f1 FROM bar";
1386        let issues = run(sql);
1387        assert!(!issues.is_empty(), "{issues:?}");
1388    }
1389
1390    #[test]
1391    fn flags_exists_subquery_unqualified_reference() {
1392        let sql = "SELECT id FROM bar WHERE EXISTS (SELECT 1 FROM foo2 WHERE bar.id = id)";
1393        let issues = run(sql);
1394        assert!(!issues.is_empty(), "{issues:?}");
1395    }
1396
1397    #[test]
1398    fn flags_nested_correlated_subquery_inside_from_clause() {
1399        let sql = "SELECT a.id AS a_id, b.id AS b_id FROM (SELECT id FROM foo WHERE id IN (SELECT id FROM baz)) AS a INNER JOIN bar AS b ON a.id = b.id";
1400        let issues = run(sql);
1401        assert!(!issues.is_empty(), "{issues:?}");
1402    }
1403}