Skip to main content

sqrust_rules/structure/
order_by_in_subquery.rs

1use sqrust_core::{Diagnostic, FileContext, Rule};
2use sqlparser::ast::{Expr, Query, Select, SelectItem, SetExpr, Statement, TableFactor};
3
4use crate::capitalisation::{is_word_char, SkipMap};
5
6pub struct OrderByInSubquery;
7
8impl Rule for OrderByInSubquery {
9    fn name(&self) -> &'static str {
10        "OrderByInSubquery"
11    }
12
13    fn check(&self, ctx: &FileContext) -> Vec<Diagnostic> {
14        if !ctx.parse_errors.is_empty() {
15            return Vec::new();
16        }
17
18        let mut diags = Vec::new();
19
20        for stmt in &ctx.statements {
21            if let Statement::Query(query) = stmt {
22                // Walk the top-level query, but exempt it from the check.
23                // CTEs inside WITH are inner queries → checked.
24                // The body of the top-level query is walked for subqueries,
25                // but the top-level query itself (query.order_by) is not flagged.
26                check_top_level_query(query, ctx, &mut diags);
27            }
28        }
29
30        diags
31    }
32}
33
34// ── AST walking ───────────────────────────────────────────────────────────────
35
36/// Walk the top-level query:
37/// - CTEs are inner queries → pass to `check_inner_query`
38/// - The body is walked for nested subqueries
39fn check_top_level_query(query: &Query, ctx: &FileContext, diags: &mut Vec<Diagnostic>) {
40    // Walk CTEs — each CTE body is an inner query.
41    if let Some(with) = &query.with {
42        for cte in &with.cte_tables {
43            check_inner_query(&cte.query, ctx, diags);
44        }
45    }
46
47    // Walk the body for any nested subqueries (not the top-level body itself).
48    check_set_expr_for_subqueries(&query.body, ctx, diags);
49}
50
51/// Check an inner (subquery) Query: flag if it has ORDER BY without LIMIT/OFFSET,
52/// then recurse into its own subqueries.
53fn check_inner_query(query: &Query, ctx: &FileContext, diags: &mut Vec<Diagnostic>) {
54    // Flag if ORDER BY present and no LIMIT and no OFFSET.
55    if let Some(order_by) = &query.order_by {
56        if !order_by.exprs.is_empty() && query.limit.is_none() && query.offset.is_none() {
57            let (line, col) = find_keyword_pos(&ctx.source, "ORDER BY");
58            diags.push(Diagnostic {
59                rule: "OrderByInSubquery",
60                message: "ORDER BY in subquery without LIMIT has no effect on the final result"
61                    .to_string(),
62                line,
63                col,
64            });
65        }
66    }
67
68    // Recurse into CTEs of this inner query.
69    if let Some(with) = &query.with {
70        for cte in &with.cte_tables {
71            check_inner_query(&cte.query, ctx, diags);
72        }
73    }
74
75    // Recurse into the body for further nested subqueries.
76    check_set_expr_for_subqueries(&query.body, ctx, diags);
77}
78
79/// Walk a SetExpr looking for subqueries inside it (not the SetExpr itself).
80fn check_set_expr_for_subqueries(expr: &SetExpr, ctx: &FileContext, diags: &mut Vec<Diagnostic>) {
81    match expr {
82        SetExpr::Select(sel) => check_select_for_subqueries(sel, ctx, diags),
83        SetExpr::Query(inner) => check_inner_query(inner, ctx, diags),
84        SetExpr::SetOperation { left, right, .. } => {
85            check_set_expr_for_subqueries(left, ctx, diags);
86            check_set_expr_for_subqueries(right, ctx, diags);
87        }
88        _ => {}
89    }
90}
91
92/// Walk a SELECT clause looking for subqueries in FROM, WHERE, and projections.
93fn check_select_for_subqueries(sel: &Select, ctx: &FileContext, diags: &mut Vec<Diagnostic>) {
94    for twj in &sel.from {
95        // Derived table (subquery in FROM).
96        check_table_factor(&twj.relation, ctx, diags);
97        for join in &twj.joins {
98            check_table_factor(&join.relation, ctx, diags);
99        }
100    }
101
102    // WHERE clause.
103    if let Some(selection) = &sel.selection {
104        check_expr(selection, ctx, diags);
105    }
106
107    // SELECT list.
108    for item in &sel.projection {
109        if let SelectItem::UnnamedExpr(e) | SelectItem::ExprWithAlias { expr: e, .. } = item {
110            check_expr(e, ctx, diags);
111        }
112    }
113}
114
115fn check_table_factor(tf: &TableFactor, ctx: &FileContext, diags: &mut Vec<Diagnostic>) {
116    if let TableFactor::Derived { subquery, .. } = tf {
117        check_inner_query(subquery, ctx, diags);
118    }
119}
120
121fn check_expr(expr: &Expr, ctx: &FileContext, diags: &mut Vec<Diagnostic>) {
122    match expr {
123        Expr::Subquery(q) => check_inner_query(q, ctx, diags),
124        Expr::InSubquery { subquery, .. } => check_inner_query(subquery, ctx, diags),
125        Expr::Exists { subquery, .. } => check_inner_query(subquery, ctx, diags),
126        Expr::BinaryOp { left, right, .. } => {
127            check_expr(left, ctx, diags);
128            check_expr(right, ctx, diags);
129        }
130        _ => {}
131    }
132}
133
134// ── keyword position helper ───────────────────────────────────────────────────
135
136/// Find the first occurrence of a two-word keyword like "ORDER BY"
137/// (case-insensitive, word-boundary on the first word) in `source`, outside
138/// strings and comments.
139///
140/// Returns a 1-indexed (line, col) pair. Falls back to (1, 1) if not found.
141fn find_keyword_pos(source: &str, keyword: &str) -> (usize, usize) {
142    let bytes = source.as_bytes();
143    let len = bytes.len();
144    let skip_map = SkipMap::build(source);
145    let kw_upper: Vec<u8> = keyword.bytes().map(|b| b.to_ascii_uppercase()).collect();
146    let kw_len = kw_upper.len();
147
148    let mut i = 0;
149    while i + kw_len <= len {
150        if !skip_map.is_code(i) {
151            i += 1;
152            continue;
153        }
154
155        // Word boundary before first character.
156        let before_ok = i == 0 || !is_word_char(bytes[i - 1]);
157        if !before_ok {
158            i += 1;
159            continue;
160        }
161
162        // Case-insensitive multi-character match (including internal space).
163        let matches = bytes[i..i + kw_len]
164            .iter()
165            .zip(kw_upper.iter())
166            .all(|(a, b)| {
167                // The space character in "ORDER BY" is not a word char, just match exactly.
168                if *b == b' ' {
169                    *a == b' ' || *a == b'\t'
170                } else {
171                    a.eq_ignore_ascii_case(b)
172                }
173            });
174
175        if matches {
176            let after = i + kw_len;
177            let after_ok = after >= len || !is_word_char(bytes[after]);
178            // All non-space bytes must be real code.
179            let all_code = (i..i + kw_len).all(|k| kw_upper[k - i] == b' ' || skip_map.is_code(k));
180
181            if after_ok && all_code {
182                return line_col(source, i);
183            }
184        }
185
186        i += 1;
187    }
188
189    (1, 1)
190}
191
192/// Converts a byte offset in `source` to a 1-indexed (line, col) pair.
193fn line_col(source: &str, offset: usize) -> (usize, usize) {
194    let before = &source[..offset];
195    let line = before.chars().filter(|&c| c == '\n').count() + 1;
196    let col = before.rfind('\n').map(|p| offset - p - 1).unwrap_or(offset) + 1;
197    (line, col)
198}