Skip to main content

sqrust_rules/structure/
limit_without_order_by.rs

1use sqrust_core::{Diagnostic, FileContext, Rule};
2use sqlparser::ast::{Expr, Query, Select, SelectItem, SetExpr, Statement, TableFactor};
3
4pub struct LimitWithoutOrderBy;
5
6impl Rule for LimitWithoutOrderBy {
7    fn name(&self) -> &'static str {
8        "Structure/LimitWithoutOrderBy"
9    }
10
11    fn check(&self, ctx: &FileContext) -> Vec<Diagnostic> {
12        if !ctx.parse_errors.is_empty() {
13            return Vec::new();
14        }
15
16        let mut diags = Vec::new();
17
18        for stmt in &ctx.statements {
19            collect_from_statement(stmt, ctx, &mut diags);
20        }
21
22        diags
23    }
24}
25
26/// Recursively collect violations from a single top-level Statement.
27fn collect_from_statement(stmt: &Statement, ctx: &FileContext, diags: &mut Vec<Diagnostic>) {
28    if let Statement::Query(query) = stmt {
29        collect_from_query(query, ctx, diags);
30    }
31}
32
33/// Recursively walk a Query, checking for LIMIT without ORDER BY.
34fn collect_from_query(query: &Query, ctx: &FileContext, diags: &mut Vec<Diagnostic>) {
35    // Check CTEs.
36    if let Some(with) = &query.with {
37        for cte in &with.cte_tables {
38            collect_from_query(&cte.query, ctx, diags);
39        }
40    }
41
42    // Check this query's own LIMIT / ORDER BY.
43    let has_limit = query.limit.is_some() || query.fetch.is_some();
44    let has_order_by = query
45        .order_by
46        .as_ref()
47        .map(|ob| !ob.exprs.is_empty())
48        .unwrap_or(false);
49
50    if has_limit && !has_order_by {
51        // Locate the LIMIT keyword in the source text for an accurate position.
52        let (line, col) = find_keyword_pos(&ctx.source, "LIMIT");
53        diags.push(Diagnostic {
54            rule: "Structure/LimitWithoutOrderBy",
55            message: "LIMIT without ORDER BY produces non-deterministic results".to_string(),
56            line,
57            col,
58        });
59    }
60
61    // Recurse into the body (handles subqueries in FROM clauses, nested set
62    // operations, etc.).
63    collect_from_set_expr(&query.body, ctx, diags);
64}
65
66/// Recurse into a SetExpr, looking for nested Queries.
67fn collect_from_set_expr(expr: &SetExpr, ctx: &FileContext, diags: &mut Vec<Diagnostic>) {
68    match expr {
69        SetExpr::Select(select) => {
70            collect_from_select(select, ctx, diags);
71        }
72        SetExpr::Query(inner) => {
73            collect_from_query(inner, ctx, diags);
74        }
75        SetExpr::SetOperation { left, right, .. } => {
76            collect_from_set_expr(left, ctx, diags);
77            collect_from_set_expr(right, ctx, diags);
78        }
79        _ => {}
80    }
81}
82
83/// Recurse into a SELECT, checking FROM clause subqueries and WHERE
84/// expressions.
85fn collect_from_select(select: &Select, ctx: &FileContext, diags: &mut Vec<Diagnostic>) {
86    // FROM clause — check Derived subqueries.
87    for table_with_joins in &select.from {
88        collect_from_table_factor(&table_with_joins.relation, ctx, diags);
89        for join in &table_with_joins.joins {
90            collect_from_table_factor(&join.relation, ctx, diags);
91        }
92    }
93
94    // WHERE clause — check subquery expressions.
95    if let Some(selection) = &select.selection {
96        collect_from_expr(selection, ctx, diags);
97    }
98
99    // SELECT projection — check scalar subqueries.
100    for item in &select.projection {
101        if let SelectItem::UnnamedExpr(e) | SelectItem::ExprWithAlias { expr: e, .. } = item {
102            collect_from_expr(e, ctx, diags);
103        }
104    }
105}
106
107/// Recurse into a TableFactor looking for Derived (subquery) tables.
108fn collect_from_table_factor(
109    factor: &TableFactor,
110    ctx: &FileContext,
111    diags: &mut Vec<Diagnostic>,
112) {
113    if let TableFactor::Derived { subquery, .. } = factor {
114        collect_from_query(subquery, ctx, diags);
115    }
116}
117
118/// Recurse into an expression looking for subqueries.
119fn collect_from_expr(expr: &Expr, ctx: &FileContext, diags: &mut Vec<Diagnostic>) {
120    match expr {
121        Expr::Subquery(q) => collect_from_query(q, ctx, diags),
122        Expr::InSubquery { subquery, .. } => collect_from_query(subquery, ctx, diags),
123        Expr::Exists { subquery, .. } => collect_from_query(subquery, ctx, diags),
124        Expr::BinaryOp { left, right, .. } => {
125            collect_from_expr(left, ctx, diags);
126            collect_from_expr(right, ctx, diags);
127        }
128        _ => {}
129    }
130}
131
132/// Finds the first occurrence of `keyword` (case-insensitive, word-boundary)
133/// in `source` and returns a 1-indexed (line, col) pair.
134/// Falls back to (1, 1) if not found.
135fn find_keyword_pos(source: &str, keyword: &str) -> (usize, usize) {
136    let upper = source.to_uppercase();
137    let kw_upper = keyword.to_uppercase();
138    let kw_len = kw_upper.len();
139    let bytes = upper.as_bytes();
140    let len = bytes.len();
141    let kw_bytes = kw_upper.as_bytes();
142
143    let mut pos = 0;
144    while pos + kw_len <= len {
145        if let Some(rel) = upper[pos..].find(kw_upper.as_str()) {
146            let abs = pos + rel;
147
148            // Word boundary check.
149            let before_ok = abs == 0 || {
150                let b = bytes[abs - 1];
151                !b.is_ascii_alphanumeric() && b != b'_'
152            };
153            let after = abs + kw_len;
154            let after_ok = after >= len || {
155                let b = bytes[after];
156                !b.is_ascii_alphanumeric() && b != b'_'
157            };
158
159            // Suppress matches inside strings/comments by checking the
160            // original source character.  We use a simple heuristic: if the
161            // original source byte at `abs` is inside a single-quoted string
162            // the first char before it would be a `'`.  A full SkipMap is
163            // overkill here since we only need the position, not precision —
164            // the AST already confirmed a real LIMIT exists.
165            if before_ok && after_ok {
166                let _ = kw_bytes; // reference to silence unused-variable warning
167                return line_col(source, abs);
168            }
169
170            pos = abs + 1;
171        } else {
172            break;
173        }
174    }
175
176    (1, 1)
177}
178
179/// Converts a byte offset in `source` to a 1-indexed (line, col) pair.
180fn line_col(source: &str, offset: usize) -> (usize, usize) {
181    let before = &source[..offset];
182    let line = before.chars().filter(|&c| c == '\n').count() + 1;
183    let col = before.rfind('\n').map(|p| offset - p - 1).unwrap_or(offset) + 1;
184    (line, col)
185}