1use crate::linter::rule::{LintContext, LintRule};
12use crate::types::{issue_codes, Dialect, Issue, IssueAutofixApplicability, IssuePatchEdit, Span};
13use sqlparser::ast::{
14 CreateView, Expr, GroupByExpr, Query, Select, SelectItem, SetExpr, Statement, TableFactor,
15 Update,
16};
17use sqlparser::tokenizer::{Token, TokenWithSpan, Tokenizer, Whitespace};
18use std::collections::{HashMap, HashSet};
19
20pub struct StructureColumnOrder;
21
22impl LintRule for StructureColumnOrder {
23 fn code(&self) -> &'static str {
24 issue_codes::LINT_ST_006
25 }
26
27 fn name(&self) -> &'static str {
28 "Structure column order"
29 }
30
31 fn description(&self) -> &'static str {
32 "Select wildcards then simple targets before calculations and aggregates."
33 }
34
35 fn check(&self, statement: &Statement, ctx: &LintContext) -> Vec<Issue> {
36 let mut violations_info: Vec<ViolationInfo> = Vec::new();
37 visit_order_safe_selects(statement, &mut |select| {
38 if let Some(info) = check_select_order(select) {
39 violations_info.push(info);
40 }
41 });
42
43 let resolved = st006_resolve_violations(ctx, &violations_info);
44
45 resolved
49 .into_iter()
50 .filter_map(|r| {
51 let span = r.span?;
52 let mut issue = Issue::info(
53 issue_codes::LINT_ST_006,
54 "Prefer simple columns before complex expressions in SELECT.",
55 )
56 .with_statement(ctx.statement_index)
57 .with_span(span);
58 if let Some(edits) = r.edits {
59 issue = issue.with_autofix_edits(IssueAutofixApplicability::Safe, edits);
60 }
61 Some(issue)
62 })
63 .collect()
64 }
65}
66
67fn item_band(item: &SelectItem) -> u8 {
76 match item {
77 SelectItem::Wildcard(_) | SelectItem::QualifiedWildcard(_, _) => 0,
78 SelectItem::UnnamedExpr(expr) => expr_band(expr),
79 SelectItem::ExprWithAlias { expr, .. } => expr_band(expr),
80 }
81}
82
83fn expr_band(expr: &Expr) -> u8 {
84 match expr {
85 Expr::Identifier(_) | Expr::CompoundIdentifier(_) => 1,
87 Expr::Value(_) => 1,
89 Expr::Cast { .. } => 1,
93 Expr::Function(f) if is_cast_function(f) => 1,
95 Expr::Nested(inner) => expr_band(inner),
97 _ => 2,
99 }
100}
101
102fn is_cast_function(f: &sqlparser::ast::Function) -> bool {
104 use sqlparser::ast::ObjectNamePart;
105 f.name
106 .0
107 .last()
108 .and_then(ObjectNamePart::as_ident)
109 .is_some_and(|ident| ident.value.eq_ignore_ascii_case("CAST"))
110}
111
112struct ViolationInfo {
117 bands: Vec<u8>,
119 has_implicit_refs: bool,
121 first_item_hint: String,
124}
125
126fn check_select_order(select: &Select) -> Option<ViolationInfo> {
129 if select.projection.len() < 2 {
130 return None;
131 }
132
133 let bands: Vec<u8> = select.projection.iter().map(item_band).collect();
134
135 let mut max_band = 0u8;
137 let mut violated = false;
138 for &band in &bands {
139 if band < max_band {
140 violated = true;
141 break;
142 }
143 max_band = max_band.max(band);
144 }
145
146 if !violated {
147 return None;
148 }
149
150 let has_implicit_refs = has_implicit_column_references(select);
151
152 Some(ViolationInfo {
153 bands,
154 has_implicit_refs,
155 first_item_hint: select
156 .projection
157 .first()
158 .map(std::string::ToString::to_string)
159 .unwrap_or_default(),
160 })
161}
162
163fn has_implicit_column_references(select: &Select) -> bool {
166 if let GroupByExpr::Expressions(exprs, _) = &select.group_by {
167 for expr in exprs {
168 if matches!(expr, Expr::Value(v) if matches!(v.value, sqlparser::ast::Value::Number(_, _)))
169 {
170 return true;
171 }
172 }
173 }
174
175 for sort in &select.sort_by {
176 if matches!(&sort.expr, Expr::Value(v) if matches!(v.value, sqlparser::ast::Value::Number(_, _)))
177 {
178 return true;
179 }
180 }
181
182 false
183}
184
185fn visit_order_safe_selects<F: FnMut(&Select)>(statement: &Statement, visitor: &mut F) {
192 match statement {
193 Statement::Query(query) => visit_query_selects(query, visitor, false),
194 Statement::Insert(_) => {}
196 Statement::Merge(..) => {}
198 Statement::CreateTable(create) => {
200 if create.query.is_some() {
201 }
203 }
205 Statement::CreateView(CreateView { query, .. }) => {
206 visit_query_selects(query, visitor, false);
207 }
208 Statement::Update(Update {
209 table,
210 from,
211 selection,
212 ..
213 }) => {
214 visit_table_factor_selects(&table.relation, visitor);
216 for join in &table.joins {
217 visit_table_factor_selects(&join.relation, visitor);
218 }
219 if let Some(from) = from {
220 match from {
221 sqlparser::ast::UpdateTableFromKind::BeforeSet(tables)
222 | sqlparser::ast::UpdateTableFromKind::AfterSet(tables) => {
223 for t in tables {
224 visit_table_factor_selects(&t.relation, visitor);
225 for j in &t.joins {
226 visit_table_factor_selects(&j.relation, visitor);
227 }
228 }
229 }
230 }
231 }
232 if let Some(sel) = selection {
233 visit_expr_selects(sel, visitor);
234 }
235 }
236 _ => {}
237 }
238}
239
240fn visit_query_selects<F: FnMut(&Select)>(query: &Query, visitor: &mut F, in_set_operation: bool) {
241 let order_sensitive_ctes = order_sensitive_cte_names_for_query(query);
242
243 if let Some(with) = &query.with {
248 for cte in &with.cte_tables {
249 let cte_name = cte.alias.name.value.to_ascii_uppercase();
250 let cte_order_matters =
251 in_set_operation || order_sensitive_ctes.contains(cte_name.as_str());
252 visit_query_selects(&cte.query, visitor, cte_order_matters);
253 }
254 }
255
256 visit_set_expr_selects(&query.body, visitor, in_set_operation);
257}
258
259fn visit_set_expr_selects<F: FnMut(&Select)>(
260 set_expr: &SetExpr,
261 visitor: &mut F,
262 in_set_operation: bool,
263) {
264 match set_expr {
265 SetExpr::Select(select) => {
266 if in_set_operation {
267 return;
271 }
272 visitor(select);
273 for table in &select.from {
275 visit_table_factor_selects(&table.relation, visitor);
276 for join in &table.joins {
277 visit_table_factor_selects(&join.relation, visitor);
278 }
279 }
280 if let Some(sel) = &select.selection {
282 visit_expr_selects(sel, visitor);
283 }
284 }
285 SetExpr::Query(query) => visit_query_selects(query, visitor, in_set_operation),
286 SetExpr::SetOperation { left, right, .. } => {
287 visit_set_expr_selects(left, visitor, true);
289 visit_set_expr_selects(right, visitor, true);
290 }
291 _ => {}
292 }
293}
294
295fn visit_table_factor_selects<F: FnMut(&Select)>(table_factor: &TableFactor, visitor: &mut F) {
296 match table_factor {
297 TableFactor::Derived { subquery, .. } => {
298 visit_query_selects(subquery, visitor, false);
299 }
300 TableFactor::NestedJoin {
301 table_with_joins, ..
302 } => {
303 visit_table_factor_selects(&table_with_joins.relation, visitor);
304 for join in &table_with_joins.joins {
305 visit_table_factor_selects(&join.relation, visitor);
306 }
307 }
308 _ => {}
309 }
310}
311
312fn visit_expr_selects<F: FnMut(&Select)>(expr: &Expr, visitor: &mut F) {
313 match expr {
314 Expr::Subquery(query)
315 | Expr::Exists {
316 subquery: query, ..
317 } => visit_query_selects(query, visitor, false),
318 Expr::InSubquery {
319 expr: inner,
320 subquery,
321 ..
322 } => {
323 visit_expr_selects(inner, visitor);
324 visit_query_selects(subquery, visitor, false);
325 }
326 Expr::BinaryOp { left, right, .. } => {
327 visit_expr_selects(left, visitor);
328 visit_expr_selects(right, visitor);
329 }
330 Expr::UnaryOp { expr: inner, .. }
331 | Expr::Nested(inner)
332 | Expr::IsNull(inner)
333 | Expr::IsNotNull(inner)
334 | Expr::Cast { expr: inner, .. } => visit_expr_selects(inner, visitor),
335 Expr::Case {
336 operand,
337 conditions,
338 else_result,
339 ..
340 } => {
341 if let Some(op) = operand {
342 visit_expr_selects(op, visitor);
343 }
344 for when in conditions {
345 visit_expr_selects(&when.condition, visitor);
346 visit_expr_selects(&when.result, visitor);
347 }
348 if let Some(e) = else_result {
349 visit_expr_selects(e, visitor);
350 }
351 }
352 _ => {}
353 }
354}
355
356fn order_sensitive_cte_names_for_query(query: &Query) -> HashSet<String> {
357 let Some(with) = &query.with else {
358 return HashSet::new();
359 };
360
361 let cte_names: HashSet<String> = with
362 .cte_tables
363 .iter()
364 .map(|cte| cte.alias.name.value.to_ascii_uppercase())
365 .collect();
366 if cte_names.is_empty() {
367 return HashSet::new();
368 }
369
370 let mut deps_by_cte: HashMap<String, HashSet<String>> = HashMap::new();
372 for cte in &with.cte_tables {
373 let mut deps = HashSet::new();
374 collect_cte_references_in_set_expr(&cte.query.body, &cte_names, &mut deps);
375 deps_by_cte.insert(cte.alias.name.value.to_ascii_uppercase(), deps);
376 }
377
378 let mut sensitive = HashSet::new();
381 if matches!(query.body.as_ref(), SetExpr::SetOperation { .. })
382 && set_expr_has_wildcard_select(&query.body)
383 {
384 collect_cte_references_in_set_expr(&query.body, &cte_names, &mut sensitive);
385 }
386 for cte in &with.cte_tables {
387 if matches!(cte.query.body.as_ref(), SetExpr::SetOperation { .. })
388 && set_expr_has_wildcard_select(&cte.query.body)
389 {
390 collect_cte_references_in_set_expr(&cte.query.body, &cte_names, &mut sensitive);
391 }
392 }
393
394 let mut stack: Vec<String> = sensitive.iter().cloned().collect();
396 while let Some(current) = stack.pop() {
397 let Some(deps) = deps_by_cte.get(¤t) else {
398 continue;
399 };
400 for dep in deps {
401 if sensitive.insert(dep.clone()) {
402 stack.push(dep.clone());
403 }
404 }
405 }
406
407 sensitive
408}
409
410fn collect_cte_references_in_set_expr(
411 set_expr: &SetExpr,
412 cte_names: &HashSet<String>,
413 out: &mut HashSet<String>,
414) {
415 match set_expr {
416 SetExpr::Select(select) => collect_cte_references_in_select(select, cte_names, out),
417 SetExpr::SetOperation { left, right, .. } => {
418 collect_cte_references_in_set_expr(left, cte_names, out);
419 collect_cte_references_in_set_expr(right, cte_names, out);
420 }
421 SetExpr::Query(query) => collect_cte_references_in_set_expr(&query.body, cte_names, out),
422 _ => {}
423 }
424}
425
426fn collect_cte_references_in_select(
427 select: &Select,
428 cte_names: &HashSet<String>,
429 out: &mut HashSet<String>,
430) {
431 for table in &select.from {
432 collect_cte_references_in_table_factor(&table.relation, cte_names, out);
433 for join in &table.joins {
434 collect_cte_references_in_table_factor(&join.relation, cte_names, out);
435 }
436 }
437}
438
439fn collect_cte_references_in_table_factor(
440 table_factor: &TableFactor,
441 cte_names: &HashSet<String>,
442 out: &mut HashSet<String>,
443) {
444 match table_factor {
445 TableFactor::Table { name, .. } => {
446 if let Some(ident) = name.0.last().and_then(|part| part.as_ident()) {
447 let upper = ident.value.to_ascii_uppercase();
448 if cte_names.contains(upper.as_str()) {
449 out.insert(upper);
450 }
451 }
452 }
453 TableFactor::Derived { subquery, .. } => {
454 collect_cte_references_in_set_expr(&subquery.body, cte_names, out);
455 }
456 TableFactor::NestedJoin {
457 table_with_joins, ..
458 } => {
459 collect_cte_references_in_table_factor(&table_with_joins.relation, cte_names, out);
460 for join in &table_with_joins.joins {
461 collect_cte_references_in_table_factor(&join.relation, cte_names, out);
462 }
463 }
464 TableFactor::Pivot { table, .. }
465 | TableFactor::Unpivot { table, .. }
466 | TableFactor::MatchRecognize { table, .. } => {
467 collect_cte_references_in_table_factor(table, cte_names, out);
468 }
469 _ => {}
470 }
471}
472
473fn set_expr_has_wildcard_select(set_expr: &SetExpr) -> bool {
477 match set_expr {
478 SetExpr::Select(select) => select.projection.iter().any(|item| {
479 matches!(
480 item,
481 SelectItem::Wildcard(_) | SelectItem::QualifiedWildcard(_, _)
482 )
483 }),
484 SetExpr::SetOperation { left, right, .. } => {
485 set_expr_has_wildcard_select(left) || set_expr_has_wildcard_select(right)
486 }
487 SetExpr::Query(query) => set_expr_has_wildcard_select(&query.body),
488 _ => false,
489 }
490}
491
492#[derive(Clone, Debug)]
497struct PositionedToken {
498 token: Token,
499 start: usize,
500 end: usize,
501}
502
503#[derive(Clone, Debug)]
504struct SelectProjectionSegment {
505 items: Vec<SelectProjectionItem>,
506}
507
508#[derive(Clone, Debug)]
509struct SelectProjectionItem {
510 core_span: Span,
511 leading_span: Span,
512 trailing_span: Span,
513}
514
515#[derive(Clone, Debug)]
516struct St006AutofixCandidate {
517 span: Span,
518 edits: Vec<IssuePatchEdit>,
519}
520
521struct ResolvedViolation {
525 span: Option<Span>,
526 edits: Option<Vec<IssuePatchEdit>>,
527}
528
529fn st006_resolve_violations(
531 ctx: &LintContext,
532 violations: &[ViolationInfo],
533) -> Vec<ResolvedViolation> {
534 let candidates = st006_autofix_candidates_for_context(ctx, violations);
535
536 if candidates.len() == violations.len() {
538 return candidates
539 .into_iter()
540 .map(|c| ResolvedViolation {
541 span: Some(c.span),
542 edits: if c.edits.is_empty() {
543 None
544 } else {
545 Some(c.edits)
546 },
547 })
548 .collect();
549 }
550
551 let spans = st006_violation_spans(ctx, violations);
554 if spans.len() == violations.len() {
555 return spans
556 .into_iter()
557 .map(|span| ResolvedViolation {
558 span: Some(span),
559 edits: None,
560 })
561 .collect();
562 }
563
564 violations
566 .iter()
567 .map(|_| ResolvedViolation {
568 span: None,
569 edits: None,
570 })
571 .collect()
572}
573
574fn positioned_tokens_for_context(ctx: &LintContext) -> Vec<PositionedToken> {
575 let from_document_tokens = ctx.with_document_tokens(|tokens| {
576 if tokens.is_empty() {
577 return None;
578 }
579
580 let mut positioned = Vec::new();
581 for token in tokens {
582 let (start, end) = token_with_span_offsets(ctx.sql, token)?;
583 if start < ctx.statement_range.start || end > ctx.statement_range.end {
584 continue;
585 }
586 positioned.push(PositionedToken {
587 token: token.token.clone(),
588 start,
589 end,
590 });
591 }
592 Some(positioned)
593 });
594
595 if let Some(tokens) = from_document_tokens {
596 tokens
597 } else {
598 let Some(tokens) = tokenize_with_spans(ctx.statement_sql(), ctx.dialect()) else {
599 return Vec::new();
600 };
601
602 let mut positioned = Vec::new();
603 for token in &tokens {
604 let Some((start, end)) = token_with_span_offsets(ctx.statement_sql(), token) else {
605 continue;
606 };
607 positioned.push(PositionedToken {
608 token: token.token.clone(),
609 start: ctx.statement_range.start + start,
610 end: ctx.statement_range.start + end,
611 });
612 }
613 positioned
614 }
615}
616
617fn st006_autofix_candidates_for_context(
618 ctx: &LintContext,
619 violations: &[ViolationInfo],
620) -> Vec<St006AutofixCandidate> {
621 let tokens = positioned_tokens_for_context(ctx);
622 let segments = select_projection_segments(&tokens);
623
624 if segments.len() < violations.len() {
629 return Vec::new();
630 }
631
632 let mut candidates = Vec::new();
636 let mut violation_idx = 0;
637 for segment in &segments {
638 if violation_idx >= violations.len() {
639 break;
640 }
641 let violation = &violations[violation_idx];
642 if segment.items.len() != violation.bands.len() {
643 continue;
644 }
645 if !segment_first_item_matches(ctx.sql, segment, &violation.first_item_hint) {
646 continue;
647 }
648
649 if violation.has_implicit_refs {
651 violation_idx += 1;
652 continue;
653 }
654
655 if let Some(candidate) =
656 projection_reorder_candidate_by_band(ctx.sql, &tokens, segment, &violation.bands)
657 {
658 candidates.push(candidate);
659 }
660 violation_idx += 1;
661 }
662
663 candidates
664}
665
666fn st006_violation_spans(ctx: &LintContext, violations: &[ViolationInfo]) -> Vec<Span> {
669 let tokens = positioned_tokens_for_context(ctx);
670 let segments = select_projection_segments(&tokens);
671
672 if segments.len() < violations.len() {
673 return Vec::new();
674 }
675
676 let mut spans = Vec::new();
677 let mut violation_idx = 0;
678 for segment in &segments {
679 if violation_idx >= violations.len() {
680 break;
681 }
682 let violation = &violations[violation_idx];
683 if segment.items.len() != violation.bands.len() {
684 continue;
685 }
686 if !segment_first_item_matches(ctx.sql, segment, &violation.first_item_hint) {
687 continue;
688 }
689
690 if let Some(first) = segment.items.first() {
692 spans.push(first.core_span);
693 }
694 violation_idx += 1;
695 }
696
697 spans
698}
699
700fn select_projection_segments(tokens: &[PositionedToken]) -> Vec<SelectProjectionSegment> {
701 let significant_positions: Vec<usize> = tokens
702 .iter()
703 .enumerate()
704 .filter_map(|(index, token)| (!is_trivia(&token.token)).then_some(index))
705 .collect();
706 if significant_positions.is_empty() {
707 return Vec::new();
708 }
709
710 let mut depths = vec![0usize; significant_positions.len()];
711 let mut depth = 0usize;
712 for (position, token_index) in significant_positions.iter().copied().enumerate() {
713 depths[position] = depth;
714 match tokens[token_index].token {
715 Token::LParen => depth += 1,
716 Token::RParen => depth = depth.saturating_sub(1),
717 _ => {}
718 }
719 }
720
721 let mut segments = Vec::new();
722 for position in 0..significant_positions.len() {
723 let token = &tokens[significant_positions[position]].token;
724 if !token_word_equals(token, "SELECT") {
725 continue;
726 }
727
728 let base_depth = depths[position];
729 let Some(projection_start) = projection_start_after_select(
730 tokens,
731 &significant_positions,
732 &depths,
733 position + 1,
734 base_depth,
735 ) else {
736 continue;
737 };
738 let Some(from_position) = from_position_for_select(
739 tokens,
740 &significant_positions,
741 &depths,
742 projection_start,
743 base_depth,
744 ) else {
745 continue;
746 };
747 if from_position <= projection_start {
748 continue;
749 }
750
751 let items = projection_items(
752 tokens,
753 &significant_positions,
754 &depths,
755 projection_start,
756 from_position,
757 base_depth,
758 );
759 if items.is_empty() {
760 continue;
761 }
762
763 segments.push(SelectProjectionSegment { items });
764 }
765
766 segments
767}
768
769fn projection_start_after_select(
770 tokens: &[PositionedToken],
771 significant_positions: &[usize],
772 depths: &[usize],
773 mut position: usize,
774 base_depth: usize,
775) -> Option<usize> {
776 while position < significant_positions.len() {
777 if depths[position] != base_depth {
778 return Some(position);
779 }
780
781 let token = &tokens[significant_positions[position]].token;
782 if token_word_equals(token, "DISTINCT")
783 || token_word_equals(token, "ALL")
784 || token_word_equals(token, "DISTINCTROW")
785 {
786 position += 1;
787 continue;
788 }
789 return Some(position);
790 }
791
792 None
793}
794
795fn from_position_for_select(
796 tokens: &[PositionedToken],
797 significant_positions: &[usize],
798 depths: &[usize],
799 start_position: usize,
800 base_depth: usize,
801) -> Option<usize> {
802 (start_position..significant_positions.len()).find(|&position| {
803 depths[position] == base_depth
804 && token_word_equals(&tokens[significant_positions[position]].token, "FROM")
805 })
806}
807
808fn projection_items(
809 tokens: &[PositionedToken],
810 significant_positions: &[usize],
811 depths: &[usize],
812 start_position: usize,
813 from_position: usize,
814 base_depth: usize,
815) -> Vec<SelectProjectionItem> {
816 if start_position >= from_position {
817 return Vec::new();
818 }
819
820 let mut core_items: Vec<(usize, usize, Option<usize>)> = Vec::new();
821 let mut item_start = start_position;
822
823 for position in start_position..from_position {
824 let token = &tokens[significant_positions[position]].token;
825 if depths[position] == base_depth && matches!(token, Token::Comma) {
826 if item_start < position {
827 core_items.push((item_start, position - 1, Some(position)));
828 }
829 item_start = position + 1;
830 }
831 }
832
833 if item_start < from_position {
834 core_items.push((item_start, from_position - 1, None));
835 }
836
837 if core_items.is_empty() {
838 return Vec::new();
839 }
840
841 let mut items = Vec::with_capacity(core_items.len());
842 let mut previous_comma_end = 0usize;
843 for (index, (core_start_position, core_end_position, comma_position)) in
844 core_items.iter().copied().enumerate()
845 {
846 let Some(core_span) = span_from_positions(
847 tokens,
848 significant_positions,
849 core_start_position,
850 core_end_position,
851 ) else {
852 return Vec::new();
853 };
854
855 let leading_start = if index == 0 {
856 core_span.start
857 } else {
858 previous_comma_end
859 };
860 let leading_end = core_span.start;
861
862 let trailing_end = if let Some(comma_position) = comma_position {
863 tokens[significant_positions[comma_position]].start
864 } else {
865 core_span.end
866 };
867 if trailing_end < core_span.end {
868 return Vec::new();
869 }
870
871 let leading_span = Span::new(leading_start, leading_end);
872 let trailing_span = Span::new(core_span.end, trailing_end);
873 items.push(SelectProjectionItem {
874 core_span,
875 leading_span,
876 trailing_span,
877 });
878
879 if let Some(comma_position) = comma_position {
880 previous_comma_end = tokens[significant_positions[comma_position]].end;
881 }
882 }
883
884 items
885}
886
887fn span_from_positions(
888 tokens: &[PositionedToken],
889 significant_positions: &[usize],
890 start_position: usize,
891 end_position: usize,
892) -> Option<Span> {
893 if end_position < start_position {
894 return None;
895 }
896
897 let start = tokens[*significant_positions.get(start_position)?].start;
898 let end = tokens[*significant_positions.get(end_position)?].end;
899 (start < end).then_some(Span::new(start, end))
900}
901
902fn projection_reorder_candidate_by_band(
904 sql: &str,
905 tokens: &[PositionedToken],
906 segment: &SelectProjectionSegment,
907 bands: &[u8],
908) -> Option<St006AutofixCandidate> {
909 if segment.items.len() != bands.len() {
910 return None;
911 }
912
913 let replace_span = Span::new(
914 segment.items.first()?.core_span.start,
915 segment.items.last()?.core_span.end,
916 );
917 if replace_span.start >= replace_span.end || replace_span.end > sql.len() {
918 return None;
919 }
920
921 let mut normalized_items = Vec::with_capacity(segment.items.len());
922 for item in &segment.items {
923 let core_span = item.core_span;
924 if core_span.start >= core_span.end || core_span.end > sql.len() {
925 return None;
926 }
927 if trailing_span_has_inline_comment(tokens, sql, core_span, item.trailing_span) {
928 return None;
929 }
930 let text = sql[core_span.start..core_span.end].trim();
931 let normalized = normalize_projection_item_text(text)?;
932
933 let leading = if item.leading_span.start < item.leading_span.end
934 && item.leading_span.end <= sql.len()
935 {
936 &sql[item.leading_span.start..item.leading_span.end]
937 } else {
938 ""
939 };
940 let trailing = if item.trailing_span.start < item.trailing_span.end
941 && item.trailing_span.end <= sql.len()
942 {
943 &sql[item.trailing_span.start..item.trailing_span.end]
944 } else {
945 ""
946 };
947 normalized_items.push((normalized, leading, trailing));
948 }
949
950 let mut indexed: Vec<(usize, u8)> = normalized_items
952 .iter()
953 .enumerate()
954 .zip(bands.iter())
955 .map(|((i, _), &band)| (i, band))
956 .collect();
957 indexed.sort_by_key(|&(i, band)| (band, i));
958
959 let original_segment = &sql[replace_span.start..replace_span.end];
960 let replacement = if original_segment.contains('\n') || original_segment.contains('\r') {
961 let indent = indent_prefix_for_offset(sql, segment.items.first()?.core_span.start);
962 let default_separator = format!(",\n{indent}");
963 let mut rewritten = String::new();
964 for (position, &(item_index, _)) in indexed.iter().enumerate() {
965 let (core_text, leading, trailing) = &normalized_items[item_index];
966 if position > 0 {
967 if leading.is_empty() {
968 rewritten.push_str(&default_separator);
969 } else {
970 rewritten.push(',');
971 rewritten.push_str(leading);
972 }
973 }
974 rewritten.push_str(core_text);
975 rewritten.push_str(trailing);
976 }
977 rewritten
978 } else {
979 indexed
980 .iter()
981 .map(|(item_index, _)| normalized_items[*item_index].0.as_str())
982 .collect::<Vec<_>>()
983 .join(", ")
984 };
985 if replacement.is_empty() || replacement == original_segment {
986 return None;
987 }
988
989 Some(St006AutofixCandidate {
990 span: replace_span,
991 edits: vec![IssuePatchEdit::new(replace_span, replacement)],
992 })
993}
994
995fn trailing_span_has_inline_comment(
996 tokens: &[PositionedToken],
997 sql: &str,
998 core_span: Span,
999 trailing_span: Span,
1000) -> bool {
1001 if trailing_span.start >= trailing_span.end {
1002 return false;
1003 }
1004 tokens.iter().any(|token| {
1005 if token.start < trailing_span.start
1006 || token.end > trailing_span.end
1007 || !is_comment(&token.token)
1008 {
1009 return false;
1010 }
1011 if token.start <= core_span.end || token.start > sql.len() || core_span.end > sql.len() {
1012 return true;
1013 }
1014 let between = &sql[core_span.end..token.start];
1015 !between.contains('\n') && !between.contains('\r')
1016 })
1017}
1018
1019fn normalize_projection_item_text(text: &str) -> Option<String> {
1020 let trimmed = text.trim();
1021 if trimmed.is_empty() {
1022 return None;
1023 }
1024 Some(trimmed.to_string())
1025}
1026
1027fn segment_first_item_matches(sql: &str, segment: &SelectProjectionSegment, hint: &str) -> bool {
1028 let Some(first_item) = segment.items.first() else {
1029 return false;
1030 };
1031 let span = first_item.core_span;
1032 if span.start >= span.end || span.end > sql.len() {
1033 return false;
1034 }
1035 normalize_item_hint(&sql[span.start..span.end]) == normalize_item_hint(hint)
1036}
1037
1038fn normalize_item_hint(text: &str) -> String {
1039 text.chars()
1040 .filter(|ch| !ch.is_whitespace())
1041 .flat_map(|ch| ch.to_uppercase())
1042 .collect()
1043}
1044
1045fn indent_prefix_for_offset(sql: &str, offset: usize) -> String {
1046 let start = sql[..offset].rfind('\n').map_or(0, |idx| idx + 1);
1047 sql[start..offset]
1048 .chars()
1049 .take_while(|ch| ch.is_whitespace())
1050 .collect()
1051}
1052
1053fn tokenize_with_spans(sql: &str, dialect: Dialect) -> Option<Vec<TokenWithSpan>> {
1058 let dialect = dialect.to_sqlparser_dialect();
1059 let mut tokenizer = Tokenizer::new(dialect.as_ref(), sql);
1060 tokenizer.tokenize_with_location().ok()
1061}
1062
1063fn token_with_span_offsets(sql: &str, token: &TokenWithSpan) -> Option<(usize, usize)> {
1064 let start = line_col_to_offset(
1065 sql,
1066 token.span.start.line as usize,
1067 token.span.start.column as usize,
1068 )?;
1069 let end = line_col_to_offset(
1070 sql,
1071 token.span.end.line as usize,
1072 token.span.end.column as usize,
1073 )?;
1074 Some((start, end))
1075}
1076
1077fn line_col_to_offset(sql: &str, line: usize, column: usize) -> Option<usize> {
1078 if line == 0 || column == 0 {
1079 return None;
1080 }
1081
1082 let mut current_line = 1usize;
1083 let mut current_col = 1usize;
1084 for (offset, ch) in sql.char_indices() {
1085 if current_line == line && current_col == column {
1086 return Some(offset);
1087 }
1088 if ch == '\n' {
1089 current_line += 1;
1090 current_col = 1;
1091 } else {
1092 current_col += 1;
1093 }
1094 }
1095 if current_line == line && current_col == column {
1096 return Some(sql.len());
1097 }
1098 None
1099}
1100
1101fn token_word_equals(token: &Token, expected_upper: &str) -> bool {
1102 matches!(token, Token::Word(word) if word.value.eq_ignore_ascii_case(expected_upper))
1103}
1104
1105fn is_trivia(token: &Token) -> bool {
1106 matches!(
1107 token,
1108 Token::Whitespace(
1109 Whitespace::Space
1110 | Whitespace::Newline
1111 | Whitespace::Tab
1112 | Whitespace::SingleLineComment { .. }
1113 | Whitespace::MultiLineComment(_)
1114 )
1115 )
1116}
1117
1118fn is_comment(token: &Token) -> bool {
1119 matches!(
1120 token,
1121 Token::Whitespace(Whitespace::SingleLineComment { .. } | Whitespace::MultiLineComment(_))
1122 )
1123}
1124
1125#[cfg(test)]
1126mod tests {
1127 use super::*;
1128 use crate::parser::parse_sql;
1129 use crate::types::IssueAutofixApplicability;
1130
1131 fn run(sql: &str) -> Vec<Issue> {
1132 let statements = parse_sql(sql).expect("parse");
1133 let rule = StructureColumnOrder;
1134 statements
1135 .iter()
1136 .enumerate()
1137 .flat_map(|(index, statement)| {
1138 rule.check(
1139 statement,
1140 &LintContext {
1141 sql,
1142 statement_range: 0..sql.len(),
1143 statement_index: index,
1144 },
1145 )
1146 })
1147 .collect()
1148 }
1149
1150 fn apply_edits(sql: &str, edits: &[IssuePatchEdit]) -> String {
1151 let mut output = sql.to_string();
1152 let mut ordered = edits.iter().collect::<Vec<_>>();
1153 ordered.sort_by_key(|edit| edit.span.start);
1154
1155 for edit in ordered.into_iter().rev() {
1156 output.replace_range(edit.span.start..edit.span.end, &edit.replacement);
1157 }
1158
1159 output
1160 }
1161
1162 #[test]
1165 fn pass_select_statement_order() {
1166 let issues = run("SELECT a, cast(b as int) as b, c FROM x");
1168 assert!(issues.is_empty());
1169 }
1170
1171 #[test]
1172 fn pass_union_statements_ignored() {
1173 let sql = "SELECT a + b as c, d FROM table_a UNION ALL SELECT c, d FROM table_b";
1174 let issues = run(sql);
1175 assert!(issues.is_empty());
1176 }
1177
1178 #[test]
1179 fn pass_insert_statements_ignored() {
1180 let sql = "\
1181INSERT INTO example_schema.example_table
1182(id, example_column, rank_asc, rank_desc)
1183SELECT
1184 id,
1185 CASE WHEN col_a IN('a', 'b', 'c') THEN col_a END AS example_column,
1186 rank_asc,
1187 rank_desc
1188FROM another_schema.another_table";
1189 let issues = run(sql);
1190 assert!(issues.is_empty());
1191 }
1192
1193 #[test]
1194 fn pass_insert_statement_with_cte_ignored() {
1195 let sql = "\
1196INSERT INTO my_table
1197WITH my_cte AS (SELECT * FROM t1)
1198SELECT MAX(field1), field2
1199FROM t1";
1200 let issues = run(sql);
1201 assert!(issues.is_empty());
1202 }
1203
1204 #[test]
1205 fn with_cte_insert_into_still_checks_cte() {
1206 let sql = "\
1211WITH my_cte AS (
1212 SELECT MAX(field1) AS mx, field2 FROM t1
1213)
1214INSERT INTO my_table (col1, col2)
1215SELECT mx, field2 FROM my_cte";
1216 let issues = run(sql);
1217 assert_eq!(issues.len(), 1);
1219 }
1220
1221 #[test]
1222 fn with_cte_insert_into_no_violation_when_ordered() {
1223 let sql = "\
1225WITH my_cte AS (
1226 SELECT field2, MAX(field1) AS mx FROM t1
1227)
1228INSERT INTO my_table (col1, col2)
1229SELECT mx, field2 FROM my_cte";
1230 let issues = run(sql);
1231 assert!(issues.is_empty());
1232 }
1233
1234 #[test]
1235 fn pass_merge_statements_ignored() {
1236 let sql = "\
1237MERGE INTO t
1238USING
1239(
1240 SELECT
1241 DATE_TRUNC('DAY', end_time) AS time_day,
1242 b
1243 FROM u
1244) AS u ON (a = b)
1245WHEN MATCHED THEN
1246UPDATE SET a = b
1247WHEN NOT MATCHED THEN
1248INSERT (b) VALUES (c)";
1249 let issues = run(sql);
1250 assert!(issues.is_empty());
1251 }
1252
1253 #[test]
1254 fn pass_merge_statement_with_cte_ignored() {
1255 let sql = "\
1256MERGE INTO t
1257USING
1258(
1259 WITH my_cte AS (SELECT * FROM t1)
1260 SELECT MAX(field1), field2
1261 FROM t1
1262) AS u ON (a = b)
1263WHEN MATCHED THEN
1264UPDATE SET a = b
1265WHEN NOT MATCHED THEN
1266INSERT (b) VALUES (c)";
1267 let issues = run(sql);
1268 assert!(issues.is_empty());
1269 }
1270
1271 #[test]
1272 fn pass_create_table_as_select_with_cte_ignored() {
1273 let sql = "\
1274CREATE TABLE new_table AS (
1275 WITH my_cte AS (SELECT * FROM t1)
1276 SELECT MAX(field1), field2
1277 FROM t1
1278)";
1279 let issues = run(sql);
1280 assert!(issues.is_empty());
1281 }
1282
1283 #[test]
1284 fn pass_cte_used_in_set() {
1285 let sql = "\
1286WITH T1 AS (
1287 SELECT
1288 'a'::varchar AS A,
1289 1::bigint AS B
1290),
1291T2 AS (
1292 SELECT
1293 CASE WHEN COL > 1 THEN 'x' ELSE 'y' END AS A,
1294 COL AS B
1295 FROM T
1296)
1297SELECT * FROM T1
1298UNION ALL
1299SELECT * FROM T2";
1300 let issues = run(sql);
1301 assert!(issues.is_empty());
1302 }
1303
1304 #[test]
1305 fn fail_cte_used_in_set_with_explicit_columns() {
1306 let sql = "\
1309WITH T1 AS (
1310 SELECT
1311 'a'::varchar AS A,
1312 1::bigint AS B
1313),
1314T2 AS (
1315 SELECT
1316 CASE WHEN COL > 1 THEN 'x' ELSE 'y' END AS A,
1317 COL AS B
1318 FROM T
1319)
1320SELECT A, B FROM T1
1321UNION ALL
1322SELECT A, B FROM T2";
1323 let issues = run(sql);
1324 assert_eq!(issues.len(), 1);
1325 assert_eq!(issues[0].code, issue_codes::LINT_ST_006);
1326 }
1327
1328 #[test]
1329 fn pass_transitive_cte_dependency_into_wildcard_set_operation() {
1330 let sql = "\
1333WITH base_a AS (
1334 SELECT MAX(a) AS mx, b FROM t
1335),
1336base_b AS (
1337 SELECT MAX(c) AS mx, d FROM t2
1338),
1339combined AS (
1340 SELECT * FROM base_a
1341 UNION ALL
1342 SELECT * FROM base_b
1343)
1344SELECT mx, b FROM combined";
1345 let issues = run(sql);
1346 assert!(issues.is_empty());
1347 }
1348
1349 #[test]
1350 fn pass_subquery_used_in_set() {
1351 let sql = "\
1352SELECT * FROM (SELECT 'a'::varchar AS A, 1::bigint AS B)
1353UNION ALL
1354SELECT * FROM (SELECT CASE WHEN COL > 1 THEN 'x' ELSE 'y' END AS A, COL AS B FROM T)";
1355 let issues = run(sql);
1356 assert!(issues.is_empty());
1357 }
1358
1359 #[test]
1362 fn fail_select_statement_order_1() {
1363 let sql = "SELECT a, row_number() over (partition by id order by date) as y, b FROM x";
1365 let issues = run(sql);
1366 assert_eq!(issues.len(), 1);
1367 assert_eq!(issues[0].code, issue_codes::LINT_ST_006);
1368 }
1369
1370 #[test]
1371 fn fail_select_statement_order_2() {
1372 let sql = "SELECT row_number() over (partition by id order by date) as y, *, cast(b as int) as b_int FROM x";
1374 let issues = run(sql);
1375 assert_eq!(issues.len(), 1);
1376 }
1377
1378 #[test]
1379 fn fail_select_statement_order_3() {
1380 let sql = "SELECT row_number() over (partition by id order by date) as y, cast(b as int) as b_int, * FROM x";
1382 let issues = run(sql);
1383 assert_eq!(issues.len(), 1);
1384 }
1385
1386 #[test]
1387 fn fail_select_statement_order_4() {
1388 let sql = "SELECT row_number() over (partition by id order by date) as y, b::int, * FROM x";
1390 let issues = run(sql);
1391 assert_eq!(issues.len(), 1);
1392 }
1393
1394 #[test]
1395 fn fail_select_statement_order_5() {
1396 let sql = "SELECT row_number() over (partition by id order by date) as y, *, 2::int + 4 as sum, cast(b) as c FROM x";
1398 let issues = run(sql);
1399 assert_eq!(issues.len(), 1);
1400 }
1401
1402 #[test]
1405 fn autofix_reorder_simple_before_complex() {
1406 let sql = "SELECT a + 1, a FROM t";
1407 let issues = run(sql);
1408 assert_eq!(issues.len(), 1);
1409
1410 let autofix = issues[0].autofix.as_ref().expect("expected ST006 autofix");
1411 assert_eq!(autofix.applicability, IssueAutofixApplicability::Safe);
1412 let fixed = apply_edits(sql, &autofix.edits);
1413 assert_eq!(fixed, "SELECT a, a + 1 FROM t");
1414 }
1415
1416 #[test]
1417 fn autofix_reorder_wildcard_first() {
1418 let sql = "SELECT row_number() over (partition by id order by date) as y, *, cast(b as int) as b_int FROM x";
1419 let issues = run(sql);
1420 assert_eq!(issues.len(), 1);
1421
1422 let autofix = issues[0].autofix.as_ref().expect("expected ST006 autofix");
1423 let fixed = apply_edits(sql, &autofix.edits);
1424 assert_eq!(fixed, "SELECT *, cast(b as int) as b_int, row_number() over (partition by id order by date) as y FROM x");
1425 }
1426
1427 #[test]
1428 fn autofix_reorder_with_casts() {
1429 let sql = "SELECT row_number() over (partition by id order by date) as y, b::int, * FROM x";
1430 let issues = run(sql);
1431 assert_eq!(issues.len(), 1);
1432
1433 let autofix = issues[0].autofix.as_ref().expect("expected ST006 autofix");
1434 let fixed = apply_edits(sql, &autofix.edits);
1435 assert_eq!(
1436 fixed,
1437 "SELECT *, b::int, row_number() over (partition by id order by date) as y FROM x"
1438 );
1439 }
1440
1441 #[test]
1442 fn autofix_fail_order_5_complex() {
1443 let sql = "SELECT row_number() over (partition by id order by date) as y, *, 2::int + 4 as sum, cast(b) as c FROM x";
1446 let issues = run(sql);
1447 assert_eq!(issues.len(), 1);
1448
1449 let autofix = issues[0].autofix.as_ref().expect("expected ST006 autofix");
1450 let fixed = apply_edits(sql, &autofix.edits);
1451 assert_eq!(fixed, "SELECT *, cast(b) as c, row_number() over (partition by id order by date) as y, 2::int + 4 as sum FROM x");
1452 }
1453
1454 #[test]
1455 fn no_autofix_with_implicit_column_references() {
1456 let sql =
1457 "SELECT DATE_TRUNC('DAY', end_time) AS time_day, b_field FROM table_name GROUP BY 1, 2";
1458 let issues = run(sql);
1459 assert_eq!(issues.len(), 1);
1460 assert!(
1461 issues[0].autofix.is_none(),
1462 "should not autofix when implicit column references exist"
1463 );
1464 }
1465
1466 #[test]
1467 fn autofix_explicit_column_references() {
1468 let sql = "SELECT DATE_TRUNC('DAY', end_time) AS time_day, b_field FROM table_name GROUP BY time_day, b_field";
1469 let issues = run(sql);
1470 assert_eq!(issues.len(), 1);
1471
1472 let autofix = issues[0].autofix.as_ref().expect("expected ST006 autofix");
1473 let fixed = apply_edits(sql, &autofix.edits);
1474 assert_eq!(fixed, "SELECT b_field, DATE_TRUNC('DAY', end_time) AS time_day FROM table_name GROUP BY time_day, b_field");
1475 }
1476
1477 #[test]
1478 fn autofix_reorders_multiline_targets_without_quotes() {
1479 let sql = "SELECT\n SUM(a) AS total,\n a\nFROM t";
1480 let issues = run(sql);
1481 assert_eq!(issues.len(), 1);
1482
1483 let autofix = issues[0].autofix.as_ref().expect("expected ST006 autofix");
1484 let fixed = apply_edits(sql, &autofix.edits);
1485 assert!(
1486 fixed.contains("a,\n SUM(a) AS total"),
1487 "expected reordered multiline projection, got: {fixed}"
1488 );
1489
1490 parse_sql(&fixed).expect("fixed SQL should remain parseable");
1491 }
1492
1493 #[test]
1494 fn autofix_reorders_multiline_targets_with_inter_item_comment() {
1495 let sql = "SELECT\n -- total usage for period\n SUM(a) AS total,\n a\nFROM t";
1496 let issues = run(sql);
1497 assert_eq!(issues.len(), 1);
1498
1499 let autofix = issues[0].autofix.as_ref().expect("expected ST006 autofix");
1500 let fixed = apply_edits(sql, &autofix.edits);
1501 assert!(
1502 fixed.contains("-- total usage for period"),
1503 "expected inter-item comment to be preserved, got: {fixed}"
1504 );
1505 assert!(
1506 fixed.contains("a,\n SUM(a) AS total"),
1507 "expected reordered projection, got: {fixed}"
1508 );
1509
1510 parse_sql(&fixed).expect("fixed SQL should remain parseable");
1511 }
1512
1513 #[test]
1514 fn autofix_reorders_trailing_simple_column_after_subquery_expressions() {
1515 let sql = "SELECT\n a.table_full_name AS table_a,\n b.table_full_name AS table_b,\n (\n SELECT count(*)\n FROM unnest(a.columns) AS ac\n WHERE ac = ANY(b.columns)\n ) AS intersection_size,\n a.column_count + b.column_count - (\n SELECT count(*)\n FROM unnest(a.columns) AS ac\n WHERE ac = ANY(b.columns)\n ) AS union_size,\n a.connector_id\nFROM table_columns AS a\nINNER JOIN table_columns AS b\n ON a.connector_id = b.connector_id";
1516 let issues = run(sql);
1517 assert_eq!(issues.len(), 1);
1518 let autofix = issues[0]
1519 .autofix
1520 .as_ref()
1521 .expect("expected ST006 autofix metadata");
1522 let fixed = apply_edits(sql, &autofix.edits);
1523 assert!(
1524 fixed.contains("a.connector_id,\n (\n SELECT count(*)"),
1525 "expected simple trailing column to move before complex expressions, got: {fixed}"
1526 );
1527 parse_sql(&fixed).expect("fixed SQL should remain parseable");
1528 }
1529
1530 #[test]
1531 fn fail_cte_used_in_select_not_set() {
1532 let sql = "\
1534WITH T2 AS (
1535 SELECT
1536 CASE WHEN COL > 1 THEN 'x' ELSE 'y' END AS A,
1537 COL AS B
1538 FROM T
1539)
1540SELECT * FROM T2";
1541 let issues = run(sql);
1542 assert_eq!(issues.len(), 1);
1543 }
1544
1545 #[test]
1546 fn comment_in_projection_blocks_safe_autofix_metadata() {
1547 let sql = "SELECT a + 1 /*keep*/, a FROM t";
1548 let issues = run(sql);
1549 assert_eq!(issues.len(), 1);
1550 assert!(
1551 issues[0].autofix.is_none(),
1552 "comment-bearing projection should not receive ST006 safe patch metadata"
1553 );
1554 }
1555
1556 #[test]
1559 fn does_not_flag_when_simple_target_starts_projection() {
1560 let issues = run("SELECT a, a + 1 FROM t");
1561 assert!(issues.is_empty());
1562 }
1563
1564 #[test]
1565 fn flags_simple_target_after_complex() {
1566 let issues = run("SELECT a, a + 1, b FROM t");
1568 assert_eq!(issues.len(), 1);
1569 }
1570
1571 #[test]
1572 fn does_not_flag_when_alias_wraps_simple_identifier() {
1573 let issues = run("SELECT a AS first_a, b FROM t");
1574 assert!(issues.is_empty());
1575 }
1576
1577 #[test]
1578 fn flags_in_nested_select_scopes() {
1579 let issues = run("SELECT * FROM (SELECT a + 1, a FROM t) AS sub");
1580 assert_eq!(issues.len(), 1);
1581 }
1582}