1use crate::linter::rule::{LintContext, LintRule};
12use crate::types::{issue_codes, Dialect, Issue, IssueAutofixApplicability, IssuePatchEdit, Span};
13use sqlparser::ast::{
14 Expr, GroupByExpr, Query, Select, SelectItem, SetExpr, Statement, TableFactor,
15};
16use sqlparser::tokenizer::{Token, TokenWithSpan, Tokenizer, Whitespace};
17use std::collections::{HashMap, HashSet};
18
19pub struct StructureColumnOrder;
20
21impl LintRule for StructureColumnOrder {
22 fn code(&self) -> &'static str {
23 issue_codes::LINT_ST_006
24 }
25
26 fn name(&self) -> &'static str {
27 "Structure column order"
28 }
29
30 fn description(&self) -> &'static str {
31 "Select wildcards then simple targets before calculations and aggregates."
32 }
33
34 fn check(&self, statement: &Statement, ctx: &LintContext) -> Vec<Issue> {
35 let mut violations_info: Vec<ViolationInfo> = Vec::new();
36 visit_order_safe_selects(statement, &mut |select| {
37 if let Some(info) = check_select_order(select) {
38 violations_info.push(info);
39 }
40 });
41
42 let resolved = st006_resolve_violations(ctx, &violations_info);
43
44 resolved
48 .into_iter()
49 .filter_map(|r| {
50 let span = r.span?;
51 let mut issue = Issue::info(
52 issue_codes::LINT_ST_006,
53 "Prefer simple columns before complex expressions in SELECT.",
54 )
55 .with_statement(ctx.statement_index)
56 .with_span(span);
57 if let Some(edits) = r.edits {
58 issue = issue.with_autofix_edits(IssueAutofixApplicability::Safe, edits);
59 }
60 Some(issue)
61 })
62 .collect()
63 }
64}
65
66fn item_band(item: &SelectItem) -> u8 {
75 match item {
76 SelectItem::Wildcard(_) | SelectItem::QualifiedWildcard(_, _) => 0,
77 SelectItem::UnnamedExpr(expr) => expr_band(expr),
78 SelectItem::ExprWithAlias { expr, .. } => expr_band(expr),
79 }
80}
81
82fn expr_band(expr: &Expr) -> u8 {
83 match expr {
84 Expr::Identifier(_) | Expr::CompoundIdentifier(_) => 1,
86 Expr::Value(_) => 1,
88 Expr::Cast { .. } => 1,
92 Expr::Function(f) if is_cast_function(f) => 1,
94 Expr::Nested(inner) => expr_band(inner),
96 _ => 2,
98 }
99}
100
101fn is_cast_function(f: &sqlparser::ast::Function) -> bool {
103 use sqlparser::ast::ObjectNamePart;
104 f.name
105 .0
106 .last()
107 .and_then(ObjectNamePart::as_ident)
108 .is_some_and(|ident| ident.value.eq_ignore_ascii_case("CAST"))
109}
110
111struct ViolationInfo {
116 bands: Vec<u8>,
118 has_implicit_refs: bool,
120 first_item_hint: String,
123}
124
125fn check_select_order(select: &Select) -> Option<ViolationInfo> {
128 if select.projection.len() < 2 {
129 return None;
130 }
131
132 let bands: Vec<u8> = select.projection.iter().map(item_band).collect();
133
134 let mut max_band = 0u8;
136 let mut violated = false;
137 for &band in &bands {
138 if band < max_band {
139 violated = true;
140 break;
141 }
142 max_band = max_band.max(band);
143 }
144
145 if !violated {
146 return None;
147 }
148
149 let has_implicit_refs = has_implicit_column_references(select);
150
151 Some(ViolationInfo {
152 bands,
153 has_implicit_refs,
154 first_item_hint: select
155 .projection
156 .first()
157 .map(std::string::ToString::to_string)
158 .unwrap_or_default(),
159 })
160}
161
162fn has_implicit_column_references(select: &Select) -> bool {
165 if let GroupByExpr::Expressions(exprs, _) = &select.group_by {
166 for expr in exprs {
167 if matches!(expr, Expr::Value(v) if matches!(v.value, sqlparser::ast::Value::Number(_, _)))
168 {
169 return true;
170 }
171 }
172 }
173
174 for sort in &select.sort_by {
175 if matches!(&sort.expr, Expr::Value(v) if matches!(v.value, sqlparser::ast::Value::Number(_, _)))
176 {
177 return true;
178 }
179 }
180
181 false
182}
183
184fn visit_order_safe_selects<F: FnMut(&Select)>(statement: &Statement, visitor: &mut F) {
191 match statement {
192 Statement::Query(query) => visit_query_selects(query, visitor, false),
193 Statement::Insert(_) => {}
195 Statement::Merge { .. } => {}
197 Statement::CreateTable(create) => {
199 if create.query.is_some() {
200 }
202 }
204 Statement::CreateView { query, .. } => {
205 visit_query_selects(query, visitor, false);
206 }
207 Statement::Update {
208 table,
209 from,
210 selection,
211 ..
212 } => {
213 visit_table_factor_selects(&table.relation, visitor);
215 for join in &table.joins {
216 visit_table_factor_selects(&join.relation, visitor);
217 }
218 if let Some(from) = from {
219 match from {
220 sqlparser::ast::UpdateTableFromKind::BeforeSet(tables)
221 | sqlparser::ast::UpdateTableFromKind::AfterSet(tables) => {
222 for t in tables {
223 visit_table_factor_selects(&t.relation, visitor);
224 for j in &t.joins {
225 visit_table_factor_selects(&j.relation, visitor);
226 }
227 }
228 }
229 }
230 }
231 if let Some(sel) = selection {
232 visit_expr_selects(sel, visitor);
233 }
234 }
235 _ => {}
236 }
237}
238
239fn visit_query_selects<F: FnMut(&Select)>(query: &Query, visitor: &mut F, in_set_operation: bool) {
240 let order_sensitive_ctes = order_sensitive_cte_names_for_query(query);
241
242 if let Some(with) = &query.with {
247 for cte in &with.cte_tables {
248 let cte_name = cte.alias.name.value.to_ascii_uppercase();
249 let cte_order_matters =
250 in_set_operation || order_sensitive_ctes.contains(cte_name.as_str());
251 visit_query_selects(&cte.query, visitor, cte_order_matters);
252 }
253 }
254
255 visit_set_expr_selects(&query.body, visitor, in_set_operation);
256}
257
258fn visit_set_expr_selects<F: FnMut(&Select)>(
259 set_expr: &SetExpr,
260 visitor: &mut F,
261 in_set_operation: bool,
262) {
263 match set_expr {
264 SetExpr::Select(select) => {
265 if in_set_operation {
266 return;
270 }
271 visitor(select);
272 for table in &select.from {
274 visit_table_factor_selects(&table.relation, visitor);
275 for join in &table.joins {
276 visit_table_factor_selects(&join.relation, visitor);
277 }
278 }
279 if let Some(sel) = &select.selection {
281 visit_expr_selects(sel, visitor);
282 }
283 }
284 SetExpr::Query(query) => visit_query_selects(query, visitor, in_set_operation),
285 SetExpr::SetOperation { left, right, .. } => {
286 visit_set_expr_selects(left, visitor, true);
288 visit_set_expr_selects(right, visitor, true);
289 }
290 _ => {}
291 }
292}
293
294fn visit_table_factor_selects<F: FnMut(&Select)>(table_factor: &TableFactor, visitor: &mut F) {
295 match table_factor {
296 TableFactor::Derived { subquery, .. } => {
297 visit_query_selects(subquery, visitor, false);
298 }
299 TableFactor::NestedJoin {
300 table_with_joins, ..
301 } => {
302 visit_table_factor_selects(&table_with_joins.relation, visitor);
303 for join in &table_with_joins.joins {
304 visit_table_factor_selects(&join.relation, visitor);
305 }
306 }
307 _ => {}
308 }
309}
310
311fn visit_expr_selects<F: FnMut(&Select)>(expr: &Expr, visitor: &mut F) {
312 match expr {
313 Expr::Subquery(query)
314 | Expr::Exists {
315 subquery: query, ..
316 } => visit_query_selects(query, visitor, false),
317 Expr::InSubquery {
318 expr: inner,
319 subquery,
320 ..
321 } => {
322 visit_expr_selects(inner, visitor);
323 visit_query_selects(subquery, visitor, false);
324 }
325 Expr::BinaryOp { left, right, .. } => {
326 visit_expr_selects(left, visitor);
327 visit_expr_selects(right, visitor);
328 }
329 Expr::UnaryOp { expr: inner, .. }
330 | Expr::Nested(inner)
331 | Expr::IsNull(inner)
332 | Expr::IsNotNull(inner)
333 | Expr::Cast { expr: inner, .. } => visit_expr_selects(inner, visitor),
334 Expr::Case {
335 operand,
336 conditions,
337 else_result,
338 ..
339 } => {
340 if let Some(op) = operand {
341 visit_expr_selects(op, visitor);
342 }
343 for when in conditions {
344 visit_expr_selects(&when.condition, visitor);
345 visit_expr_selects(&when.result, visitor);
346 }
347 if let Some(e) = else_result {
348 visit_expr_selects(e, visitor);
349 }
350 }
351 _ => {}
352 }
353}
354
355fn order_sensitive_cte_names_for_query(query: &Query) -> HashSet<String> {
356 let Some(with) = &query.with else {
357 return HashSet::new();
358 };
359
360 let cte_names: HashSet<String> = with
361 .cte_tables
362 .iter()
363 .map(|cte| cte.alias.name.value.to_ascii_uppercase())
364 .collect();
365 if cte_names.is_empty() {
366 return HashSet::new();
367 }
368
369 let mut deps_by_cte: HashMap<String, HashSet<String>> = HashMap::new();
371 for cte in &with.cte_tables {
372 let mut deps = HashSet::new();
373 collect_cte_references_in_set_expr(&cte.query.body, &cte_names, &mut deps);
374 deps_by_cte.insert(cte.alias.name.value.to_ascii_uppercase(), deps);
375 }
376
377 let mut sensitive = HashSet::new();
380 if matches!(query.body.as_ref(), SetExpr::SetOperation { .. })
381 && set_expr_has_wildcard_select(&query.body)
382 {
383 collect_cte_references_in_set_expr(&query.body, &cte_names, &mut sensitive);
384 }
385 for cte in &with.cte_tables {
386 if matches!(cte.query.body.as_ref(), SetExpr::SetOperation { .. })
387 && set_expr_has_wildcard_select(&cte.query.body)
388 {
389 collect_cte_references_in_set_expr(&cte.query.body, &cte_names, &mut sensitive);
390 }
391 }
392
393 let mut stack: Vec<String> = sensitive.iter().cloned().collect();
395 while let Some(current) = stack.pop() {
396 let Some(deps) = deps_by_cte.get(¤t) else {
397 continue;
398 };
399 for dep in deps {
400 if sensitive.insert(dep.clone()) {
401 stack.push(dep.clone());
402 }
403 }
404 }
405
406 sensitive
407}
408
409fn collect_cte_references_in_set_expr(
410 set_expr: &SetExpr,
411 cte_names: &HashSet<String>,
412 out: &mut HashSet<String>,
413) {
414 match set_expr {
415 SetExpr::Select(select) => collect_cte_references_in_select(select, cte_names, out),
416 SetExpr::SetOperation { left, right, .. } => {
417 collect_cte_references_in_set_expr(left, cte_names, out);
418 collect_cte_references_in_set_expr(right, cte_names, out);
419 }
420 SetExpr::Query(query) => collect_cte_references_in_set_expr(&query.body, cte_names, out),
421 _ => {}
422 }
423}
424
425fn collect_cte_references_in_select(
426 select: &Select,
427 cte_names: &HashSet<String>,
428 out: &mut HashSet<String>,
429) {
430 for table in &select.from {
431 collect_cte_references_in_table_factor(&table.relation, cte_names, out);
432 for join in &table.joins {
433 collect_cte_references_in_table_factor(&join.relation, cte_names, out);
434 }
435 }
436}
437
438fn collect_cte_references_in_table_factor(
439 table_factor: &TableFactor,
440 cte_names: &HashSet<String>,
441 out: &mut HashSet<String>,
442) {
443 match table_factor {
444 TableFactor::Table { name, .. } => {
445 if let Some(ident) = name.0.last().and_then(|part| part.as_ident()) {
446 let upper = ident.value.to_ascii_uppercase();
447 if cte_names.contains(upper.as_str()) {
448 out.insert(upper);
449 }
450 }
451 }
452 TableFactor::Derived { subquery, .. } => {
453 collect_cte_references_in_set_expr(&subquery.body, cte_names, out);
454 }
455 TableFactor::NestedJoin {
456 table_with_joins, ..
457 } => {
458 collect_cte_references_in_table_factor(&table_with_joins.relation, cte_names, out);
459 for join in &table_with_joins.joins {
460 collect_cte_references_in_table_factor(&join.relation, cte_names, out);
461 }
462 }
463 TableFactor::Pivot { table, .. }
464 | TableFactor::Unpivot { table, .. }
465 | TableFactor::MatchRecognize { table, .. } => {
466 collect_cte_references_in_table_factor(table, cte_names, out);
467 }
468 _ => {}
469 }
470}
471
472fn set_expr_has_wildcard_select(set_expr: &SetExpr) -> bool {
476 match set_expr {
477 SetExpr::Select(select) => select.projection.iter().any(|item| {
478 matches!(
479 item,
480 SelectItem::Wildcard(_) | SelectItem::QualifiedWildcard(_, _)
481 )
482 }),
483 SetExpr::SetOperation { left, right, .. } => {
484 set_expr_has_wildcard_select(left) || set_expr_has_wildcard_select(right)
485 }
486 SetExpr::Query(query) => set_expr_has_wildcard_select(&query.body),
487 _ => false,
488 }
489}
490
491#[derive(Clone, Debug)]
496struct PositionedToken {
497 token: Token,
498 start: usize,
499 end: usize,
500}
501
502#[derive(Clone, Debug)]
503struct SelectProjectionSegment {
504 items: Vec<SelectProjectionItem>,
505}
506
507#[derive(Clone, Debug)]
508struct SelectProjectionItem {
509 core_span: Span,
510 leading_span: Span,
511 trailing_span: Span,
512}
513
514#[derive(Clone, Debug)]
515struct St006AutofixCandidate {
516 span: Span,
517 edits: Vec<IssuePatchEdit>,
518}
519
520struct ResolvedViolation {
524 span: Option<Span>,
525 edits: Option<Vec<IssuePatchEdit>>,
526}
527
528fn st006_resolve_violations(
530 ctx: &LintContext,
531 violations: &[ViolationInfo],
532) -> Vec<ResolvedViolation> {
533 let candidates = st006_autofix_candidates_for_context(ctx, violations);
534
535 if candidates.len() == violations.len() {
537 return candidates
538 .into_iter()
539 .map(|c| ResolvedViolation {
540 span: Some(c.span),
541 edits: if c.edits.is_empty() {
542 None
543 } else {
544 Some(c.edits)
545 },
546 })
547 .collect();
548 }
549
550 let spans = st006_violation_spans(ctx, violations);
553 if spans.len() == violations.len() {
554 return spans
555 .into_iter()
556 .map(|span| ResolvedViolation {
557 span: Some(span),
558 edits: None,
559 })
560 .collect();
561 }
562
563 violations
565 .iter()
566 .map(|_| ResolvedViolation {
567 span: None,
568 edits: None,
569 })
570 .collect()
571}
572
573fn positioned_tokens_for_context(ctx: &LintContext) -> Vec<PositionedToken> {
574 let from_document_tokens = ctx.with_document_tokens(|tokens| {
575 if tokens.is_empty() {
576 return None;
577 }
578
579 let mut positioned = Vec::new();
580 for token in tokens {
581 let (start, end) = token_with_span_offsets(ctx.sql, token)?;
582 if start < ctx.statement_range.start || end > ctx.statement_range.end {
583 continue;
584 }
585 positioned.push(PositionedToken {
586 token: token.token.clone(),
587 start,
588 end,
589 });
590 }
591 Some(positioned)
592 });
593
594 if let Some(tokens) = from_document_tokens {
595 tokens
596 } else {
597 let Some(tokens) = tokenize_with_spans(ctx.statement_sql(), ctx.dialect()) else {
598 return Vec::new();
599 };
600
601 let mut positioned = Vec::new();
602 for token in &tokens {
603 let Some((start, end)) = token_with_span_offsets(ctx.statement_sql(), token) else {
604 continue;
605 };
606 positioned.push(PositionedToken {
607 token: token.token.clone(),
608 start: ctx.statement_range.start + start,
609 end: ctx.statement_range.start + end,
610 });
611 }
612 positioned
613 }
614}
615
616fn st006_autofix_candidates_for_context(
617 ctx: &LintContext,
618 violations: &[ViolationInfo],
619) -> Vec<St006AutofixCandidate> {
620 let tokens = positioned_tokens_for_context(ctx);
621 let segments = select_projection_segments(&tokens);
622
623 if segments.len() < violations.len() {
628 return Vec::new();
629 }
630
631 let mut candidates = Vec::new();
635 let mut violation_idx = 0;
636 for segment in &segments {
637 if violation_idx >= violations.len() {
638 break;
639 }
640 let violation = &violations[violation_idx];
641 if segment.items.len() != violation.bands.len() {
642 continue;
643 }
644 if !segment_first_item_matches(ctx.sql, segment, &violation.first_item_hint) {
645 continue;
646 }
647
648 if violation.has_implicit_refs {
650 violation_idx += 1;
651 continue;
652 }
653
654 if let Some(candidate) =
655 projection_reorder_candidate_by_band(ctx.sql, &tokens, segment, &violation.bands)
656 {
657 candidates.push(candidate);
658 }
659 violation_idx += 1;
660 }
661
662 candidates
663}
664
665fn st006_violation_spans(ctx: &LintContext, violations: &[ViolationInfo]) -> Vec<Span> {
668 let tokens = positioned_tokens_for_context(ctx);
669 let segments = select_projection_segments(&tokens);
670
671 if segments.len() < violations.len() {
672 return Vec::new();
673 }
674
675 let mut spans = Vec::new();
676 let mut violation_idx = 0;
677 for segment in &segments {
678 if violation_idx >= violations.len() {
679 break;
680 }
681 let violation = &violations[violation_idx];
682 if segment.items.len() != violation.bands.len() {
683 continue;
684 }
685 if !segment_first_item_matches(ctx.sql, segment, &violation.first_item_hint) {
686 continue;
687 }
688
689 if let Some(first) = segment.items.first() {
691 spans.push(first.core_span);
692 }
693 violation_idx += 1;
694 }
695
696 spans
697}
698
699fn select_projection_segments(tokens: &[PositionedToken]) -> Vec<SelectProjectionSegment> {
700 let significant_positions: Vec<usize> = tokens
701 .iter()
702 .enumerate()
703 .filter_map(|(index, token)| (!is_trivia(&token.token)).then_some(index))
704 .collect();
705 if significant_positions.is_empty() {
706 return Vec::new();
707 }
708
709 let mut depths = vec![0usize; significant_positions.len()];
710 let mut depth = 0usize;
711 for (position, token_index) in significant_positions.iter().copied().enumerate() {
712 depths[position] = depth;
713 match tokens[token_index].token {
714 Token::LParen => depth += 1,
715 Token::RParen => depth = depth.saturating_sub(1),
716 _ => {}
717 }
718 }
719
720 let mut segments = Vec::new();
721 for position in 0..significant_positions.len() {
722 let token = &tokens[significant_positions[position]].token;
723 if !token_word_equals(token, "SELECT") {
724 continue;
725 }
726
727 let base_depth = depths[position];
728 let Some(projection_start) = projection_start_after_select(
729 tokens,
730 &significant_positions,
731 &depths,
732 position + 1,
733 base_depth,
734 ) else {
735 continue;
736 };
737 let Some(from_position) = from_position_for_select(
738 tokens,
739 &significant_positions,
740 &depths,
741 projection_start,
742 base_depth,
743 ) else {
744 continue;
745 };
746 if from_position <= projection_start {
747 continue;
748 }
749
750 let items = projection_items(
751 tokens,
752 &significant_positions,
753 &depths,
754 projection_start,
755 from_position,
756 base_depth,
757 );
758 if items.is_empty() {
759 continue;
760 }
761
762 segments.push(SelectProjectionSegment { items });
763 }
764
765 segments
766}
767
768fn projection_start_after_select(
769 tokens: &[PositionedToken],
770 significant_positions: &[usize],
771 depths: &[usize],
772 mut position: usize,
773 base_depth: usize,
774) -> Option<usize> {
775 while position < significant_positions.len() {
776 if depths[position] != base_depth {
777 return Some(position);
778 }
779
780 let token = &tokens[significant_positions[position]].token;
781 if token_word_equals(token, "DISTINCT")
782 || token_word_equals(token, "ALL")
783 || token_word_equals(token, "DISTINCTROW")
784 {
785 position += 1;
786 continue;
787 }
788 return Some(position);
789 }
790
791 None
792}
793
794fn from_position_for_select(
795 tokens: &[PositionedToken],
796 significant_positions: &[usize],
797 depths: &[usize],
798 start_position: usize,
799 base_depth: usize,
800) -> Option<usize> {
801 (start_position..significant_positions.len()).find(|&position| {
802 depths[position] == base_depth
803 && token_word_equals(&tokens[significant_positions[position]].token, "FROM")
804 })
805}
806
807fn projection_items(
808 tokens: &[PositionedToken],
809 significant_positions: &[usize],
810 depths: &[usize],
811 start_position: usize,
812 from_position: usize,
813 base_depth: usize,
814) -> Vec<SelectProjectionItem> {
815 if start_position >= from_position {
816 return Vec::new();
817 }
818
819 let mut core_items: Vec<(usize, usize, Option<usize>)> = Vec::new();
820 let mut item_start = start_position;
821
822 for position in start_position..from_position {
823 let token = &tokens[significant_positions[position]].token;
824 if depths[position] == base_depth && matches!(token, Token::Comma) {
825 if item_start < position {
826 core_items.push((item_start, position - 1, Some(position)));
827 }
828 item_start = position + 1;
829 }
830 }
831
832 if item_start < from_position {
833 core_items.push((item_start, from_position - 1, None));
834 }
835
836 if core_items.is_empty() {
837 return Vec::new();
838 }
839
840 let mut items = Vec::with_capacity(core_items.len());
841 let mut previous_comma_end = 0usize;
842 for (index, (core_start_position, core_end_position, comma_position)) in
843 core_items.iter().copied().enumerate()
844 {
845 let Some(core_span) = span_from_positions(
846 tokens,
847 significant_positions,
848 core_start_position,
849 core_end_position,
850 ) else {
851 return Vec::new();
852 };
853
854 let leading_start = if index == 0 {
855 core_span.start
856 } else {
857 previous_comma_end
858 };
859 let leading_end = core_span.start;
860
861 let trailing_end = if let Some(comma_position) = comma_position {
862 tokens[significant_positions[comma_position]].start
863 } else {
864 core_span.end
865 };
866 if trailing_end < core_span.end {
867 return Vec::new();
868 }
869
870 let leading_span = Span::new(leading_start, leading_end);
871 let trailing_span = Span::new(core_span.end, trailing_end);
872 items.push(SelectProjectionItem {
873 core_span,
874 leading_span,
875 trailing_span,
876 });
877
878 if let Some(comma_position) = comma_position {
879 previous_comma_end = tokens[significant_positions[comma_position]].end;
880 }
881 }
882
883 items
884}
885
886fn span_from_positions(
887 tokens: &[PositionedToken],
888 significant_positions: &[usize],
889 start_position: usize,
890 end_position: usize,
891) -> Option<Span> {
892 if end_position < start_position {
893 return None;
894 }
895
896 let start = tokens[*significant_positions.get(start_position)?].start;
897 let end = tokens[*significant_positions.get(end_position)?].end;
898 (start < end).then_some(Span::new(start, end))
899}
900
901fn projection_reorder_candidate_by_band(
903 sql: &str,
904 tokens: &[PositionedToken],
905 segment: &SelectProjectionSegment,
906 bands: &[u8],
907) -> Option<St006AutofixCandidate> {
908 if segment.items.len() != bands.len() {
909 return None;
910 }
911
912 let replace_span = Span::new(
913 segment.items.first()?.core_span.start,
914 segment.items.last()?.core_span.end,
915 );
916 if replace_span.start >= replace_span.end || replace_span.end > sql.len() {
917 return None;
918 }
919
920 let mut normalized_items = Vec::with_capacity(segment.items.len());
921 for item in &segment.items {
922 let core_span = item.core_span;
923 if core_span.start >= core_span.end || core_span.end > sql.len() {
924 return None;
925 }
926 if trailing_span_has_inline_comment(tokens, sql, core_span, item.trailing_span) {
927 return None;
928 }
929 let text = sql[core_span.start..core_span.end].trim();
930 let normalized = normalize_projection_item_text(text)?;
931
932 let leading = if item.leading_span.start < item.leading_span.end
933 && item.leading_span.end <= sql.len()
934 {
935 &sql[item.leading_span.start..item.leading_span.end]
936 } else {
937 ""
938 };
939 let trailing = if item.trailing_span.start < item.trailing_span.end
940 && item.trailing_span.end <= sql.len()
941 {
942 &sql[item.trailing_span.start..item.trailing_span.end]
943 } else {
944 ""
945 };
946 normalized_items.push((normalized, leading, trailing));
947 }
948
949 let mut indexed: Vec<(usize, u8)> = normalized_items
951 .iter()
952 .enumerate()
953 .zip(bands.iter())
954 .map(|((i, _), &band)| (i, band))
955 .collect();
956 indexed.sort_by_key(|&(i, band)| (band, i));
957
958 let original_segment = &sql[replace_span.start..replace_span.end];
959 let replacement = if original_segment.contains('\n') || original_segment.contains('\r') {
960 let indent = indent_prefix_for_offset(sql, segment.items.first()?.core_span.start);
961 let default_separator = format!(",\n{indent}");
962 let mut rewritten = String::new();
963 for (position, &(item_index, _)) in indexed.iter().enumerate() {
964 let (core_text, leading, trailing) = &normalized_items[item_index];
965 if position > 0 {
966 if leading.is_empty() {
967 rewritten.push_str(&default_separator);
968 } else {
969 rewritten.push(',');
970 rewritten.push_str(leading);
971 }
972 }
973 rewritten.push_str(core_text);
974 rewritten.push_str(trailing);
975 }
976 rewritten
977 } else {
978 indexed
979 .iter()
980 .map(|(item_index, _)| normalized_items[*item_index].0.as_str())
981 .collect::<Vec<_>>()
982 .join(", ")
983 };
984 if replacement.is_empty() || replacement == original_segment {
985 return None;
986 }
987
988 Some(St006AutofixCandidate {
989 span: replace_span,
990 edits: vec![IssuePatchEdit::new(replace_span, replacement)],
991 })
992}
993
994fn trailing_span_has_inline_comment(
995 tokens: &[PositionedToken],
996 sql: &str,
997 core_span: Span,
998 trailing_span: Span,
999) -> bool {
1000 if trailing_span.start >= trailing_span.end {
1001 return false;
1002 }
1003 tokens.iter().any(|token| {
1004 if token.start < trailing_span.start
1005 || token.end > trailing_span.end
1006 || !is_comment(&token.token)
1007 {
1008 return false;
1009 }
1010 if token.start <= core_span.end || token.start > sql.len() || core_span.end > sql.len() {
1011 return true;
1012 }
1013 let between = &sql[core_span.end..token.start];
1014 !between.contains('\n') && !between.contains('\r')
1015 })
1016}
1017
1018fn normalize_projection_item_text(text: &str) -> Option<String> {
1019 let trimmed = text.trim();
1020 if trimmed.is_empty() {
1021 return None;
1022 }
1023 Some(trimmed.to_string())
1024}
1025
1026fn segment_first_item_matches(sql: &str, segment: &SelectProjectionSegment, hint: &str) -> bool {
1027 let Some(first_item) = segment.items.first() else {
1028 return false;
1029 };
1030 let span = first_item.core_span;
1031 if span.start >= span.end || span.end > sql.len() {
1032 return false;
1033 }
1034 normalize_item_hint(&sql[span.start..span.end]) == normalize_item_hint(hint)
1035}
1036
1037fn normalize_item_hint(text: &str) -> String {
1038 text.chars()
1039 .filter(|ch| !ch.is_whitespace())
1040 .flat_map(|ch| ch.to_uppercase())
1041 .collect()
1042}
1043
1044fn indent_prefix_for_offset(sql: &str, offset: usize) -> String {
1045 let start = sql[..offset].rfind('\n').map_or(0, |idx| idx + 1);
1046 sql[start..offset]
1047 .chars()
1048 .take_while(|ch| ch.is_whitespace())
1049 .collect()
1050}
1051
1052fn tokenize_with_spans(sql: &str, dialect: Dialect) -> Option<Vec<TokenWithSpan>> {
1057 let dialect = dialect.to_sqlparser_dialect();
1058 let mut tokenizer = Tokenizer::new(dialect.as_ref(), sql);
1059 tokenizer.tokenize_with_location().ok()
1060}
1061
1062fn token_with_span_offsets(sql: &str, token: &TokenWithSpan) -> Option<(usize, usize)> {
1063 let start = line_col_to_offset(
1064 sql,
1065 token.span.start.line as usize,
1066 token.span.start.column as usize,
1067 )?;
1068 let end = line_col_to_offset(
1069 sql,
1070 token.span.end.line as usize,
1071 token.span.end.column as usize,
1072 )?;
1073 Some((start, end))
1074}
1075
1076fn line_col_to_offset(sql: &str, line: usize, column: usize) -> Option<usize> {
1077 if line == 0 || column == 0 {
1078 return None;
1079 }
1080
1081 let mut current_line = 1usize;
1082 let mut current_col = 1usize;
1083 for (offset, ch) in sql.char_indices() {
1084 if current_line == line && current_col == column {
1085 return Some(offset);
1086 }
1087 if ch == '\n' {
1088 current_line += 1;
1089 current_col = 1;
1090 } else {
1091 current_col += 1;
1092 }
1093 }
1094 if current_line == line && current_col == column {
1095 return Some(sql.len());
1096 }
1097 None
1098}
1099
1100fn token_word_equals(token: &Token, expected_upper: &str) -> bool {
1101 matches!(token, Token::Word(word) if word.value.eq_ignore_ascii_case(expected_upper))
1102}
1103
1104fn is_trivia(token: &Token) -> bool {
1105 matches!(
1106 token,
1107 Token::Whitespace(
1108 Whitespace::Space
1109 | Whitespace::Newline
1110 | Whitespace::Tab
1111 | Whitespace::SingleLineComment { .. }
1112 | Whitespace::MultiLineComment(_)
1113 )
1114 )
1115}
1116
1117fn is_comment(token: &Token) -> bool {
1118 matches!(
1119 token,
1120 Token::Whitespace(Whitespace::SingleLineComment { .. } | Whitespace::MultiLineComment(_))
1121 )
1122}
1123
1124#[cfg(test)]
1125mod tests {
1126 use super::*;
1127 use crate::parser::parse_sql;
1128 use crate::types::IssueAutofixApplicability;
1129
1130 fn run(sql: &str) -> Vec<Issue> {
1131 let statements = parse_sql(sql).expect("parse");
1132 let rule = StructureColumnOrder;
1133 statements
1134 .iter()
1135 .enumerate()
1136 .flat_map(|(index, statement)| {
1137 rule.check(
1138 statement,
1139 &LintContext {
1140 sql,
1141 statement_range: 0..sql.len(),
1142 statement_index: index,
1143 },
1144 )
1145 })
1146 .collect()
1147 }
1148
1149 fn apply_edits(sql: &str, edits: &[IssuePatchEdit]) -> String {
1150 let mut output = sql.to_string();
1151 let mut ordered = edits.iter().collect::<Vec<_>>();
1152 ordered.sort_by_key(|edit| edit.span.start);
1153
1154 for edit in ordered.into_iter().rev() {
1155 output.replace_range(edit.span.start..edit.span.end, &edit.replacement);
1156 }
1157
1158 output
1159 }
1160
1161 #[test]
1164 fn pass_select_statement_order() {
1165 let issues = run("SELECT a, cast(b as int) as b, c FROM x");
1167 assert!(issues.is_empty());
1168 }
1169
1170 #[test]
1171 fn pass_union_statements_ignored() {
1172 let sql = "SELECT a + b as c, d FROM table_a UNION ALL SELECT c, d FROM table_b";
1173 let issues = run(sql);
1174 assert!(issues.is_empty());
1175 }
1176
1177 #[test]
1178 fn pass_insert_statements_ignored() {
1179 let sql = "\
1180INSERT INTO example_schema.example_table
1181(id, example_column, rank_asc, rank_desc)
1182SELECT
1183 id,
1184 CASE WHEN col_a IN('a', 'b', 'c') THEN col_a END AS example_column,
1185 rank_asc,
1186 rank_desc
1187FROM another_schema.another_table";
1188 let issues = run(sql);
1189 assert!(issues.is_empty());
1190 }
1191
1192 #[test]
1193 fn pass_insert_statement_with_cte_ignored() {
1194 let sql = "\
1195INSERT INTO my_table
1196WITH my_cte AS (SELECT * FROM t1)
1197SELECT MAX(field1), field2
1198FROM t1";
1199 let issues = run(sql);
1200 assert!(issues.is_empty());
1201 }
1202
1203 #[test]
1204 fn with_cte_insert_into_still_checks_cte() {
1205 let sql = "\
1210WITH my_cte AS (
1211 SELECT MAX(field1) AS mx, field2 FROM t1
1212)
1213INSERT INTO my_table (col1, col2)
1214SELECT mx, field2 FROM my_cte";
1215 let issues = run(sql);
1216 assert_eq!(issues.len(), 1);
1218 }
1219
1220 #[test]
1221 fn with_cte_insert_into_no_violation_when_ordered() {
1222 let sql = "\
1224WITH my_cte AS (
1225 SELECT field2, MAX(field1) AS mx FROM t1
1226)
1227INSERT INTO my_table (col1, col2)
1228SELECT mx, field2 FROM my_cte";
1229 let issues = run(sql);
1230 assert!(issues.is_empty());
1231 }
1232
1233 #[test]
1234 fn pass_merge_statements_ignored() {
1235 let sql = "\
1236MERGE INTO t
1237USING
1238(
1239 SELECT
1240 DATE_TRUNC('DAY', end_time) AS time_day,
1241 b
1242 FROM u
1243) AS u ON (a = b)
1244WHEN MATCHED THEN
1245UPDATE SET a = b
1246WHEN NOT MATCHED THEN
1247INSERT (b) VALUES (c)";
1248 let issues = run(sql);
1249 assert!(issues.is_empty());
1250 }
1251
1252 #[test]
1253 fn pass_merge_statement_with_cte_ignored() {
1254 let sql = "\
1255MERGE INTO t
1256USING
1257(
1258 WITH my_cte AS (SELECT * FROM t1)
1259 SELECT MAX(field1), field2
1260 FROM t1
1261) AS u ON (a = b)
1262WHEN MATCHED THEN
1263UPDATE SET a = b
1264WHEN NOT MATCHED THEN
1265INSERT (b) VALUES (c)";
1266 let issues = run(sql);
1267 assert!(issues.is_empty());
1268 }
1269
1270 #[test]
1271 fn pass_create_table_as_select_with_cte_ignored() {
1272 let sql = "\
1273CREATE TABLE new_table AS (
1274 WITH my_cte AS (SELECT * FROM t1)
1275 SELECT MAX(field1), field2
1276 FROM t1
1277)";
1278 let issues = run(sql);
1279 assert!(issues.is_empty());
1280 }
1281
1282 #[test]
1283 fn pass_cte_used_in_set() {
1284 let sql = "\
1285WITH T1 AS (
1286 SELECT
1287 'a'::varchar AS A,
1288 1::bigint AS B
1289),
1290T2 AS (
1291 SELECT
1292 CASE WHEN COL > 1 THEN 'x' ELSE 'y' END AS A,
1293 COL AS B
1294 FROM T
1295)
1296SELECT * FROM T1
1297UNION ALL
1298SELECT * FROM T2";
1299 let issues = run(sql);
1300 assert!(issues.is_empty());
1301 }
1302
1303 #[test]
1304 fn fail_cte_used_in_set_with_explicit_columns() {
1305 let sql = "\
1308WITH T1 AS (
1309 SELECT
1310 'a'::varchar AS A,
1311 1::bigint AS B
1312),
1313T2 AS (
1314 SELECT
1315 CASE WHEN COL > 1 THEN 'x' ELSE 'y' END AS A,
1316 COL AS B
1317 FROM T
1318)
1319SELECT A, B FROM T1
1320UNION ALL
1321SELECT A, B FROM T2";
1322 let issues = run(sql);
1323 assert_eq!(issues.len(), 1);
1324 assert_eq!(issues[0].code, issue_codes::LINT_ST_006);
1325 }
1326
1327 #[test]
1328 fn pass_transitive_cte_dependency_into_wildcard_set_operation() {
1329 let sql = "\
1332WITH base_a AS (
1333 SELECT MAX(a) AS mx, b FROM t
1334),
1335base_b AS (
1336 SELECT MAX(c) AS mx, d FROM t2
1337),
1338combined AS (
1339 SELECT * FROM base_a
1340 UNION ALL
1341 SELECT * FROM base_b
1342)
1343SELECT mx, b FROM combined";
1344 let issues = run(sql);
1345 assert!(issues.is_empty());
1346 }
1347
1348 #[test]
1349 fn pass_subquery_used_in_set() {
1350 let sql = "\
1351SELECT * FROM (SELECT 'a'::varchar AS A, 1::bigint AS B)
1352UNION ALL
1353SELECT * FROM (SELECT CASE WHEN COL > 1 THEN 'x' ELSE 'y' END AS A, COL AS B FROM T)";
1354 let issues = run(sql);
1355 assert!(issues.is_empty());
1356 }
1357
1358 #[test]
1361 fn fail_select_statement_order_1() {
1362 let sql = "SELECT a, row_number() over (partition by id order by date) as y, b FROM x";
1364 let issues = run(sql);
1365 assert_eq!(issues.len(), 1);
1366 assert_eq!(issues[0].code, issue_codes::LINT_ST_006);
1367 }
1368
1369 #[test]
1370 fn fail_select_statement_order_2() {
1371 let sql = "SELECT row_number() over (partition by id order by date) as y, *, cast(b as int) as b_int FROM x";
1373 let issues = run(sql);
1374 assert_eq!(issues.len(), 1);
1375 }
1376
1377 #[test]
1378 fn fail_select_statement_order_3() {
1379 let sql = "SELECT row_number() over (partition by id order by date) as y, cast(b as int) as b_int, * FROM x";
1381 let issues = run(sql);
1382 assert_eq!(issues.len(), 1);
1383 }
1384
1385 #[test]
1386 fn fail_select_statement_order_4() {
1387 let sql = "SELECT row_number() over (partition by id order by date) as y, b::int, * FROM x";
1389 let issues = run(sql);
1390 assert_eq!(issues.len(), 1);
1391 }
1392
1393 #[test]
1394 fn fail_select_statement_order_5() {
1395 let sql = "SELECT row_number() over (partition by id order by date) as y, *, 2::int + 4 as sum, cast(b) as c FROM x";
1397 let issues = run(sql);
1398 assert_eq!(issues.len(), 1);
1399 }
1400
1401 #[test]
1404 fn autofix_reorder_simple_before_complex() {
1405 let sql = "SELECT a + 1, a FROM t";
1406 let issues = run(sql);
1407 assert_eq!(issues.len(), 1);
1408
1409 let autofix = issues[0].autofix.as_ref().expect("expected ST006 autofix");
1410 assert_eq!(autofix.applicability, IssueAutofixApplicability::Safe);
1411 let fixed = apply_edits(sql, &autofix.edits);
1412 assert_eq!(fixed, "SELECT a, a + 1 FROM t");
1413 }
1414
1415 #[test]
1416 fn autofix_reorder_wildcard_first() {
1417 let sql = "SELECT row_number() over (partition by id order by date) as y, *, cast(b as int) as b_int FROM x";
1418 let issues = run(sql);
1419 assert_eq!(issues.len(), 1);
1420
1421 let autofix = issues[0].autofix.as_ref().expect("expected ST006 autofix");
1422 let fixed = apply_edits(sql, &autofix.edits);
1423 assert_eq!(fixed, "SELECT *, cast(b as int) as b_int, row_number() over (partition by id order by date) as y FROM x");
1424 }
1425
1426 #[test]
1427 fn autofix_reorder_with_casts() {
1428 let sql = "SELECT row_number() over (partition by id order by date) as y, b::int, * FROM x";
1429 let issues = run(sql);
1430 assert_eq!(issues.len(), 1);
1431
1432 let autofix = issues[0].autofix.as_ref().expect("expected ST006 autofix");
1433 let fixed = apply_edits(sql, &autofix.edits);
1434 assert_eq!(
1435 fixed,
1436 "SELECT *, b::int, row_number() over (partition by id order by date) as y FROM x"
1437 );
1438 }
1439
1440 #[test]
1441 fn autofix_fail_order_5_complex() {
1442 let sql = "SELECT row_number() over (partition by id order by date) as y, *, 2::int + 4 as sum, cast(b) as c FROM x";
1445 let issues = run(sql);
1446 assert_eq!(issues.len(), 1);
1447
1448 let autofix = issues[0].autofix.as_ref().expect("expected ST006 autofix");
1449 let fixed = apply_edits(sql, &autofix.edits);
1450 assert_eq!(fixed, "SELECT *, cast(b) as c, row_number() over (partition by id order by date) as y, 2::int + 4 as sum FROM x");
1451 }
1452
1453 #[test]
1454 fn no_autofix_with_implicit_column_references() {
1455 let sql =
1456 "SELECT DATE_TRUNC('DAY', end_time) AS time_day, b_field FROM table_name GROUP BY 1, 2";
1457 let issues = run(sql);
1458 assert_eq!(issues.len(), 1);
1459 assert!(
1460 issues[0].autofix.is_none(),
1461 "should not autofix when implicit column references exist"
1462 );
1463 }
1464
1465 #[test]
1466 fn autofix_explicit_column_references() {
1467 let sql = "SELECT DATE_TRUNC('DAY', end_time) AS time_day, b_field FROM table_name GROUP BY time_day, b_field";
1468 let issues = run(sql);
1469 assert_eq!(issues.len(), 1);
1470
1471 let autofix = issues[0].autofix.as_ref().expect("expected ST006 autofix");
1472 let fixed = apply_edits(sql, &autofix.edits);
1473 assert_eq!(fixed, "SELECT b_field, DATE_TRUNC('DAY', end_time) AS time_day FROM table_name GROUP BY time_day, b_field");
1474 }
1475
1476 #[test]
1477 fn autofix_reorders_multiline_targets_without_quotes() {
1478 let sql = "SELECT\n SUM(a) AS total,\n a\nFROM t";
1479 let issues = run(sql);
1480 assert_eq!(issues.len(), 1);
1481
1482 let autofix = issues[0].autofix.as_ref().expect("expected ST006 autofix");
1483 let fixed = apply_edits(sql, &autofix.edits);
1484 assert!(
1485 fixed.contains("a,\n SUM(a) AS total"),
1486 "expected reordered multiline projection, got: {fixed}"
1487 );
1488
1489 parse_sql(&fixed).expect("fixed SQL should remain parseable");
1490 }
1491
1492 #[test]
1493 fn autofix_reorders_multiline_targets_with_inter_item_comment() {
1494 let sql = "SELECT\n -- total usage for period\n SUM(a) AS total,\n a\nFROM t";
1495 let issues = run(sql);
1496 assert_eq!(issues.len(), 1);
1497
1498 let autofix = issues[0].autofix.as_ref().expect("expected ST006 autofix");
1499 let fixed = apply_edits(sql, &autofix.edits);
1500 assert!(
1501 fixed.contains("-- total usage for period"),
1502 "expected inter-item comment to be preserved, got: {fixed}"
1503 );
1504 assert!(
1505 fixed.contains("a,\n SUM(a) AS total"),
1506 "expected reordered projection, got: {fixed}"
1507 );
1508
1509 parse_sql(&fixed).expect("fixed SQL should remain parseable");
1510 }
1511
1512 #[test]
1513 fn autofix_reorders_trailing_simple_column_after_subquery_expressions() {
1514 let sql = "SELECT\n a.table_full_name AS table_a,\n b.table_full_name AS table_b,\n (\n SELECT count(*)\n FROM unnest(a.columns) AS ac\n WHERE ac = ANY(b.columns)\n ) AS intersection_size,\n a.column_count + b.column_count - (\n SELECT count(*)\n FROM unnest(a.columns) AS ac\n WHERE ac = ANY(b.columns)\n ) AS union_size,\n a.connector_id\nFROM table_columns AS a\nINNER JOIN table_columns AS b\n ON a.connector_id = b.connector_id";
1515 let issues = run(sql);
1516 assert_eq!(issues.len(), 1);
1517 let autofix = issues[0]
1518 .autofix
1519 .as_ref()
1520 .expect("expected ST006 autofix metadata");
1521 let fixed = apply_edits(sql, &autofix.edits);
1522 assert!(
1523 fixed.contains("a.connector_id,\n (\n SELECT count(*)"),
1524 "expected simple trailing column to move before complex expressions, got: {fixed}"
1525 );
1526 parse_sql(&fixed).expect("fixed SQL should remain parseable");
1527 }
1528
1529 #[test]
1530 fn fail_cte_used_in_select_not_set() {
1531 let sql = "\
1533WITH T2 AS (
1534 SELECT
1535 CASE WHEN COL > 1 THEN 'x' ELSE 'y' END AS A,
1536 COL AS B
1537 FROM T
1538)
1539SELECT * FROM T2";
1540 let issues = run(sql);
1541 assert_eq!(issues.len(), 1);
1542 }
1543
1544 #[test]
1545 fn comment_in_projection_blocks_safe_autofix_metadata() {
1546 let sql = "SELECT a + 1 /*keep*/, a FROM t";
1547 let issues = run(sql);
1548 assert_eq!(issues.len(), 1);
1549 assert!(
1550 issues[0].autofix.is_none(),
1551 "comment-bearing projection should not receive ST006 safe patch metadata"
1552 );
1553 }
1554
1555 #[test]
1558 fn does_not_flag_when_simple_target_starts_projection() {
1559 let issues = run("SELECT a, a + 1 FROM t");
1560 assert!(issues.is_empty());
1561 }
1562
1563 #[test]
1564 fn flags_simple_target_after_complex() {
1565 let issues = run("SELECT a, a + 1, b FROM t");
1567 assert_eq!(issues.len(), 1);
1568 }
1569
1570 #[test]
1571 fn does_not_flag_when_alias_wraps_simple_identifier() {
1572 let issues = run("SELECT a AS first_a, b FROM t");
1573 assert!(issues.is_empty());
1574 }
1575
1576 #[test]
1577 fn flags_in_nested_select_scopes() {
1578 let issues = run("SELECT * FROM (SELECT a + 1, a FROM t) AS sub");
1579 assert_eq!(issues.len(), 1);
1580 }
1581}