1use crate::linter::rule::{LintContext, LintRule};
12use crate::types::{issue_codes, Dialect, Issue, IssueAutofixApplicability, IssuePatchEdit, Span};
13use sqlparser::ast::{
14 CreateView, Expr, GroupByExpr, Query, Select, SelectItem, SetExpr, Statement, TableFactor,
15 Update,
16};
17use sqlparser::tokenizer::{Token, TokenWithSpan, Tokenizer, Whitespace};
18use std::collections::{HashMap, HashSet};
19
20pub struct StructureColumnOrder;
21
22impl LintRule for StructureColumnOrder {
23 fn code(&self) -> &'static str {
24 issue_codes::LINT_ST_006
25 }
26
27 fn name(&self) -> &'static str {
28 "Structure column order"
29 }
30
31 fn description(&self) -> &'static str {
32 "Select wildcards then simple targets before calculations and aggregates."
33 }
34
35 fn check(&self, statement: &Statement, ctx: &LintContext) -> Vec<Issue> {
36 let mut violations_info: Vec<ViolationInfo> = Vec::new();
37 visit_order_safe_selects(statement, &mut |select| {
38 if let Some(info) = check_select_order(select) {
39 violations_info.push(info);
40 }
41 });
42
43 let resolved = st006_resolve_violations(ctx, &violations_info);
44
45 resolved
49 .into_iter()
50 .filter_map(|r| {
51 let span = r.span?;
52 let mut issue = Issue::info(
53 issue_codes::LINT_ST_006,
54 "Prefer simple columns before complex expressions in SELECT.",
55 )
56 .with_statement(ctx.statement_index)
57 .with_span(span);
58 if let Some(edits) = r.edits {
59 issue = issue.with_autofix_edits(IssueAutofixApplicability::Safe, edits);
60 }
61 Some(issue)
62 })
63 .collect()
64 }
65}
66
67fn item_band(item: &SelectItem) -> u8 {
76 match item {
77 SelectItem::Wildcard(_) | SelectItem::QualifiedWildcard(_, _) => 0,
78 SelectItem::UnnamedExpr(expr) => expr_band(expr),
79 SelectItem::ExprWithAlias { expr, .. } => expr_band(expr),
80 }
81}
82
83fn expr_band(expr: &Expr) -> u8 {
84 match expr {
85 Expr::Identifier(_) | Expr::CompoundIdentifier(_) => 1,
87 Expr::Value(_) => 1,
89 Expr::Cast { .. } => 1,
93 Expr::Function(f) if is_cast_function(f) => 1,
95 Expr::Nested(inner) => expr_band(inner),
97 _ => 2,
99 }
100}
101
102fn is_cast_function(f: &sqlparser::ast::Function) -> bool {
104 use sqlparser::ast::ObjectNamePart;
105 f.name
106 .0
107 .last()
108 .and_then(ObjectNamePart::as_ident)
109 .is_some_and(|ident| ident.value.eq_ignore_ascii_case("CAST"))
110}
111
112struct ViolationInfo {
117 bands: Vec<u8>,
119 has_implicit_refs: bool,
121 first_item_hint: String,
124}
125
126fn check_select_order(select: &Select) -> Option<ViolationInfo> {
129 if select.projection.len() < 2 {
130 return None;
131 }
132
133 let bands: Vec<u8> = select.projection.iter().map(item_band).collect();
134
135 let mut max_band = 0u8;
137 let mut violated = false;
138 for &band in &bands {
139 if band < max_band {
140 violated = true;
141 break;
142 }
143 max_band = max_band.max(band);
144 }
145
146 if !violated {
147 return None;
148 }
149
150 let has_implicit_refs = has_implicit_column_references(select);
151
152 Some(ViolationInfo {
153 bands,
154 has_implicit_refs,
155 first_item_hint: select
156 .projection
157 .first()
158 .map(std::string::ToString::to_string)
159 .unwrap_or_default(),
160 })
161}
162
163fn has_implicit_column_references(select: &Select) -> bool {
166 if let GroupByExpr::Expressions(exprs, _) = &select.group_by {
167 for expr in exprs {
168 if matches!(expr, Expr::Value(v) if matches!(v.value, sqlparser::ast::Value::Number(_, _)))
169 {
170 return true;
171 }
172 }
173 }
174
175 for sort in &select.sort_by {
176 if matches!(&sort.expr, Expr::Value(v) if matches!(v.value, sqlparser::ast::Value::Number(_, _)))
177 {
178 return true;
179 }
180 }
181
182 false
183}
184
185fn visit_order_safe_selects<F: FnMut(&Select)>(statement: &Statement, visitor: &mut F) {
192 match statement {
193 Statement::Query(query) => visit_query_selects(query, visitor, false),
194 Statement::Insert(_) => {}
196 Statement::Merge(..) => {}
198 Statement::CreateTable(create) if create.query.is_some() => {
200 }
202 Statement::CreateView(CreateView { query, .. }) => {
204 visit_query_selects(query, visitor, false);
205 }
206 Statement::Update(Update {
207 table,
208 from,
209 selection,
210 ..
211 }) => {
212 visit_table_factor_selects(&table.relation, visitor);
214 for join in &table.joins {
215 visit_table_factor_selects(&join.relation, visitor);
216 }
217 if let Some(from) = from {
218 match from {
219 sqlparser::ast::UpdateTableFromKind::BeforeSet(tables)
220 | sqlparser::ast::UpdateTableFromKind::AfterSet(tables) => {
221 for t in tables {
222 visit_table_factor_selects(&t.relation, visitor);
223 for j in &t.joins {
224 visit_table_factor_selects(&j.relation, visitor);
225 }
226 }
227 }
228 }
229 }
230 if let Some(sel) = selection {
231 visit_expr_selects(sel, visitor);
232 }
233 }
234 _ => {}
235 }
236}
237
238fn visit_query_selects<F: FnMut(&Select)>(query: &Query, visitor: &mut F, in_set_operation: bool) {
239 let order_sensitive_ctes = order_sensitive_cte_names_for_query(query);
240
241 if let Some(with) = &query.with {
246 for cte in &with.cte_tables {
247 let cte_name = cte.alias.name.value.to_ascii_uppercase();
248 let cte_order_matters =
249 in_set_operation || order_sensitive_ctes.contains(cte_name.as_str());
250 visit_query_selects(&cte.query, visitor, cte_order_matters);
251 }
252 }
253
254 visit_set_expr_selects(&query.body, visitor, in_set_operation);
255}
256
257fn visit_set_expr_selects<F: FnMut(&Select)>(
258 set_expr: &SetExpr,
259 visitor: &mut F,
260 in_set_operation: bool,
261) {
262 match set_expr {
263 SetExpr::Select(select) => {
264 if in_set_operation {
265 return;
269 }
270 visitor(select);
271 for table in &select.from {
273 visit_table_factor_selects(&table.relation, visitor);
274 for join in &table.joins {
275 visit_table_factor_selects(&join.relation, visitor);
276 }
277 }
278 if let Some(sel) = &select.selection {
280 visit_expr_selects(sel, visitor);
281 }
282 }
283 SetExpr::Query(query) => visit_query_selects(query, visitor, in_set_operation),
284 SetExpr::SetOperation { left, right, .. } => {
285 visit_set_expr_selects(left, visitor, true);
287 visit_set_expr_selects(right, visitor, true);
288 }
289 _ => {}
290 }
291}
292
293fn visit_table_factor_selects<F: FnMut(&Select)>(table_factor: &TableFactor, visitor: &mut F) {
294 match table_factor {
295 TableFactor::Derived { subquery, .. } => {
296 visit_query_selects(subquery, visitor, false);
297 }
298 TableFactor::NestedJoin {
299 table_with_joins, ..
300 } => {
301 visit_table_factor_selects(&table_with_joins.relation, visitor);
302 for join in &table_with_joins.joins {
303 visit_table_factor_selects(&join.relation, visitor);
304 }
305 }
306 _ => {}
307 }
308}
309
310fn visit_expr_selects<F: FnMut(&Select)>(expr: &Expr, visitor: &mut F) {
311 match expr {
312 Expr::Subquery(query)
313 | Expr::Exists {
314 subquery: query, ..
315 } => visit_query_selects(query, visitor, false),
316 Expr::InSubquery {
317 expr: inner,
318 subquery,
319 ..
320 } => {
321 visit_expr_selects(inner, visitor);
322 visit_query_selects(subquery, visitor, false);
323 }
324 Expr::BinaryOp { left, right, .. } => {
325 visit_expr_selects(left, visitor);
326 visit_expr_selects(right, visitor);
327 }
328 Expr::UnaryOp { expr: inner, .. }
329 | Expr::Nested(inner)
330 | Expr::IsNull(inner)
331 | Expr::IsNotNull(inner)
332 | Expr::Cast { expr: inner, .. } => visit_expr_selects(inner, visitor),
333 Expr::Case {
334 operand,
335 conditions,
336 else_result,
337 ..
338 } => {
339 if let Some(op) = operand {
340 visit_expr_selects(op, visitor);
341 }
342 for when in conditions {
343 visit_expr_selects(&when.condition, visitor);
344 visit_expr_selects(&when.result, visitor);
345 }
346 if let Some(e) = else_result {
347 visit_expr_selects(e, visitor);
348 }
349 }
350 _ => {}
351 }
352}
353
354fn order_sensitive_cte_names_for_query(query: &Query) -> HashSet<String> {
355 let Some(with) = &query.with else {
356 return HashSet::new();
357 };
358
359 let cte_names: HashSet<String> = with
360 .cte_tables
361 .iter()
362 .map(|cte| cte.alias.name.value.to_ascii_uppercase())
363 .collect();
364 if cte_names.is_empty() {
365 return HashSet::new();
366 }
367
368 let mut deps_by_cte: HashMap<String, HashSet<String>> = HashMap::new();
370 for cte in &with.cte_tables {
371 let mut deps = HashSet::new();
372 collect_cte_references_in_set_expr(&cte.query.body, &cte_names, &mut deps);
373 deps_by_cte.insert(cte.alias.name.value.to_ascii_uppercase(), deps);
374 }
375
376 let mut sensitive = HashSet::new();
379 if matches!(query.body.as_ref(), SetExpr::SetOperation { .. })
380 && set_expr_has_wildcard_select(&query.body)
381 {
382 collect_cte_references_in_set_expr(&query.body, &cte_names, &mut sensitive);
383 }
384 for cte in &with.cte_tables {
385 if matches!(cte.query.body.as_ref(), SetExpr::SetOperation { .. })
386 && set_expr_has_wildcard_select(&cte.query.body)
387 {
388 collect_cte_references_in_set_expr(&cte.query.body, &cte_names, &mut sensitive);
389 }
390 }
391
392 let mut stack: Vec<String> = sensitive.iter().cloned().collect();
394 while let Some(current) = stack.pop() {
395 let Some(deps) = deps_by_cte.get(¤t) else {
396 continue;
397 };
398 for dep in deps {
399 if sensitive.insert(dep.clone()) {
400 stack.push(dep.clone());
401 }
402 }
403 }
404
405 sensitive
406}
407
408fn collect_cte_references_in_set_expr(
409 set_expr: &SetExpr,
410 cte_names: &HashSet<String>,
411 out: &mut HashSet<String>,
412) {
413 match set_expr {
414 SetExpr::Select(select) => collect_cte_references_in_select(select, cte_names, out),
415 SetExpr::SetOperation { left, right, .. } => {
416 collect_cte_references_in_set_expr(left, cte_names, out);
417 collect_cte_references_in_set_expr(right, cte_names, out);
418 }
419 SetExpr::Query(query) => collect_cte_references_in_set_expr(&query.body, cte_names, out),
420 _ => {}
421 }
422}
423
424fn collect_cte_references_in_select(
425 select: &Select,
426 cte_names: &HashSet<String>,
427 out: &mut HashSet<String>,
428) {
429 for table in &select.from {
430 collect_cte_references_in_table_factor(&table.relation, cte_names, out);
431 for join in &table.joins {
432 collect_cte_references_in_table_factor(&join.relation, cte_names, out);
433 }
434 }
435}
436
437fn collect_cte_references_in_table_factor(
438 table_factor: &TableFactor,
439 cte_names: &HashSet<String>,
440 out: &mut HashSet<String>,
441) {
442 match table_factor {
443 TableFactor::Table { name, .. } => {
444 if let Some(ident) = name.0.last().and_then(|part| part.as_ident()) {
445 let upper = ident.value.to_ascii_uppercase();
446 if cte_names.contains(upper.as_str()) {
447 out.insert(upper);
448 }
449 }
450 }
451 TableFactor::Derived { subquery, .. } => {
452 collect_cte_references_in_set_expr(&subquery.body, cte_names, out);
453 }
454 TableFactor::NestedJoin {
455 table_with_joins, ..
456 } => {
457 collect_cte_references_in_table_factor(&table_with_joins.relation, cte_names, out);
458 for join in &table_with_joins.joins {
459 collect_cte_references_in_table_factor(&join.relation, cte_names, out);
460 }
461 }
462 TableFactor::Pivot { table, .. }
463 | TableFactor::Unpivot { table, .. }
464 | TableFactor::MatchRecognize { table, .. } => {
465 collect_cte_references_in_table_factor(table, cte_names, out);
466 }
467 _ => {}
468 }
469}
470
471fn set_expr_has_wildcard_select(set_expr: &SetExpr) -> bool {
475 match set_expr {
476 SetExpr::Select(select) => select.projection.iter().any(|item| {
477 matches!(
478 item,
479 SelectItem::Wildcard(_) | SelectItem::QualifiedWildcard(_, _)
480 )
481 }),
482 SetExpr::SetOperation { left, right, .. } => {
483 set_expr_has_wildcard_select(left) || set_expr_has_wildcard_select(right)
484 }
485 SetExpr::Query(query) => set_expr_has_wildcard_select(&query.body),
486 _ => false,
487 }
488}
489
490#[derive(Clone, Debug)]
495struct PositionedToken {
496 token: Token,
497 start: usize,
498 end: usize,
499}
500
501#[derive(Clone, Debug)]
502struct SelectProjectionSegment {
503 items: Vec<SelectProjectionItem>,
504}
505
506#[derive(Clone, Debug)]
507struct SelectProjectionItem {
508 core_span: Span,
509 leading_span: Span,
510 trailing_span: Span,
511}
512
513#[derive(Clone, Debug)]
514struct St006AutofixCandidate {
515 span: Span,
516 edits: Vec<IssuePatchEdit>,
517}
518
519struct ResolvedViolation {
523 span: Option<Span>,
524 edits: Option<Vec<IssuePatchEdit>>,
525}
526
527fn st006_resolve_violations(
529 ctx: &LintContext,
530 violations: &[ViolationInfo],
531) -> Vec<ResolvedViolation> {
532 let candidates = st006_autofix_candidates_for_context(ctx, violations);
533
534 if candidates.len() == violations.len() {
536 return candidates
537 .into_iter()
538 .map(|c| ResolvedViolation {
539 span: Some(c.span),
540 edits: if c.edits.is_empty() {
541 None
542 } else {
543 Some(c.edits)
544 },
545 })
546 .collect();
547 }
548
549 let spans = st006_violation_spans(ctx, violations);
552 if spans.len() == violations.len() {
553 return spans
554 .into_iter()
555 .map(|span| ResolvedViolation {
556 span: Some(span),
557 edits: None,
558 })
559 .collect();
560 }
561
562 violations
564 .iter()
565 .map(|_| ResolvedViolation {
566 span: None,
567 edits: None,
568 })
569 .collect()
570}
571
572fn positioned_tokens_for_context(ctx: &LintContext) -> Vec<PositionedToken> {
573 let from_document_tokens = ctx.with_document_tokens(|tokens| {
574 if tokens.is_empty() {
575 return None;
576 }
577
578 let mut positioned = Vec::new();
579 for token in tokens {
580 let (start, end) = token_with_span_offsets(ctx.sql, token)?;
581 if start < ctx.statement_range.start || end > ctx.statement_range.end {
582 continue;
583 }
584 positioned.push(PositionedToken {
585 token: token.token.clone(),
586 start,
587 end,
588 });
589 }
590 Some(positioned)
591 });
592
593 if let Some(tokens) = from_document_tokens {
594 tokens
595 } else {
596 let Some(tokens) = tokenize_with_spans(ctx.statement_sql(), ctx.dialect()) else {
597 return Vec::new();
598 };
599
600 let mut positioned = Vec::new();
601 for token in &tokens {
602 let Some((start, end)) = token_with_span_offsets(ctx.statement_sql(), token) else {
603 continue;
604 };
605 positioned.push(PositionedToken {
606 token: token.token.clone(),
607 start: ctx.statement_range.start + start,
608 end: ctx.statement_range.start + end,
609 });
610 }
611 positioned
612 }
613}
614
615fn st006_autofix_candidates_for_context(
616 ctx: &LintContext,
617 violations: &[ViolationInfo],
618) -> Vec<St006AutofixCandidate> {
619 let tokens = positioned_tokens_for_context(ctx);
620 let segments = select_projection_segments(&tokens);
621
622 if segments.len() < violations.len() {
627 return Vec::new();
628 }
629
630 let mut candidates = Vec::new();
634 let mut violation_idx = 0;
635 for segment in &segments {
636 if violation_idx >= violations.len() {
637 break;
638 }
639 let violation = &violations[violation_idx];
640 if segment.items.len() != violation.bands.len() {
641 continue;
642 }
643 if !segment_first_item_matches(ctx.sql, segment, &violation.first_item_hint) {
644 continue;
645 }
646
647 if violation.has_implicit_refs {
649 violation_idx += 1;
650 continue;
651 }
652
653 if let Some(candidate) =
654 projection_reorder_candidate_by_band(ctx.sql, &tokens, segment, &violation.bands)
655 {
656 candidates.push(candidate);
657 }
658 violation_idx += 1;
659 }
660
661 candidates
662}
663
664fn st006_violation_spans(ctx: &LintContext, violations: &[ViolationInfo]) -> Vec<Span> {
667 let tokens = positioned_tokens_for_context(ctx);
668 let segments = select_projection_segments(&tokens);
669
670 if segments.len() < violations.len() {
671 return Vec::new();
672 }
673
674 let mut spans = Vec::new();
675 let mut violation_idx = 0;
676 for segment in &segments {
677 if violation_idx >= violations.len() {
678 break;
679 }
680 let violation = &violations[violation_idx];
681 if segment.items.len() != violation.bands.len() {
682 continue;
683 }
684 if !segment_first_item_matches(ctx.sql, segment, &violation.first_item_hint) {
685 continue;
686 }
687
688 if let Some(first) = segment.items.first() {
690 spans.push(first.core_span);
691 }
692 violation_idx += 1;
693 }
694
695 spans
696}
697
698fn select_projection_segments(tokens: &[PositionedToken]) -> Vec<SelectProjectionSegment> {
699 let significant_positions: Vec<usize> = tokens
700 .iter()
701 .enumerate()
702 .filter_map(|(index, token)| (!is_trivia(&token.token)).then_some(index))
703 .collect();
704 if significant_positions.is_empty() {
705 return Vec::new();
706 }
707
708 let mut depths = vec![0usize; significant_positions.len()];
709 let mut depth = 0usize;
710 for (position, token_index) in significant_positions.iter().copied().enumerate() {
711 depths[position] = depth;
712 match tokens[token_index].token {
713 Token::LParen => depth += 1,
714 Token::RParen => depth = depth.saturating_sub(1),
715 _ => {}
716 }
717 }
718
719 let mut segments = Vec::new();
720 for position in 0..significant_positions.len() {
721 let token = &tokens[significant_positions[position]].token;
722 if !token_word_equals(token, "SELECT") {
723 continue;
724 }
725
726 let base_depth = depths[position];
727 let Some(projection_start) = projection_start_after_select(
728 tokens,
729 &significant_positions,
730 &depths,
731 position + 1,
732 base_depth,
733 ) else {
734 continue;
735 };
736 let Some(from_position) = from_position_for_select(
737 tokens,
738 &significant_positions,
739 &depths,
740 projection_start,
741 base_depth,
742 ) else {
743 continue;
744 };
745 if from_position <= projection_start {
746 continue;
747 }
748
749 let items = projection_items(
750 tokens,
751 &significant_positions,
752 &depths,
753 projection_start,
754 from_position,
755 base_depth,
756 );
757 if items.is_empty() {
758 continue;
759 }
760
761 segments.push(SelectProjectionSegment { items });
762 }
763
764 segments
765}
766
767fn projection_start_after_select(
768 tokens: &[PositionedToken],
769 significant_positions: &[usize],
770 depths: &[usize],
771 mut position: usize,
772 base_depth: usize,
773) -> Option<usize> {
774 while position < significant_positions.len() {
775 if depths[position] != base_depth {
776 return Some(position);
777 }
778
779 let token = &tokens[significant_positions[position]].token;
780 if token_word_equals(token, "DISTINCT")
781 || token_word_equals(token, "ALL")
782 || token_word_equals(token, "DISTINCTROW")
783 {
784 position += 1;
785 continue;
786 }
787 return Some(position);
788 }
789
790 None
791}
792
793fn from_position_for_select(
794 tokens: &[PositionedToken],
795 significant_positions: &[usize],
796 depths: &[usize],
797 start_position: usize,
798 base_depth: usize,
799) -> Option<usize> {
800 (start_position..significant_positions.len()).find(|&position| {
801 depths[position] == base_depth
802 && token_word_equals(&tokens[significant_positions[position]].token, "FROM")
803 })
804}
805
806fn projection_items(
807 tokens: &[PositionedToken],
808 significant_positions: &[usize],
809 depths: &[usize],
810 start_position: usize,
811 from_position: usize,
812 base_depth: usize,
813) -> Vec<SelectProjectionItem> {
814 if start_position >= from_position {
815 return Vec::new();
816 }
817
818 let mut core_items: Vec<(usize, usize, Option<usize>)> = Vec::new();
819 let mut item_start = start_position;
820
821 for position in start_position..from_position {
822 let token = &tokens[significant_positions[position]].token;
823 if depths[position] == base_depth && matches!(token, Token::Comma) {
824 if item_start < position {
825 core_items.push((item_start, position - 1, Some(position)));
826 }
827 item_start = position + 1;
828 }
829 }
830
831 if item_start < from_position {
832 core_items.push((item_start, from_position - 1, None));
833 }
834
835 if core_items.is_empty() {
836 return Vec::new();
837 }
838
839 let mut items = Vec::with_capacity(core_items.len());
840 let mut previous_comma_end = 0usize;
841 for (index, (core_start_position, core_end_position, comma_position)) in
842 core_items.iter().copied().enumerate()
843 {
844 let Some(core_span) = span_from_positions(
845 tokens,
846 significant_positions,
847 core_start_position,
848 core_end_position,
849 ) else {
850 return Vec::new();
851 };
852
853 let leading_start = if index == 0 {
854 core_span.start
855 } else {
856 previous_comma_end
857 };
858 let leading_end = core_span.start;
859
860 let trailing_end = if let Some(comma_position) = comma_position {
861 tokens[significant_positions[comma_position]].start
862 } else {
863 core_span.end
864 };
865 if trailing_end < core_span.end {
866 return Vec::new();
867 }
868
869 let leading_span = Span::new(leading_start, leading_end);
870 let trailing_span = Span::new(core_span.end, trailing_end);
871 items.push(SelectProjectionItem {
872 core_span,
873 leading_span,
874 trailing_span,
875 });
876
877 if let Some(comma_position) = comma_position {
878 previous_comma_end = tokens[significant_positions[comma_position]].end;
879 }
880 }
881
882 items
883}
884
885fn span_from_positions(
886 tokens: &[PositionedToken],
887 significant_positions: &[usize],
888 start_position: usize,
889 end_position: usize,
890) -> Option<Span> {
891 if end_position < start_position {
892 return None;
893 }
894
895 let start = tokens[*significant_positions.get(start_position)?].start;
896 let end = tokens[*significant_positions.get(end_position)?].end;
897 (start < end).then_some(Span::new(start, end))
898}
899
900fn projection_reorder_candidate_by_band(
902 sql: &str,
903 tokens: &[PositionedToken],
904 segment: &SelectProjectionSegment,
905 bands: &[u8],
906) -> Option<St006AutofixCandidate> {
907 if segment.items.len() != bands.len() {
908 return None;
909 }
910
911 let replace_span = Span::new(
912 segment.items.first()?.core_span.start,
913 segment.items.last()?.core_span.end,
914 );
915 if replace_span.start >= replace_span.end || replace_span.end > sql.len() {
916 return None;
917 }
918
919 let mut normalized_items = Vec::with_capacity(segment.items.len());
920 for item in &segment.items {
921 let core_span = item.core_span;
922 if core_span.start >= core_span.end || core_span.end > sql.len() {
923 return None;
924 }
925 if trailing_span_has_inline_comment(tokens, sql, core_span, item.trailing_span) {
926 return None;
927 }
928 let text = sql[core_span.start..core_span.end].trim();
929 let normalized = normalize_projection_item_text(text)?;
930
931 let leading = if item.leading_span.start < item.leading_span.end
932 && item.leading_span.end <= sql.len()
933 {
934 &sql[item.leading_span.start..item.leading_span.end]
935 } else {
936 ""
937 };
938 let trailing = if item.trailing_span.start < item.trailing_span.end
939 && item.trailing_span.end <= sql.len()
940 {
941 &sql[item.trailing_span.start..item.trailing_span.end]
942 } else {
943 ""
944 };
945 normalized_items.push((normalized, leading, trailing));
946 }
947
948 let mut indexed: Vec<(usize, u8)> = normalized_items
950 .iter()
951 .enumerate()
952 .zip(bands.iter())
953 .map(|((i, _), &band)| (i, band))
954 .collect();
955 indexed.sort_by_key(|&(i, band)| (band, i));
956
957 let original_segment = &sql[replace_span.start..replace_span.end];
958 let replacement = if original_segment.contains('\n') || original_segment.contains('\r') {
959 let indent = indent_prefix_for_offset(sql, segment.items.first()?.core_span.start);
960 let default_separator = format!(",\n{indent}");
961 let mut rewritten = String::new();
962 for (position, &(item_index, _)) in indexed.iter().enumerate() {
963 let (core_text, leading, trailing) = &normalized_items[item_index];
964 if position > 0 {
965 if leading.is_empty() {
966 rewritten.push_str(&default_separator);
967 } else {
968 rewritten.push(',');
969 rewritten.push_str(leading);
970 }
971 }
972 rewritten.push_str(core_text);
973 rewritten.push_str(trailing);
974 }
975 rewritten
976 } else {
977 indexed
978 .iter()
979 .map(|(item_index, _)| normalized_items[*item_index].0.as_str())
980 .collect::<Vec<_>>()
981 .join(", ")
982 };
983 if replacement.is_empty() || replacement == original_segment {
984 return None;
985 }
986
987 Some(St006AutofixCandidate {
988 span: replace_span,
989 edits: vec![IssuePatchEdit::new(replace_span, replacement)],
990 })
991}
992
993fn trailing_span_has_inline_comment(
994 tokens: &[PositionedToken],
995 sql: &str,
996 core_span: Span,
997 trailing_span: Span,
998) -> bool {
999 if trailing_span.start >= trailing_span.end {
1000 return false;
1001 }
1002 tokens.iter().any(|token| {
1003 if token.start < trailing_span.start
1004 || token.end > trailing_span.end
1005 || !is_comment(&token.token)
1006 {
1007 return false;
1008 }
1009 if token.start <= core_span.end || token.start > sql.len() || core_span.end > sql.len() {
1010 return true;
1011 }
1012 let between = &sql[core_span.end..token.start];
1013 !between.contains('\n') && !between.contains('\r')
1014 })
1015}
1016
1017fn normalize_projection_item_text(text: &str) -> Option<String> {
1018 let trimmed = text.trim();
1019 if trimmed.is_empty() {
1020 return None;
1021 }
1022 Some(trimmed.to_string())
1023}
1024
1025fn segment_first_item_matches(sql: &str, segment: &SelectProjectionSegment, hint: &str) -> bool {
1026 let Some(first_item) = segment.items.first() else {
1027 return false;
1028 };
1029 let span = first_item.core_span;
1030 if span.start >= span.end || span.end > sql.len() {
1031 return false;
1032 }
1033 normalize_item_hint(&sql[span.start..span.end]) == normalize_item_hint(hint)
1034}
1035
1036fn normalize_item_hint(text: &str) -> String {
1037 text.chars()
1038 .filter(|ch| !ch.is_whitespace())
1039 .flat_map(|ch| ch.to_uppercase())
1040 .collect()
1041}
1042
1043fn indent_prefix_for_offset(sql: &str, offset: usize) -> String {
1044 let start = sql[..offset].rfind('\n').map_or(0, |idx| idx + 1);
1045 sql[start..offset]
1046 .chars()
1047 .take_while(|ch| ch.is_whitespace())
1048 .collect()
1049}
1050
1051fn tokenize_with_spans(sql: &str, dialect: Dialect) -> Option<Vec<TokenWithSpan>> {
1056 let dialect = dialect.to_sqlparser_dialect();
1057 let mut tokenizer = Tokenizer::new(dialect.as_ref(), sql);
1058 tokenizer.tokenize_with_location().ok()
1059}
1060
1061fn token_with_span_offsets(sql: &str, token: &TokenWithSpan) -> Option<(usize, usize)> {
1062 let start = line_col_to_offset(
1063 sql,
1064 token.span.start.line as usize,
1065 token.span.start.column as usize,
1066 )?;
1067 let end = line_col_to_offset(
1068 sql,
1069 token.span.end.line as usize,
1070 token.span.end.column as usize,
1071 )?;
1072 Some((start, end))
1073}
1074
1075fn line_col_to_offset(sql: &str, line: usize, column: usize) -> Option<usize> {
1076 if line == 0 || column == 0 {
1077 return None;
1078 }
1079
1080 let mut current_line = 1usize;
1081 let mut current_col = 1usize;
1082 for (offset, ch) in sql.char_indices() {
1083 if current_line == line && current_col == column {
1084 return Some(offset);
1085 }
1086 if ch == '\n' {
1087 current_line += 1;
1088 current_col = 1;
1089 } else {
1090 current_col += 1;
1091 }
1092 }
1093 if current_line == line && current_col == column {
1094 return Some(sql.len());
1095 }
1096 None
1097}
1098
1099fn token_word_equals(token: &Token, expected_upper: &str) -> bool {
1100 matches!(token, Token::Word(word) if word.value.eq_ignore_ascii_case(expected_upper))
1101}
1102
1103fn is_trivia(token: &Token) -> bool {
1104 matches!(
1105 token,
1106 Token::Whitespace(
1107 Whitespace::Space
1108 | Whitespace::Newline
1109 | Whitespace::Tab
1110 | Whitespace::SingleLineComment { .. }
1111 | Whitespace::MultiLineComment(_)
1112 )
1113 )
1114}
1115
1116fn is_comment(token: &Token) -> bool {
1117 matches!(
1118 token,
1119 Token::Whitespace(Whitespace::SingleLineComment { .. } | Whitespace::MultiLineComment(_))
1120 )
1121}
1122
1123#[cfg(test)]
1124mod tests {
1125 use super::*;
1126 use crate::parser::parse_sql;
1127 use crate::types::IssueAutofixApplicability;
1128
1129 fn run(sql: &str) -> Vec<Issue> {
1130 let statements = parse_sql(sql).expect("parse");
1131 let rule = StructureColumnOrder;
1132 statements
1133 .iter()
1134 .enumerate()
1135 .flat_map(|(index, statement)| {
1136 rule.check(
1137 statement,
1138 &LintContext {
1139 sql,
1140 statement_range: 0..sql.len(),
1141 statement_index: index,
1142 },
1143 )
1144 })
1145 .collect()
1146 }
1147
1148 fn apply_edits(sql: &str, edits: &[IssuePatchEdit]) -> String {
1149 let mut output = sql.to_string();
1150 let mut ordered = edits.iter().collect::<Vec<_>>();
1151 ordered.sort_by_key(|edit| edit.span.start);
1152
1153 for edit in ordered.into_iter().rev() {
1154 output.replace_range(edit.span.start..edit.span.end, &edit.replacement);
1155 }
1156
1157 output
1158 }
1159
1160 #[test]
1163 fn pass_select_statement_order() {
1164 let issues = run("SELECT a, cast(b as int) as b, c FROM x");
1166 assert!(issues.is_empty());
1167 }
1168
1169 #[test]
1170 fn pass_union_statements_ignored() {
1171 let sql = "SELECT a + b as c, d FROM table_a UNION ALL SELECT c, d FROM table_b";
1172 let issues = run(sql);
1173 assert!(issues.is_empty());
1174 }
1175
1176 #[test]
1177 fn pass_insert_statements_ignored() {
1178 let sql = "\
1179INSERT INTO example_schema.example_table
1180(id, example_column, rank_asc, rank_desc)
1181SELECT
1182 id,
1183 CASE WHEN col_a IN('a', 'b', 'c') THEN col_a END AS example_column,
1184 rank_asc,
1185 rank_desc
1186FROM another_schema.another_table";
1187 let issues = run(sql);
1188 assert!(issues.is_empty());
1189 }
1190
1191 #[test]
1192 fn pass_insert_statement_with_cte_ignored() {
1193 let sql = "\
1194INSERT INTO my_table
1195WITH my_cte AS (SELECT * FROM t1)
1196SELECT MAX(field1), field2
1197FROM t1";
1198 let issues = run(sql);
1199 assert!(issues.is_empty());
1200 }
1201
1202 #[test]
1203 fn with_cte_insert_into_still_checks_cte() {
1204 let sql = "\
1209WITH my_cte AS (
1210 SELECT MAX(field1) AS mx, field2 FROM t1
1211)
1212INSERT INTO my_table (col1, col2)
1213SELECT mx, field2 FROM my_cte";
1214 let issues = run(sql);
1215 assert_eq!(issues.len(), 1);
1217 }
1218
1219 #[test]
1220 fn with_cte_insert_into_no_violation_when_ordered() {
1221 let sql = "\
1223WITH my_cte AS (
1224 SELECT field2, MAX(field1) AS mx FROM t1
1225)
1226INSERT INTO my_table (col1, col2)
1227SELECT mx, field2 FROM my_cte";
1228 let issues = run(sql);
1229 assert!(issues.is_empty());
1230 }
1231
1232 #[test]
1233 fn pass_merge_statements_ignored() {
1234 let sql = "\
1235MERGE INTO t
1236USING
1237(
1238 SELECT
1239 DATE_TRUNC('DAY', end_time) AS time_day,
1240 b
1241 FROM u
1242) AS u ON (a = b)
1243WHEN MATCHED THEN
1244UPDATE SET a = b
1245WHEN NOT MATCHED THEN
1246INSERT (b) VALUES (c)";
1247 let issues = run(sql);
1248 assert!(issues.is_empty());
1249 }
1250
1251 #[test]
1252 fn pass_merge_statement_with_cte_ignored() {
1253 let sql = "\
1254MERGE INTO t
1255USING
1256(
1257 WITH my_cte AS (SELECT * FROM t1)
1258 SELECT MAX(field1), field2
1259 FROM t1
1260) AS u ON (a = b)
1261WHEN MATCHED THEN
1262UPDATE SET a = b
1263WHEN NOT MATCHED THEN
1264INSERT (b) VALUES (c)";
1265 let issues = run(sql);
1266 assert!(issues.is_empty());
1267 }
1268
1269 #[test]
1270 fn pass_create_table_as_select_with_cte_ignored() {
1271 let sql = "\
1272CREATE TABLE new_table AS (
1273 WITH my_cte AS (SELECT * FROM t1)
1274 SELECT MAX(field1), field2
1275 FROM t1
1276)";
1277 let issues = run(sql);
1278 assert!(issues.is_empty());
1279 }
1280
1281 #[test]
1282 fn pass_cte_used_in_set() {
1283 let sql = "\
1284WITH T1 AS (
1285 SELECT
1286 'a'::varchar AS A,
1287 1::bigint AS B
1288),
1289T2 AS (
1290 SELECT
1291 CASE WHEN COL > 1 THEN 'x' ELSE 'y' END AS A,
1292 COL AS B
1293 FROM T
1294)
1295SELECT * FROM T1
1296UNION ALL
1297SELECT * FROM T2";
1298 let issues = run(sql);
1299 assert!(issues.is_empty());
1300 }
1301
1302 #[test]
1303 fn fail_cte_used_in_set_with_explicit_columns() {
1304 let sql = "\
1307WITH T1 AS (
1308 SELECT
1309 'a'::varchar AS A,
1310 1::bigint AS B
1311),
1312T2 AS (
1313 SELECT
1314 CASE WHEN COL > 1 THEN 'x' ELSE 'y' END AS A,
1315 COL AS B
1316 FROM T
1317)
1318SELECT A, B FROM T1
1319UNION ALL
1320SELECT A, B FROM T2";
1321 let issues = run(sql);
1322 assert_eq!(issues.len(), 1);
1323 assert_eq!(issues[0].code, issue_codes::LINT_ST_006);
1324 }
1325
1326 #[test]
1327 fn pass_transitive_cte_dependency_into_wildcard_set_operation() {
1328 let sql = "\
1331WITH base_a AS (
1332 SELECT MAX(a) AS mx, b FROM t
1333),
1334base_b AS (
1335 SELECT MAX(c) AS mx, d FROM t2
1336),
1337combined AS (
1338 SELECT * FROM base_a
1339 UNION ALL
1340 SELECT * FROM base_b
1341)
1342SELECT mx, b FROM combined";
1343 let issues = run(sql);
1344 assert!(issues.is_empty());
1345 }
1346
1347 #[test]
1348 fn pass_subquery_used_in_set() {
1349 let sql = "\
1350SELECT * FROM (SELECT 'a'::varchar AS A, 1::bigint AS B)
1351UNION ALL
1352SELECT * FROM (SELECT CASE WHEN COL > 1 THEN 'x' ELSE 'y' END AS A, COL AS B FROM T)";
1353 let issues = run(sql);
1354 assert!(issues.is_empty());
1355 }
1356
1357 #[test]
1360 fn fail_select_statement_order_1() {
1361 let sql = "SELECT a, row_number() over (partition by id order by date) as y, b FROM x";
1363 let issues = run(sql);
1364 assert_eq!(issues.len(), 1);
1365 assert_eq!(issues[0].code, issue_codes::LINT_ST_006);
1366 }
1367
1368 #[test]
1369 fn fail_select_statement_order_2() {
1370 let sql = "SELECT row_number() over (partition by id order by date) as y, *, cast(b as int) as b_int FROM x";
1372 let issues = run(sql);
1373 assert_eq!(issues.len(), 1);
1374 }
1375
1376 #[test]
1377 fn fail_select_statement_order_3() {
1378 let sql = "SELECT row_number() over (partition by id order by date) as y, cast(b as int) as b_int, * FROM x";
1380 let issues = run(sql);
1381 assert_eq!(issues.len(), 1);
1382 }
1383
1384 #[test]
1385 fn fail_select_statement_order_4() {
1386 let sql = "SELECT row_number() over (partition by id order by date) as y, b::int, * FROM x";
1388 let issues = run(sql);
1389 assert_eq!(issues.len(), 1);
1390 }
1391
1392 #[test]
1393 fn fail_select_statement_order_5() {
1394 let sql = "SELECT row_number() over (partition by id order by date) as y, *, 2::int + 4 as sum, cast(b) as c FROM x";
1396 let issues = run(sql);
1397 assert_eq!(issues.len(), 1);
1398 }
1399
1400 #[test]
1403 fn autofix_reorder_simple_before_complex() {
1404 let sql = "SELECT a + 1, a FROM t";
1405 let issues = run(sql);
1406 assert_eq!(issues.len(), 1);
1407
1408 let autofix = issues[0].autofix.as_ref().expect("expected ST006 autofix");
1409 assert_eq!(autofix.applicability, IssueAutofixApplicability::Safe);
1410 let fixed = apply_edits(sql, &autofix.edits);
1411 assert_eq!(fixed, "SELECT a, a + 1 FROM t");
1412 }
1413
1414 #[test]
1415 fn autofix_reorder_wildcard_first() {
1416 let sql = "SELECT row_number() over (partition by id order by date) as y, *, cast(b as int) as b_int FROM x";
1417 let issues = run(sql);
1418 assert_eq!(issues.len(), 1);
1419
1420 let autofix = issues[0].autofix.as_ref().expect("expected ST006 autofix");
1421 let fixed = apply_edits(sql, &autofix.edits);
1422 assert_eq!(fixed, "SELECT *, cast(b as int) as b_int, row_number() over (partition by id order by date) as y FROM x");
1423 }
1424
1425 #[test]
1426 fn autofix_reorder_with_casts() {
1427 let sql = "SELECT row_number() over (partition by id order by date) as y, b::int, * FROM x";
1428 let issues = run(sql);
1429 assert_eq!(issues.len(), 1);
1430
1431 let autofix = issues[0].autofix.as_ref().expect("expected ST006 autofix");
1432 let fixed = apply_edits(sql, &autofix.edits);
1433 assert_eq!(
1434 fixed,
1435 "SELECT *, b::int, row_number() over (partition by id order by date) as y FROM x"
1436 );
1437 }
1438
1439 #[test]
1440 fn autofix_fail_order_5_complex() {
1441 let sql = "SELECT row_number() over (partition by id order by date) as y, *, 2::int + 4 as sum, cast(b) as c FROM x";
1444 let issues = run(sql);
1445 assert_eq!(issues.len(), 1);
1446
1447 let autofix = issues[0].autofix.as_ref().expect("expected ST006 autofix");
1448 let fixed = apply_edits(sql, &autofix.edits);
1449 assert_eq!(fixed, "SELECT *, cast(b) as c, row_number() over (partition by id order by date) as y, 2::int + 4 as sum FROM x");
1450 }
1451
1452 #[test]
1453 fn no_autofix_with_implicit_column_references() {
1454 let sql =
1455 "SELECT DATE_TRUNC('DAY', end_time) AS time_day, b_field FROM table_name GROUP BY 1, 2";
1456 let issues = run(sql);
1457 assert_eq!(issues.len(), 1);
1458 assert!(
1459 issues[0].autofix.is_none(),
1460 "should not autofix when implicit column references exist"
1461 );
1462 }
1463
1464 #[test]
1465 fn autofix_explicit_column_references() {
1466 let sql = "SELECT DATE_TRUNC('DAY', end_time) AS time_day, b_field FROM table_name GROUP BY time_day, b_field";
1467 let issues = run(sql);
1468 assert_eq!(issues.len(), 1);
1469
1470 let autofix = issues[0].autofix.as_ref().expect("expected ST006 autofix");
1471 let fixed = apply_edits(sql, &autofix.edits);
1472 assert_eq!(fixed, "SELECT b_field, DATE_TRUNC('DAY', end_time) AS time_day FROM table_name GROUP BY time_day, b_field");
1473 }
1474
1475 #[test]
1476 fn autofix_reorders_multiline_targets_without_quotes() {
1477 let sql = "SELECT\n SUM(a) AS total,\n a\nFROM t";
1478 let issues = run(sql);
1479 assert_eq!(issues.len(), 1);
1480
1481 let autofix = issues[0].autofix.as_ref().expect("expected ST006 autofix");
1482 let fixed = apply_edits(sql, &autofix.edits);
1483 assert!(
1484 fixed.contains("a,\n SUM(a) AS total"),
1485 "expected reordered multiline projection, got: {fixed}"
1486 );
1487
1488 parse_sql(&fixed).expect("fixed SQL should remain parseable");
1489 }
1490
1491 #[test]
1492 fn autofix_reorders_multiline_targets_with_inter_item_comment() {
1493 let sql = "SELECT\n -- total usage for period\n SUM(a) AS total,\n a\nFROM t";
1494 let issues = run(sql);
1495 assert_eq!(issues.len(), 1);
1496
1497 let autofix = issues[0].autofix.as_ref().expect("expected ST006 autofix");
1498 let fixed = apply_edits(sql, &autofix.edits);
1499 assert!(
1500 fixed.contains("-- total usage for period"),
1501 "expected inter-item comment to be preserved, got: {fixed}"
1502 );
1503 assert!(
1504 fixed.contains("a,\n SUM(a) AS total"),
1505 "expected reordered projection, got: {fixed}"
1506 );
1507
1508 parse_sql(&fixed).expect("fixed SQL should remain parseable");
1509 }
1510
1511 #[test]
1512 fn autofix_reorders_trailing_simple_column_after_subquery_expressions() {
1513 let sql = "SELECT\n a.table_full_name AS table_a,\n b.table_full_name AS table_b,\n (\n SELECT count(*)\n FROM unnest(a.columns) AS ac\n WHERE ac = ANY(b.columns)\n ) AS intersection_size,\n a.column_count + b.column_count - (\n SELECT count(*)\n FROM unnest(a.columns) AS ac\n WHERE ac = ANY(b.columns)\n ) AS union_size,\n a.connector_id\nFROM table_columns AS a\nINNER JOIN table_columns AS b\n ON a.connector_id = b.connector_id";
1514 let issues = run(sql);
1515 assert_eq!(issues.len(), 1);
1516 let autofix = issues[0]
1517 .autofix
1518 .as_ref()
1519 .expect("expected ST006 autofix metadata");
1520 let fixed = apply_edits(sql, &autofix.edits);
1521 assert!(
1522 fixed.contains("a.connector_id,\n (\n SELECT count(*)"),
1523 "expected simple trailing column to move before complex expressions, got: {fixed}"
1524 );
1525 parse_sql(&fixed).expect("fixed SQL should remain parseable");
1526 }
1527
1528 #[test]
1529 fn fail_cte_used_in_select_not_set() {
1530 let sql = "\
1532WITH T2 AS (
1533 SELECT
1534 CASE WHEN COL > 1 THEN 'x' ELSE 'y' END AS A,
1535 COL AS B
1536 FROM T
1537)
1538SELECT * FROM T2";
1539 let issues = run(sql);
1540 assert_eq!(issues.len(), 1);
1541 }
1542
1543 #[test]
1544 fn comment_in_projection_blocks_safe_autofix_metadata() {
1545 let sql = "SELECT a + 1 /*keep*/, a FROM t";
1546 let issues = run(sql);
1547 assert_eq!(issues.len(), 1);
1548 assert!(
1549 issues[0].autofix.is_none(),
1550 "comment-bearing projection should not receive ST006 safe patch metadata"
1551 );
1552 }
1553
1554 #[test]
1557 fn does_not_flag_when_simple_target_starts_projection() {
1558 let issues = run("SELECT a, a + 1 FROM t");
1559 assert!(issues.is_empty());
1560 }
1561
1562 #[test]
1563 fn flags_simple_target_after_complex() {
1564 let issues = run("SELECT a, a + 1, b FROM t");
1566 assert_eq!(issues.len(), 1);
1567 }
1568
1569 #[test]
1570 fn does_not_flag_when_alias_wraps_simple_identifier() {
1571 let issues = run("SELECT a AS first_a, b FROM t");
1572 assert!(issues.is_empty());
1573 }
1574
1575 #[test]
1576 fn flags_in_nested_select_scopes() {
1577 let issues = run("SELECT * FROM (SELECT a + 1, a FROM t) AS sub");
1578 assert_eq!(issues.len(), 1);
1579 }
1580}