sqruff_lib/rules/convention/
cv06.rs

1use ahash::{AHashMap, AHashSet};
2use itertools::Itertools;
3use sqruff_lib_core::dialects::syntax::{SyntaxKind, SyntaxSet};
4use sqruff_lib_core::lint_fix::LintFix;
5use sqruff_lib_core::parser::segments::{ErasedSegment, SegmentBuilder, Tables};
6use sqruff_lib_core::utils::functional::segments::Segments;
7
8use crate::core::config::Value;
9use crate::core::rules::context::RuleContext;
10use crate::core::rules::crawlers::{Crawler, RootOnlyCrawler};
11use crate::core::rules::{Erased, ErasedRule, LintResult, Rule, RuleGroups};
12
13#[derive(Default, Clone, Debug)]
14pub struct RuleCV06 {
15    multiline_newline: bool,
16    require_final_semicolon: bool,
17}
18
19impl Rule for RuleCV06 {
20    fn load_from_config(&self, config: &AHashMap<String, Value>) -> Result<ErasedRule, String> {
21        let multiline_newline = config["multiline_newline"].as_bool().unwrap();
22        let require_final_semicolon = config["require_final_semicolon"].as_bool().unwrap();
23        Ok(Self {
24            multiline_newline,
25            require_final_semicolon,
26        }
27        .erased())
28    }
29
30    fn name(&self) -> &'static str {
31        "convention.terminator"
32    }
33
34    fn description(&self) -> &'static str {
35        "Statements must end with a semi-colon."
36    }
37
38    fn long_description(&self) -> &'static str {
39        r"
40**Anti-pattern**
41
42A statement is not immediately terminated with a semi-colon. The `•` represents space.
43
44```sql
45SELECT
46    a
47FROM foo
48
49;
50
51SELECT
52    b
53FROM bar••;
54```
55
56**Best practice**
57
58Immediately terminate the statement with a semi-colon.
59
60```sql
61SELECT
62    a
63FROM foo;
64```"
65    }
66
67    fn groups(&self) -> &'static [RuleGroups] {
68        &[RuleGroups::All, RuleGroups::Convention]
69    }
70
71    fn eval(&self, context: &RuleContext) -> Vec<LintResult> {
72        debug_assert!(context.segment.is_type(SyntaxKind::File));
73
74        let mut results = vec![];
75        for (idx, segment) in context.segment.segments().iter().enumerate() {
76            let mut res = None;
77            if segment.is_type(SyntaxKind::StatementTerminator) {
78                // First we can simply handle the case of existing semi-colon alignment.
79                // If it's a terminator then we know it's raw.
80
81                res =
82                    self.handle_semicolon(context.tables, segment.clone(), context.segment.clone());
83            } else if self.require_final_semicolon && idx == context.segment.segments().len() - 1 {
84                // Otherwise, handle the end of the file separately.
85                res = self.ensure_final_semicolon(context.tables, context.segment.clone());
86            }
87            if let Some(res) = res {
88                results.push(res);
89            }
90        }
91        results
92    }
93
94    fn is_fix_compatible(&self) -> bool {
95        true
96    }
97
98    fn crawl_behaviour(&self) -> Crawler {
99        RootOnlyCrawler.into()
100    }
101}
102
103impl RuleCV06 {
104    // Adjust anchor_segment to not move trailing inline comment.
105    //
106    // We don't want to move inline comments that are on the same line
107    // as the preceding code segment as they could contain noqa instructions.
108    fn handle_trailing_inline_comments(
109        parent_segment: ErasedSegment,
110        anchor_segment: ErasedSegment,
111    ) -> ErasedSegment {
112        // See if we have a trailing inline comment on the same line as the preceding
113        // segment.
114        for comment_segment in parent_segment
115            .recursive_crawl(
116                const {
117                    &SyntaxSet::new(&[
118                        SyntaxKind::Comment,
119                        SyntaxKind::InlineComment,
120                        SyntaxKind::BlockComment,
121                    ])
122                },
123                true,
124                &SyntaxSet::EMPTY,
125                false,
126            )
127            .iter()
128        {
129            assert!(comment_segment.get_position_marker().is_some());
130            assert!(anchor_segment.get_position_marker().is_some());
131            if comment_segment
132                .get_position_marker()
133                .unwrap()
134                .working_line_no
135                == anchor_segment
136                    .get_position_marker()
137                    .unwrap()
138                    .working_line_no
139                && !comment_segment.is_type(SyntaxKind::BlockComment)
140            {
141                return comment_segment.clone();
142            }
143        }
144        anchor_segment
145    }
146
147    fn is_one_line_statement(parent_segment: ErasedSegment, segment: ErasedSegment) -> bool {
148        let statement_segment = parent_segment
149            .path_to(&segment)
150            .iter()
151            .filter(|&it| it.segment.is_type(SyntaxKind::Statement))
152            .map(|it| it.segment.clone())
153            .next();
154
155        match statement_segment {
156            None => false,
157            Some(statement_segment) => statement_segment
158                .recursive_crawl(
159                    const { &SyntaxSet::new(&[SyntaxKind::Newline]) },
160                    true,
161                    &SyntaxSet::EMPTY,
162                    true,
163                )
164                .is_empty(),
165        }
166    }
167
168    fn handle_semicolon(
169        &self,
170        tables: &Tables,
171        target_segment: ErasedSegment,
172        parent_segment: ErasedSegment,
173    ) -> Option<LintResult> {
174        let info = Self::get_segment_move_context(target_segment.clone(), parent_segment.clone());
175        let semicolon_newline = if !info.is_one_line {
176            self.multiline_newline
177        } else {
178            false
179        };
180
181        if !semicolon_newline {
182            self.handle_semicolon_same_line(tables, target_segment, parent_segment, info)
183        } else {
184            self.handle_semicolon_newline(tables, target_segment, parent_segment, info)
185        }
186    }
187
188    fn handle_semicolon_same_line(
189        &self,
190        tables: &Tables,
191        target_segment: ErasedSegment,
192        parent_segment: ErasedSegment,
193        info: SegmentMoveContext,
194    ) -> Option<LintResult> {
195        if info.before_segment.is_empty() {
196            return None;
197        }
198
199        // If preceding segments are found then delete the old
200        // semicolon and its preceding whitespace and then insert
201        // the semicolon in the correct location.
202        let fixes = self.create_semicolon_and_delete_whitespace(
203            target_segment,
204            parent_segment,
205            info.anchor_segment.clone(),
206            info.whitespace_deletions,
207            vec![
208                SegmentBuilder::token(tables.next_id(), ";", SyntaxKind::StatementTerminator)
209                    .finish(),
210            ],
211        );
212
213        Some(LintResult::new(
214            Some(info.anchor_segment),
215            fixes,
216            None,
217            None,
218        ))
219    }
220
221    /// Adjust segments to not move preceding inline comments.
222    ///
223    /// We don't want to move inline comments that are on the same line
224    /// as the preceding code segment as they could contain noqa instructions.
225    fn handle_preceding_inline_comments(
226        before_segment: Segments,
227        anchor_segment: ErasedSegment,
228    ) -> (Segments, ErasedSegment) {
229        // See if we have a preceding inline comment on the same line as the preceding
230        // segment.
231
232        let same_line_comment = before_segment.iter().find(|s| {
233            s.is_comment()
234                && !s.is_type(SyntaxKind::BlockComment)
235                && s.get_position_marker().is_some()
236                && s.get_position_marker().unwrap().working_loc().0
237                    == anchor_segment
238                        .get_raw_segments()
239                        .last()
240                        .unwrap()
241                        .get_position_marker()
242                        .unwrap()
243                        .working_loc()
244                        .0
245        });
246
247        // If so then make that our new anchor segment and adjust
248        // before_segment accordingly.
249        if let Some(same_line_comment) = same_line_comment {
250            let anchor_segment = same_line_comment.clone();
251            let before_segment = before_segment
252                .iter()
253                .take_while(|s| *s != same_line_comment)
254                .cloned()
255                .collect();
256            let before_segment = Segments::from_vec(before_segment, None);
257            (before_segment, anchor_segment)
258        } else {
259            (before_segment, anchor_segment)
260        }
261    }
262
263    fn handle_semicolon_newline(
264        &self,
265        tables: &Tables,
266        target_segment: ErasedSegment,
267        parent_segment: ErasedSegment,
268        info: SegmentMoveContext,
269    ) -> Option<LintResult> {
270        // Adjust before_segment and anchor_segment for preceding inline
271        // comments. Inline comments can contain noqa logic so we need to add the
272        // newline after the inline comment.
273        let (before_segment, anchor_segment) = Self::handle_preceding_inline_comments(
274            info.before_segment.clone(),
275            info.anchor_segment.clone(),
276        );
277
278        if before_segment.len() == 1
279            && before_segment
280                .all_match(|segment: &ErasedSegment| segment.is_type(SyntaxKind::Newline))
281        {
282            return None;
283        }
284
285        // If preceding segment is not a single newline then delete the old
286        // semicolon/preceding whitespace and then insert the
287        // semicolon in the correct location.
288        let anchor_segment =
289            Self::handle_trailing_inline_comments(parent_segment.clone(), anchor_segment.clone());
290        let fixes = if anchor_segment == target_segment {
291            vec![LintFix::replace(
292                anchor_segment.clone(),
293                vec![
294                    SegmentBuilder::whitespace(tables.next_id(), "\n"),
295                    SegmentBuilder::token(tables.next_id(), ";", SyntaxKind::StatementTerminator)
296                        .finish(),
297                ],
298                None,
299            )]
300        } else {
301            self.create_semicolon_and_delete_whitespace(
302                target_segment,
303                parent_segment,
304                anchor_segment.clone(),
305                info.whitespace_deletions.clone(),
306                vec![
307                    SegmentBuilder::newline(tables.next_id(), "\n"),
308                    SegmentBuilder::token(tables.next_id(), ";", SyntaxKind::StatementTerminator)
309                        .finish(),
310                ],
311            )
312        };
313
314        Some(LintResult::new(Some(anchor_segment), fixes, None, None))
315    }
316
317    fn create_semicolon_and_delete_whitespace(
318        &self,
319        target_segment: ErasedSegment,
320        parent_segment: ErasedSegment,
321        anchor_segment: ErasedSegment,
322        mut whitespace_deletions: Segments,
323        create_segments: Vec<ErasedSegment>,
324    ) -> Vec<LintFix> {
325        let anchor_segment = choose_anchor_segment(
326            &parent_segment,
327            EditType::CreateAfter,
328            &anchor_segment,
329            true,
330        );
331
332        let mut lintfix_fn: fn(
333            ErasedSegment,
334            Vec<ErasedSegment>,
335            Option<Vec<ErasedSegment>>,
336        ) -> LintFix = LintFix::create_after;
337        if AHashSet::from_iter(whitespace_deletions.base.clone()).contains(&anchor_segment) {
338            lintfix_fn = LintFix::replace;
339            whitespace_deletions =
340                whitespace_deletions.filter(|it: &ErasedSegment| it.id() != anchor_segment.id());
341        }
342
343        let mut fixes = vec![
344            lintfix_fn(anchor_segment, create_segments, None),
345            LintFix::delete(target_segment),
346        ];
347        fixes.extend(whitespace_deletions.into_iter().map(LintFix::delete));
348        fixes
349    }
350
351    fn ensure_final_semicolon(
352        &self,
353        tables: &Tables,
354        parent_segment: ErasedSegment,
355    ) -> Option<LintResult> {
356        // Iterate backwards over complete stack to find
357        // if the final semicolon is already present.
358        let mut anchor_segment = parent_segment.segments().last().cloned();
359        let trigger_segment = parent_segment.segments().last().cloned();
360        let mut semi_colon_exist_flag = false;
361        let mut is_one_line = false;
362        let mut before_segment = vec![];
363
364        let mut found_code = false;
365        for segment in parent_segment.segments().iter().rev() {
366            anchor_segment = Some(segment.clone());
367            if segment.is_type(SyntaxKind::StatementTerminator) {
368                semi_colon_exist_flag = true;
369            } else if segment.is_code() {
370                is_one_line = Self::is_one_line_statement(parent_segment.clone(), segment.clone());
371                found_code = true;
372                break;
373            } else if !segment.is_meta() {
374                before_segment.push(segment.clone());
375            }
376        }
377
378        if !found_code {
379            return None;
380        }
381
382        let semicolon_newline = if is_one_line {
383            false
384        } else {
385            self.multiline_newline
386        };
387        if !semi_colon_exist_flag {
388            // Create the final semicolon if it does not yet exist.
389
390            // Semicolon on same line.
391            return if !semicolon_newline {
392                let fixes = vec![LintFix::create_after(
393                    anchor_segment.unwrap().clone(),
394                    vec![
395                        SegmentBuilder::token(
396                            tables.next_id(),
397                            ";",
398                            SyntaxKind::StatementTerminator,
399                        )
400                        .finish(),
401                    ],
402                    None,
403                )];
404                Some(LintResult::new(
405                    Some(trigger_segment.unwrap().clone()),
406                    fixes,
407                    None,
408                    None,
409                ))
410            } else {
411                // Semi-colon on new line.
412                // Adjust before_segment and anchor_segment for inline
413                // comments.
414                let (_before_segment, anchor_segment) = Self::handle_preceding_inline_comments(
415                    Segments::from_vec(before_segment, None),
416                    anchor_segment.unwrap().clone(),
417                );
418                let fixes = vec![LintFix::create_after(
419                    anchor_segment.clone(),
420                    vec![
421                        SegmentBuilder::newline(tables.next_id(), "\n"),
422                        SegmentBuilder::token(
423                            tables.next_id(),
424                            ";",
425                            SyntaxKind::StatementTerminator,
426                        )
427                        .finish(),
428                    ],
429                    None,
430                )];
431
432                Some(LintResult::new(
433                    Some(trigger_segment.unwrap().clone()),
434                    fixes,
435                    None,
436                    None,
437                ))
438            };
439        }
440        None
441    }
442
443    fn get_segment_move_context(
444        target_segment: ErasedSegment,
445        parent_segment: ErasedSegment,
446    ) -> SegmentMoveContext {
447        // Locate the segment to be moved (i.e. context.segment) and search back
448        // over the raw stack to find the end of the preceding statement.
449
450        let reversed_raw_stack =
451            Segments::from_vec(parent_segment.get_raw_segments(), None).reversed();
452
453        let before_code = reversed_raw_stack
454            .after(&target_segment)
455            .take_while(|s| !s.is_code());
456        let before_segment = before_code.filter(|segment: &ErasedSegment| !segment.is_meta());
457
458        // We're selecting from the raw stack, so we know that before_code is made of
459        // raw elements.
460        let anchor_segment = if !before_code.is_empty() {
461            before_code.last().unwrap().clone()
462        } else {
463            target_segment.clone()
464        };
465
466        let first_code = reversed_raw_stack
467            .after(&target_segment)
468            .filter(|s: &ErasedSegment| s.is_code())
469            .first()
470            .cloned();
471
472        let is_one_line = first_code
473            .is_some_and(|segment| Self::is_one_line_statement(parent_segment, segment.clone()));
474
475        // We can tidy up any whitespace between the segment and the preceding
476        // code/comment segment. Don't mess with the comment spacing/placement.
477        let whitespace_deletions = before_segment.take_while(|segment| segment.is_whitespace());
478        SegmentMoveContext {
479            anchor_segment,
480            is_one_line,
481            before_segment,
482            whitespace_deletions,
483        }
484    }
485}
486
487struct SegmentMoveContext {
488    anchor_segment: ErasedSegment,
489    is_one_line: bool,
490    before_segment: Segments,
491    whitespace_deletions: Segments,
492}
493
494#[derive(Debug, Clone, Copy, PartialEq)]
495enum EditType {
496    CreateAfter,
497}
498
499fn choose_anchor_segment(
500    root_segment: &ErasedSegment,
501    edit_type: EditType,
502    segment: &ErasedSegment,
503    filter_meta: bool,
504) -> ErasedSegment {
505    if !matches!(edit_type, EditType::CreateAfter) {
506        return segment.clone();
507    }
508
509    let mut anchor = segment.clone();
510    let mut child = segment.clone();
511
512    let mut path = root_segment
513        .path_to(segment)
514        .into_iter()
515        .map(|it| it.segment)
516        .collect_vec();
517    path.reverse();
518
519    for seg in path {
520        if seg.can_start_end_non_code() {
521            break;
522        }
523
524        let mut children_lists = Vec::new();
525        if filter_meta {
526            children_lists.push(
527                seg.segments()
528                    .iter()
529                    .filter(|child| !child.is_meta())
530                    .cloned()
531                    .collect_vec(),
532            );
533        }
534        children_lists.push(seg.segments().to_vec());
535        for children in children_lists {
536            match edit_type {
537                EditType::CreateAfter if children.last().unwrap().id() == child.id() => {
538                    anchor = seg.clone();
539                    child = seg;
540                    break;
541                }
542                _ => {}
543            }
544        }
545    }
546
547    anchor
548}