Skip to main content

sqruff_lib/rules/convention/
cv06.rs

1use hashbrown::{HashMap, HashSet};
2use itertools::Itertools;
3use sqruff_lib_core::dialects::syntax::{SyntaxKind, SyntaxSet};
4use sqruff_lib_core::lint_fix::LintFix;
5use sqruff_lib_core::parser::segments::{ErasedSegment, SegmentBuilder, Tables};
6use sqruff_lib_core::utils::functional::segments::Segments;
7
8use crate::core::config::Value;
9use crate::core::rules::context::RuleContext;
10use crate::core::rules::crawlers::{Crawler, RootOnlyCrawler};
11use crate::core::rules::{Erased, ErasedRule, LintResult, Rule, RuleGroups};
12
13#[derive(Default, Clone, Debug)]
14pub struct RuleCV06 {
15    multiline_newline: bool,
16    require_final_semicolon: bool,
17}
18
19impl Rule for RuleCV06 {
20    fn load_from_config(&self, config: &HashMap<String, Value>) -> Result<ErasedRule, String> {
21        let multiline_newline = config["multiline_newline"].as_bool().unwrap();
22        let require_final_semicolon = config["require_final_semicolon"].as_bool().unwrap();
23        Ok(Self {
24            multiline_newline,
25            require_final_semicolon,
26        }
27        .erased())
28    }
29
30    fn name(&self) -> &'static str {
31        "convention.terminator"
32    }
33
34    fn description(&self) -> &'static str {
35        "Statements must end with a semi-colon."
36    }
37
38    fn long_description(&self) -> &'static str {
39        r"
40**Anti-pattern**
41
42A statement is not immediately terminated with a semi-colon. The `•` represents space.
43
44```sql
45SELECT
46    a
47FROM foo
48
49;
50
51SELECT
52    b
53FROM bar••;
54```
55
56**Best practice**
57
58Immediately terminate the statement with a semi-colon.
59
60```sql
61SELECT
62    a
63FROM foo;
64```"
65    }
66
67    fn groups(&self) -> &'static [RuleGroups] {
68        &[RuleGroups::All, RuleGroups::Convention]
69    }
70
71    fn eval(&self, context: &RuleContext) -> Vec<LintResult> {
72        debug_assert!(context.segment.is_type(SyntaxKind::File));
73
74        let mut results = vec![];
75        for (idx, segment) in context.segment.segments().iter().enumerate() {
76            let mut res = None;
77            if segment.is_type(SyntaxKind::StatementTerminator) {
78                // First we can simply handle the case of existing semi-colon alignment.
79                // If it's a terminator then we know it's raw.
80
81                res =
82                    self.handle_semicolon(context.tables, segment.clone(), context.segment.clone());
83            } else if self.require_final_semicolon && idx == context.segment.segments().len() - 1 {
84                // Otherwise, handle the end of the file separately.
85                res = self.ensure_final_semicolon(context.tables, context.segment.clone());
86            }
87            if let Some(res) = res {
88                results.push(res);
89            }
90        }
91        results
92    }
93
94    fn is_fix_compatible(&self) -> bool {
95        true
96    }
97
98    fn crawl_behaviour(&self) -> Crawler {
99        RootOnlyCrawler.into()
100    }
101}
102
103impl RuleCV06 {
104    // Adjust anchor_segment to not move trailing inline comment.
105    //
106    // We don't want to move inline comments that are on the same line
107    // as the preceding code segment as they could contain noqa instructions.
108    fn handle_trailing_inline_comments(
109        parent_segment: ErasedSegment,
110        anchor_segment: ErasedSegment,
111    ) -> ErasedSegment {
112        // See if we have a trailing inline comment on the same line as the preceding
113        // segment.
114        for comment_segment in parent_segment
115            .recursive_crawl(
116                const {
117                    &SyntaxSet::new(&[
118                        SyntaxKind::Comment,
119                        SyntaxKind::InlineComment,
120                        SyntaxKind::BlockComment,
121                    ])
122                },
123                true,
124                &SyntaxSet::EMPTY,
125                false,
126            )
127            .iter()
128        {
129            assert!(comment_segment.get_position_marker().is_some());
130            assert!(anchor_segment.get_position_marker().is_some());
131            if comment_segment
132                .get_position_marker()
133                .unwrap()
134                .working_line_no
135                == anchor_segment
136                    .get_position_marker()
137                    .unwrap()
138                    .working_line_no
139                && !comment_segment.is_type(SyntaxKind::BlockComment)
140            {
141                return comment_segment.clone();
142            }
143        }
144        anchor_segment
145    }
146
147    fn is_one_line_statement(parent_segment: ErasedSegment, segment: ErasedSegment) -> bool {
148        let statement_segment = parent_segment
149            .path_to(&segment)
150            .iter()
151            .filter(|&it| it.segment.is_type(SyntaxKind::Statement))
152            .map(|it| it.segment.clone())
153            .next();
154
155        match statement_segment {
156            None => false,
157            Some(statement_segment) => statement_segment
158                .recursive_crawl(
159                    const { &SyntaxSet::new(&[SyntaxKind::Newline]) },
160                    true,
161                    &SyntaxSet::EMPTY,
162                    true,
163                )
164                .is_empty(),
165        }
166    }
167
168    fn handle_semicolon(
169        &self,
170        tables: &Tables,
171        target_segment: ErasedSegment,
172        parent_segment: ErasedSegment,
173    ) -> Option<LintResult> {
174        let info = Self::get_segment_move_context(target_segment.clone(), parent_segment.clone());
175        let semicolon_newline = if !info.is_one_line {
176            self.multiline_newline
177        } else {
178            false
179        };
180
181        if !semicolon_newline {
182            self.handle_semicolon_same_line(tables, target_segment, parent_segment, info)
183        } else {
184            self.handle_semicolon_newline(tables, target_segment, parent_segment, info)
185        }
186    }
187
188    fn handle_semicolon_same_line(
189        &self,
190        tables: &Tables,
191        target_segment: ErasedSegment,
192        parent_segment: ErasedSegment,
193        info: SegmentMoveContext,
194    ) -> Option<LintResult> {
195        if info.before_segment.is_empty() {
196            return None;
197        }
198
199        // If preceding segments are found then delete the old
200        // semicolon and its preceding whitespace and then insert
201        // the semicolon in the correct location.
202        let fixes = self.create_semicolon_and_delete_whitespace(
203            target_segment,
204            parent_segment,
205            info.anchor_segment.clone(),
206            info.whitespace_deletions,
207            vec![
208                SegmentBuilder::token(tables.next_id(), ";", SyntaxKind::StatementTerminator)
209                    .finish(),
210            ],
211        );
212
213        Some(LintResult::new(
214            Some(info.anchor_segment),
215            fixes,
216            None,
217            None,
218        ))
219    }
220
221    /// Adjust segments to not move preceding inline comments.
222    ///
223    /// We don't want to move inline comments that are on the same line
224    /// as the preceding code segment as they could contain noqa instructions.
225    fn handle_preceding_inline_comments(
226        before_segment: Segments,
227        anchor_segment: ErasedSegment,
228    ) -> (Segments, ErasedSegment) {
229        // See if we have a preceding inline comment on the same line as the preceding
230        // segment.
231
232        let same_line_comment = before_segment.iter().find(|s| {
233            s.is_comment()
234                && !s.is_type(SyntaxKind::BlockComment)
235                && s.get_position_marker().is_some()
236                && s.get_position_marker().unwrap().working_loc().0
237                    == anchor_segment
238                        .get_raw_segments()
239                        .last()
240                        .unwrap()
241                        .get_position_marker()
242                        .unwrap()
243                        .working_loc()
244                        .0
245        });
246
247        // If so then make that our new anchor segment and adjust
248        // before_segment accordingly.
249        if let Some(same_line_comment) = same_line_comment {
250            let anchor_segment = same_line_comment.clone();
251            let before_segment = before_segment
252                .iter()
253                .take_while(|s| *s != same_line_comment)
254                .cloned()
255                .collect();
256            let before_segment = Segments::from_vec(before_segment, None);
257            (before_segment, anchor_segment)
258        } else {
259            (before_segment, anchor_segment)
260        }
261    }
262
263    fn handle_semicolon_newline(
264        &self,
265        tables: &Tables,
266        target_segment: ErasedSegment,
267        parent_segment: ErasedSegment,
268        info: SegmentMoveContext,
269    ) -> Option<LintResult> {
270        // Adjust before_segment and anchor_segment for preceding inline
271        // comments. Inline comments can contain noqa logic so we need to add the
272        // newline after the inline comment.
273        let (before_segment, anchor_segment) = Self::handle_preceding_inline_comments(
274            info.before_segment.clone(),
275            info.anchor_segment.clone(),
276        );
277
278        if before_segment.len() == 1
279            && before_segment
280                .all_match(|segment: &ErasedSegment| segment.is_type(SyntaxKind::Newline))
281        {
282            return None;
283        }
284
285        // If preceding segment is not a single newline then delete the old
286        // semicolon/preceding whitespace and then insert the
287        // semicolon in the correct location.
288        let anchor_segment =
289            Self::handle_trailing_inline_comments(parent_segment.clone(), anchor_segment.clone());
290        let fixes = if anchor_segment == target_segment {
291            vec![LintFix::replace(
292                anchor_segment.clone(),
293                vec![
294                    SegmentBuilder::whitespace(tables.next_id(), "\n"),
295                    SegmentBuilder::token(tables.next_id(), ";", SyntaxKind::StatementTerminator)
296                        .finish(),
297                ],
298                None,
299            )]
300        } else {
301            self.create_semicolon_and_delete_whitespace(
302                target_segment,
303                parent_segment,
304                anchor_segment.clone(),
305                info.whitespace_deletions.clone(),
306                vec![
307                    SegmentBuilder::newline(tables.next_id(), "\n"),
308                    SegmentBuilder::token(tables.next_id(), ";", SyntaxKind::StatementTerminator)
309                        .finish(),
310                ],
311            )
312        };
313
314        Some(LintResult::new(Some(anchor_segment), fixes, None, None))
315    }
316
317    fn create_semicolon_and_delete_whitespace(
318        &self,
319        target_segment: ErasedSegment,
320        parent_segment: ErasedSegment,
321        anchor_segment: ErasedSegment,
322        mut whitespace_deletions: Segments,
323        create_segments: Vec<ErasedSegment>,
324    ) -> Vec<LintFix> {
325        let anchor_segment = choose_anchor_segment(
326            &parent_segment,
327            EditType::CreateAfter,
328            &anchor_segment,
329            true,
330        );
331
332        let mut lintfix_fn: fn(
333            ErasedSegment,
334            Vec<ErasedSegment>,
335            Option<Vec<ErasedSegment>>,
336        ) -> LintFix = LintFix::create_after;
337        if HashSet::<ErasedSegment>::from_iter(whitespace_deletions.base.clone())
338            .contains(&anchor_segment)
339        {
340            lintfix_fn = LintFix::replace;
341            whitespace_deletions =
342                whitespace_deletions.filter(|it: &ErasedSegment| it.id() != anchor_segment.id());
343        }
344
345        let mut fixes = vec![
346            lintfix_fn(anchor_segment, create_segments, None),
347            LintFix::delete(target_segment),
348        ];
349        fixes.extend(whitespace_deletions.into_iter().map(LintFix::delete));
350        fixes
351    }
352
353    fn ensure_final_semicolon(
354        &self,
355        tables: &Tables,
356        parent_segment: ErasedSegment,
357    ) -> Option<LintResult> {
358        // Iterate backwards over complete stack to find
359        // if the final semicolon is already present.
360        let mut anchor_segment = parent_segment.segments().last().cloned();
361        let trigger_segment = parent_segment.segments().last().cloned();
362        let mut semi_colon_exist_flag = false;
363        let mut is_one_line = false;
364        let mut before_segment = vec![];
365
366        let mut found_code = false;
367        for segment in parent_segment.segments().iter().rev() {
368            anchor_segment = Some(segment.clone());
369            if segment.is_type(SyntaxKind::StatementTerminator) {
370                semi_colon_exist_flag = true;
371            } else if segment.is_code() {
372                is_one_line = Self::is_one_line_statement(parent_segment.clone(), segment.clone());
373                found_code = true;
374                break;
375            } else if !segment.is_meta() {
376                before_segment.push(segment.clone());
377            }
378        }
379
380        if !found_code {
381            return None;
382        }
383
384        let semicolon_newline = if is_one_line {
385            false
386        } else {
387            self.multiline_newline
388        };
389        if !semi_colon_exist_flag {
390            // Create the final semicolon if it does not yet exist.
391
392            // Semicolon on same line.
393            return if !semicolon_newline {
394                let fixes = vec![LintFix::create_after(
395                    anchor_segment.unwrap().clone(),
396                    vec![
397                        SegmentBuilder::token(
398                            tables.next_id(),
399                            ";",
400                            SyntaxKind::StatementTerminator,
401                        )
402                        .finish(),
403                    ],
404                    None,
405                )];
406                Some(LintResult::new(
407                    Some(trigger_segment.unwrap().clone()),
408                    fixes,
409                    None,
410                    None,
411                ))
412            } else {
413                // Semi-colon on new line.
414                // Adjust before_segment and anchor_segment for inline
415                // comments.
416                let (_before_segment, anchor_segment) = Self::handle_preceding_inline_comments(
417                    Segments::from_vec(before_segment, None),
418                    anchor_segment.unwrap().clone(),
419                );
420                let fixes = vec![LintFix::create_after(
421                    anchor_segment.clone(),
422                    vec![
423                        SegmentBuilder::newline(tables.next_id(), "\n"),
424                        SegmentBuilder::token(
425                            tables.next_id(),
426                            ";",
427                            SyntaxKind::StatementTerminator,
428                        )
429                        .finish(),
430                    ],
431                    None,
432                )];
433
434                Some(LintResult::new(
435                    Some(trigger_segment.unwrap().clone()),
436                    fixes,
437                    None,
438                    None,
439                ))
440            };
441        }
442        None
443    }
444
445    fn get_segment_move_context(
446        target_segment: ErasedSegment,
447        parent_segment: ErasedSegment,
448    ) -> SegmentMoveContext {
449        // Locate the segment to be moved (i.e. context.segment) and search back
450        // over the raw stack to find the end of the preceding statement.
451
452        let reversed_raw_stack =
453            Segments::from_vec(parent_segment.get_raw_segments(), None).reversed();
454
455        let before_code = reversed_raw_stack
456            .after(&target_segment)
457            .take_while(|s| !s.is_code());
458        let before_segment = before_code.filter(|segment: &ErasedSegment| !segment.is_meta());
459
460        // We're selecting from the raw stack, so we know that before_code is made of
461        // raw elements.
462        let anchor_segment = if !before_code.is_empty() {
463            before_code.last().unwrap().clone()
464        } else {
465            target_segment.clone()
466        };
467
468        let first_code = reversed_raw_stack
469            .after(&target_segment)
470            .filter(|s: &ErasedSegment| s.is_code())
471            .first()
472            .cloned();
473
474        let is_one_line = first_code
475            .is_some_and(|segment| Self::is_one_line_statement(parent_segment, segment.clone()));
476
477        // We can tidy up any whitespace between the segment and the preceding
478        // code/comment segment. Don't mess with the comment spacing/placement.
479        let whitespace_deletions = before_segment.take_while(|segment| segment.is_whitespace());
480        SegmentMoveContext {
481            anchor_segment,
482            is_one_line,
483            before_segment,
484            whitespace_deletions,
485        }
486    }
487}
488
489struct SegmentMoveContext {
490    anchor_segment: ErasedSegment,
491    is_one_line: bool,
492    before_segment: Segments,
493    whitespace_deletions: Segments,
494}
495
496#[derive(Debug, Clone, Copy, PartialEq)]
497enum EditType {
498    CreateAfter,
499}
500
501fn choose_anchor_segment(
502    root_segment: &ErasedSegment,
503    edit_type: EditType,
504    segment: &ErasedSegment,
505    filter_meta: bool,
506) -> ErasedSegment {
507    if !matches!(edit_type, EditType::CreateAfter) {
508        return segment.clone();
509    }
510
511    let mut anchor = segment.clone();
512    let mut child = segment.clone();
513
514    let mut path = root_segment
515        .path_to(segment)
516        .into_iter()
517        .map(|it| it.segment)
518        .collect_vec();
519    path.reverse();
520
521    for seg in path {
522        if seg.can_start_end_non_code() {
523            break;
524        }
525
526        let mut children_lists = Vec::new();
527        if filter_meta {
528            children_lists.push(
529                seg.segments()
530                    .iter()
531                    .filter(|child| !child.is_meta())
532                    .cloned()
533                    .collect_vec(),
534            );
535        }
536        children_lists.push(seg.segments().to_vec());
537        for children in children_lists {
538            match edit_type {
539                EditType::CreateAfter if children.last().unwrap().id() == child.id() => {
540                    anchor = seg.clone();
541                    child = seg;
542                    break;
543                }
544                _ => {}
545            }
546        }
547    }
548
549    anchor
550}