Skip to main content

sqruff_lib/rules/layout/
lt05.rs

1use hashbrown::{HashMap, HashSet};
2use itertools::enumerate;
3use sqruff_lib_core::dialects::syntax::SyntaxKind;
4
5use crate::core::config::Value;
6use crate::core::rules::context::RuleContext;
7use crate::core::rules::crawlers::{Crawler, RootOnlyCrawler};
8use crate::core::rules::{Erased, ErasedRule, LintResult, Rule, RuleGroups};
9use crate::utils::reflow::sequence::ReflowSequence;
10
11#[derive(Debug, Default, Clone)]
12pub struct RuleLT05 {
13    ignore_comment_lines: bool,
14    ignore_comment_clauses: bool,
15}
16
17impl Rule for RuleLT05 {
18    fn load_from_config(&self, config: &HashMap<String, Value>) -> Result<ErasedRule, String> {
19        Ok(RuleLT05 {
20            ignore_comment_lines: config["ignore_comment_lines"].as_bool().unwrap(),
21            ignore_comment_clauses: config["ignore_comment_clauses"].as_bool().unwrap(),
22        }
23        .erased())
24    }
25    fn name(&self) -> &'static str {
26        "layout.long_lines"
27    }
28
29    fn description(&self) -> &'static str {
30        "Line is too long."
31    }
32
33    fn long_description(&self) -> &'static str {
34        r#"
35**Anti-pattern**
36
37In this example, the line is too long.
38
39```sql
40SELECT
41    my_function(col1 + col2, arg2, arg3) over (partition by col3, col4 order by col5 rows between unbounded preceding and current row) as my_relatively_long_alias,
42    my_other_function(col6, col7 + col8, arg4) as my_other_relatively_long_alias,
43    my_expression_function(col6, col7 + col8, arg4) = col9 + col10 as another_relatively_long_alias
44FROM my_table
45```
46
47**Best practice**
48
49Wraps the line to be within the maximum line length.
50
51```sql
52SELECT
53    my_function(col1 + col2, arg2, arg3)
54        over (
55            partition by col3, col4
56            order by col5 rows between unbounded preceding and current row
57        )
58        as my_relatively_long_alias,
59    my_other_function(col6, col7 + col8, arg4)
60        as my_other_relatively_long_alias,
61    my_expression_function(col6, col7 + col8, arg4)
62    = col9 + col10 as another_relatively_long_alias
63FROM my_table
64```"#
65    }
66
67    fn groups(&self) -> &'static [RuleGroups] {
68        &[RuleGroups::All, RuleGroups::Core, RuleGroups::Layout]
69    }
70    fn eval(&self, context: &RuleContext) -> Vec<LintResult> {
71        let mut results = ReflowSequence::from_root(&context.segment, context.config)
72            .break_long_lines(context.tables)
73            .results();
74
75        let mut to_remove = HashSet::new();
76
77        if self.ignore_comment_lines {
78            let raw_segments = context.segment.get_raw_segments();
79            for (res_idx, res) in enumerate(&results) {
80                if res.anchor.as_ref().unwrap().is_type(SyntaxKind::Comment)
81                    || res
82                        .anchor
83                        .as_ref()
84                        .unwrap()
85                        .is_type(SyntaxKind::InlineComment)
86                {
87                    to_remove.insert(res_idx);
88                    continue;
89                }
90
91                let pos_marker = res.anchor.as_ref().unwrap().get_position_marker().unwrap();
92                let raw_idx = raw_segments
93                    .iter()
94                    .position(|it| it == res.anchor.as_ref().unwrap())
95                    .unwrap();
96
97                for seg in &raw_segments[raw_idx..] {
98                    if seg.get_position_marker().unwrap().working_line_no
99                        != pos_marker.working_line_no
100                    {
101                        break;
102                    }
103
104                    if seg.is_type(SyntaxKind::Comment) || seg.is_type(SyntaxKind::InlineComment) {
105                        to_remove.insert(res_idx);
106                        break;
107                    } else if seg.is_type(SyntaxKind::Placeholder) {
108                        unimplemented!()
109                    }
110                }
111            }
112        }
113
114        if self.ignore_comment_clauses {
115            let raw_segments = context.segment.get_raw_segments();
116            for (res_idx, res) in enumerate(&results) {
117                let raw_idx = raw_segments
118                    .iter()
119                    .position(|it| it == res.anchor.as_ref().unwrap())
120                    .unwrap();
121
122                for seg in &raw_segments[raw_idx..] {
123                    if seg.get_position_marker().unwrap().working_line_no
124                        != res
125                            .anchor
126                            .as_ref()
127                            .unwrap()
128                            .get_position_marker()
129                            .unwrap()
130                            .working_line_no
131                    {
132                        break;
133                    }
134
135                    let mut is_break = false;
136
137                    for ps in context.segment.path_to(seg) {
138                        if ps.segment.is_type(SyntaxKind::CommentClause)
139                            || ps.segment.is_type(SyntaxKind::CommentEqualsClause)
140                        {
141                            let line_pos =
142                                ps.segment.get_position_marker().unwrap().working_line_pos;
143                            if (line_pos as i32)
144                                < context
145                                    .config
146                                    .get("max_line_length", "core")
147                                    .as_int()
148                                    .unwrap()
149                            {
150                                to_remove.insert(res_idx);
151                                is_break = true;
152                                break;
153                            }
154                        }
155                    }
156
157                    if is_break {
158                        break;
159                    } else {
160                        continue;
161                    }
162                }
163            }
164        }
165
166        // Sort indices in reversed order to avoid index shifting issues when removing.
167        // Remove items from the end of the vector first.
168        let mut to_remove_vec: Vec<usize> = to_remove.into_iter().collect();
169        to_remove_vec.sort_by(|a, b| b.cmp(a));
170        for idx in to_remove_vec {
171            results.remove(idx);
172        }
173
174        results
175    }
176
177    fn is_fix_compatible(&self) -> bool {
178        true
179    }
180
181    fn crawl_behaviour(&self) -> Crawler {
182        RootOnlyCrawler.into()
183    }
184}
185
186#[cfg(test)]
187mod tests {
188    use crate::core::config::FluffConfig;
189    use crate::core::linter::core::Linter;
190
191    /// Verifies that moving a trailing comment before its code line doesn't
192    /// merge it with the code, which would produce broken SQL since
193    /// everything after `--` becomes part of the comment.
194    #[test]
195    fn test_comment_not_merged_with_next_line() {
196        let sql = "\
197SELECT
198    COALESCE(
199        REGEXP_EXTRACT(project_id, '^foo-bar-(.+)$'),                -- foo-bar-baz -> baz
200        REGEXP_EXTRACT(project_id, '^qux-(.+)$')                     -- qux-corge -> corge
201    ) AS result
202FROM t
203";
204        let linter = Linter::new(FluffConfig::default(), None, None, true).unwrap();
205        let result = linter.lint_string(sql, None, true).unwrap();
206        let fixed = result.fix_string();
207
208        for line in fixed.lines() {
209            if let Some(comment_pos) = line.find("--") {
210                let before_comment = line[..comment_pos].trim();
211                if before_comment.is_empty() {
212                    assert!(
213                        !after_double_dash_has_code(line, comment_pos),
214                        "Comment merged with code on line: {line}"
215                    );
216                }
217            }
218        }
219    }
220
221    fn after_double_dash_has_code(line: &str, comment_pos: usize) -> bool {
222        let after_comment = &line[comment_pos..];
223        after_comment.contains("REGEXP_EXTRACT")
224            || after_comment.contains("SELECT")
225            || after_comment.contains("FROM")
226    }
227}