use crate::linter::config::LintConfig;
use crate::linter::rule::{LintContext, LintRule};
use crate::types::{issue_codes, Dialect, Issue, IssueAutofixApplicability, IssuePatchEdit, Span};
use sqlparser::ast::Statement;
use sqlparser::tokenizer::{Token, TokenWithSpan, Tokenizer, Whitespace};
pub struct LayoutLongLines {
max_line_length: Option<usize>,
ignore_comment_lines: bool,
ignore_comment_clauses: bool,
trailing_comments_after: bool,
}
impl LayoutLongLines {
pub fn from_config(config: &LintConfig) -> Self {
let max_line_length = if let Some(value) = config
.rule_config_object(issue_codes::LINT_LT_005)
.and_then(|obj| obj.get("max_line_length"))
{
value
.as_i64()
.map(|signed| {
if signed <= 0 {
None
} else {
usize::try_from(signed).ok()
}
})
.or_else(|| {
value
.as_u64()
.and_then(|unsigned| usize::try_from(unsigned).ok().map(Some))
})
.flatten()
} else {
Some(80)
};
Self {
max_line_length,
ignore_comment_lines: config
.rule_option_bool(issue_codes::LINT_LT_005, "ignore_comment_lines")
.unwrap_or(false),
ignore_comment_clauses: config
.rule_option_bool(issue_codes::LINT_LT_005, "ignore_comment_clauses")
.unwrap_or(false),
trailing_comments_after: config
.section_option_str("indentation", "trailing_comments")
.is_some_and(|value| value.eq_ignore_ascii_case("after")),
}
}
}
impl Default for LayoutLongLines {
fn default() -> Self {
Self {
max_line_length: Some(80),
ignore_comment_lines: false,
ignore_comment_clauses: false,
trailing_comments_after: false,
}
}
}
impl LintRule for LayoutLongLines {
fn code(&self) -> &'static str {
issue_codes::LINT_LT_005
}
fn name(&self) -> &'static str {
"Layout long lines"
}
fn description(&self) -> &'static str {
"Line is too long."
}
fn check(&self, _statement: &Statement, ctx: &LintContext) -> Vec<Issue> {
let Some(max_line_length) = self.max_line_length else {
return Vec::new();
};
if ctx.statement_index != 0 {
return Vec::new();
}
let overflow_spans = long_line_overflow_spans_for_context(
ctx,
max_line_length,
self.ignore_comment_lines,
self.ignore_comment_clauses,
);
if overflow_spans.is_empty() {
return Vec::new();
}
let mut issues: Vec<Issue> = overflow_spans
.into_iter()
.map(|(start, end)| {
Issue::info(
issue_codes::LINT_LT_005,
"SQL contains excessively long lines.",
)
.with_statement(ctx.statement_index)
.with_span(Span::new(start, end))
})
.collect();
let autofix_edits =
long_line_autofix_edits(ctx.sql, max_line_length, self.trailing_comments_after);
if let Some(first_issue) = issues.first_mut() {
if !autofix_edits.is_empty() {
*first_issue = first_issue
.clone()
.with_autofix_edits(IssueAutofixApplicability::Safe, autofix_edits);
}
}
issues
}
}
fn long_line_overflow_spans_for_context(
ctx: &LintContext,
max_len: usize,
ignore_comment_lines: bool,
ignore_comment_clauses: bool,
) -> Vec<(usize, usize)> {
let jinja_comment_spans = jinja_comment_spans(ctx.sql);
if !jinja_comment_spans.is_empty() {
return long_line_overflow_spans(
ctx.sql,
max_len,
ignore_comment_lines,
ignore_comment_clauses,
ctx.dialect(),
);
}
if let Some(tokens) = tokenize_with_offsets_for_context(ctx) {
return long_line_overflow_spans_from_tokens(
ctx.sql,
max_len,
ignore_comment_lines,
ignore_comment_clauses,
&tokens,
&jinja_comment_spans,
);
}
long_line_overflow_spans(
ctx.sql,
max_len,
ignore_comment_lines,
ignore_comment_clauses,
ctx.dialect(),
)
}
fn long_line_overflow_spans(
sql: &str,
max_len: usize,
ignore_comment_lines: bool,
ignore_comment_clauses: bool,
dialect: Dialect,
) -> Vec<(usize, usize)> {
if let Some(spans) = long_line_overflow_spans_tokenized(
sql,
max_len,
ignore_comment_lines,
ignore_comment_clauses,
dialect,
) {
return spans;
}
long_line_overflow_spans_naive(sql, max_len, ignore_comment_lines)
}
fn long_line_overflow_spans_naive(
sql: &str,
max_len: usize,
ignore_comment_lines: bool,
) -> Vec<(usize, usize)> {
let mut spans = Vec::new();
for (line_start, line_end) in line_ranges(sql) {
let line = &sql[line_start..line_end];
if ignore_comment_lines {
let trimmed = line.trim_start();
if trimmed.starts_with("--") || trimmed.starts_with("/*") || trimmed.starts_with("{#") {
continue;
}
}
if line.chars().count() <= max_len {
continue;
}
let mut overflow_start = line_end;
for (char_idx, (byte_off, _)) in line.char_indices().enumerate() {
if char_idx == max_len {
overflow_start = line_start + byte_off;
break;
}
}
if overflow_start < line_end {
let overflow_end = sql[overflow_start..line_end]
.chars()
.next()
.map(|ch| overflow_start + ch.len_utf8())
.unwrap_or(overflow_start);
spans.push((overflow_start, overflow_end));
}
}
spans
}
#[derive(Clone)]
struct LocatedToken {
token: Token,
start: usize,
end: usize,
}
fn long_line_overflow_spans_tokenized(
sql: &str,
max_len: usize,
ignore_comment_lines: bool,
ignore_comment_clauses: bool,
dialect: Dialect,
) -> Option<Vec<(usize, usize)>> {
let jinja_comment_spans = jinja_comment_spans(sql);
let sanitized = sanitize_sql_for_jinja_comments(sql, &jinja_comment_spans);
let tokens = tokenize_with_offsets(&sanitized, dialect)?;
Some(long_line_overflow_spans_from_tokens(
sql,
max_len,
ignore_comment_lines,
ignore_comment_clauses,
&tokens,
&jinja_comment_spans,
))
}
fn long_line_overflow_spans_from_tokens(
sql: &str,
max_len: usize,
ignore_comment_lines: bool,
ignore_comment_clauses: bool,
tokens: &[LocatedToken],
jinja_comment_spans: &[std::ops::Range<usize>],
) -> Vec<(usize, usize)> {
let line_ranges = line_ranges(sql);
let mut spans = Vec::new();
for (line_start, line_end) in line_ranges {
let line = &sql[line_start..line_end];
if ignore_comment_lines
&& line_is_comment_only_tokenized(
line_start,
line_end,
tokens,
line,
sql,
jinja_comment_spans,
)
{
continue;
}
let effective_end = if ignore_comment_clauses {
comment_clause_start_offset_tokenized(line_start, line_end, tokens, jinja_comment_spans)
.unwrap_or(line_end)
} else {
line_end
};
let effective_line = &sql[line_start..effective_end];
if effective_line.chars().count() <= max_len {
continue;
}
let mut overflow_start = effective_end;
for (char_idx, (byte_off, _)) in effective_line.char_indices().enumerate() {
if char_idx == max_len {
overflow_start = line_start + byte_off;
break;
}
}
if overflow_start < effective_end {
let overflow_end = sql[overflow_start..effective_end]
.chars()
.next()
.map(|ch| overflow_start + ch.len_utf8())
.unwrap_or(overflow_start);
spans.push((overflow_start, overflow_end));
}
}
spans
}
fn line_ranges(sql: &str) -> Vec<(usize, usize)> {
let mut ranges = Vec::new();
let mut line_start = 0usize;
for (idx, ch) in sql.char_indices() {
if ch != '\n' {
continue;
}
let mut line_end = idx;
if line_end > line_start && sql[line_start..line_end].ends_with('\r') {
line_end -= 1;
}
ranges.push((line_start, line_end));
line_start = idx + 1;
}
let mut line_end = sql.len();
if line_end > line_start && sql[line_start..line_end].ends_with('\r') {
line_end -= 1;
}
ranges.push((line_start, line_end));
ranges
}
const LEGACY_MAX_LINE_LENGTH: usize = 300;
const LEGACY_LINE_SPLIT_TARGET: usize = 280;
fn legacy_split_long_line(line: &str) -> Option<String> {
if line.len() <= LEGACY_MAX_LINE_LENGTH {
return None;
}
let mut rewritten = String::new();
let mut remaining = line.trim_start();
let mut first_segment = true;
while remaining.len() > LEGACY_MAX_LINE_LENGTH {
let probe = remaining
.char_indices()
.take_while(|(index, _)| *index <= LEGACY_LINE_SPLIT_TARGET)
.map(|(index, _)| index)
.last()
.unwrap_or(LEGACY_LINE_SPLIT_TARGET.min(remaining.len()));
let split_at = remaining[..probe].rfind(' ').unwrap_or(probe);
if !first_segment {
rewritten.push('\n');
}
rewritten.push_str(remaining[..split_at].trim_end());
rewritten.push('\n');
remaining = remaining[split_at..].trim_start();
first_segment = false;
}
rewritten.push_str(remaining);
Some(rewritten)
}
fn long_line_autofix_edits(
sql: &str,
max_line_length: usize,
trailing_comments_after: bool,
) -> Vec<IssuePatchEdit> {
let mut edits = Vec::new();
for (line_start, line_end) in line_ranges(sql) {
let line = &sql[line_start..line_end];
if is_comment_only_line(line) {
continue;
}
let replacement = if line.len() > LEGACY_MAX_LINE_LENGTH {
legacy_split_long_line(line)
} else if line.chars().count() > max_line_length {
rewrite_lt05_long_line(line, max_line_length, trailing_comments_after)
} else {
None
};
let Some(replacement) = replacement else {
continue;
};
if replacement == line {
continue;
}
edits.push(IssuePatchEdit::new(
Span::new(line_start, line_end),
replacement,
));
}
edits
}
fn is_comment_only_line(line: &str) -> bool {
let trimmed = line.trim_start();
trimmed.starts_with("--")
|| trimmed.starts_with("/*")
|| trimmed.starts_with('*')
|| trimmed.starts_with("*/")
|| trimmed.starts_with("{#")
}
fn rewrite_lt05_long_line(
line: &str,
max_line_length: usize,
trailing_comments_after: bool,
) -> Option<String> {
rewrite_inline_comment_line(line, max_line_length, trailing_comments_after)
.or_else(|| rewrite_lt05_code_line(line, max_line_length))
}
fn rewrite_lt05_code_line(line: &str, max_line_length: usize) -> Option<String> {
rewrite_window_function_line(line, max_line_length)
.or_else(|| rewrite_over_clause_with_tail_line(line, max_line_length))
.or_else(|| rewrite_function_alias_line(line, max_line_length))
.or_else(|| rewrite_function_equals_line(line, max_line_length))
.or_else(|| rewrite_expression_alias_line(line, max_line_length))
.or_else(|| rewrite_clause_break_line(line, max_line_length))
.or_else(|| rewrite_whitespace_wrap_line(line, max_line_length))
}
fn rewrite_expression_alias_line(line: &str, max_line_length: usize) -> Option<String> {
if line.chars().count() <= max_line_length {
return None;
}
let marker = find_last_ascii_case_insensitive(line, " as ")?;
if marker == 0 {
return None;
}
let left = line[..marker].trim_end();
let right = line[marker + 1..].trim_start();
if left.is_empty() || right.is_empty() {
return None;
}
let continuation = format!("{} ", leading_whitespace_prefix(line));
Some(format!("{left}\n{continuation}{right}"))
}
fn rewrite_inline_comment_line(
line: &str,
max_line_length: usize,
trailing_comments_after: bool,
) -> Option<String> {
let comment_start = find_unquoted_inline_comment_start(line)?;
let code_prefix = &line[..comment_start];
let code_trimmed = code_prefix.trim_end();
if code_trimmed.trim().is_empty() {
return None;
}
if code_trimmed.trim() == "," {
return None;
}
let indent = leading_whitespace_prefix(line);
let code_body = code_trimmed
.strip_prefix(indent)
.unwrap_or(code_trimmed)
.trim_start();
if code_body.is_empty() {
return None;
}
let mut code_line = format!("{indent}{code_body}");
if code_line.chars().count() > max_line_length {
if let Some(rewritten) = rewrite_lt05_code_line(&code_line, max_line_length) {
code_line = rewritten;
}
}
let comment_line = format!("{indent}{}", line[comment_start..].trim_end());
if trailing_comments_after {
Some(format!("{code_line}\n{comment_line}"))
} else {
Some(format!("{comment_line}\n{code_line}"))
}
}
fn rewrite_clause_break_line(line: &str, max_line_length: usize) -> Option<String> {
if line.chars().count() <= max_line_length {
return None;
}
const CLAUSE_NEEDLES: [&str; 7] = [
" from ",
" where ",
" qualify ",
" order by ",
" group by ",
" having ",
" join ",
];
let split_at = CLAUSE_NEEDLES
.iter()
.filter_map(|needle| find_ascii_case_insensitive(line, needle))
.min()?;
if split_at == 0 {
return None;
}
let left = line[..split_at].trim_end();
let right = line[split_at + 1..].trim_start();
if left.is_empty() || right.is_empty() {
return None;
}
let indent = leading_whitespace_prefix(line);
Some(format!("{left}\n{indent}{right}"))
}
fn rewrite_function_alias_line(line: &str, max_line_length: usize) -> Option<String> {
if line.chars().count() <= max_line_length
|| find_ascii_case_insensitive(line, " over ").is_some()
{
return None;
}
let marker = find_ascii_case_insensitive(line, ") as ")?;
let split_at = marker + 1;
let left = line[..split_at].trim_end();
let right = line[split_at..].trim_start();
if left.is_empty() || right.is_empty() {
return None;
}
let continuation = format!("{} ", leading_whitespace_prefix(line));
Some(format!("{left}\n{continuation}{right}"))
}
fn rewrite_function_equals_line(line: &str, max_line_length: usize) -> Option<String> {
if line.chars().count() <= max_line_length {
return None;
}
let marker = find_ascii_case_insensitive(line, ") = ")?;
let split_at = marker + 1;
let left = line[..split_at].trim_end();
let right = line[split_at..].trim_start();
if left.is_empty() || right.is_empty() {
return None;
}
let indent = leading_whitespace_prefix(line);
Some(format!("{left}\n{indent}{right}"))
}
fn find_last_ascii_case_insensitive(haystack: &str, needle: &str) -> Option<usize> {
if needle.is_empty() || haystack.len() < needle.len() {
return None;
}
let haystack_bytes = haystack.as_bytes();
let needle_bytes = needle.as_bytes();
(0..=haystack_bytes.len() - needle_bytes.len())
.rev()
.find(|&start| {
haystack_bytes[start..start + needle_bytes.len()]
.iter()
.zip(needle_bytes.iter())
.all(|(left, right)| left.eq_ignore_ascii_case(right))
})
}
fn rewrite_over_clause_with_tail_line(line: &str, max_line_length: usize) -> Option<String> {
if line.chars().count() <= max_line_length {
return None;
}
let over_start = find_ascii_case_insensitive(line, " over (")?;
let over_open = line[over_start..]
.find('(')
.map(|offset| over_start + offset)?;
let over_close = matching_close_paren(line, over_open)?;
let tail = line[over_close + 1..].trim_start();
if !contains_ascii_case_insensitive(tail, "as ") {
return None;
}
let indent = leading_whitespace_prefix(line);
let continuation = format!("{indent} ");
let inner_indent = format!("{indent} ");
let prefix = line[..over_start].trim_end();
if prefix.is_empty() {
return None;
}
let over_kw = line[over_start..over_open].trim();
let inside = line[over_open + 1..over_close].trim();
if inside.is_empty() {
return None;
}
let mut lines = vec![prefix.to_string(), format!("{continuation}{over_kw} (")];
if let Some(order_idx) = find_ascii_case_insensitive(inside, " order by ") {
let partition = inside[..order_idx].trim();
let order_by = inside[order_idx + 1..].trim_start();
if !partition.is_empty() {
lines.push(format!("{inner_indent}{partition}"));
}
if !order_by.is_empty() {
lines.push(format!("{inner_indent}{order_by}"));
}
} else {
lines.push(format!("{inner_indent}{inside}"));
}
lines.push(format!("{continuation})"));
lines.push(format!("{continuation}{tail}"));
Some(lines.join("\n"))
}
fn rewrite_window_function_line(line: &str, max_line_length: usize) -> Option<String> {
if line.chars().count() <= max_line_length {
return None;
}
let over_start = find_ascii_case_insensitive(line, " over (")?;
let modifier_start = rfind_ascii_case_insensitive_before(line, " ignore nulls", over_start)
.or_else(|| rfind_ascii_case_insensitive_before(line, " respect nulls", over_start))?;
let function_part = line[..modifier_start].trim_end();
let modifier = line[modifier_start..over_start].trim();
let over_part = line[over_start + 1..].trim_start();
if function_part.is_empty() || modifier.is_empty() || over_part.is_empty() {
return None;
}
let indent = leading_whitespace_prefix(line);
let continuation = format!("{indent} ");
let mut lines = Vec::new();
if let Some((head, inner)) = outer_call_head_and_inner(function_part) {
if inner.contains('(') && inner.contains(')') {
lines.push(format!("{head}("));
lines.push(format!("{continuation}{inner}"));
lines.push(format!("{indent}) {modifier}"));
} else {
lines.push(format!("{} {modifier}", function_part.trim_end()));
}
} else {
lines.push(format!("{} {modifier}", function_part.trim_end()));
}
lines.push(format!("{continuation}{over_part}"));
Some(lines.join("\n"))
}
fn outer_call_head_and_inner(function_part: &str) -> Option<(&str, &str)> {
let trimmed = function_part.trim_end();
if !trimmed.ends_with(')') {
return None;
}
let open = trimmed.find('(')?;
let close = matching_close_paren(trimmed, open)?;
if close + 1 != trimmed.len() {
return None;
}
let head = trimmed[..open].trim_end();
let inner = trimmed[open + 1..close].trim();
if head.is_empty() || inner.is_empty() {
return None;
}
Some((head, inner))
}
fn leading_whitespace_prefix(line: &str) -> &str {
let width = line
.bytes()
.take_while(|byte| matches!(*byte, b' ' | b'\t'))
.count();
&line[..width]
}
fn find_unquoted_inline_comment_start(line: &str) -> Option<usize> {
let bytes = line.as_bytes();
let mut index = 0usize;
let mut in_single = false;
let mut in_double = false;
while index + 1 < bytes.len() {
let byte = bytes[index];
if in_single {
if byte == b'\'' {
if index + 1 < bytes.len() && bytes[index + 1] == b'\'' {
index += 2;
continue;
}
in_single = false;
}
index += 1;
continue;
}
if in_double {
if byte == b'"' {
if index + 1 < bytes.len() && bytes[index + 1] == b'"' {
index += 2;
continue;
}
in_double = false;
}
index += 1;
continue;
}
if byte == b'\'' {
in_single = true;
index += 1;
continue;
}
if byte == b'"' {
in_double = true;
index += 1;
continue;
}
if byte == b'-' && bytes[index + 1] == b'-' {
return Some(index);
}
index += 1;
}
None
}
fn matching_close_paren(input: &str, open_index: usize) -> Option<usize> {
if !matches!(input.as_bytes().get(open_index), Some(b'(')) {
return None;
}
let mut depth = 0usize;
for (index, ch) in input
.char_indices()
.skip_while(|(idx, _)| *idx < open_index)
{
match ch {
'(' => depth += 1,
')' => {
depth = depth.saturating_sub(1);
if depth == 0 {
return Some(index);
}
}
_ => {}
}
}
None
}
fn find_ascii_case_insensitive(haystack: &str, needle: &str) -> Option<usize> {
haystack
.to_ascii_lowercase()
.find(&needle.to_ascii_lowercase())
}
fn contains_ascii_case_insensitive(haystack: &str, needle: &str) -> bool {
find_ascii_case_insensitive(haystack, needle).is_some()
}
fn rfind_ascii_case_insensitive_before(haystack: &str, needle: &str, end: usize) -> Option<usize> {
haystack[..end.min(haystack.len())]
.to_ascii_lowercase()
.rfind(&needle.to_ascii_lowercase())
}
fn rewrite_whitespace_wrap_line(line: &str, max_line_length: usize) -> Option<String> {
if line.chars().count() <= max_line_length {
return None;
}
if line.contains("--") || line.contains("/*") || line.contains("*/") {
return None;
}
let indent = leading_whitespace_prefix(line);
let indent_chars = indent.chars().count();
let continuation_indent = format!("{indent} ");
let continuation_chars = continuation_indent.chars().count();
let mut remaining = line[indent.len()..].trim_end().to_string();
if remaining.is_empty() {
return None;
}
let mut wrapped = Vec::new();
let mut first = true;
loop {
let limit = if first {
max_line_length.saturating_sub(indent_chars)
} else {
max_line_length.saturating_sub(continuation_chars)
};
if limit < 8 || remaining.chars().count() <= limit {
break;
}
let split_at = wrap_split_index(&remaining, limit)?;
let head = remaining[..split_at].trim_end();
let tail = remaining[split_at..].trim_start();
if head.is_empty() || tail.is_empty() {
return None;
}
if first {
wrapped.push(format!("{indent}{head}"));
first = false;
} else {
wrapped.push(format!("{continuation_indent}{head}"));
}
remaining = tail.to_string();
}
if wrapped.is_empty() {
return None;
}
if first {
wrapped.push(format!("{indent}{remaining}"));
} else {
wrapped.push(format!("{continuation_indent}{remaining}"));
}
Some(wrapped.join("\n"))
}
fn wrap_split_index(content: &str, char_limit: usize) -> Option<usize> {
if char_limit == 0 {
return None;
}
#[derive(Clone, Copy)]
enum ScanMode {
Outside,
SingleQuote,
DoubleQuote,
BacktickQuote,
}
let mut split_at = None;
let mut mode = ScanMode::Outside;
let mut iter = content.char_indices().enumerate().peekable();
while let Some((char_idx, (byte_idx, ch))) = iter.next() {
if char_idx >= char_limit {
break;
}
match mode {
ScanMode::Outside => {
if ch.is_whitespace() {
split_at = Some(byte_idx);
continue;
}
mode = match ch {
'\'' => ScanMode::SingleQuote,
'"' => ScanMode::DoubleQuote,
'`' => ScanMode::BacktickQuote,
_ => ScanMode::Outside,
};
}
ScanMode::SingleQuote => {
if ch == '\'' {
if iter
.peek()
.is_some_and(|(_, (_, next_ch))| *next_ch == '\'')
{
let _ = iter.next();
} else {
mode = ScanMode::Outside;
}
}
}
ScanMode::DoubleQuote => {
if ch == '"' {
if iter.peek().is_some_and(|(_, (_, next_ch))| *next_ch == '"') {
let _ = iter.next();
} else {
mode = ScanMode::Outside;
}
}
}
ScanMode::BacktickQuote => {
if ch == '`' {
if iter.peek().is_some_and(|(_, (_, next_ch))| *next_ch == '`') {
let _ = iter.next();
} else {
mode = ScanMode::Outside;
}
}
}
}
}
split_at.filter(|byte_idx| *byte_idx > 0)
}
fn line_is_comment_only_tokenized(
line_start: usize,
line_end: usize,
tokens: &[LocatedToken],
line_text: &str,
sql: &str,
jinja_comment_spans: &[std::ops::Range<usize>],
) -> bool {
if line_is_jinja_comment_only(line_start, line_end, sql, jinja_comment_spans) {
return true;
}
let line_tokens = tokens_on_line(tokens, line_start, line_end);
if line_tokens.is_empty() {
return false;
}
let mut non_spacing = line_tokens
.into_iter()
.filter(|token| !is_spacing_whitespace(&token.token))
.peekable();
let Some(first) = non_spacing.peek() else {
return false;
};
let mut saw_comment = false;
if matches!(first.token, Token::Comma)
&& line_prefix_before_token_is_spacing(line_text, line_start, first.start)
{
let _ = non_spacing.next();
}
for token in non_spacing {
if is_comment_token(&token.token) {
saw_comment = true;
continue;
}
return false;
}
saw_comment
}
fn comment_clause_start_offset_tokenized(
line_start: usize,
line_end: usize,
tokens: &[LocatedToken],
jinja_comment_spans: &[std::ops::Range<usize>],
) -> Option<usize> {
let jinja_start = first_jinja_comment_start_on_line(line_start, line_end, jinja_comment_spans);
let line_tokens = tokens_on_line(tokens, line_start, line_end);
let significant: Vec<&LocatedToken> = line_tokens
.iter()
.copied()
.filter(|token| !is_spacing_whitespace(&token.token))
.collect();
let mut earliest = jinja_start;
for (index, token) in significant.iter().enumerate() {
if let Token::Word(word) = &token.token {
if word.value.eq_ignore_ascii_case("comment") {
let candidate = token.start.max(line_start);
earliest = Some(earliest.map_or(candidate, |current| current.min(candidate)));
break;
}
}
if matches!(
token.token,
Token::Whitespace(Whitespace::SingleLineComment { .. })
) {
let candidate = token.start.max(line_start);
earliest = Some(earliest.map_or(candidate, |current| current.min(candidate)));
break;
}
if matches!(
token.token,
Token::Whitespace(Whitespace::MultiLineComment(_))
) && significant[index + 1..]
.iter()
.all(|next| is_spacing_whitespace(&next.token))
{
let candidate = token.start.max(line_start);
earliest = Some(earliest.map_or(candidate, |current| current.min(candidate)));
break;
}
}
earliest
}
fn tokens_on_line(
tokens: &[LocatedToken],
line_start: usize,
line_end: usize,
) -> Vec<&LocatedToken> {
tokens
.iter()
.filter(|token| token.start < line_end && token.end > line_start)
.collect()
}
fn line_prefix_before_token_is_spacing(
line_text: &str,
line_start: usize,
token_start: usize,
) -> bool {
if token_start < line_start {
return false;
}
line_text[..token_start - line_start]
.chars()
.all(char::is_whitespace)
}
fn tokenize_with_offsets(sql: &str, dialect: Dialect) -> Option<Vec<LocatedToken>> {
let dialect = dialect.to_sqlparser_dialect();
let mut tokenizer = Tokenizer::new(dialect.as_ref(), sql);
let tokens = tokenizer.tokenize_with_location().ok()?;
let mut out = Vec::with_capacity(tokens.len());
for token in tokens {
let start = line_col_to_offset(
sql,
token.span.start.line as usize,
token.span.start.column as usize,
)?;
let end = line_col_to_offset(
sql,
token.span.end.line as usize,
token.span.end.column as usize,
)?;
out.push(LocatedToken {
token: token.token,
start,
end,
});
}
Some(out)
}
fn tokenize_with_offsets_for_context(ctx: &LintContext) -> Option<Vec<LocatedToken>> {
ctx.with_document_tokens(|tokens| {
if tokens.is_empty() {
return None;
}
Some(
tokens
.iter()
.filter_map(|token| {
token_with_span_offsets(ctx.sql, token).map(|(start, end)| LocatedToken {
token: token.token.clone(),
start,
end,
})
})
.collect::<Vec<_>>(),
)
})
}
fn jinja_comment_spans(sql: &str) -> Vec<std::ops::Range<usize>> {
let mut spans = Vec::new();
let mut cursor = 0usize;
while cursor < sql.len() {
let Some(open_rel) = sql[cursor..].find("{#") else {
break;
};
let start = cursor + open_rel;
let content_start = start + 2;
if let Some(close_rel) = sql[content_start..].find("#}") {
let end = content_start + close_rel + 2;
spans.push(start..end);
cursor = end;
} else {
spans.push(start..sql.len());
break;
}
}
spans
}
fn sanitize_sql_for_jinja_comments(sql: &str, spans: &[std::ops::Range<usize>]) -> String {
if spans.is_empty() {
return sql.to_string();
}
let mut bytes = sql.as_bytes().to_vec();
for span in spans {
for idx in span.start..span.end.min(bytes.len()) {
if bytes[idx] != b'\n' {
bytes[idx] = b' ';
}
}
}
String::from_utf8(bytes).expect("sanitized SQL should remain valid UTF-8")
}
fn first_jinja_comment_start_on_line(
line_start: usize,
line_end: usize,
spans: &[std::ops::Range<usize>],
) -> Option<usize> {
spans
.iter()
.filter_map(|span| {
if span.start >= line_end || span.end <= line_start {
return None;
}
Some(span.start.max(line_start))
})
.min()
}
fn line_is_jinja_comment_only(
line_start: usize,
line_end: usize,
sql: &str,
spans: &[std::ops::Range<usize>],
) -> bool {
let mut in_prefix = true;
let mut saw_comment = false;
for (rel, ch) in sql[line_start..line_end].char_indices() {
if in_prefix {
if ch.is_whitespace() || ch == ',' {
continue;
}
in_prefix = false;
}
if ch.is_whitespace() {
continue;
}
let abs = line_start + rel;
if !offset_in_any_span(abs, spans) {
return false;
}
saw_comment = true;
}
saw_comment
}
fn offset_in_any_span(offset: usize, spans: &[std::ops::Range<usize>]) -> bool {
spans
.iter()
.any(|span| offset >= span.start && offset < span.end)
}
fn is_comment_token(token: &Token) -> bool {
matches!(
token,
Token::Whitespace(Whitespace::SingleLineComment { .. })
| Token::Whitespace(Whitespace::MultiLineComment(_))
)
}
fn is_spacing_whitespace(token: &Token) -> bool {
matches!(
token,
Token::Whitespace(Whitespace::Space | Whitespace::Tab | Whitespace::Newline)
)
}
fn line_col_to_offset(sql: &str, line: usize, column: usize) -> Option<usize> {
if line == 0 || column == 0 {
return None;
}
let mut current_line = 1usize;
let mut current_col = 1usize;
for (offset, ch) in sql.char_indices() {
if current_line == line && current_col == column {
return Some(offset);
}
if ch == '\n' {
current_line += 1;
current_col = 1;
} else {
current_col += 1;
}
}
if current_line == line && current_col == column {
return Some(sql.len());
}
None
}
fn token_with_span_offsets(sql: &str, token: &TokenWithSpan) -> Option<(usize, usize)> {
let start = line_col_to_offset(
sql,
token.span.start.line as usize,
token.span.start.column as usize,
)?;
let end = line_col_to_offset(
sql,
token.span.end.line as usize,
token.span.end.column as usize,
)?;
Some((start, end))
}
#[cfg(test)]
mod tests {
use super::*;
use crate::parser::parse_sql;
use crate::types::IssueAutofixApplicability;
fn run_with_rule(sql: &str, rule: &LayoutLongLines) -> Vec<Issue> {
let statements = parse_sql(sql).expect("parse");
statements
.iter()
.enumerate()
.flat_map(|(index, statement)| {
rule.check(
statement,
&LintContext {
sql,
statement_range: 0..sql.len(),
statement_index: index,
},
)
})
.collect()
}
fn run(sql: &str) -> Vec<Issue> {
run_with_rule(sql, &LayoutLongLines::default())
}
fn apply_issue_autofix(sql: &str, issue: &Issue) -> Option<String> {
let autofix = issue.autofix.as_ref()?;
let mut edits = autofix.edits.clone();
Some(apply_patch_edits(sql, &mut edits))
}
fn apply_patch_edits(sql: &str, edits: &mut [IssuePatchEdit]) -> String {
edits.sort_by_key(|edit| (edit.span.start, edit.span.end));
let mut rewritten = sql.to_string();
for edit in edits.iter().rev() {
rewritten.replace_range(edit.span.start..edit.span.end, &edit.replacement);
}
rewritten
}
#[test]
fn flags_single_long_line() {
let long_line = format!("SELECT {} FROM t", "x".repeat(320));
let issues = run(&long_line);
assert_eq!(issues.len(), 1);
assert_eq!(issues[0].code, issue_codes::LINT_LT_005);
}
#[test]
fn does_not_flag_short_line() {
assert!(run("SELECT x FROM t").is_empty());
}
#[test]
fn flags_each_overflowing_line_once() {
let sql = format!(
"SELECT {} AS a,\n {} AS b FROM t",
"x".repeat(90),
"y".repeat(90)
);
let issues = run(&sql);
assert_eq!(
issues
.iter()
.filter(|issue| issue.code == issue_codes::LINT_LT_005)
.count(),
2,
);
}
#[test]
fn configured_max_line_length_is_respected() {
let config = LintConfig {
enabled: true,
disabled_rules: vec![],
rule_configs: std::collections::BTreeMap::from([(
"layout.long_lines".to_string(),
serde_json::json!({"max_line_length": 20}),
)]),
};
let rule = LayoutLongLines::from_config(&config);
let sql = "SELECT this_line_is_long FROM t";
let statements = parse_sql(sql).expect("parse");
let issues = rule.check(
&statements[0],
&LintContext {
sql,
statement_range: 0..sql.len(),
statement_index: 0,
},
);
assert_eq!(issues.len(), 1);
assert_eq!(issues[0].code, issue_codes::LINT_LT_005);
}
#[test]
fn ignore_comment_lines_skips_long_comment_only_lines() {
let config = LintConfig {
enabled: true,
disabled_rules: vec![],
rule_configs: std::collections::BTreeMap::from([(
"layout.long_lines".to_string(),
serde_json::json!({
"max_line_length": 20,
"ignore_comment_lines": true
}),
)]),
};
let sql = format!("SELECT 1;\n-- {}\nSELECT 2", "x".repeat(120));
let issues = run_with_rule(&sql, &LayoutLongLines::from_config(&config));
assert!(
issues.is_empty(),
"ignore_comment_lines should suppress long comment-only lines: {issues:?}",
);
}
#[test]
fn ignore_comment_lines_skips_comma_prefixed_comment_lines() {
let config = LintConfig {
enabled: true,
disabled_rules: vec![],
rule_configs: std::collections::BTreeMap::from([(
"layout.long_lines".to_string(),
serde_json::json!({
"max_line_length": 30,
"ignore_comment_lines": true
}),
)]),
};
let sql = "SELECT\nc1\n,-- this is a very long comment line that should be ignored\nc2\n";
let issues = run_with_rule(sql, &LayoutLongLines::from_config(&config));
assert!(issues.is_empty());
}
#[test]
fn ignore_comment_lines_skips_jinja_comment_lines() {
let sql =
"SELECT *\n{# this is a very long jinja comment line that should be ignored #}\nFROM t";
let spans = long_line_overflow_spans(sql, 30, true, false, Dialect::Generic);
assert!(spans.is_empty());
}
#[test]
fn ignore_comment_clauses_skips_long_trailing_comment_text() {
let config = LintConfig {
enabled: true,
disabled_rules: vec![],
rule_configs: std::collections::BTreeMap::from([(
"layout.long_lines".to_string(),
serde_json::json!({
"max_line_length": 20,
"ignore_comment_clauses": true
}),
)]),
};
let sql = format!("SELECT 1 -- {}", "x".repeat(120));
let issues = run_with_rule(&sql, &LayoutLongLines::from_config(&config));
assert!(
issues.is_empty(),
"ignore_comment_clauses should suppress trailing-comment overflow: {issues:?}",
);
}
#[test]
fn ignore_comment_clauses_still_flags_long_sql_prefix() {
let config = LintConfig {
enabled: true,
disabled_rules: vec![],
rule_configs: std::collections::BTreeMap::from([(
"LINT_LT_005".to_string(),
serde_json::json!({
"max_line_length": 20,
"ignore_comment_clauses": true
}),
)]),
};
let sql = format!("SELECT {} -- short", "x".repeat(40));
let issues = run_with_rule(&sql, &LayoutLongLines::from_config(&config));
assert_eq!(issues.len(), 1);
assert_eq!(issues[0].code, issue_codes::LINT_LT_005);
}
#[test]
fn ignore_comment_clauses_skips_sql_comment_clause_lines() {
let config = LintConfig {
enabled: true,
disabled_rules: vec![],
rule_configs: std::collections::BTreeMap::from([(
"layout.long_lines".to_string(),
serde_json::json!({
"max_line_length": 40,
"ignore_comment_clauses": true
}),
)]),
};
let sql = "CREATE TABLE t (\n c1 INT COMMENT 'this is a very very very very very very very very long comment'\n)";
let issues = run_with_rule(sql, &LayoutLongLines::from_config(&config));
assert!(issues.is_empty());
}
#[test]
fn non_positive_max_line_length_disables_rule() {
let config = LintConfig {
enabled: true,
disabled_rules: vec![],
rule_configs: std::collections::BTreeMap::from([(
"layout.long_lines".to_string(),
serde_json::json!({"max_line_length": -1}),
)]),
};
let sql = "SELECT this_is_a_very_long_column_name_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx FROM t";
let issues = run_with_rule(sql, &LayoutLongLines::from_config(&config));
assert!(issues.is_empty());
}
#[test]
fn statementless_fallback_flags_long_jinja_config_line() {
let sql = "{{ config (schema='bronze', materialized='view', sort =['id','number'], dist = 'all', tags =['longlonglonglonglong']) }} \n\nselect 1\n";
let synthetic = parse_sql("SELECT 1").expect("parse");
let rule = LayoutLongLines::default();
let issues = rule.check(
&synthetic[0],
&LintContext {
sql,
statement_range: 0..sql.len(),
statement_index: 0,
},
);
assert!(
!issues.is_empty(),
"expected LT05 to flag long templated config line in statementless mode"
);
assert_eq!(issues[0].code, issue_codes::LINT_LT_005);
}
#[test]
fn emits_safe_autofix_patch_for_very_long_line() {
let projections = (0..120)
.map(|index| format!("col_{index}"))
.collect::<Vec<_>>()
.join(", ");
let sql = format!("SELECT {projections} FROM t");
let issues = run(&sql);
assert_eq!(issues[0].code, issue_codes::LINT_LT_005);
let autofix = issues[0].autofix.as_ref().expect("autofix metadata");
assert_eq!(autofix.applicability, IssueAutofixApplicability::Safe);
let fixed = apply_issue_autofix(&sql, &issues[0]).expect("apply autofix");
let expected = legacy_split_long_line(&sql).expect("legacy split result");
assert_eq!(fixed, expected);
assert_ne!(fixed, sql);
}
#[test]
fn does_not_emit_autofix_when_line_is_below_legacy_split_threshold() {
let sql = format!("SELECT {} FROM t", "x".repeat(120));
let issues = run(&sql);
assert_eq!(issues[0].code, issue_codes::LINT_LT_005);
let fixed = apply_issue_autofix(&sql, &issues[0]).expect("apply autofix");
assert!(fixed.contains('\n'));
assert!(fixed.contains("\nFROM t"));
}
#[test]
fn autofix_moves_inline_comment_before_code_when_overflowing() {
let sql = "SELECT 1 -- Some Comment\n";
let mut edits = long_line_autofix_edits(sql, 18, false);
let fixed = apply_patch_edits(sql, &mut edits);
assert_eq!(fixed, "-- Some Comment\nSELECT 1\n");
}
#[test]
fn autofix_moves_inline_comment_after_code_when_configured() {
let sql = "SELECT 1 -- Some Comment\n";
let mut edits = long_line_autofix_edits(sql, 18, true);
let fixed = apply_patch_edits(sql, &mut edits);
assert_eq!(fixed, "SELECT 1\n-- Some Comment\n");
}
#[test]
fn autofix_moves_comment_and_rebreaks_select_from_line() {
let sql = "SELECT COUNT(*) FROM tbl -- Some Comment\n";
let mut edits = long_line_autofix_edits(sql, 18, false);
let fixed = apply_patch_edits(sql, &mut edits);
assert_eq!(fixed, "-- Some Comment\nSELECT COUNT(*)\nFROM tbl\n");
}
#[test]
fn autofix_does_not_split_comment_only_long_line() {
let sql =
"-- Aggregate page performance events from the last 24 hours into hourly summaries.\n";
let mut edits = long_line_autofix_edits(sql, 80, false);
let fixed = apply_patch_edits(sql, &mut edits);
assert_eq!(fixed, sql);
}
#[test]
fn autofix_moves_mid_query_inline_comment() {
let sql = "select\n my_long_long_line as foo -- with some comment\nfrom foo\n";
let mut edits = long_line_autofix_edits(sql, 40, false);
let fixed = apply_patch_edits(sql, &mut edits);
assert_eq!(
fixed,
"select\n -- with some comment\n my_long_long_line as foo\nfrom foo\n"
);
}
#[test]
fn autofix_rebreaks_window_function_lines() {
let sql = "select *\nfrom t\nqualify a = coalesce(\n first_value(iff(b = 'none', null, a)) ignore nulls over (partition by c order by d desc),\n first_value(a) respect nulls over (partition by c order by d desc)\n)\n";
let mut edits = long_line_autofix_edits(sql, 50, false);
let fixed = apply_patch_edits(sql, &mut edits);
assert_eq!(
fixed,
"select *\nfrom t\nqualify a = coalesce(\n first_value(\n iff(b = 'none', null, a)\n ) ignore nulls\n over (partition by c order by d desc),\n first_value(a) respect nulls\n over (partition by c order by d desc)\n)\n"
);
}
#[test]
fn autofix_rebreaks_long_functions_and_aliases() {
let sql = "SELECT\n my_function(col1 + col2, arg2, arg3) over (partition by col3, col4 order by col5 rows between unbounded preceding and current row) as my_relatively_long_alias,\n my_other_function(col6, col7 + col8, arg4) as my_other_relatively_long_alias,\n my_expression_function(col6, col7 + col8, arg4) = col9 + col10 as another_relatively_long_alias\nFROM my_table\n";
let mut edits = long_line_autofix_edits(sql, 80, false);
let fixed = apply_patch_edits(sql, &mut edits);
assert_eq!(
fixed,
"SELECT\n my_function(col1 + col2, arg2, arg3)\n over (\n partition by col3, col4\n order by col5 rows between unbounded preceding and current row\n )\n as my_relatively_long_alias,\n my_other_function(col6, col7 + col8, arg4)\n as my_other_relatively_long_alias,\n my_expression_function(col6, col7 + col8, arg4)\n = col9 + col10 as another_relatively_long_alias\nFROM my_table\n"
);
}
#[test]
fn autofix_splits_long_expression_alias_line() {
let sql =
" percentile_cont(0.50) WITHIN GROUP (ORDER BY duration_ms)::int AS p50_ms,\n";
let mut edits = long_line_autofix_edits(sql, 80, false);
let fixed = apply_patch_edits(sql, &mut edits);
assert_eq!(
fixed,
" percentile_cont(0.50) WITHIN GROUP (ORDER BY duration_ms)::int\n AS p50_ms,\n"
);
}
#[test]
fn autofix_wraps_generic_long_predicate_line() {
let sql = " WHEN uli.usage_start_time >= params.as_of_date - MAKE_INTERVAL(days => params.window_days) AND uli.usage_start_time < params.as_of_date\n";
let mut edits = long_line_autofix_edits(sql, 80, false);
let fixed = apply_patch_edits(sql, &mut edits);
assert_ne!(fixed, sql);
for line in fixed.lines() {
assert!(
line.chars().count() <= 80,
"expected wrapped line <= 80 chars, got {}: {line}",
line.chars().count()
);
}
}
#[test]
fn generic_wrap_keeps_quoted_literals_intact() {
let sql = "SELECT CONCAT('hello world this is a long literal', col1, col2, col3, col4, col5, col6) FROM t\n";
let mut edits = long_line_autofix_edits(sql, 60, false);
let fixed = apply_patch_edits(sql, &mut edits);
assert_ne!(fixed, sql);
assert!(fixed.contains("'hello world this is a long literal'"));
}
}