rigsql-rules 0.7.1

Lint rules (sqlfluff-compatible) for the rigsql SQL linter
Documentation
use rigsql_core::{Segment, SegmentType, Span};

use crate::violation::{LintViolation, SourceEdit};

/// Check if an AliasExpression's children contain an explicit AS keyword.
pub fn has_as_keyword(children: &[Segment]) -> bool {
    children.iter().any(|child| {
        if let Segment::Token(t) = child {
            t.segment_type == SegmentType::Keyword && t.token.text.eq_ignore_ascii_case("AS")
        } else {
            false
        }
    })
}

/// Return the first non-trivia child segment.
pub fn first_non_trivia(children: &[Segment]) -> Option<&Segment> {
    children.iter().find(|c| !c.segment_type().is_trivia())
}

/// Return the last non-trivia child segment.
pub fn last_non_trivia(children: &[Segment]) -> Option<&Segment> {
    children
        .iter()
        .rev()
        .find(|c| !c.segment_type().is_trivia())
}

/// Keywords that should NOT be treated as alias names.
/// Sorted alphabetically for binary_search.
const NOT_ALIAS_KEYWORDS: &[&str] = &[
    "ALTER",
    "AND",
    "BEGIN",
    "BREAK",
    "CATCH",
    "CLOSE",
    "COMMIT",
    "CONTINUE",
    "CREATE",
    "CROSS",
    "CURSOR",
    "DEALLOCATE",
    "DECLARE",
    "DELETE",
    "DROP",
    "ELSE",
    "END",
    "EXCEPT",
    "EXEC",
    "EXECUTE",
    "FETCH",
    "FOR",
    "FROM",
    "FULL",
    "GO",
    "GOTO",
    "GROUP",
    "HAVING",
    "IF",
    "INNER",
    "INSERT",
    "INTERSECT",
    "INTO",
    "JOIN",
    "LEFT",
    "LIMIT",
    "MERGE",
    "NATURAL",
    "NEXT",
    "OFFSET",
    "ON",
    "OPEN",
    "OR",
    "ORDER",
    "OUTPUT",
    "OVER",
    "PRINT",
    "RAISERROR",
    "RETURN",
    "RETURNING",
    "RIGHT",
    "ROLLBACK",
    "SELECT",
    "SET",
    "TABLE",
    "THEN",
    "THROW",
    "TRUNCATE",
    "TRY",
    "UNION",
    "UPDATE",
    "VALUES",
    "WHEN",
    "WHERE",
    "WHILE",
    "WITH",
];

/// Check if the "alias name" in an AliasExpression is actually a misidentified
/// SQL keyword (e.g. OVER in window functions). Returns true if the alias
/// looks like a false positive.
pub fn is_false_alias(children: &[Segment]) -> bool {
    // The alias name is the last non-trivia child
    if let Some(Segment::Token(t)) = last_non_trivia(children) {
        let upper = t.token.text.to_ascii_uppercase();
        return NOT_ALIAS_KEYWORDS.binary_search(&upper.as_str()).is_ok();
    }
    false
}

/// Generate a fix that inserts "AS " before the last non-trivia child (the alias name).
/// Used by AL01 and AL02.
pub fn insert_as_keyword_fix(children: &[Segment]) -> Vec<SourceEdit> {
    last_non_trivia(children)
        .map(|alias| vec![SourceEdit::insert(alias.span().start, "AS ")])
        .unwrap_or_default()
}

/// Capitalise the first letter and lowercase the rest.
/// Used by CP01 and CP03 for the `Capitalise` policy.
pub fn capitalise(s: &str) -> String {
    let mut chars = s.chars();
    match chars.next() {
        Some(c) => c.to_uppercase().to_string() + &chars.as_str().to_lowercase(),
        None => String::new(),
    }
}

/// Check capitalisation of a token and return a violation if it doesn't match.
/// Shared by CP01, CP04, CP05 to avoid duplicating violation creation.
pub fn check_capitalisation(
    rule_code: &'static str,
    category: &str,
    text: &str,
    expected: &str,
    policy_name: &str,
    span: Span,
) -> Option<LintViolation> {
    if text != expected {
        let message = format!(
            "{} must be {} case. Found '{}' instead of '{}'.",
            category, policy_name, text, expected
        );
        let msg_key = format!("rules.{rule_code}.msg");
        let params = vec![
            ("category".to_string(), category.to_string()),
            ("policy".to_string(), policy_name.to_string()),
            ("found".to_string(), text.to_string()),
            ("expected".to_string(), expected.to_string()),
        ];
        Some(LintViolation::with_fix_and_msg_key(
            rule_code,
            message,
            span,
            vec![SourceEdit::replace(span, expected.to_string())],
            msg_key,
            params,
        ))
    } else {
        None
    }
}

/// Extract the alias name from an AliasExpression.
/// The alias name is the last Identifier or QuotedIdentifier before any
/// non-trivia, non-keyword segment (scanning from the end).
pub fn extract_alias_name(children: &[Segment]) -> Option<String> {
    for child in children.iter().rev() {
        let st = child.segment_type();
        if st == SegmentType::Identifier || st == SegmentType::QuotedIdentifier {
            if let Segment::Token(t) = child {
                return Some(t.token.text.to_string());
            }
        }
        if st.is_trivia() {
            continue;
        }
        if st != SegmentType::Keyword {
            break;
        }
    }
    None
}

/// Check if a segment ends with a Newline (possibly preceded by Whitespace).
/// Used by layout rules (LT07, LT14) to detect newlines absorbed into clause bodies.
pub fn has_trailing_newline(segment: &Segment) -> bool {
    for child in segment.children().iter().rev() {
        let st = child.segment_type();
        if st == SegmentType::Newline {
            return true;
        }
        if st == SegmentType::Whitespace {
            continue;
        }
        return false;
    }
    false
}

/// Check if the current rule context is a table alias (parent is FROM or JOIN clause).
pub fn is_in_table_context(ctx: &crate::rule::RuleContext) -> bool {
    ctx.parent.is_some_and(|p| {
        let pt = p.segment_type();
        pt == SegmentType::FromClause || pt == SegmentType::JoinClause
    })
}

/// Find a keyword by case-insensitive name in children. Returns (index, segment).
pub fn find_keyword_in_children<'a>(
    children: &'a [Segment],
    name: &str,
) -> Option<(usize, &'a Segment)> {
    children.iter().enumerate().find(|(_, c)| {
        if let Segment::Token(t) = c {
            t.segment_type == SegmentType::Keyword && t.token.text.eq_ignore_ascii_case(name)
        } else {
            false
        }
    })
}

/// Collect all leaf tokens from a CST that match a filter predicate.
/// Used by CP rules in `consistent` mode to gather all tokens of a category.
pub fn collect_matching_tokens<F>(segment: &Segment, filter: &F, out: &mut Vec<(String, Span)>)
where
    F: Fn(&Segment) -> Option<(String, Span)>,
{
    if let Some(pair) = filter(segment) {
        out.push(pair);
    }
    for child in segment.children() {
        collect_matching_tokens(child, filter, out);
    }
}

/// Determine the majority case from a list of token texts.
/// Returns `"upper"` or `"lower"`. Mixed-case tokens are skipped (always violations).
/// On tie, defaults to `"upper"`.
pub fn determine_majority_case(tokens: &[(String, Span)]) -> &'static str {
    let mut upper_count = 0u32;
    let mut lower_count = 0u32;
    for (text, _) in tokens {
        let is_all_upper = text
            .chars()
            .all(|c| !c.is_ascii_alphabetic() || c.is_ascii_uppercase());
        let is_all_lower = text
            .chars()
            .all(|c| !c.is_ascii_alphabetic() || c.is_ascii_lowercase());
        if is_all_upper {
            upper_count += 1;
        } else if is_all_lower {
            lower_count += 1;
        }
        // mixed-case: skip (they'll be flagged regardless)
    }
    if lower_count > upper_count {
        "lower"
    } else {
        "upper"
    }
}