badness 0.5.0

A language server, formatter, and linter for LaTeX
//! The rule abstraction: the [`Rule`] trait every lint implements, the
//! [`RuleContext`] handed to it, and the registry of built-in rules.
//!
//! Every rule is on by default; the
//! `badness.toml` `[lint]` `select`/`ignore` keys (and the CLI's matching flags)
//! narrow the active set via [`RuleSelection`], applied as a post-filter so the
//! shared `lint_document` driver stays config-unaware.

use std::path::Path;

use crate::project::{ResolvedCitations, ResolvedLabels};
use crate::semantic::SemanticModel;
use crate::syntax::{SyntaxElement, SyntaxKind, SyntaxNode};

use super::diagnostic::{Diagnostic, Severity};

pub mod deprecated_command;
pub mod dollar_display_math;
pub mod duplicate_label;
pub mod mismatched_delimiter;
pub mod missing_nonbreaking_space;
pub mod obsolete_environment;
pub mod undefined_citation;
pub mod undefined_ref;

pub use deprecated_command::DeprecatedCommand;
pub use dollar_display_math::DollarDisplayMath;
pub use duplicate_label::DuplicateLabel;
pub use mismatched_delimiter::MismatchedDelimiter;
pub use missing_nonbreaking_space::MissingNonbreakingSpace;
pub use obsolete_environment::ObsoleteEnvironment;
pub use undefined_citation::UndefinedCitation;
pub use undefined_ref::UndefinedRef;

/// Everything a [`Rule`] reads to produce diagnostics for one file.
///
/// `path` is informational (rules may name the file in a message); the driver
/// still stamps each diagnostic's `path` afterward, so rules construct
/// diagnostics with an empty path.
pub struct RuleContext<'a> {
    pub path: &'a Path,
    pub root: &'a SyntaxNode,
    pub model: &'a SemanticModel,
    /// Cross-file label resolution for the project `path` belongs to, or `None`
    /// when there is no project view (stdin, or a context — like the language
    /// server today — that hasn't assembled one). Cross-file rules are inert when
    /// this is `None`. `path` keys into it to find this file's label namespace.
    pub resolution: Option<&'a ResolvedLabels>,
    /// Cross-file citation resolution (cite keys reachable via the project's
    /// `.bib` resources), or `None` when there is no project view. Gates
    /// `undefined-citation`, the bibliographic analog of `resolution`.
    pub citations: Option<&'a ResolvedCitations>,
}

/// A single lint. `Send + Sync` so the registry can be shared across the LSP's
/// read pool.
///
/// Rules come in two flavors, both driven by [`lint_document`](super::check::lint_document)'s
/// single shared traversal:
///
/// - **Node-shape rules** subscribe to [`Rule::interests`] and implement
///   [`Rule::check`]; the driver invokes `check` once per visited element whose
///   kind they named. They never walk the tree themselves.
/// - **Whole-file rules** leave `interests` empty and implement
///   [`Rule::check_file`]; the driver calls it once, after the walk. This is for
///   rules driven by the semantic model or cross-file resolution rather than by
///   node shape.
pub trait Rule: Send + Sync {
    /// The stable, kebab-case identifier reported as the diagnostic's `rule` and
    /// targeted by `% badness-ignore <id>`.
    fn id(&self) -> &'static str;

    /// The severity a rule emits unless it overrides per-finding.
    fn default_severity(&self) -> Severity {
        Severity::Warning
    }

    /// The `SyntaxKind`s this rule subscribes to. During the driver's single
    /// shared traversal, [`Rule::check`] is invoked once for every element whose
    /// kind appears here. The default (`&[]`) opts out of node dispatch entirely —
    /// appropriate for rules that work off the whole file via [`Rule::check_file`].
    fn interests(&self) -> &'static [SyntaxKind] {
        &[]
    }

    /// Per-element callback, invoked for each CST element (node *or* token) whose
    /// kind is in [`Rule::interests`]. Node-shape rules unwrap `el.as_node()`.
    /// Findings are pushed onto `sink` with the path left empty.
    fn check(&self, el: &SyntaxElement, ctx: &RuleContext<'_>, sink: &mut Vec<Diagnostic>) {
        let _ = (el, ctx, sink);
    }

    /// Whole-file pass, run once after the shared traversal. For rules driven by
    /// the semantic model or cross-file resolution rather than node shape. The
    /// default is a no-op. Findings are pushed onto `sink` with the path left empty.
    fn check_file(&self, ctx: &RuleContext<'_>, sink: &mut Vec<Diagnostic>) {
        let _ = (ctx, sink);
    }
}

/// Every built-in rule, in registry order.
pub fn all_rules() -> Vec<Box<dyn Rule>> {
    vec![
        Box::new(DuplicateLabel),
        Box::new(DeprecatedCommand),
        Box::new(MissingNonbreakingSpace),
        Box::new(ObsoleteEnvironment),
        Box::new(DollarDisplayMath),
        Box::new(MismatchedDelimiter),
        Box::new(UndefinedRef),
        Box::new(UndefinedCitation),
    ]
}

/// The ids of every built-in **LaTeX** rule. Kept in lockstep with [`all_rules`].
/// The bib rules live in [`crate::bib::linter::ALL_BIB_RULE_IDS`]; the selectable
/// universe is the union of the two (see [`all_known_rule_ids`]).
pub const ALL_RULE_IDS: &[&str] = &[
    "duplicate-label",
    "deprecated-command",
    "missing-nonbreaking-space",
    "obsolete-environment",
    "dollar-display-math",
    "mismatched-delimiter",
    "undefined-ref",
    "undefined-citation",
];

/// Every known built-in rule id across **both** linters (LaTeX ∪ BibTeX).
///
/// The CLI lints `.tex` and `.bib` files in one pass and folds their findings into
/// a single diagnostic stream filtered by one [`RuleSelection`], so the selectable
/// universe — and the set `select`/`ignore` are validated against — must span both
/// registries. Without the bib half, every bib finding's id reads as "not active"
/// and the CLI silently drops it (the LSP, which doesn't post-filter, still shows
/// them — the source of the CLI/LSP divergence).
fn all_known_rule_ids() -> impl Iterator<Item = &'static str> {
    ALL_RULE_IDS
        .iter()
        .copied()
        .chain(crate::bib::linter::ALL_BIB_RULE_IDS.iter().copied())
}

/// The pseudo-rule id parse diagnostics carry. It is never a lint rule, so
/// `select`/`ignore` never touch it: a parse error always surfaces.
pub const PARSE_RULE_ID: &str = "parse";

/// The active lint-rule set for one run, after applying `select`/`ignore`.
///
/// Resolution by rule id (not by constructing the rule objects) so it can filter
/// the diagnostics `lint_document` already produced without changing that shared
/// entry point's signature. The semantics are:
///
/// 1. Base set = the ids in `select` when it is `Some`, else every built-in rule.
/// 2. Subtract anything in `ignore`.
/// 3. Unknown ids in `select`/`ignore` (not in [`ALL_RULE_IDS`]) are returned via
///    the second tuple element so the caller can surface them; they do not error.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct RuleSelection {
    active: Vec<&'static str>,
}

impl RuleSelection {
    /// Build the active set from `select`/`ignore`, returning it plus any unknown
    /// ids encountered (preserving their original spelling and order).
    pub fn resolve(select: Option<&[String]>, ignore: &[String]) -> (Self, Vec<String>) {
        let mut unknown = Vec::new();
        for id in select.iter().flat_map(|v| v.iter()).chain(ignore.iter()) {
            if !all_known_rule_ids().any(|known| known == id) {
                unknown.push(id.clone());
            }
        }
        let base: Vec<&'static str> = match select {
            Some(picks) => all_known_rule_ids()
                .filter(|id| picks.iter().any(|p| p == id))
                .collect(),
            None => all_known_rule_ids().collect(),
        };
        let active = base
            .into_iter()
            .filter(|id| !ignore.iter().any(|i| i == id))
            .collect();
        (Self { active }, unknown)
    }

    /// The unfiltered set: every built-in rule active. The default for callers
    /// with no config (the LSP, the library API).
    pub fn all() -> Self {
        Self {
            active: all_known_rule_ids().collect(),
        }
    }

    /// Whether a diagnostic with this `rule` should be kept. Parse diagnostics
    /// ([`PARSE_RULE_ID`]) are always kept; lint rules are kept iff active.
    pub fn is_active(&self, rule: &str) -> bool {
        rule == PARSE_RULE_ID || self.active.contains(&rule)
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn registry_and_id_list_agree() {
        let ids: Vec<&str> = all_rules().iter().map(|r| r.id()).collect();
        assert_eq!(ids, ALL_RULE_IDS);
    }

    #[test]
    fn all_selection_keeps_every_rule_and_parse() {
        let sel = RuleSelection::all();
        for id in ALL_RULE_IDS {
            assert!(sel.is_active(id), "{id} should be active");
        }
        assert!(sel.is_active(PARSE_RULE_ID));
    }

    #[test]
    fn select_restricts_to_listed_rules_but_keeps_parse() {
        let (sel, unknown) = RuleSelection::resolve(Some(&["duplicate-label".to_string()]), &[]);
        assert!(unknown.is_empty());
        assert!(sel.is_active("duplicate-label"));
        assert!(!sel.is_active("deprecated-command"));
        // Parse errors are never filtered out by a `select`.
        assert!(sel.is_active(PARSE_RULE_ID));
    }

    #[test]
    fn ignore_subtracts_from_default_set() {
        let (sel, unknown) = RuleSelection::resolve(None, &["deprecated-command".to_string()]);
        assert!(unknown.is_empty());
        assert!(!sel.is_active("deprecated-command"));
        assert!(sel.is_active("duplicate-label"));
    }

    #[test]
    fn ignore_overrides_select() {
        let (sel, _) = RuleSelection::resolve(
            Some(&["duplicate-label".to_string(), "undefined-ref".to_string()]),
            &["undefined-ref".to_string()],
        );
        assert!(sel.is_active("duplicate-label"));
        assert!(!sel.is_active("undefined-ref"));
    }

    #[test]
    fn bib_rules_are_active_by_default() {
        // The CLI filters bib findings through the same `RuleSelection`; bib rule
        // ids must count as known/active or the CLI silently drops every bib finding
        // (while the LSP, which doesn't post-filter, still shows them).
        let sel = RuleSelection::all();
        for id in crate::bib::linter::ALL_BIB_RULE_IDS {
            assert!(sel.is_active(id), "{id} should be active");
        }
        let (sel, unknown) = RuleSelection::resolve(None, &[]);
        assert!(unknown.is_empty());
        assert!(sel.is_active("missing-required-field"));
    }

    #[test]
    fn bib_rules_are_selectable_and_ignorable() {
        let (sel, unknown) =
            RuleSelection::resolve(Some(&["missing-required-field".to_string()]), &[]);
        assert!(unknown.is_empty(), "bib id must be recognized, not unknown");
        assert!(sel.is_active("missing-required-field"));
        assert!(!sel.is_active("duplicate-label"));

        let (sel, unknown) = RuleSelection::resolve(None, &["missing-required-field".to_string()]);
        assert!(unknown.is_empty());
        assert!(!sel.is_active("missing-required-field"));
        assert!(sel.is_active("duplicate-label"));
    }

    #[test]
    fn unknown_ids_are_reported() {
        let (_, unknown) = RuleSelection::resolve(
            Some(&["duplicate-label".to_string(), "no-such-rule".to_string()]),
            &["also-bogus".to_string()],
        );
        assert_eq!(
            unknown,
            vec!["no-such-rule".to_string(), "also-bogus".to_string()]
        );
    }
}