Skip to main content

mdwright_lint/stdlib/
subscript_damage.rs

1//! `Hom*{…}`, `α*f`, etc.: math identifier whose subscript `_` was
2//! rewritten to `*` emphasis by an earlier formatter pass.
3//!
4//! Scans prose chunks and inline code (the broken text sometimes
5//! lives verbatim inside a code span if the original `_` was
6//! preceded by a backtick). The allowlist guards against the common
7//! legitimate pullback notation `f*`, `λ*`, etc.: bare one-character
8//! identifiers that conventionally apply pullback.
9//!
10//! Default-off — repair-focused, useful for cleaning legacy
11//! `mdformat`-mangled documents.
12
13use std::sync::OnceLock;
14
15use regex::Regex;
16
17use crate::diagnostic::{Diagnostic, Fix};
18use crate::regex_util::compile_static;
19use crate::rule::LintRule;
20use mdwright_document::Document;
21
22pub struct SubscriptDamage;
23
24fn pattern() -> &'static Regex {
25    static RE: OnceLock<Regex> = OnceLock::new();
26    RE.get_or_init(|| compile_static(r"(?P<head>[A-Za-z\p{Greek}\p{Letter}])\*(?P<tail>[A-Za-z\{])"))
27}
28
29const PULLBACK_HEADS: &[&str] = &["λ", "f", "F", "g", "G", "u", "v", "h"];
30
31impl LintRule for SubscriptDamage {
32    fn name(&self) -> &str {
33        "subscript-damage"
34    }
35
36    fn description(&self) -> &str {
37        "Identifier with `*` where a `_` subscript was expected (formatter damage)."
38    }
39
40    fn explain(&self) -> &str {
41        include_str!("explain/subscript_damage.md")
42    }
43
44    fn produces_fix(&self) -> bool {
45        true
46    }
47
48    fn is_default(&self) -> bool {
49        false
50    }
51
52    fn check(&self, doc: &Document, out: &mut Vec<Diagnostic>) {
53        for chunk in doc.prose_chunks() {
54            scan(&chunk.text, chunk.byte_offset, doc, out);
55        }
56        for code in doc.inline_codes() {
57            scan(&code.text, code.byte_offset, doc, out);
58        }
59    }
60}
61
62fn scan(text: &str, offset: usize, doc: &Document, out: &mut Vec<Diagnostic>) {
63    for cap in pattern().captures_iter(text) {
64        let Some(m) = cap.get(0) else { continue };
65        let head = cap.name("head").map_or("", |x| x.as_str());
66        if PULLBACK_HEADS.contains(&head) {
67            continue;
68        }
69        let matched = m.as_str();
70        let fixed: String = matched.chars().map(|c| if c == '*' { '_' } else { c }).collect();
71        let message = format!(
72            "`{matched}` looks like subscript damage (`_` rewritten to `*` after a \
73             broken code span); intended math is likely `{fixed}`"
74        );
75        if let Some(d) = Diagnostic::at(
76            doc,
77            offset,
78            m.range(),
79            message,
80            Some(Fix {
81                replacement: fixed,
82                safe: false,
83            }),
84        ) {
85            out.push(d);
86        }
87    }
88}