Skip to main content

mdwright_lint/stdlib/
info_string_typo.rs

1//! Fenced code block info string not in a known-languages
2//! allowlist.
3//!
4//! Default allowlist covers the languages this project uses;
5//! advisory because every project has its own list, and the rule
6//! exists to catch typos like ` ```sytax-error` rather than to
7//! enforce a closed set.
8
9use crate::diagnostic::Diagnostic;
10use crate::rule::LintRule;
11use mdwright_document::Document;
12
13/// Languages we treat as definitely-fine info strings. The allowlist
14/// is intentionally generous — false positives here are noise, not
15/// errors.
16const DEFAULT_ALLOWLIST: &[&str] = &[
17    "",
18    "text",
19    "plain",
20    "plaintext",
21    "txt",
22    "no-highlight",
23    "nohighlight",
24    "rust",
25    "rs",
26    "python",
27    "py",
28    "lean",
29    "lean4",
30    "agda",
31    "haskell",
32    "hs",
33    "ocaml",
34    "ml",
35    "c",
36    "cpp",
37    "c++",
38    "cxx",
39    "objc",
40    "objective-c",
41    "js",
42    "javascript",
43    "ts",
44    "typescript",
45    "jsx",
46    "tsx",
47    "json",
48    "jsonc",
49    "json5",
50    "toml",
51    "yaml",
52    "yml",
53    "ini",
54    "sh",
55    "bash",
56    "zsh",
57    "fish",
58    "console",
59    "shell-session",
60    "shellsession",
61    "diff",
62    "patch",
63    "md",
64    "markdown",
65    "mdx",
66    "html",
67    "xml",
68    "svg",
69    "css",
70    "scss",
71    "sass",
72    "less",
73    "sql",
74    "graphql",
75    "make",
76    "makefile",
77    "cmake",
78    "dockerfile",
79    "tex",
80    "latex",
81    "bibtex",
82    "go",
83    "java",
84    "kotlin",
85    "swift",
86    "scala",
87    "ruby",
88    "rb",
89    "perl",
90    "lua",
91    "r",
92    "julia",
93    "jl",
94    "matlab",
95    "fortran",
96    "elm",
97    "erlang",
98    "elixir",
99    "ex",
100    "nix",
101    "zig",
102    "rust-toml",
103];
104
105pub struct InfoStringTypo {
106    extra: Vec<String>,
107}
108
109impl InfoStringTypo {
110    /// Default instance — only the stdlib allowlist applies.
111    #[must_use]
112    pub fn new() -> Self {
113        Self { extra: Vec::new() }
114    }
115
116    /// Extend the allowlist with project-specific language tags
117    /// (`promql`, `kdb`, …). The stdlib defaults still apply; these
118    /// are additions. The CLI wires this from `[lint.info-strings]
119    /// extra` in `mdwright.toml`.
120    #[must_use]
121    pub fn with_extra(extra: Vec<String>) -> Self {
122        Self { extra }
123    }
124}
125
126impl Default for InfoStringTypo {
127    fn default() -> Self {
128        Self::new()
129    }
130}
131
132impl LintRule for InfoStringTypo {
133    fn name(&self) -> &str {
134        "info-string-typo"
135    }
136
137    fn description(&self) -> &str {
138        "Fenced code block info string not in the known-languages allowlist."
139    }
140
141    fn explain(&self) -> &str {
142        include_str!("explain/info_string_typo.md")
143    }
144
145    fn is_advisory(&self) -> bool {
146        true
147    }
148
149    fn check(&self, doc: &Document, out: &mut Vec<Diagnostic>) {
150        for cb in doc.code_blocks() {
151            if !cb.fenced {
152                continue;
153            }
154            let info: &str = cb.info.as_str();
155            // Some renderers allow attributes after the language tag
156            // (`rust,no_run`). Strip everything after the first comma
157            // or whitespace before allowlist checking.
158            let language = info.split([',', ' ', '\t']).next().unwrap_or("");
159            if is_myst_directive_info(language) {
160                continue;
161            }
162            let language_lower = language.to_ascii_lowercase();
163            if DEFAULT_ALLOWLIST.iter().any(|&a| a == language_lower)
164                || self.extra.iter().any(|e| e.eq_ignore_ascii_case(&language_lower))
165            {
166                continue;
167            }
168            let message = format!(
169                "unfamiliar code-fence info string `{language}` — typo, or extend the \
170                 allowlist if this is intentional"
171            );
172            // Point at the fence line — the first line of the block.
173            let line_end = doc
174                .source()
175                .get(cb.raw_range.start..cb.raw_range.end)
176                .and_then(|s| s.find('\n'))
177                .map_or(cb.raw_range.end, |n| cb.raw_range.start.saturating_add(n));
178            let local = 0..(line_end.saturating_sub(cb.raw_range.start));
179            if let Some(d) = Diagnostic::at(doc, cb.raw_range.start, local, message, None) {
180                out.push(d);
181            }
182        }
183    }
184}
185
186fn is_myst_directive_info(language: &str) -> bool {
187    let Some(inner) = language.strip_prefix('{').and_then(|s| s.strip_suffix('}')) else {
188        return false;
189    };
190    !inner.is_empty()
191        && inner
192            .bytes()
193            .all(|b| b.is_ascii_alphanumeric() || matches!(b, b'-' | b'_'))
194}
195
196#[cfg(test)]
197mod tests {
198    use anyhow::Result;
199
200    use super::InfoStringTypo;
201    use crate::rule_set::RuleSet;
202    use mdwright_document::Document;
203
204    #[test]
205    fn extra_allowlist_silences_known_language() -> Result<()> {
206        let src = "```promql\nrate(http_requests_total[5m])\n```\n";
207        // Without extra: the rule should fire because `promql` isn't
208        // in the stdlib allowlist.
209        let mut rs = RuleSet::new();
210        rs.add(Box::new(InfoStringTypo::new()))
211            .map_err(|e| anyhow::anyhow!("{e}"))?;
212        let baseline = rs.check(&Document::parse(src)?);
213        assert!(
214            baseline.iter().any(|d| d.rule == "info-string-typo"),
215            "baseline should report info-string-typo; got {baseline:?}"
216        );
217
218        // With extra: silenced.
219        let mut rs = RuleSet::new();
220        rs.add(Box::new(InfoStringTypo::with_extra(vec!["promql".to_owned()])))
221            .map_err(|e| anyhow::anyhow!("{e}"))?;
222        let extended = rs.check(&Document::parse(src)?);
223        assert!(
224            !extended.iter().any(|d| d.rule == "info-string-typo"),
225            "extra allowlist should silence info-string-typo; got {extended:?}"
226        );
227        Ok(())
228    }
229
230    #[test]
231    fn accepts_jsonc_and_myst_directive_fences() -> Result<()> {
232        let src = "```jsonc\n{}\n```\n\n```{note}\nbody\n```\n";
233        let mut rs = RuleSet::new();
234        rs.add(Box::new(InfoStringTypo::new()))
235            .map_err(|e| anyhow::anyhow!("{e}"))?;
236        let diagnostics = rs.check(&Document::parse(src)?);
237        assert!(
238            diagnostics.is_empty(),
239            "jsonc and MyST directive fences should be accepted: {diagnostics:?}"
240        );
241        Ok(())
242    }
243}