Skip to main content

rumdl_lib/rules/
md079_chunk_label_spaces.rs

1//! Rule MD079: Quarto chunk labels must not contain whitespace.
2//!
3//! Whitespace in chunk labels silently breaks Quarto cross-references
4//! (`@fig-foo`) and produces unstable HTML anchors. This rule catches:
5//!
6//! - Implicit-positional spaces: ` ```{r several words} ` — multiple bare
7//!   words before any `key=value` are interpreted by knitr/Quarto as a
8//!   single space-separated label.
9//! - Quoted-value spaces: ` ```{r, label="my label"} `.
10//! - Hashpipe spaces: `#| label: my label`.
11//!
12//! Quarto flavor only; a no-op for every other flavor. No auto-fix —
13//! renaming a label is a semantic choice (hyphen vs underscore vs collapse).
14
15use crate::config::MarkdownFlavor;
16use crate::lint_context::LintContext;
17use crate::rule::{LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
18use crate::utils::quarto_chunks::{
19    ChunkLabelSource, is_executable_chunk, parse_hashpipe_labels, parse_inline_chunk_header,
20};
21
22#[derive(Debug, Clone, Default)]
23pub struct MD079ChunkLabelSpaces;
24
25impl Rule for MD079ChunkLabelSpaces {
26    fn name(&self) -> &'static str {
27        "MD079"
28    }
29
30    fn description(&self) -> &'static str {
31        "Quarto chunk labels must not contain whitespace"
32    }
33
34    fn check(&self, ctx: &LintContext) -> LintResult {
35        if ctx.flavor != MarkdownFlavor::Quarto {
36            return Ok(Vec::new());
37        }
38
39        let mut warnings = Vec::new();
40        for detail in &ctx.code_block_details {
41            if !detail.is_fenced || !is_executable_chunk(&detail.info_string) {
42                continue;
43            }
44
45            // Inline labels.
46            if let Some(header) = parse_inline_chunk_header(&detail.info_string) {
47                // Implicit-positional run: two or more bare words before any
48                // key=value parse as one space-separated label per Quarto.
49                let positional: Vec<_> = header
50                    .labels
51                    .iter()
52                    .filter(|l| l.source == ChunkLabelSource::InlinePositional)
53                    .collect();
54                if positional.len() >= 2 {
55                    let combined = positional
56                        .iter()
57                        .map(|l| l.value.as_str())
58                        .collect::<Vec<_>>()
59                        .join(" ");
60                    warnings.push(make_warning(
61                        self.name(),
62                        ctx,
63                        detail.start,
64                        &detail.info_string,
65                        &combined,
66                    ));
67                } else if let Some(label) = positional.first()
68                    && label.value.chars().any(char::is_whitespace)
69                {
70                    // Quoted positional like `{r "my label"}` is a single
71                    // token whose value already contains the offending space.
72                    warnings.push(make_warning(
73                        self.name(),
74                        ctx,
75                        detail.start,
76                        &detail.info_string,
77                        &label.value,
78                    ));
79                }
80
81                // Quoted `label="..."` containing spaces.
82                for label in header.labels.iter().filter(|l| l.source == ChunkLabelSource::InlineKey) {
83                    if label.value.chars().any(char::is_whitespace) {
84                        warnings.push(make_warning(
85                            self.name(),
86                            ctx,
87                            detail.start,
88                            &detail.info_string,
89                            &label.value,
90                        ));
91                    }
92                }
93            }
94
95            // Hashpipe `#| label: ...` containing spaces.
96            let body = block_body(ctx.content, detail.start);
97            for label in parse_hashpipe_labels(body) {
98                if label.value.chars().any(char::is_whitespace) {
99                    warnings.push(make_warning(
100                        self.name(),
101                        ctx,
102                        detail.start,
103                        &detail.info_string,
104                        &label.value,
105                    ));
106                }
107            }
108        }
109        Ok(warnings)
110    }
111
112    fn fix(&self, _ctx: &LintContext) -> Result<String, LintError> {
113        // Renaming a label is a human decision (hyphen, underscore, or collapse).
114        Err(LintError::FixFailed("MD079 has no auto-fix".to_string()))
115    }
116
117    fn category(&self) -> RuleCategory {
118        RuleCategory::CodeBlock
119    }
120
121    fn should_skip(&self, ctx: &LintContext) -> bool {
122        ctx.flavor != MarkdownFlavor::Quarto || ctx.code_block_details.is_empty()
123    }
124
125    fn as_any(&self) -> &dyn std::any::Any {
126        self
127    }
128
129    fn from_config(_config: &crate::config::Config) -> Box<dyn Rule>
130    where
131        Self: Sized,
132    {
133        Box::new(Self)
134    }
135}
136
137fn block_body(content: &str, block_start: usize) -> &str {
138    let rest = &content[block_start..];
139    match rest.find('\n') {
140        Some(idx) => &rest[idx + 1..],
141        None => "",
142    }
143}
144
145fn make_warning(
146    rule_name: &str,
147    ctx: &LintContext,
148    block_start: usize,
149    info_string: &str,
150    label_value: &str,
151) -> LintWarning {
152    let line_idx = ctx
153        .line_offsets
154        .binary_search(&block_start)
155        .unwrap_or_else(|i| i.saturating_sub(1));
156    let line_start = ctx.line_offsets.get(line_idx).copied().unwrap_or(0);
157    let line_end = ctx.line_offsets.get(line_idx + 1).copied().unwrap_or(ctx.content.len());
158    let line_text = &ctx.content[line_start..line_end];
159
160    let trimmed = info_string.trim();
161    let (start_col, end_col) = match line_text.find(trimmed) {
162        Some(off) => {
163            let start = off + 1;
164            let end = start + trimmed.chars().count();
165            (start, end)
166        }
167        None => (1, line_text.trim_end_matches('\n').chars().count().max(1) + 1),
168    };
169
170    LintWarning {
171        rule_name: Some(rule_name.to_string()),
172        line: line_idx + 1,
173        column: start_col,
174        end_line: line_idx + 1,
175        end_column: end_col,
176        severity: Severity::Warning,
177        message: format!("Chunk label `{label_value}` contains whitespace; use a hyphen or underscore instead"),
178        fix: None,
179    }
180}
181
182#[cfg(test)]
183mod tests {
184    use super::*;
185    use crate::lint_context::LintContext;
186
187    fn check_quarto(content: &str) -> Vec<LintWarning> {
188        let ctx = LintContext::new(content, MarkdownFlavor::Quarto, None);
189        MD079ChunkLabelSpaces.check(&ctx).unwrap()
190    }
191
192    fn check_standard(content: &str) -> Vec<LintWarning> {
193        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
194        MD079ChunkLabelSpaces.check(&ctx).unwrap()
195    }
196
197    #[test]
198    fn flags_implicit_positional_spaces() {
199        let warnings = check_quarto("```{r several words}\n1 + 1\n```\n");
200        assert_eq!(warnings.len(), 1);
201        assert!(warnings[0].message.contains("several words"));
202    }
203
204    #[test]
205    fn flags_quoted_label_with_spaces() {
206        let warnings = check_quarto("```{r, label=\"my label\"}\n1 + 1\n```\n");
207        assert_eq!(warnings.len(), 1);
208        assert!(warnings[0].message.contains("my label"));
209    }
210
211    #[test]
212    fn flags_hashpipe_label_with_spaces() {
213        let warnings = check_quarto("```{r}\n#| label: my label\n1 + 1\n```\n");
214        assert_eq!(warnings.len(), 1);
215        assert!(warnings[0].message.contains("my label"));
216    }
217
218    #[test]
219    fn accepts_single_positional_label() {
220        let warnings = check_quarto("```{r setup}\n1 + 1\n```\n");
221        assert!(warnings.is_empty());
222    }
223
224    #[test]
225    fn accepts_hyphenated_or_underscored_labels() {
226        assert!(check_quarto("```{r my-label}\n1\n```\n").is_empty());
227        assert!(check_quarto("```{r, label=my_label}\n1\n```\n").is_empty());
228        assert!(check_quarto("```{r}\n#| label: my-label\n1\n```\n").is_empty());
229    }
230
231    #[test]
232    fn ignores_display_blocks() {
233        // Plain ` ```r several words ` is a display block, not a chunk.
234        // The trailing text is an info-string class list, not a label.
235        let warnings = check_quarto("```r several words\n1 + 1\n```\n");
236        assert!(warnings.is_empty());
237    }
238
239    #[test]
240    fn no_warnings_under_standard_flavor() {
241        let warnings = check_standard("```{r several words}\n1 + 1\n```\n");
242        assert!(warnings.is_empty());
243    }
244
245    #[test]
246    fn does_not_flag_options_after_label() {
247        // First bare word is the label, subsequent key=value args are options.
248        let warnings = check_quarto("```{r setup, echo=FALSE}\n1 + 1\n```\n");
249        assert!(warnings.is_empty());
250    }
251
252    #[test]
253    fn no_auto_fix_offered() {
254        let warnings = check_quarto("```{r several words}\n1 + 1\n```\n");
255        assert!(warnings[0].fix.is_none());
256    }
257
258    #[test]
259    fn flags_quoted_positional_with_spaces() {
260        // `{r "my label"}` parses as a single quoted positional. The value
261        // still contains a space, so it must be flagged.
262        let warnings = check_quarto("```{r \"my label\"}\n1 + 1\n```\n");
263        assert_eq!(warnings.len(), 1);
264        assert!(warnings[0].message.contains("my label"));
265    }
266}