rumdl_lib/rules/
md067_footnote_definition_order.rs

1//! MD067: Footnote definitions should appear in order of first reference
2//!
3//! This rule enforces that footnote definitions appear in the same order
4//! as their first references in the document. Out-of-order footnotes
5//! can confuse readers.
6//!
7//! ## Example
8//!
9//! ### Incorrect
10//! ```markdown
11//! Text with [^2] and then [^1].
12//!
13//! [^1]: First definition
14//! [^2]: Second definition
15//! ```
16//!
17//! ### Correct
18//! ```markdown
19//! Text with [^2] and then [^1].
20//!
21//! [^2]: Referenced first
22//! [^1]: Referenced second
23//! ```
24
25use crate::rule::{LintError, LintResult, LintWarning, Rule, Severity};
26use crate::rules::md066_footnote_validation::{FOOTNOTE_DEF_PATTERN, FOOTNOTE_REF_PATTERN, strip_blockquote_prefix};
27use std::collections::HashMap;
28
29#[derive(Debug, Default, Clone)]
30pub struct MD067FootnoteDefinitionOrder;
31
32impl MD067FootnoteDefinitionOrder {
33    pub fn new() -> Self {
34        Self
35    }
36}
37
38impl Rule for MD067FootnoteDefinitionOrder {
39    fn name(&self) -> &'static str {
40        "MD067"
41    }
42
43    fn description(&self) -> &'static str {
44        "Footnote definitions should appear in order of first reference"
45    }
46
47    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
48        let mut warnings = Vec::new();
49
50        // Track first reference position for each footnote ID
51        let mut reference_order: Vec<String> = Vec::new();
52        let mut seen_refs: HashMap<String, usize> = HashMap::new();
53
54        // Track definition positions
55        let mut definition_order: Vec<(String, usize, usize)> = Vec::new(); // (id, line, byte_offset)
56
57        // Get code spans to avoid false positives
58        let code_spans = ctx.code_spans();
59
60        // First pass: collect references in order of first occurrence
61        for line_info in &ctx.lines {
62            // Skip special contexts
63            if line_info.in_code_block
64                || line_info.in_front_matter
65                || line_info.in_html_comment
66                || line_info.in_html_block
67            {
68                continue;
69            }
70
71            let line = line_info.content(ctx.content);
72
73            for caps in FOOTNOTE_REF_PATTERN.captures_iter(line).flatten() {
74                if let Some(id_match) = caps.get(1) {
75                    let id = id_match.as_str().to_lowercase();
76
77                    // Check if this match is inside a code span
78                    let match_start = caps.get(0).unwrap().start();
79                    let byte_offset = line_info.byte_offset + match_start;
80
81                    let in_code_span = code_spans
82                        .iter()
83                        .any(|span| byte_offset >= span.byte_offset && byte_offset < span.byte_end);
84
85                    if !in_code_span && !seen_refs.contains_key(&id) {
86                        seen_refs.insert(id.clone(), reference_order.len());
87                        reference_order.push(id);
88                    }
89                }
90            }
91        }
92
93        // Second pass: collect definitions in document order
94        for (line_idx, line_info) in ctx.lines.iter().enumerate() {
95            // Skip special contexts
96            if line_info.in_code_block
97                || line_info.in_front_matter
98                || line_info.in_html_comment
99                || line_info.in_html_block
100            {
101                continue;
102            }
103
104            let line = line_info.content(ctx.content);
105            // Strip blockquote prefixes
106            let line_stripped = strip_blockquote_prefix(line);
107
108            if let Some(caps) = FOOTNOTE_DEF_PATTERN.captures(line_stripped)
109                && let Some(id_match) = caps.get(1)
110            {
111                let id = id_match.as_str().to_lowercase();
112                let line_num = line_idx + 1;
113                definition_order.push((id, line_num, line_info.byte_offset));
114            }
115        }
116
117        // Compare definition order against reference order
118        let mut expected_idx = 0;
119        for (def_id, def_line, _byte_offset) in &definition_order {
120            // Find this definition's expected position based on reference order
121            if let Some(&ref_idx) = seen_refs.get(def_id) {
122                if ref_idx != expected_idx {
123                    // Find what was expected
124                    if expected_idx < reference_order.len() {
125                        let expected_id = &reference_order[expected_idx];
126                        warnings.push(LintWarning {
127                            rule_name: Some(self.name().to_string()),
128                            line: *def_line,
129                            column: 1,
130                            end_line: *def_line,
131                            end_column: 1,
132                            message: format!(
133                                "Footnote definition '[^{def_id}]' is out of order; expected '[^{expected_id}]' next (based on reference order)"
134                            ),
135                            severity: Severity::Warning,
136                            fix: None,
137                        });
138                    }
139                }
140                expected_idx = ref_idx + 1;
141            }
142            // Definitions without references are handled by MD066, skip them here
143        }
144
145        Ok(warnings)
146    }
147
148    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
149        // Auto-fix would require reordering definitions which is complex
150        // and could break multi-paragraph footnotes
151        Ok(ctx.content.to_string())
152    }
153
154    fn as_any(&self) -> &dyn std::any::Any {
155        self
156    }
157
158    fn from_config(_config: &crate::config::Config) -> Box<dyn Rule>
159    where
160        Self: Sized,
161    {
162        Box::new(MD067FootnoteDefinitionOrder)
163    }
164}
165
166#[cfg(test)]
167mod tests {
168    use super::*;
169    use crate::LintContext;
170
171    fn check(content: &str) -> Vec<LintWarning> {
172        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
173        MD067FootnoteDefinitionOrder::new().check(&ctx).unwrap()
174    }
175
176    #[test]
177    fn test_correct_order() {
178        let content = r#"Text with [^1] and [^2].
179
180[^1]: First definition
181[^2]: Second definition
182"#;
183        let warnings = check(content);
184        assert!(warnings.is_empty(), "Expected no warnings for correct order");
185    }
186
187    #[test]
188    fn test_incorrect_order() {
189        let content = r#"Text with [^1] and [^2].
190
191[^2]: Second definition
192[^1]: First definition
193"#;
194        let warnings = check(content);
195        assert_eq!(warnings.len(), 1);
196        assert!(warnings[0].message.contains("out of order"));
197        assert!(warnings[0].message.contains("[^2]"));
198    }
199
200    #[test]
201    fn test_named_footnotes_order() {
202        let content = r#"Text with [^alpha] and [^beta].
203
204[^beta]: Beta definition
205[^alpha]: Alpha definition
206"#;
207        let warnings = check(content);
208        assert_eq!(warnings.len(), 1);
209        assert!(warnings[0].message.contains("[^beta]"));
210    }
211
212    #[test]
213    fn test_multiple_refs_same_footnote() {
214        let content = r#"Text with [^1] and [^2] and [^1] again.
215
216[^1]: First footnote
217[^2]: Second footnote
218"#;
219        let warnings = check(content);
220        assert!(
221            warnings.is_empty(),
222            "Multiple refs to same footnote should use first occurrence"
223        );
224    }
225
226    #[test]
227    fn test_skip_code_blocks() {
228        let content = r#"Text with [^1].
229
230```
231[^2]: In code block
232```
233
234[^1]: Real definition
235"#;
236        let warnings = check(content);
237        assert!(warnings.is_empty());
238    }
239
240    #[test]
241    fn test_skip_code_spans() {
242        let content = r#"Text with `[^2]` in code and [^1].
243
244[^1]: Only real reference
245"#;
246        let warnings = check(content);
247        assert!(warnings.is_empty());
248    }
249
250    #[test]
251    fn test_case_insensitive() {
252        let content = r#"Text with [^Note] and [^OTHER].
253
254[^note]: First (case-insensitive match)
255[^other]: Second
256"#;
257        let warnings = check(content);
258        assert!(warnings.is_empty());
259    }
260
261    #[test]
262    fn test_definitions_without_references() {
263        // Orphaned definitions are handled by MD066, not this rule
264        let content = r#"Text with [^1].
265
266[^1]: Referenced
267[^2]: Orphaned
268"#;
269        let warnings = check(content);
270        assert!(warnings.is_empty(), "Orphaned definitions handled by MD066");
271    }
272
273    #[test]
274    fn test_three_footnotes_wrong_order() {
275        let content = r#"Ref [^a], then [^b], then [^c].
276
277[^c]: Third ref, first def
278[^a]: First ref, second def
279[^b]: Second ref, third def
280"#;
281        let warnings = check(content);
282        assert!(!warnings.is_empty());
283    }
284
285    #[test]
286    fn test_blockquote_definitions() {
287        let content = r#"Text with [^1] and [^2].
288
289> [^1]: First in blockquote
290> [^2]: Second in blockquote
291"#;
292        let warnings = check(content);
293        assert!(warnings.is_empty());
294    }
295}