quickmark_core/rules/
md041.rs

1use serde::Deserialize;
2use std::rc::Rc;
3
4use regex::Regex;
5use tree_sitter::Node;
6
7use crate::{
8    linter::{range_from_tree_sitter, Context, RuleLinter, RuleViolation},
9    rules::{Rule, RuleType},
10};
11
12// MD041-specific configuration types
13#[derive(Debug, PartialEq, Clone, Deserialize)]
14pub struct MD041FirstLineHeadingTable {
15    #[serde(default)]
16    pub allow_preamble: bool,
17    #[serde(default)]
18    pub front_matter_title: String,
19    #[serde(default)]
20    pub level: u8,
21}
22
23impl Default for MD041FirstLineHeadingTable {
24    fn default() -> Self {
25        Self {
26            allow_preamble: false,
27            front_matter_title: r"^\s*title\s*[:=]".to_string(),
28            level: 1,
29        }
30    }
31}
32
33#[derive(Debug)]
34enum FirstElement {
35    Heading(u8, tree_sitter::Range), // level, range
36    Content(tree_sitter::Range),
37    None,
38}
39
40pub(crate) struct MD041Linter {
41    context: Rc<Context>,
42    violations: Vec<RuleViolation>,
43    first_element: FirstElement,
44    front_matter_end_byte: Option<usize>,
45    title_regex: Option<Regex>,
46}
47
48impl MD041Linter {
49    pub fn new(context: Rc<Context>) -> Self {
50        let content = context.get_document_content();
51        let front_matter_end_byte = Self::calculate_front_matter_end_byte(&content);
52
53        let config = &context.config.linters.settings.first_line_heading;
54        let title_regex = if !config.front_matter_title.is_empty() {
55            Some(
56                Regex::new(&config.front_matter_title)
57                    .unwrap_or_else(|_| Regex::new(r"^\s*title\s*[:=]").unwrap()),
58            )
59        } else {
60            None
61        };
62
63        Self {
64            context: context.clone(),
65            violations: Vec::new(),
66            first_element: FirstElement::None,
67            front_matter_end_byte,
68            title_regex,
69        }
70    }
71
72    /// Calculates the end byte of the front matter, including the final newline.
73    /// This is done by iterating through the lines of the content.
74    fn calculate_front_matter_end_byte(content: &str) -> Option<usize> {
75        if !content.starts_with("---") {
76            return None;
77        }
78
79        let mut byte_pos = 0;
80        let mut found_start = false;
81
82        let mut remaining = content;
83        while let Some(newline_pos) = remaining.find('\n') {
84            let line = &remaining[..newline_pos];
85            let line_to_check = line.trim_end_matches('\r');
86
87            if line_to_check.trim() == "---" {
88                if !found_start {
89                    found_start = true;
90                } else {
91                    return Some(byte_pos + newline_pos + 1);
92                }
93            }
94            byte_pos += newline_pos + 1;
95            remaining = &remaining[newline_pos + 1..];
96        }
97
98        // Check last line if no newline at end
99        if !remaining.is_empty() && remaining.trim() == "---" && found_start {
100            return Some(content.len());
101        }
102
103        None
104    }
105
106    fn extract_heading_level(&self, node: &Node) -> u8 {
107        match node.kind() {
108            "atx_heading" => {
109                for i in 0..node.child_count() {
110                    let child = node.child(i).unwrap();
111                    let kind = child.kind();
112                    if kind.starts_with("atx_h") && kind.ends_with("_marker") {
113                        let level_str = &kind["atx_h".len()..kind.len() - "_marker".len()];
114                        return level_str.parse::<u8>().unwrap_or(1);
115                    }
116                }
117                1 // fallback
118            }
119            "setext_heading" => {
120                for i in 0..node.child_count() {
121                    let child = node.child(i).unwrap();
122                    if child.kind() == "setext_h1_underline" {
123                        return 1;
124                    } else if child.kind() == "setext_h2_underline" {
125                        return 2;
126                    }
127                }
128                1 // fallback
129            }
130            _ => 1,
131        }
132    }
133
134    fn check_front_matter_has_title(&self) -> bool {
135        let Some(title_regex) = &self.title_regex else {
136            return false; // Front matter title checking disabled
137        };
138
139        let Some(fm_end) = self.front_matter_end_byte else {
140            return false; // No front matter found
141        };
142
143        let content = self.context.get_document_content();
144        let front_matter_content = &content[..fm_end];
145
146        front_matter_content
147            .lines()
148            .skip(1) // Skip the initial "---"
149            .take_while(|line| line.trim() != "---")
150            .any(|line| title_regex.is_match(line))
151    }
152
153    fn is_html_comment(&self, node: &Node) -> bool {
154        if node.kind() == "html_flow" {
155            let source = self.context.get_document_content();
156            let content = &source[node.start_byte()..node.end_byte()];
157            content.trim_start().starts_with("<!--")
158        } else {
159            false
160        }
161    }
162
163    fn is_in_front_matter(&self, node: &Node) -> bool {
164        if let Some(fm_end) = self.front_matter_end_byte {
165            node.start_byte() < fm_end
166        } else {
167            false
168        }
169    }
170
171    fn should_ignore_node(&self, node: &Node) -> bool {
172        // Ignore front matter nodes
173        if self.is_in_front_matter(node) {
174            return true;
175        }
176
177        // Ignore HTML comments
178        if self.is_html_comment(node) {
179            return true;
180        }
181
182        false
183    }
184
185    fn is_content_node(&self, node: &Node) -> bool {
186        matches!(
187            node.kind(),
188            "paragraph"
189                | "list"
190                | "list_item"
191                | "code_block"
192                | "fenced_code_block"
193                | "blockquote"
194                | "table"
195                | "thematic_break"
196        )
197    }
198}
199
200impl RuleLinter for MD041Linter {
201    fn feed(&mut self, node: &Node) {
202        // Skip if we already processed the first element
203        if !matches!(self.first_element, FirstElement::None) {
204            return;
205        }
206
207        // Skip nodes that should be ignored
208        if self.should_ignore_node(node) {
209            return;
210        }
211
212        // Check if this is a heading
213        if node.kind() == "atx_heading" || node.kind() == "setext_heading" {
214            let level = self.extract_heading_level(node);
215            self.first_element = FirstElement::Heading(level, node.range());
216            return;
217        }
218
219        // Check if this is content
220        if self.is_content_node(node) {
221            self.first_element = FirstElement::Content(node.range());
222        }
223    }
224
225    fn finalize(&mut self) -> Vec<RuleViolation> {
226        // Check if front matter has title - if so, no violation
227        if self.check_front_matter_has_title() {
228            return Vec::new();
229        }
230
231        let config = &self.context.config.linters.settings.first_line_heading;
232
233        match &self.first_element {
234            FirstElement::Heading(level, range) => {
235                // First element is a heading - check if it has the correct level
236                if *level != config.level {
237                    self.violations.push(RuleViolation::new(
238                        &MD041,
239                        format!(
240                            "Expected first heading to be level {}, but found level {}",
241                            config.level, level
242                        ),
243                        self.context.file_path.clone(),
244                        range_from_tree_sitter(range),
245                    ));
246                }
247            }
248            FirstElement::Content(range) => {
249                // First element is content - only a violation if preamble is not allowed
250                if !config.allow_preamble {
251                    self.violations.push(RuleViolation::new(
252                        &MD041,
253                        "First line in a file should be a top-level heading".to_string(),
254                        self.context.file_path.clone(),
255                        range_from_tree_sitter(range),
256                    ));
257                }
258            }
259            FirstElement::None => {
260                // No content found - this is valid (empty document)
261            }
262        }
263
264        std::mem::take(&mut self.violations)
265    }
266}
267
268pub const MD041: Rule = Rule {
269    id: "MD041",
270    alias: "first-line-heading",
271    tags: &["headings"],
272    description: "First line in a file should be a top-level heading",
273    rule_type: RuleType::Document,
274    required_nodes: &[
275        "atx_heading",
276        "setext_heading",
277        "paragraph",
278        "list",
279        "list_item",
280        "code_block",
281        "fenced_code_block",
282        "blockquote",
283        "table",
284        "thematic_break",
285    ],
286    new_linter: |context| Box::new(MD041Linter::new(context)),
287};
288
289#[cfg(test)]
290mod test {
291    use std::path::PathBuf;
292
293    use crate::config::{LintersSettingsTable, MD041FirstLineHeadingTable, RuleSeverity};
294    use crate::linter::MultiRuleLinter;
295    use crate::test_utils::test_helpers::test_config_with_settings;
296
297    fn test_config(
298        level: u8,
299        front_matter_title: &str,
300        allow_preamble: bool,
301    ) -> crate::config::QuickmarkConfig {
302        test_config_with_settings(
303            vec![("first-line-heading", RuleSeverity::Error)],
304            LintersSettingsTable {
305                first_line_heading: MD041FirstLineHeadingTable {
306                    level,
307                    front_matter_title: front_matter_title.to_string(),
308                    allow_preamble,
309                },
310                ..Default::default()
311            },
312        )
313    }
314
315    #[test]
316    fn test_valid_first_line_heading() {
317        let config = test_config(1, r"^\s*title\s*[:=]", false);
318        let input = "# Title
319
320Some content
321
322## Section 1
323
324Content";
325
326        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
327        let violations = linter.analyze();
328        assert_eq!(violations.len(), 0);
329    }
330
331    #[test]
332    fn test_no_first_line_heading() {
333        let config = test_config(1, r"^\s*title\s*[:=]", false);
334        let input = "This is some text
335
336# Title
337
338Content";
339
340        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
341        let violations = linter.analyze();
342        assert_eq!(violations.len(), 1);
343        assert!(violations[0]
344            .message()
345            .contains("First line in a file should be a top-level heading"));
346    }
347
348    #[test]
349    fn test_wrong_level_first_heading() {
350        let config = test_config(1, r"^\s*title\s*[:=]", false);
351        let input = "## Title
352
353Content";
354
355        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
356        let violations = linter.analyze();
357        assert_eq!(violations.len(), 1);
358        assert!(violations[0]
359            .message()
360            .contains("Expected first heading to be level 1, but found level 2"));
361    }
362
363    #[test]
364    fn test_custom_level() {
365        let config = test_config(2, r"^\s*title\s*[:=]", false);
366        let input = "## Title
367
368Content";
369
370        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
371        let violations = linter.analyze();
372        assert_eq!(violations.len(), 0);
373    }
374
375    #[test]
376    fn test_custom_level_wrong_level() {
377        let config = test_config(2, r"^\s*title\s*[:=]", false);
378        let input = "# Title
379
380Content";
381
382        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
383        let violations = linter.analyze();
384        assert_eq!(violations.len(), 1);
385        assert!(violations[0]
386            .message()
387            .contains("Expected first heading to be level 2, but found level 1"));
388    }
389
390    #[test]
391    fn test_setext_heading_valid() {
392        let config = test_config(1, r"^\s*title\s*[:=]", false);
393        let input = "Title
394=====
395
396Content";
397
398        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
399        let violations = linter.analyze();
400        assert_eq!(violations.len(), 0);
401    }
402
403    #[test]
404    fn test_setext_heading_wrong_level() {
405        let config = test_config(1, r"^\s*title\s*[:=]", false);
406        let input = "Title
407-----
408
409Content";
410
411        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
412        let violations = linter.analyze();
413        assert_eq!(violations.len(), 1);
414        assert!(violations[0]
415            .message()
416            .contains("Expected first heading to be level 1, but found level 2"));
417    }
418
419    #[test]
420    fn test_allow_preamble_true() {
421        let config = test_config(1, r"^\s*title\s*[:=]", true);
422        let input = "This is some preamble text
423
424# Title
425
426Content";
427
428        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
429        let violations = linter.analyze();
430        assert_eq!(violations.len(), 0);
431    }
432
433    #[test]
434    fn test_allow_preamble_false() {
435        let config = test_config(1, r"^\s*title\s*[:=]", false);
436        let input = "This is some preamble text
437
438# Title
439
440Content";
441
442        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
443        let violations = linter.analyze();
444        assert_eq!(violations.len(), 1);
445        assert!(violations[0]
446            .message()
447            .contains("First line in a file should be a top-level heading"));
448    }
449
450    #[test]
451    fn test_front_matter_with_title() {
452        let config = test_config(1, r"^\s*title\s*[:=]", false);
453        let input = "---
454layout: post
455title: \"Welcome to Jekyll!\"
456date: 2015-11-17 16:16:01 -0600
457---
458
459This is content without a heading";
460
461        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
462        let violations = linter.analyze();
463        assert_eq!(violations.len(), 0);
464    }
465
466    #[test]
467    fn test_front_matter_without_title() {
468        let config = test_config(1, r"^\s*title\s*[:=]", false);
469        let input = "---
470layout: post
471author: John Doe
472date: 2015-11-17 16:16:01 -0600
473---
474
475This is content without a heading";
476
477        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
478        let violations = linter.analyze();
479        assert_eq!(violations.len(), 1);
480    }
481
482    #[test]
483    fn test_front_matter_title_disabled() {
484        let config = test_config(1, "", false); // Empty pattern disables front matter checking
485        let input = "---
486title: \"Welcome to Jekyll!\"
487---
488
489This is content without a heading";
490
491        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
492        let violations = linter.analyze();
493        assert_eq!(violations.len(), 1);
494    }
495
496    #[test]
497    fn test_custom_front_matter_title_regex() {
498        let config = test_config(1, r"^\s*heading\s*:", false);
499        let input = "---
500layout: post
501heading: \"My Custom Title\"
502---
503
504This is content without a heading";
505
506        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
507        let violations = linter.analyze();
508        assert_eq!(violations.len(), 0);
509    }
510
511    #[test]
512    fn test_comments_before_heading() {
513        let config = test_config(1, r"^\s*title\s*[:=]", false);
514        let input = "<!-- This is a comment -->
515
516# Title
517
518Content";
519
520        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
521        let violations = linter.analyze();
522        assert_eq!(violations.len(), 0);
523    }
524
525    #[test]
526    fn test_empty_document() {
527        let config = test_config(1, r"^\s*title\s*[:=]", false);
528        let input = "";
529
530        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
531        let violations = linter.analyze();
532        assert_eq!(violations.len(), 0);
533    }
534
535    #[test]
536    fn test_whitespace_only() {
537        let config = test_config(1, r"^\s*title\s*[:=]", false);
538        let input = "   \n\n  \n\n# Title\n\nContent";
539
540        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
541        let violations = linter.analyze();
542        assert_eq!(violations.len(), 0);
543    }
544}