quickmark_core/rules/
md025.rs

1use serde::Deserialize;
2use std::rc::Rc;
3
4use tree_sitter::Node;
5
6use crate::{
7    linter::{range_from_tree_sitter, Context, RuleLinter, RuleViolation},
8    rules::{Rule, RuleType},
9};
10
11// MD025-specific configuration types
12#[derive(Debug, PartialEq, Clone, Deserialize)]
13pub struct MD025SingleH1Table {
14    #[serde(default)]
15    pub level: u8,
16    #[serde(default)]
17    pub front_matter_title: String,
18}
19
20impl Default for MD025SingleH1Table {
21    fn default() -> Self {
22        Self {
23            level: 1,
24            front_matter_title: r"^\s*title\s*[:=]".to_string(),
25        }
26    }
27}
28
29#[derive(Debug)]
30struct HeadingInfo {
31    content: String,
32    range: tree_sitter::Range,
33    is_first_content_heading: bool,
34}
35
36pub(crate) struct MD025Linter {
37    context: Rc<Context>,
38    violations: Vec<RuleViolation>,
39    matching_headings: Vec<HeadingInfo>,
40    has_front_matter_title: Option<bool>,
41}
42
43impl MD025Linter {
44    pub fn new(context: Rc<Context>) -> Self {
45        Self {
46            context,
47            violations: Vec::new(),
48            matching_headings: Vec::new(),
49            has_front_matter_title: None,
50        }
51    }
52
53    fn extract_heading_level(&self, node: &Node) -> u8 {
54        match node.kind() {
55            "atx_heading" => {
56                for i in 0..node.child_count() {
57                    let child = node.child(i).unwrap();
58                    if child.kind().starts_with("atx_h") && child.kind().ends_with("_marker") {
59                        return child.kind().chars().nth(5).unwrap().to_digit(10).unwrap() as u8;
60                    }
61                }
62                1 // fallback
63            }
64            "setext_heading" => {
65                for i in 0..node.child_count() {
66                    let child = node.child(i).unwrap();
67                    if child.kind() == "setext_h1_underline" {
68                        return 1;
69                    } else if child.kind() == "setext_h2_underline" {
70                        return 2;
71                    }
72                }
73                1 // fallback
74            }
75            _ => 1,
76        }
77    }
78
79    fn extract_heading_content(&self, node: &Node) -> String {
80        let source = self.context.get_document_content();
81        let start_byte = node.start_byte();
82        let end_byte = node.end_byte();
83        let full_text = &source[start_byte..end_byte];
84
85        match node.kind() {
86            "atx_heading" => full_text
87                .trim_start_matches('#')
88                .trim()
89                .trim_end_matches('#')
90                .trim()
91                .to_string(),
92            "setext_heading" => {
93                if let Some(line) = full_text.lines().next() {
94                    line.trim().to_string()
95                } else {
96                    String::new()
97                }
98            }
99            _ => String::new(),
100        }
101    }
102
103    fn check_front_matter_has_title(&mut self) -> bool {
104        if self.has_front_matter_title.is_some() {
105            return self.has_front_matter_title.unwrap();
106        }
107
108        let config = &self.context.config.linters.settings.single_h1;
109        if config.front_matter_title.is_empty() {
110            self.has_front_matter_title = Some(false);
111            return false; // Front matter checking disabled
112        }
113
114        let content = self.context.get_document_content();
115
116        // Check if document starts with front matter (---)
117        if !content.starts_with("---") {
118            self.has_front_matter_title = Some(false);
119            return false;
120        }
121
122        // Find the end of front matter
123        let lines: Vec<&str> = content.lines().collect();
124        if lines.len() < 3 {
125            self.has_front_matter_title = Some(false);
126            return false; // Too short to have valid front matter
127        }
128
129        let mut end_index = None;
130        for (i, line) in lines.iter().enumerate().skip(1) {
131            if line.trim() == "---" {
132                end_index = Some(i);
133                break;
134            }
135        }
136
137        let end_index = match end_index {
138            Some(idx) => idx,
139            None => {
140                self.has_front_matter_title = Some(false);
141                return false; // No closing front matter delimiter
142            }
143        };
144
145        // Check for title in front matter
146        let front_matter_lines = &lines[1..end_index];
147        let title_regex = regex::Regex::new(&config.front_matter_title).unwrap_or_else(|_| {
148            // Fallback to default regex if invalid
149            regex::Regex::new(r"^\s*title\s*[:=]").unwrap()
150        });
151
152        let has_title = front_matter_lines
153            .iter()
154            .any(|line| title_regex.is_match(line));
155        self.has_front_matter_title = Some(has_title);
156        has_title
157    }
158
159    fn is_first_content_heading(&self, node: &Node) -> bool {
160        let content = self.context.get_document_content();
161        let node_start_byte = node.start_byte();
162        let target_level = self.context.config.linters.settings.single_h1.level;
163
164        // Get text before this heading
165        let text_before = &content[..node_start_byte];
166
167        // Check if there's only whitespace, comments, front matter,
168        // or headings above the target level before this heading
169        let mut in_front_matter = false;
170
171        for line in text_before.lines() {
172            let trimmed = line.trim();
173
174            if trimmed == "---" {
175                if !in_front_matter {
176                    in_front_matter = true;
177                    continue;
178                } else {
179                    // End of front matter
180                    in_front_matter = false;
181                    continue;
182                }
183            }
184
185            if in_front_matter {
186                continue; // Skip front matter content
187            }
188
189            // Check if this line is a heading above target level
190            if trimmed.starts_with('#') {
191                let heading_level = trimmed.chars().take_while(|&c| c == '#').count() as u8;
192                if heading_level < target_level {
193                    continue; // Ignore headings above target level
194                }
195                if heading_level == target_level {
196                    // Found another heading at target level before this one
197                    return false;
198                }
199                // Headings below target level count as content
200                return false;
201            }
202
203            // Check for setext headings
204            if trimmed.chars().all(|c| c == '=' || c == '-') && !trimmed.is_empty() {
205                // This might be a setext underline - need to check previous line for content
206                // For simplicity, we'll consider all setext underlines as potential headings
207                let setext_level = if trimmed.chars().all(|c| c == '=') {
208                    1
209                } else {
210                    2
211                };
212                if setext_level < target_level {
213                    continue; // Ignore headings above target level
214                }
215                return false; // Setext heading at or below target level
216            }
217
218            // After front matter is closed or if no front matter
219            if !trimmed.is_empty() && !trimmed.starts_with("<!--") && !trimmed.starts_with("-->") {
220                // Found non-whitespace, non-comment, non-heading content before heading
221                return false;
222            }
223        }
224
225        true
226    }
227}
228
229impl RuleLinter for MD025Linter {
230    fn feed(&mut self, node: &Node) {
231        if node.kind() == "atx_heading" || node.kind() == "setext_heading" {
232            let level = self.extract_heading_level(node);
233            let config = &self.context.config.linters.settings.single_h1;
234
235            if level != config.level {
236                return; // Not the level we're checking
237            }
238
239            let content = self.extract_heading_content(node);
240            let is_first_content = self.is_first_content_heading(node);
241
242            // Store the heading info for processing in finalize
243            self.matching_headings.push(HeadingInfo {
244                content,
245                range: node.range(),
246                is_first_content_heading: is_first_content,
247            });
248        }
249    }
250
251    fn finalize(&mut self) -> Vec<RuleViolation> {
252        if self.matching_headings.is_empty() {
253            return Vec::new();
254        }
255
256        let has_front_matter_title = self.check_front_matter_has_title();
257
258        // Determine if we have a "top-level heading" scenario
259        let has_top_level_heading = has_front_matter_title
260            || (!self.matching_headings.is_empty()
261                && self.matching_headings[0].is_first_content_heading);
262
263        if has_top_level_heading {
264            // Determine which headings are violations
265            let start_index = if has_front_matter_title { 0 } else { 1 };
266
267            for heading in self.matching_headings.iter().skip(start_index) {
268                self.violations.push(RuleViolation::new(
269                    &MD025,
270                    format!("{} [{}]", MD025.description, heading.content),
271                    self.context.file_path.clone(),
272                    range_from_tree_sitter(&heading.range),
273                ));
274            }
275        }
276
277        std::mem::take(&mut self.violations)
278    }
279}
280
281pub const MD025: Rule = Rule {
282    id: "MD025",
283    alias: "single-h1",
284    tags: &["headings"],
285    description: "Multiple top-level headings in the same document",
286    rule_type: RuleType::Document,
287    required_nodes: &["atx_heading", "setext_heading"],
288    new_linter: |context| Box::new(MD025Linter::new(context)),
289};
290
291#[cfg(test)]
292mod test {
293    use std::path::PathBuf;
294
295    use crate::config::{LintersSettingsTable, MD025SingleH1Table, RuleSeverity};
296    use crate::linter::MultiRuleLinter;
297    use crate::test_utils::test_helpers::test_config_with_settings;
298
299    fn test_config(level: u8, front_matter_title: &str) -> crate::config::QuickmarkConfig {
300        test_config_with_settings(
301            vec![("single-h1", RuleSeverity::Error)],
302            LintersSettingsTable {
303                single_h1: MD025SingleH1Table {
304                    level,
305                    front_matter_title: front_matter_title.to_string(),
306                },
307                ..Default::default()
308            },
309        )
310    }
311
312    #[test]
313    fn test_single_h1_no_violations() {
314        let config = test_config(1, r"^\s*title\s*[:=]");
315        let input = "# Title
316
317Some content
318
319## Section 1
320
321Content
322
323## Section 2
324
325More content";
326
327        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
328        let violations = linter.analyze();
329        assert_eq!(violations.len(), 0);
330    }
331
332    #[test]
333    fn test_multiple_h1_violations() {
334        let config = test_config(1, r"^\s*title\s*[:=]");
335        let input = "# First Title
336
337Some content
338
339# Second Title
340
341More content";
342
343        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
344        let violations = linter.analyze();
345        assert_eq!(violations.len(), 1);
346        assert!(violations[0].message().contains("Second Title"));
347    }
348
349    #[test]
350    fn test_front_matter_with_title_and_h1() {
351        let config = test_config(1, r"^\s*title\s*[:=]");
352        let input = "---
353layout: post
354title: \"Welcome to Jekyll!\"
355date: 2015-11-17 16:16:01 -0600
356---
357# Top level heading
358
359Content";
360
361        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
362        let violations = linter.analyze();
363        assert_eq!(violations.len(), 1);
364        assert!(violations[0].message().contains("Top level heading"));
365    }
366
367    #[test]
368    fn test_front_matter_without_title() {
369        let config = test_config(1, r"^\s*title\s*[:=]");
370        let input = "---
371layout: post
372author: John Doe
373date: 2015-11-17 16:16:01 -0600
374---
375# Title
376
377Content
378
379## Section";
380
381        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
382        let violations = linter.analyze();
383        assert_eq!(violations.len(), 0);
384    }
385
386    #[test]
387    fn test_custom_level() {
388        let config = test_config(2, r"^\s*title\s*[:=]");
389        let input = "# Title (level 1, should be ignored)
390
391## First H2
392
393Content
394
395## Second H2
396
397More content";
398
399        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
400        let violations = linter.analyze();
401        assert_eq!(violations.len(), 1);
402        assert!(violations[0].message().contains("Second H2"));
403    }
404
405    #[test]
406    fn test_setext_headings() {
407        let config = test_config(1, r"^\s*title\s*[:=]");
408        let input = "First Title
409===========
410
411Content
412
413Second Title
414============
415
416More content";
417
418        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
419        let violations = linter.analyze();
420        assert_eq!(violations.len(), 1);
421        assert!(violations[0].message().contains("Second Title"));
422    }
423
424    #[test]
425    fn test_mixed_heading_styles() {
426        let config = test_config(1, r"^\s*title\s*[:=]");
427        let input = "First Title
428===========
429
430Content
431
432# Second Title
433
434More content";
435
436        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
437        let violations = linter.analyze();
438        assert_eq!(violations.len(), 1);
439        assert!(violations[0].message().contains("Second Title"));
440    }
441
442    #[test]
443    fn test_h1_not_first_content() {
444        let config = test_config(1, r"^\s*title\s*[:=]");
445        let input = "Some intro paragraph
446
447# Title
448
449Content
450
451# Another Title";
452
453        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
454        let violations = linter.analyze();
455        // No violations because first H1 is not the first content
456        assert_eq!(violations.len(), 0);
457    }
458
459    #[test]
460    fn test_front_matter_title_disabled() {
461        let config = test_config(1, ""); // Empty pattern disables front matter checking
462        let input = "---
463title: \"Welcome to Jekyll!\"
464---
465# Top level heading
466
467Content";
468
469        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
470        let violations = linter.analyze();
471        assert_eq!(violations.len(), 0);
472    }
473
474    #[test]
475    fn test_custom_front_matter_title_regex() {
476        let config = test_config(1, r"^\s*heading\s*:");
477        let input = "---
478layout: post
479heading: \"My Custom Title\"
480---
481# Top level heading
482
483Content";
484
485        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
486        let violations = linter.analyze();
487        assert_eq!(violations.len(), 1);
488        assert!(violations[0].message().contains("Top level heading"));
489    }
490
491    #[test]
492    fn test_comments_before_heading() {
493        let config = test_config(1, r"^\s*title\s*[:=]");
494        let input = "<!-- This is a comment -->
495
496# Title
497
498Content
499
500# Another Title";
501
502        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
503        let violations = linter.analyze();
504        assert_eq!(violations.len(), 1);
505        assert!(violations[0].message().contains("Another Title"));
506    }
507
508    #[test]
509    fn test_empty_document() {
510        let config = test_config(1, r"^\s*title\s*[:=]");
511        let input = "";
512
513        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
514        let violations = linter.analyze();
515        assert_eq!(violations.len(), 0);
516    }
517
518    #[test]
519    fn test_only_lower_level_headings() {
520        let config = test_config(1, r"^\s*title\s*[:=]");
521        let input = "## Section 1
522
523Content
524
525### Subsection
526
527More content
528
529## Section 2
530
531Final content";
532
533        let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
534        let violations = linter.analyze();
535        assert_eq!(violations.len(), 0);
536    }
537}