1use serde::Deserialize;
2use std::rc::Rc;
3
4use regex::Regex;
5use tree_sitter::Node;
6
7use crate::{
8 linter::{range_from_tree_sitter, Context, RuleLinter, RuleViolation},
9 rules::{Rule, RuleType},
10};
11
12#[derive(Debug, PartialEq, Clone, Deserialize)]
14pub struct MD041FirstLineHeadingTable {
15 #[serde(default)]
16 pub allow_preamble: bool,
17 #[serde(default)]
18 pub front_matter_title: String,
19 #[serde(default)]
20 pub level: u8,
21}
22
23impl Default for MD041FirstLineHeadingTable {
24 fn default() -> Self {
25 Self {
26 allow_preamble: false,
27 front_matter_title: r"^\s*title\s*[:=]".to_string(),
28 level: 1,
29 }
30 }
31}
32
33#[derive(Debug)]
34enum FirstElement {
35 Heading(u8, tree_sitter::Range), Content(tree_sitter::Range),
37 None,
38}
39
40pub(crate) struct MD041Linter {
41 context: Rc<Context>,
42 violations: Vec<RuleViolation>,
43 first_element: FirstElement,
44 front_matter_end_byte: Option<usize>,
45 title_regex: Option<Regex>,
46}
47
48impl MD041Linter {
49 pub fn new(context: Rc<Context>) -> Self {
50 let content = context.get_document_content();
51 let front_matter_end_byte = Self::calculate_front_matter_end_byte(&content);
52
53 let config = &context.config.linters.settings.first_line_heading;
54 let title_regex = if !config.front_matter_title.is_empty() {
55 Some(
56 Regex::new(&config.front_matter_title)
57 .unwrap_or_else(|_| Regex::new(r"^\s*title\s*[:=]").unwrap()),
58 )
59 } else {
60 None
61 };
62
63 Self {
64 context: context.clone(),
65 violations: Vec::new(),
66 first_element: FirstElement::None,
67 front_matter_end_byte,
68 title_regex,
69 }
70 }
71
72 fn calculate_front_matter_end_byte(content: &str) -> Option<usize> {
75 if !content.starts_with("---") {
76 return None;
77 }
78
79 let mut byte_pos = 0;
80 let mut found_start = false;
81
82 let mut remaining = content;
83 while let Some(newline_pos) = remaining.find('\n') {
84 let line = &remaining[..newline_pos];
85 let line_to_check = line.trim_end_matches('\r');
86
87 if line_to_check.trim() == "---" {
88 if !found_start {
89 found_start = true;
90 } else {
91 return Some(byte_pos + newline_pos + 1);
92 }
93 }
94 byte_pos += newline_pos + 1;
95 remaining = &remaining[newline_pos + 1..];
96 }
97
98 if !remaining.is_empty() && remaining.trim() == "---" && found_start {
100 return Some(content.len());
101 }
102
103 None
104 }
105
106 fn extract_heading_level(&self, node: &Node) -> u8 {
107 match node.kind() {
108 "atx_heading" => {
109 for i in 0..node.child_count() {
110 let child = node.child(i).unwrap();
111 let kind = child.kind();
112 if kind.starts_with("atx_h") && kind.ends_with("_marker") {
113 let level_str = &kind["atx_h".len()..kind.len() - "_marker".len()];
114 return level_str.parse::<u8>().unwrap_or(1);
115 }
116 }
117 1 }
119 "setext_heading" => {
120 for i in 0..node.child_count() {
121 let child = node.child(i).unwrap();
122 if child.kind() == "setext_h1_underline" {
123 return 1;
124 } else if child.kind() == "setext_h2_underline" {
125 return 2;
126 }
127 }
128 1 }
130 _ => 1,
131 }
132 }
133
134 fn check_front_matter_has_title(&self) -> bool {
135 let Some(title_regex) = &self.title_regex else {
136 return false; };
138
139 let Some(fm_end) = self.front_matter_end_byte else {
140 return false; };
142
143 let content = self.context.get_document_content();
144 let front_matter_content = &content[..fm_end];
145
146 front_matter_content
147 .lines()
148 .skip(1) .take_while(|line| line.trim() != "---")
150 .any(|line| title_regex.is_match(line))
151 }
152
153 fn is_html_comment(&self, node: &Node) -> bool {
154 if node.kind() == "html_flow" {
155 let source = self.context.get_document_content();
156 let content = &source[node.start_byte()..node.end_byte()];
157 content.trim_start().starts_with("<!--")
158 } else {
159 false
160 }
161 }
162
163 fn is_in_front_matter(&self, node: &Node) -> bool {
164 if let Some(fm_end) = self.front_matter_end_byte {
165 node.start_byte() < fm_end
166 } else {
167 false
168 }
169 }
170
171 fn should_ignore_node(&self, node: &Node) -> bool {
172 if self.is_in_front_matter(node) {
174 return true;
175 }
176
177 if self.is_html_comment(node) {
179 return true;
180 }
181
182 false
183 }
184
185 fn is_content_node(&self, node: &Node) -> bool {
186 matches!(
187 node.kind(),
188 "paragraph"
189 | "list"
190 | "list_item"
191 | "code_block"
192 | "fenced_code_block"
193 | "blockquote"
194 | "table"
195 | "thematic_break"
196 )
197 }
198}
199
200impl RuleLinter for MD041Linter {
201 fn feed(&mut self, node: &Node) {
202 if !matches!(self.first_element, FirstElement::None) {
204 return;
205 }
206
207 if self.should_ignore_node(node) {
209 return;
210 }
211
212 if node.kind() == "atx_heading" || node.kind() == "setext_heading" {
214 let level = self.extract_heading_level(node);
215 self.first_element = FirstElement::Heading(level, node.range());
216 return;
217 }
218
219 if self.is_content_node(node) {
221 self.first_element = FirstElement::Content(node.range());
222 }
223 }
224
225 fn finalize(&mut self) -> Vec<RuleViolation> {
226 if self.check_front_matter_has_title() {
228 return Vec::new();
229 }
230
231 let config = &self.context.config.linters.settings.first_line_heading;
232
233 match &self.first_element {
234 FirstElement::Heading(level, range) => {
235 if *level != config.level {
237 self.violations.push(RuleViolation::new(
238 &MD041,
239 format!(
240 "Expected first heading to be level {}, but found level {}",
241 config.level, level
242 ),
243 self.context.file_path.clone(),
244 range_from_tree_sitter(range),
245 ));
246 }
247 }
248 FirstElement::Content(range) => {
249 if !config.allow_preamble {
251 self.violations.push(RuleViolation::new(
252 &MD041,
253 "First line in a file should be a top-level heading".to_string(),
254 self.context.file_path.clone(),
255 range_from_tree_sitter(range),
256 ));
257 }
258 }
259 FirstElement::None => {
260 }
262 }
263
264 std::mem::take(&mut self.violations)
265 }
266}
267
268pub const MD041: Rule = Rule {
269 id: "MD041",
270 alias: "first-line-heading",
271 tags: &["headings"],
272 description: "First line in a file should be a top-level heading",
273 rule_type: RuleType::Document,
274 required_nodes: &[
275 "atx_heading",
276 "setext_heading",
277 "paragraph",
278 "list",
279 "list_item",
280 "code_block",
281 "fenced_code_block",
282 "blockquote",
283 "table",
284 "thematic_break",
285 ],
286 new_linter: |context| Box::new(MD041Linter::new(context)),
287};
288
289#[cfg(test)]
290mod test {
291 use std::path::PathBuf;
292
293 use crate::config::{LintersSettingsTable, MD041FirstLineHeadingTable, RuleSeverity};
294 use crate::linter::MultiRuleLinter;
295 use crate::test_utils::test_helpers::test_config_with_settings;
296
297 fn test_config(
298 level: u8,
299 front_matter_title: &str,
300 allow_preamble: bool,
301 ) -> crate::config::QuickmarkConfig {
302 test_config_with_settings(
303 vec![("first-line-heading", RuleSeverity::Error)],
304 LintersSettingsTable {
305 first_line_heading: MD041FirstLineHeadingTable {
306 level,
307 front_matter_title: front_matter_title.to_string(),
308 allow_preamble,
309 },
310 ..Default::default()
311 },
312 )
313 }
314
315 #[test]
316 fn test_valid_first_line_heading() {
317 let config = test_config(1, r"^\s*title\s*[:=]", false);
318 let input = "# Title
319
320Some content
321
322## Section 1
323
324Content";
325
326 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
327 let violations = linter.analyze();
328 assert_eq!(violations.len(), 0);
329 }
330
331 #[test]
332 fn test_no_first_line_heading() {
333 let config = test_config(1, r"^\s*title\s*[:=]", false);
334 let input = "This is some text
335
336# Title
337
338Content";
339
340 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
341 let violations = linter.analyze();
342 assert_eq!(violations.len(), 1);
343 assert!(violations[0]
344 .message()
345 .contains("First line in a file should be a top-level heading"));
346 }
347
348 #[test]
349 fn test_wrong_level_first_heading() {
350 let config = test_config(1, r"^\s*title\s*[:=]", false);
351 let input = "## Title
352
353Content";
354
355 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
356 let violations = linter.analyze();
357 assert_eq!(violations.len(), 1);
358 assert!(violations[0]
359 .message()
360 .contains("Expected first heading to be level 1, but found level 2"));
361 }
362
363 #[test]
364 fn test_custom_level() {
365 let config = test_config(2, r"^\s*title\s*[:=]", false);
366 let input = "## Title
367
368Content";
369
370 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
371 let violations = linter.analyze();
372 assert_eq!(violations.len(), 0);
373 }
374
375 #[test]
376 fn test_custom_level_wrong_level() {
377 let config = test_config(2, r"^\s*title\s*[:=]", false);
378 let input = "# Title
379
380Content";
381
382 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
383 let violations = linter.analyze();
384 assert_eq!(violations.len(), 1);
385 assert!(violations[0]
386 .message()
387 .contains("Expected first heading to be level 2, but found level 1"));
388 }
389
390 #[test]
391 fn test_setext_heading_valid() {
392 let config = test_config(1, r"^\s*title\s*[:=]", false);
393 let input = "Title
394=====
395
396Content";
397
398 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
399 let violations = linter.analyze();
400 assert_eq!(violations.len(), 0);
401 }
402
403 #[test]
404 fn test_setext_heading_wrong_level() {
405 let config = test_config(1, r"^\s*title\s*[:=]", false);
406 let input = "Title
407-----
408
409Content";
410
411 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
412 let violations = linter.analyze();
413 assert_eq!(violations.len(), 1);
414 assert!(violations[0]
415 .message()
416 .contains("Expected first heading to be level 1, but found level 2"));
417 }
418
419 #[test]
420 fn test_allow_preamble_true() {
421 let config = test_config(1, r"^\s*title\s*[:=]", true);
422 let input = "This is some preamble text
423
424# Title
425
426Content";
427
428 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
429 let violations = linter.analyze();
430 assert_eq!(violations.len(), 0);
431 }
432
433 #[test]
434 fn test_allow_preamble_false() {
435 let config = test_config(1, r"^\s*title\s*[:=]", false);
436 let input = "This is some preamble text
437
438# Title
439
440Content";
441
442 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
443 let violations = linter.analyze();
444 assert_eq!(violations.len(), 1);
445 assert!(violations[0]
446 .message()
447 .contains("First line in a file should be a top-level heading"));
448 }
449
450 #[test]
451 fn test_front_matter_with_title() {
452 let config = test_config(1, r"^\s*title\s*[:=]", false);
453 let input = "---
454layout: post
455title: \"Welcome to Jekyll!\"
456date: 2015-11-17 16:16:01 -0600
457---
458
459This is content without a heading";
460
461 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
462 let violations = linter.analyze();
463 assert_eq!(violations.len(), 0);
464 }
465
466 #[test]
467 fn test_front_matter_without_title() {
468 let config = test_config(1, r"^\s*title\s*[:=]", false);
469 let input = "---
470layout: post
471author: John Doe
472date: 2015-11-17 16:16:01 -0600
473---
474
475This is content without a heading";
476
477 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
478 let violations = linter.analyze();
479 assert_eq!(violations.len(), 1);
480 }
481
482 #[test]
483 fn test_front_matter_title_disabled() {
484 let config = test_config(1, "", false); let input = "---
486title: \"Welcome to Jekyll!\"
487---
488
489This is content without a heading";
490
491 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
492 let violations = linter.analyze();
493 assert_eq!(violations.len(), 1);
494 }
495
496 #[test]
497 fn test_custom_front_matter_title_regex() {
498 let config = test_config(1, r"^\s*heading\s*:", false);
499 let input = "---
500layout: post
501heading: \"My Custom Title\"
502---
503
504This is content without a heading";
505
506 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
507 let violations = linter.analyze();
508 assert_eq!(violations.len(), 0);
509 }
510
511 #[test]
512 fn test_comments_before_heading() {
513 let config = test_config(1, r"^\s*title\s*[:=]", false);
514 let input = "<!-- This is a comment -->
515
516# Title
517
518Content";
519
520 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
521 let violations = linter.analyze();
522 assert_eq!(violations.len(), 0);
523 }
524
525 #[test]
526 fn test_empty_document() {
527 let config = test_config(1, r"^\s*title\s*[:=]", false);
528 let input = "";
529
530 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
531 let violations = linter.analyze();
532 assert_eq!(violations.len(), 0);
533 }
534
535 #[test]
536 fn test_whitespace_only() {
537 let config = test_config(1, r"^\s*title\s*[:=]", false);
538 let input = " \n\n \n\n# Title\n\nContent";
539
540 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
541 let violations = linter.analyze();
542 assert_eq!(violations.len(), 0);
543 }
544}