mdbook-lint 0.2.0

A fast markdown linter for mdBook
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
//! MD030: Spaces after list markers
//!
//! This rule checks for consistent spacing after list markers.
//! Unordered lists should have one space after the marker, and ordered lists should have one space after the period.

use crate::error::Result;
use crate::rule::{Rule, RuleCategory, RuleMetadata};
use crate::{
    Document,
    violation::{Severity, Violation},
};

/// Configuration for spaces after list markers
#[derive(Debug, Clone, PartialEq)]
pub struct MD030Config {
    /// Number of spaces after unordered list markers (default: 1)
    pub ul_single: usize,
    /// Number of spaces after ordered list markers (default: 1)
    pub ol_single: usize,
    /// Number of spaces after unordered list markers in multi-item lists (default: 1)
    pub ul_multi: usize,
    /// Number of spaces after ordered list markers in multi-item lists (default: 1)
    pub ol_multi: usize,
}

impl Default for MD030Config {
    fn default() -> Self {
        Self {
            ul_single: 1,
            ol_single: 1,
            ul_multi: 1,
            ol_multi: 1,
        }
    }
}

/// Rule to check for spaces after list markers
pub struct MD030 {
    config: MD030Config,
}

impl MD030 {
    /// Create a new MD030 rule with default settings
    pub fn new() -> Self {
        Self {
            config: MD030Config::default(),
        }
    }

    /// Create a new MD030 rule with custom configuration
    #[allow(dead_code)]
    pub fn with_config(config: MD030Config) -> Self {
        Self { config }
    }
}

impl Default for MD030 {
    fn default() -> Self {
        Self::new()
    }
}

impl Rule for MD030 {
    fn id(&self) -> &'static str {
        "MD030"
    }

    fn name(&self) -> &'static str {
        "list-marker-space"
    }

    fn description(&self) -> &'static str {
        "Spaces after list markers"
    }

    fn metadata(&self) -> RuleMetadata {
        RuleMetadata::stable(RuleCategory::Formatting).introduced_in("mdbook-lint v0.1.0")
    }

    fn check_with_ast<'a>(
        &self,
        document: &Document,
        _ast: Option<&'a comrak::nodes::AstNode<'a>>,
    ) -> Result<Vec<Violation>> {
        let mut violations = Vec::new();

        for (line_number, line) in document.lines.iter().enumerate() {
            let line_num = line_number + 1; // Convert to 1-based line numbers

            if let Some(violation) = self.check_list_marker_spacing(line, line_num) {
                violations.push(violation);
            }
        }

        Ok(violations)
    }
}

impl MD030 {
    /// Check spacing after list markers on a single line
    fn check_list_marker_spacing(&self, line: &str, line_num: usize) -> Option<Violation> {
        let trimmed = line.trim_start();
        let indent_count = line.len() - trimmed.len();

        // Skip setext heading underlines (lines that are all = or - characters)
        if self.is_setext_underline(trimmed) {
            return None;
        }

        // Check for unordered list markers
        if let Some(marker_char) = self.get_unordered_marker(trimmed) {
            let after_marker = &trimmed[1..];
            let whitespace_count = after_marker
                .chars()
                .take_while(|&c| c.is_whitespace())
                .count();
            let expected_spaces = self.config.ul_single; // TODO: Determine if multi-item

            // For expected_spaces = 1: accept exactly 1 space OR exactly 1 tab
            let is_valid_spacing = if expected_spaces == 1 {
                whitespace_count == 1
                    && (after_marker.starts_with(' ') || after_marker.starts_with('\t'))
            } else {
                whitespace_count == expected_spaces
            };

            if !is_valid_spacing {
                return Some(self.create_violation(
                    format!(
                        "Unordered list marker spacing: expected {expected_spaces} space(s) after '{marker_char}', found {whitespace_count}"
                    ),
                    line_num,
                    indent_count + 2, // Position after the marker
                    Severity::Warning,
                ));
            }
        }

        // Check for ordered list markers
        if let Some((number, dot_pos)) = self.get_ordered_marker(trimmed) {
            let after_dot = &trimmed[dot_pos + 1..];
            let whitespace_count = after_dot.chars().take_while(|&c| c.is_whitespace()).count();
            let expected_spaces = self.config.ol_single; // TODO: Determine if multi-item

            // For expected_spaces = 1: accept exactly 1 space OR exactly 1 tab
            let is_valid_spacing = if expected_spaces == 1 {
                whitespace_count == 1 && (after_dot.starts_with(' ') || after_dot.starts_with('\t'))
            } else {
                whitespace_count == expected_spaces
            };

            if !is_valid_spacing {
                return Some(self.create_violation(
                    format!(
                        "Ordered list marker spacing: expected {expected_spaces} space(s) after '{number}. ', found {whitespace_count}"
                    ),
                    line_num,
                    indent_count + dot_pos + 2, // Position after the dot
                    Severity::Warning,
                ));
            }
        }

        None
    }

    /// Get unordered list marker character if line starts with one
    fn get_unordered_marker(&self, trimmed: &str) -> Option<char> {
        let first_char = trimmed.chars().next()?;
        match first_char {
            '-' | '*' | '+' => {
                // Check if this is actually emphasis syntax, not a list marker
                if self.is_emphasis_syntax(trimmed, first_char) {
                    return None;
                }
                Some(first_char)
            }
            _ => None,
        }
    }

    /// Check if a line starting with *, -, or + is actually emphasis/bold syntax
    fn is_emphasis_syntax(&self, trimmed: &str, marker: char) -> bool {
        // Check for bold syntax: **text** or __text__
        if marker == '*' && trimmed.starts_with("**") {
            return true;
        }
        if marker == '_' && trimmed.starts_with("__") {
            return true;
        }

        // Check for italic syntax that's not a list: *text* (but allow "* text" as list)
        if marker == '*' {
            // If there's immediately non-whitespace after the *, it's likely emphasis
            if let Some(second_char) = trimmed.chars().nth(1) {
                if !second_char.is_whitespace() && second_char != '*' {
                    // Look for closing * to confirm it's emphasis
                    if let Some(closing_pos) = trimmed[2..].find('*') {
                        // Make sure it's not just another list item with * in the text
                        let text_between = &trimmed[1..closing_pos + 2];
                        if !text_between.contains('\n') && closing_pos < 50 {
                            // Likely emphasis if reasonably short and no newlines
                            return true;
                        }
                    }
                }
            }
        }

        // For - and +, only consider them emphasis in very specific cases
        // Most of the time, these should be treated as potential list markers
        // We'll be conservative here and only exclude obvious non-list cases

        false
    }

    /// Get ordered list marker number and dot position if line starts with one
    fn get_ordered_marker(&self, trimmed: &str) -> Option<(String, usize)> {
        // Look for pattern like "1. " or "42. "
        let dot_pos = trimmed.find('.')?;
        let prefix = &trimmed[..dot_pos];

        // Check if prefix is all digits
        if prefix.chars().all(|c| c.is_ascii_digit()) && !prefix.is_empty() {
            Some((prefix.to_string(), dot_pos))
        } else {
            None
        }
    }

    /// Check if a line is a setext heading underline (all = or - characters)
    fn is_setext_underline(&self, trimmed: &str) -> bool {
        if trimmed.is_empty() {
            return false;
        }

        let first_char = trimmed.chars().next().unwrap();
        (first_char == '=' || first_char == '-') && trimmed.chars().all(|c| c == first_char)
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::Document;
    use crate::rule::Rule;
    use std::path::PathBuf;

    #[test]
    fn test_md030_no_violations() {
        let content = r#"# Valid List Spacing

Unordered lists with single space:
- Item 1
* Item 2
+ Item 3

Ordered lists with single space:
1. First item
2. Second item
42. Item with large number

Regular text here.
"#;
        let document = Document::new(content.to_string(), PathBuf::from("test.md")).unwrap();
        let rule = MD030::new();
        let violations = rule.check(&document).unwrap();

        assert_eq!(violations.len(), 0);
    }

    #[test]
    fn test_md030_unordered_multiple_spaces() {
        let content = r#"# Unordered List Spacing Issues

- Single space is fine
-  Two spaces after dash
*   Three spaces after asterisk
+    Four spaces after plus

Regular text.
"#;
        let document = Document::new(content.to_string(), PathBuf::from("test.md")).unwrap();
        let rule = MD030::new();
        let violations = rule.check(&document).unwrap();

        assert_eq!(violations.len(), 3);
        assert!(
            violations[0]
                .message
                .contains("expected 1 space(s) after '-', found 2")
        );
        assert!(
            violations[1]
                .message
                .contains("expected 1 space(s) after '*', found 3")
        );
        assert!(
            violations[2]
                .message
                .contains("expected 1 space(s) after '+', found 4")
        );
        assert_eq!(violations[0].line, 4);
        assert_eq!(violations[1].line, 5);
        assert_eq!(violations[2].line, 6);
    }

    #[test]
    fn test_md030_ordered_multiple_spaces() {
        let content = r#"# Ordered List Spacing Issues

1. Single space is fine
2.  Two spaces after number
42.   Three spaces after large number
100.    Four spaces after even larger number

Regular text.
"#;
        let document = Document::new(content.to_string(), PathBuf::from("test.md")).unwrap();
        let rule = MD030::new();
        let violations = rule.check(&document).unwrap();

        assert_eq!(violations.len(), 3);
        assert!(
            violations[0]
                .message
                .contains("expected 1 space(s) after '2. ', found 2")
        );
        assert!(
            violations[1]
                .message
                .contains("expected 1 space(s) after '42. ', found 3")
        );
        assert!(
            violations[2]
                .message
                .contains("expected 1 space(s) after '100. ', found 4")
        );
        assert_eq!(violations[0].line, 4);
        assert_eq!(violations[1].line, 5);
        assert_eq!(violations[2].line, 6);
    }

    #[test]
    fn test_md030_no_spaces_after_marker() {
        let content = r#"# No Spaces After Markers

-No space after dash
*No space after asterisk
+No space after plus
1.No space after number
42.No space after large number

Text here.
"#;
        let document = Document::new(content.to_string(), PathBuf::from("test.md")).unwrap();
        let rule = MD030::new();
        let violations = rule.check(&document).unwrap();

        assert_eq!(violations.len(), 5);
        for violation in &violations {
            assert!(violation.message.contains("expected 1 space(s)"));
            assert!(violation.message.contains("found 0"));
        }
    }

    #[test]
    fn test_md030_custom_config() {
        let content = r#"# Custom Configuration Test

- Single space (should be invalid)
-  Two spaces (should be valid)
1. Single space (should be invalid)
2.  Two spaces (should be valid)

Text here.
"#;
        let config = MD030Config {
            ul_single: 2,
            ol_single: 2,
            ul_multi: 2,
            ol_multi: 2,
        };
        let document = Document::new(content.to_string(), PathBuf::from("test.md")).unwrap();
        let rule = MD030::with_config(config);
        let violations = rule.check(&document).unwrap();

        assert_eq!(violations.len(), 2);
        assert!(
            violations[0]
                .message
                .contains("expected 2 space(s) after '-', found 1")
        );
        assert!(
            violations[1]
                .message
                .contains("expected 2 space(s) after '1. ', found 1")
        );
        assert_eq!(violations[0].line, 3);
        assert_eq!(violations[1].line, 5);
    }

    #[test]
    fn test_md030_indented_lists() {
        let content = r#"# Moderately Indented Lists

  - Moderately indented item
  -  Too many spaces
  * Another marker type
  *   Too many spaces here too

Regular text here.

1. Regular ordered list
2.  Too many spaces
42. Correct spacing
100.   Too many spaces

Text here.
"#;
        let document = Document::new(content.to_string(), PathBuf::from("test.md")).unwrap();
        let rule = MD030::new();
        let violations = rule.check(&document).unwrap();

        assert_eq!(violations.len(), 4);
        assert_eq!(violations[0].line, 4); // -  Too many spaces
        assert_eq!(violations[1].line, 6); // *   Too many spaces here too
        assert_eq!(violations[2].line, 11); // 2.  Too many spaces
        assert_eq!(violations[3].line, 13); // 100.   Too many spaces
    }

    #[test]
    fn test_md030_nested_lists() {
        let content = r#"# Nested Lists

- Top level item
  - Nested item with correct spacing
  -  Nested item with too many spaces
  * Different marker type
  *   Too many spaces with asterisk
    1. Nested ordered list
    2.  Too many spaces in nested ordered
    3. Correct spacing

More text.
"#;
        let document = Document::new(content.to_string(), PathBuf::from("test.md")).unwrap();
        let rule = MD030::new();
        let violations = rule.check(&document).unwrap();

        assert_eq!(violations.len(), 3);
        assert_eq!(violations[0].line, 5); // -  Nested item with too many spaces
        assert_eq!(violations[1].line, 7); // *   Too many spaces with asterisk
        assert_eq!(violations[2].line, 9); // 2.  Too many spaces in nested ordered
    }

    #[test]
    fn test_md030_mixed_violations() {
        let content = r#"# Mixed Violations

- Correct spacing
-  Too many spaces
* Correct spacing
*No spaces
+ Correct spacing
+   Way too many spaces

1. Correct spacing
2.  Too many spaces
3. Correct spacing
42.No spaces
100.     Many spaces

Text here.
"#;
        let document = Document::new(content.to_string(), PathBuf::from("test.md")).unwrap();
        let rule = MD030::new();
        let violations = rule.check(&document).unwrap();

        assert_eq!(violations.len(), 6);
        // Unordered violations
        assert_eq!(violations[0].line, 4); // -  Too many spaces
        assert_eq!(violations[1].line, 6); // *No spaces
        assert_eq!(violations[2].line, 8); // +   Way too many spaces
        // Ordered violations
        assert_eq!(violations[3].line, 11); // 2.  Too many spaces
        assert_eq!(violations[4].line, 13); // 42.No spaces
        assert_eq!(violations[5].line, 14); // 100.     Many spaces
    }

    #[test]
    fn test_md030_tabs_after_markers() {
        let content = "- Item with tab\t\n*\tItem starting with tab\n1.\tOrdered with tab\n42.\t\tMultiple tabs\n";
        let document = Document::new(content.to_string(), PathBuf::from("test.md")).unwrap();
        let rule = MD030::new();
        let violations = rule.check(&document).unwrap();

        // Single tabs should be treated as valid (equivalent to single space)
        // Only multiple tabs should be flagged as violations
        assert_eq!(violations.len(), 1); // Only the multiple tabs case should be flagged
        assert_eq!(violations[0].line, 4); // 42.\t\tMultiple tabs
    }

    #[test]
    fn test_md030_get_markers() {
        let rule = MD030::new();

        // Unordered markers
        assert_eq!(rule.get_unordered_marker("- Item"), Some('-'));
        assert_eq!(rule.get_unordered_marker("* Item"), Some('*'));
        assert_eq!(rule.get_unordered_marker("+ Item"), Some('+'));
        assert_eq!(rule.get_unordered_marker("Not a marker"), None);
        assert_eq!(rule.get_unordered_marker("1. Ordered"), None);

        // Ordered markers
        assert_eq!(
            rule.get_ordered_marker("1. Item"),
            Some(("1".to_string(), 1))
        );
        assert_eq!(
            rule.get_ordered_marker("42. Item"),
            Some(("42".to_string(), 2))
        );
        assert_eq!(
            rule.get_ordered_marker("100. Item"),
            Some(("100".to_string(), 3))
        );
        assert_eq!(rule.get_ordered_marker("- Unordered"), None);
        assert_eq!(rule.get_ordered_marker("Not a list"), None);
        assert_eq!(rule.get_ordered_marker("a. Letter"), None);
    }

    #[test]
    fn test_md030_setext_headings_ignored() {
        let content = r#"Main Heading
============

Some content here.

Subheading
----------

More content.

- This is a real list
- With proper spacing
"#;
        let document = Document::new(content.to_string(), PathBuf::from("test.md")).unwrap();
        let rule = MD030::new();
        let violations = rule.check(&document).unwrap();

        // Should have no violations - setext underlines should be ignored
        assert_eq!(violations.len(), 0);
    }

    #[test]
    fn test_md030_is_setext_underline() {
        let rule = MD030::new();

        // Valid setext underlines
        assert!(rule.is_setext_underline("============"));
        assert!(rule.is_setext_underline("----------"));
        assert!(rule.is_setext_underline("==="));
        assert!(rule.is_setext_underline("---"));
        assert!(rule.is_setext_underline("="));
        assert!(rule.is_setext_underline("-"));

        // Not setext underlines
        assert!(!rule.is_setext_underline(""));
        assert!(!rule.is_setext_underline("- Item"));
        assert!(!rule.is_setext_underline("=-="));
        assert!(!rule.is_setext_underline("=== Header ==="));
        assert!(!rule.is_setext_underline("-- Comment --"));
        assert!(!rule.is_setext_underline("* Not a setext"));
        assert!(!rule.is_setext_underline("+ Also not"));
    }

    #[test]
    fn test_md030_bold_text_not_flagged() {
        let content = r#"# Bold Text Should Not Be Flagged

**Types**: feat, fix, docs
**Scopes**: cli, preprocessor, rules
**Important**: This is bold text, not a list marker

Regular bold text like **this** should be fine.
Italic text like *this* should also be fine.

But actual lists should still be checked:
- Valid list item
-  Invalid spacing (should be flagged)
* Another valid item
*  Invalid spacing (should be flagged)
"#;
        let document = Document::new(content.to_string(), PathBuf::from("test.md")).unwrap();
        let rule = MD030::new();
        let violations = rule.check(&document).unwrap();

        // Should only flag the actual list items with bad spacing, not the bold text
        assert_eq!(violations.len(), 2);
        assert!(
            violations[0]
                .message
                .contains("expected 1 space(s) after '-', found 2")
        );
        assert!(
            violations[1]
                .message
                .contains("expected 1 space(s) after '*', found 2")
        );
        assert_eq!(violations[0].line, 12); // -  Invalid spacing (corrected line number)
        assert_eq!(violations[1].line, 14); // *  Invalid spacing (corrected line number)
    }

    #[test]
    fn test_md030_emphasis_syntax_detection() {
        let rule = MD030::new();

        // Bold syntax should be detected as emphasis
        assert!(rule.is_emphasis_syntax("**bold text**", '*'));
        assert!(rule.is_emphasis_syntax("**Types**: something", '*'));
        assert!(rule.is_emphasis_syntax("__bold text__", '_'));

        // Italic syntax should be detected as emphasis
        assert!(rule.is_emphasis_syntax("*italic text*", '*'));
        assert!(rule.is_emphasis_syntax("*word*", '*'));

        // List markers should NOT be detected as emphasis
        assert!(!rule.is_emphasis_syntax("* List item", '*'));
        assert!(!rule.is_emphasis_syntax("- List item", '-'));
        assert!(!rule.is_emphasis_syntax("+ List item", '+'));
        assert!(!rule.is_emphasis_syntax("*  List with extra spaces", '*'));

        // Edge cases
        assert!(!rule.is_emphasis_syntax("* ", '*')); // Just marker and space
        assert!(!rule.is_emphasis_syntax("*", '*')); // Just marker
        assert!(!rule.is_emphasis_syntax("*text with no closing", '*')); // No closing marker
    }

    #[test]
    fn test_md030_mixed_emphasis_and_lists() {
        let content = r#"# Mixed Content

**Bold**: This should not be flagged
*Italic*: This should not be flagged

Valid lists:
- Item one
* Item two  
+ Item three

Invalid lists:
-  Too many spaces after dash
*  Too many spaces after asterisk
+  Too many spaces after plus

More **bold text** that should be ignored.
And some *italic text* that should be ignored.
"#;
        let document = Document::new(content.to_string(), PathBuf::from("test.md")).unwrap();
        let rule = MD030::new();
        let violations = rule.check(&document).unwrap();

        // Should only flag the 3 invalid list items, not the emphasis text
        assert_eq!(violations.len(), 3);
        for violation in &violations {
            assert!(violation.message.contains("expected 1 space(s)"));
            assert!(violation.message.contains("found 2"));
        }
        assert_eq!(violations[0].line, 12); // -  Too many spaces after dash
        assert_eq!(violations[1].line, 13); // *  Too many spaces after asterisk  
        assert_eq!(violations[2].line, 14); // +  Too many spaces after plus
    }

    #[test]
    fn test_md030_get_unordered_marker_with_emphasis() {
        let rule = MD030::new();

        // Should return marker for actual lists
        assert_eq!(rule.get_unordered_marker("- List item"), Some('-'));
        assert_eq!(rule.get_unordered_marker("* List item"), Some('*'));
        assert_eq!(rule.get_unordered_marker("+ List item"), Some('+'));

        // Should NOT return marker for emphasis syntax
        assert_eq!(rule.get_unordered_marker("**Bold text**"), None);
        assert_eq!(rule.get_unordered_marker("*Italic text*"), None);
        assert_eq!(rule.get_unordered_marker("**Types**: something"), None);

        // Edge cases
        assert_eq!(rule.get_unordered_marker("Not a list"), None);
        assert_eq!(rule.get_unordered_marker("1. Ordered list"), None);
    }
}