rumdl 0.1.51

A fast Markdown linter written in Rust (Ru(st) MarkDown Linter)
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
use regex::Regex;
/// MkDocs Snippets extension detection utilities
///
/// The Snippets extension allows including content from external files
/// using ASCII scissors syntax: `--8<--`
///
/// Common patterns:
/// - `--8<-- "filename.md"` - Include entire file
/// - `--8<-- "filename.md:start:end"` - Include specific lines
/// - `<!-- --8<-- [start:section] -->` - Start marker for section
/// - `<!-- --8<-- [end:section] -->` - End marker for section
///
use std::sync::LazyLock;

/// Pattern to match valid snippet markers: -{1,}8<-{1,}
/// Based on PyMdown Extensions Snippets specification
static BARE_SNIPPET_MARKER: LazyLock<Regex> = LazyLock::new(|| {
    Regex::new(
        r"^;*-+8<-+$", // Optional semicolons, then dashes-8<-dashes only
    )
    .unwrap()
});

/// Pattern to match snippet with quoted file path
/// Lenient: accepts unclosed quotes for detection (can warn later)
static SNIPPET_WITH_FILE: LazyLock<Regex> = LazyLock::new(|| {
    Regex::new(
        r#"-+8<-+\s+["']"#, // Just check for quote after snippet marker
    )
    .unwrap()
});

/// Pattern to match section markers
static SECTION_MARKER: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"-+8<-+\s*\[(start|end):[^\]]*\]").unwrap());

/// Pattern to match invalid asymmetric marker
static INVALID_ASYMMETRIC: LazyLock<Regex> = LazyLock::new(|| {
    Regex::new(
        r#"(?:^|\s)--8<-\s+["']"#, // --8<- followed by quote is invalid
    )
    .unwrap()
});

/// Check if a line contains MkDocs snippet syntax
pub fn is_snippet_marker(line: &str) -> bool {
    // PyMdown Snippets spec says: -{1,}8<-{1,} (symmetric dashes)
    // We're lenient with unclosed quotes for detection (to warn later)

    // Check for known invalid asymmetric patterns
    // IMPORTANT: -8<-- as a standalone marker is invalid, but it appears
    // as a substring in valid --8<-- markers!
    // Only reject if -8<-- appears without a leading dash
    if !line.contains("--8<--") && !line.contains("---8<---") {
        // Only check for invalid patterns if we don't have valid ones
        if line.contains("-8<-- ") || line.contains("-8<--\"") || line.contains("-8<--'") {
            return false; // -8<-- is invalid when not part of --8<--
        }
    }
    if INVALID_ASYMMETRIC.is_match(line) {
        return false; // --8<- with file is invalid (asymmetric)
    }

    let trimmed = line.trim();

    // Check for single-line snippet with file
    // Be lenient: accept if line has valid marker and a quote anywhere
    let has_valid_marker = line.contains("--8<--")
        || line.contains("---8<---")
        || (line.contains("-8<-") && !line.contains("-8<--") && !line.contains("--8<-"));

    if has_valid_marker && (line.contains('"') || line.contains('\'')) {
        return true;
    }

    // Check for section markers: --8<-- [start:name] or [end:name]
    // Also check with comment prefixes: # -8<- [start:name]
    if SECTION_MARKER.is_match(trimmed) {
        return true;
    }

    // Check for comment-prefixed section markers (# -8<- [start:name])
    let without_comment = trimmed.trim_start_matches(['#', ';', '/', '*']).trim_start();
    if (without_comment.starts_with("-8<-")
        || without_comment.starts_with("--8<--")
        || without_comment.starts_with("---8<---"))
        && (without_comment.contains("[start:") || without_comment.contains("[end:"))
    {
        return true;
    }

    // Block format: bare marker is valid for multi-line snippet blocks
    // According to PyMdown Extensions spec, bare markers like --8<-- on their own line
    // are valid when used as opening/closing delimiters for multi-file blocks:
    // --8<--
    // file1.md
    // file2.md
    // --8<--
    // Check for trailing whitespace (space or tab) BEFORE trimming
    let trimmed_start = line.trim_start();
    let has_trailing_whitespace = trimmed_start.ends_with(' ') || trimmed_start.ends_with('\t');
    if BARE_SNIPPET_MARKER.is_match(trimmed) && !has_trailing_whitespace {
        return true; // Valid bare marker for block format
    }

    // HTML comment variations
    if line.contains("<!--") && line.contains("-->") && line.contains("8<") {
        // Check various patterns within comments
        if SNIPPET_WITH_FILE.is_match(line) {
            return true;
        }
        if SECTION_MARKER.is_match(line) {
            return true;
        }
        // Don't accept bare snippet markers in comments without content
        // <!-- --8<-- --> is not valid (no file or section)
    }

    false
}

/// Check if a line is a snippet section start marker
pub fn is_snippet_section_start(line: &str) -> bool {
    // Check for patterns like:
    // <!-- --8<-- [start:section_name] -->
    // --8<-- [start:section_name]
    // -8<- [start:section_name]
    // # -8<- [start:section_name]  (comment format for source files)
    // ; -8<- [start:section_name]  (comment format for ini files)

    if !line.contains("start:") {
        return false;
    }

    // Must have proper bracket structure
    if let Some(_start_idx) = line.find("[start:")
        && let Some(_end_idx) = line[_start_idx..].find(']')
    {
        // Empty section names are allowed (lenient for detection)
        let trimmed = line.trim();

        // Handle HTML comments specially
        let content_to_check = if trimmed.starts_with("<!--") && trimmed.ends_with("-->") {
            // Extract content from HTML comment
            trimmed.trim_start_matches("<!--").trim_end_matches("-->").trim()
        } else {
            // For other comment styles (# ; / *)
            let without_comment = trimmed.trim_start_matches(['#', ';', '/', '*']);
            without_comment.trim_start()
        };

        return content_to_check.starts_with("--8<--")
            || content_to_check.starts_with("-8<-")
            || content_to_check.starts_with("---8<---");
    }

    false
}

/// Check if a line is a snippet section end marker
pub fn is_snippet_section_end(line: &str) -> bool {
    // Check for patterns like:
    // <!-- --8<-- [end:section_name] -->
    // --8<-- [end:section_name]
    // -8<- [end:section_name]
    // # -8<- [end:section_name]  (comment format for source files)
    // ; -8<- [end:section_name]  (comment format for ini files)

    if !line.contains("end:") {
        return false;
    }

    // Must have proper bracket structure
    if let Some(_start_idx) = line.find("[end:")
        && let Some(_end_idx) = line[_start_idx..].find(']')
    {
        // Empty section names are allowed (lenient for detection)
        let trimmed = line.trim();

        // Handle HTML comments specially
        let content_to_check = if trimmed.starts_with("<!--") && trimmed.ends_with("-->") {
            // Extract content from HTML comment
            trimmed.trim_start_matches("<!--").trim_end_matches("-->").trim()
        } else {
            // For other comment styles (# ; / *)
            let without_comment = trimmed.trim_start_matches(['#', ';', '/', '*']);
            without_comment.trim_start()
        };

        return content_to_check.starts_with("--8<--")
            || content_to_check.starts_with("-8<-")
            || content_to_check.starts_with("---8<---");
    }

    false
}

/// Check if a position is within a snippet section
pub fn is_within_snippet_section(content: &str, position: usize) -> bool {
    let lines: Vec<&str> = content.lines().collect();
    let mut byte_pos = 0;
    let mut section_stack: Vec<String> = Vec::new();

    for line in lines {
        let line_end = byte_pos + line.len();

        // Check if we're starting a snippet section
        if is_snippet_section_start(line) {
            // Extract section name for matching
            if let Some(start) = line.find("[start:")
                && let Some(end) = line[start..].find(']')
            {
                let section_name = line[start + 7..start + end].to_string();
                section_stack.push(section_name);
            }
        }

        // Check if we're ending a snippet section
        if is_snippet_section_end(line) {
            // Check if section names match
            if let Some(start) = line.find("[end:")
                && let Some(end) = line[start..].find(']')
            {
                let end_section_name = &line[start + 5..start + end];
                // Pop the matching section from the stack
                if let Some(last_section) = section_stack.last()
                    && last_section == end_section_name
                {
                    section_stack.pop();
                }
            }
        }

        // Check if position is within this line and we're in any snippet section
        if byte_pos <= position && position <= line_end && !section_stack.is_empty() {
            return true;
        }

        // Account for newline character
        byte_pos = line_end + 1;
    }

    false
}

/// Check if a line contains a snippet reference that could be a broken link
pub fn looks_like_snippet_reference(text: &str) -> bool {
    // More conservative check for link syntax that might be snippets
    text.contains("--8<--") || text.contains("-8<-")
}

/// Check if a line is a bare snippet block delimiter (for multi-line blocks)
pub fn is_snippet_block_delimiter(line: &str) -> bool {
    let trimmed = line.trim();
    // Check for trailing whitespace (space or tab) BEFORE full trim
    let trimmed_start = line.trim_start();
    let has_trailing_whitespace = trimmed_start.ends_with(' ') || trimmed_start.ends_with('\t');
    // Bare markers without trailing whitespace are valid block delimiters
    BARE_SNIPPET_MARKER.is_match(trimmed) && !has_trailing_whitespace
}

/// Check if a position is within a multi-line snippet block
/// Multi-line blocks have the format:
/// --8<--
/// file1.md
/// file2.md
/// --8<--
pub fn is_within_snippet_block(content: &str, position: usize) -> bool {
    let lines: Vec<&str> = content.lines().collect();
    let mut byte_pos = 0;
    let mut in_block = false;

    for line in lines {
        let line_end = byte_pos + line.len();

        // Check if this is a block delimiter that toggles state
        if is_snippet_block_delimiter(line) {
            if byte_pos <= position && position <= line_end {
                // The position is on the delimiter itself
                return true;
            }
            in_block = !in_block;
        }

        // Check if position is within this line and we're in a block
        if in_block && byte_pos <= position && position <= line_end {
            return true;
        }

        // Move to next line (account for newline character)
        byte_pos = line_end + 1;
    }

    false
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_snippet_marker_detection() {
        // Valid snippets with file paths
        assert!(is_snippet_marker("--8<-- \"file.md\""));
        assert!(is_snippet_marker("--8<-- 'file.md'"));
        assert!(is_snippet_marker("  --8<-- \"indented.md\"  "));
        assert!(is_snippet_marker("<!-- --8<-- \"file.md\" -->"));

        // Bare markers are valid for multi-line blocks
        assert!(is_snippet_marker("--8<--")); // Valid block delimiter
        assert!(is_snippet_marker("-8<-")); // Shorter form
        assert!(is_snippet_marker("---8<---")); // Longer form

        // Invalid snippets with trailing spaces
        assert!(!is_snippet_marker("--8<-- ")); // Trailing space suggests missing file
        assert!(!is_snippet_marker("<!-- --8<-- -->")); // Empty HTML comment snippet

        // Section markers
        assert!(is_snippet_marker("--8<-- [start:section]"));
        assert!(is_snippet_marker("--8<-- [end:section]"));
        assert!(is_snippet_marker("<!-- --8<-- [start:test] -->"));
    }

    #[test]
    fn test_section_markers() {
        // Valid section start markers
        assert!(is_snippet_section_start("<!-- --8<-- [start:intro] -->"));
        assert!(is_snippet_section_start("--8<-- [start:code]"));
        assert!(is_snippet_section_start("-8<- [start:example]"));
        assert!(is_snippet_section_start("# -8<- [start:remote-content]")); // Comment style

        // Invalid section start markers
        // We're lenient with empty section names for detection (can warn later)
        assert!(is_snippet_section_start("<!-- --8<-- [start:] -->")); // Empty name allowed
        assert!(!is_snippet_section_start("--8<-- [start")); // Missing bracket
        assert!(!is_snippet_section_start("[start:test]")); // Missing snippet marker

        // Valid section end markers
        assert!(is_snippet_section_end("<!-- --8<-- [end:intro] -->"));
        assert!(is_snippet_section_end("--8<-- [end:code]"));

        // Invalid section end markers
        // We're lenient with empty section names for detection (can warn later)
        assert!(is_snippet_section_end("<!-- --8<-- [end:] -->")); // Empty name allowed
        assert!(!is_snippet_section_end("--8<-- [end")); // Missing bracket
    }

    #[test]
    fn test_within_snippet_section() {
        let content = r#"# Document

Normal content here.

<!-- --8<-- [start:example] -->
This content is within a snippet section.
It should be detected as such.
<!-- --8<-- [end:example] -->

This is outside the snippet section.

<!-- --8<-- [start:another] -->
Another snippet section.
<!-- --8<-- [end:another] -->
"#;

        // Test positions within and outside snippet sections
        let within_pos = content.find("within a snippet").unwrap();
        let outside_pos = content.find("outside the snippet").unwrap();
        let another_pos = content.find("Another snippet").unwrap();

        assert!(is_within_snippet_section(content, within_pos));
        assert!(!is_within_snippet_section(content, outside_pos));
        assert!(is_within_snippet_section(content, another_pos));
    }

    #[test]
    fn test_nested_snippet_sections() {
        let content = r#"<!-- --8<-- [start:outer] -->
Outer content.
<!-- --8<-- [start:inner] -->
Inner content.
<!-- --8<-- [end:inner] -->
Back to outer.
<!-- --8<-- [end:outer] -->
Outside."#;

        let outer_pos = content.find("Outer content").unwrap();
        let inner_pos = content.find("Inner content").unwrap();
        let back_pos = content.find("Back to outer").unwrap();
        let outside_pos = content.find("Outside").unwrap();

        assert!(is_within_snippet_section(content, outer_pos));
        assert!(is_within_snippet_section(content, inner_pos));
        assert!(is_within_snippet_section(content, back_pos));
        assert!(!is_within_snippet_section(content, outside_pos));
    }

    #[test]
    fn test_multi_line_snippet_blocks() {
        let content = r#"# Document

Some content before.

--8<--
file1.md
file2.md
https://raw.githubusercontent.com/example/repo/main/file.md
--8<--

Some content after.

-8<-
another_file.txt
-8<-

More content.
"#;

        // Test positions within the first block
        let file1_pos = content.find("file1.md").unwrap();
        let file2_pos = content.find("file2.md").unwrap();
        let url_pos = content.find("https://raw.githubusercontent.com").unwrap();

        // Test positions outside blocks
        let before_pos = content.find("Some content before").unwrap();
        let after_pos = content.find("Some content after").unwrap();
        let more_pos = content.find("More content").unwrap();

        // Test positions on delimiters
        let first_delimiter = content.find("--8<--").unwrap();
        let second_delimiter = content.rfind("--8<--").unwrap();

        // Test position in second block
        let another_file_pos = content.find("another_file.txt").unwrap();

        // Assert content within blocks is detected
        assert!(
            is_within_snippet_block(content, file1_pos),
            "file1.md should be in block"
        );
        assert!(
            is_within_snippet_block(content, file2_pos),
            "file2.md should be in block"
        );
        assert!(is_within_snippet_block(content, url_pos), "URL should be in block");
        assert!(
            is_within_snippet_block(content, another_file_pos),
            "another_file.txt should be in block"
        );

        // Assert delimiters themselves are detected
        assert!(
            is_within_snippet_block(content, first_delimiter),
            "First delimiter should be detected"
        );
        assert!(
            is_within_snippet_block(content, second_delimiter),
            "Second delimiter should be detected"
        );

        // Assert content outside blocks is not detected
        assert!(
            !is_within_snippet_block(content, before_pos),
            "Content before block should not be detected"
        );
        assert!(
            !is_within_snippet_block(content, after_pos),
            "Content between blocks should not be detected"
        );
        assert!(
            !is_within_snippet_block(content, more_pos),
            "Content after blocks should not be detected"
        );
    }

    #[test]
    fn test_snippet_block_delimiter() {
        // Valid block delimiters
        assert!(is_snippet_block_delimiter("--8<--"));
        assert!(is_snippet_block_delimiter("-8<-"));
        assert!(is_snippet_block_delimiter("---8<---"));
        assert!(!is_snippet_block_delimiter("  --8<--  ")); // With trailing whitespace = invalid
        assert!(!is_snippet_block_delimiter("\t-8<-\t")); // With trailing tabs = invalid
        assert!(is_snippet_block_delimiter("  --8<--")); // Leading whitespace only is OK
        assert!(is_snippet_block_delimiter("\t--8<--")); // Leading tabs only is OK

        // Invalid delimiters
        assert!(!is_snippet_block_delimiter("--8<-- ")); // Trailing space after trim
        assert!(!is_snippet_block_delimiter("--8<-- file.md")); // With content
        assert!(!is_snippet_block_delimiter("<!-- --8<-- -->")); // In HTML comment
    }
}