rumdl 0.0.12

A fast Markdown linter written in Rust (Ru(st) MarkDown Linter)
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
use lazy_static::lazy_static;
use regex::Regex;
use crate::rule::{LintResult, LintWarning, Rule, LintError, Fix};

lazy_static! {
    // Improved code block detection patterns
    static ref FENCED_CODE_BLOCK_START: Regex = Regex::new(r"^(\s*)```(?:[^`\r\n]*)$").unwrap();
    static ref FENCED_CODE_BLOCK_END: Regex = Regex::new(r"^(\s*)```\s*$").unwrap();
    static ref ALTERNATE_FENCED_CODE_BLOCK_START: Regex = Regex::new(r"^(\s*)~~~(?:[^~\r\n]*)$").unwrap();
    static ref ALTERNATE_FENCED_CODE_BLOCK_END: Regex = Regex::new(r"^(\s*)~~~\s*$").unwrap();
    static ref INDENTED_CODE_BLOCK: Regex = Regex::new(r"^(\s{4,})").unwrap();
    
    // Front matter detection
    static ref FRONT_MATTER_DELIM: Regex = Regex::new(r"^---\s*$").unwrap();
    
    // Enhanced emphasis patterns with better handling of edge cases
    static ref ASTERISK_EMPHASIS: Regex = Regex::new(r"\*\s+([^*\s][^*]*?)\s+\*|\*\s+([^*\s][^*]*?)\*|\*([^*\s][^*]*?)\s+\*").unwrap();
    static ref DOUBLE_ASTERISK_EMPHASIS: Regex = Regex::new(r"\*\*\s+([^*\s][^*]*?)\s+\*\*|\*\*\s+([^*\s][^*]*?)\*\*|\*\*([^*\s][^*]*?)\s+\*\*").unwrap();
    static ref UNDERSCORE_EMPHASIS: Regex = Regex::new(r"_\s+([^_\s][^_]*?)\s+_|_\s+([^_\s][^_]*?)_|_([^_\s][^_]*?)\s+_").unwrap();
    static ref DOUBLE_UNDERSCORE_EMPHASIS: Regex = Regex::new(r"__\s+([^_\s][^_]*?)\s+__|__\s+([^_\s][^_]*?)__|__([^_\s][^_]*?)\s+__").unwrap();
    
    // Detect potential unbalanced emphasis without using look-behind/ahead
    static ref UNBALANCED_ASTERISK: Regex = Regex::new(r"\*([^*]+)$|^([^*]*)\*").unwrap();
    static ref UNBALANCED_DOUBLE_ASTERISK: Regex = Regex::new(r"\*\*([^*]+)$|^([^*]*)\*\*").unwrap();
    static ref UNBALANCED_UNDERSCORE: Regex = Regex::new(r"_([^_]+)$|^([^_]*)_").unwrap();
    static ref UNBALANCED_DOUBLE_UNDERSCORE: Regex = Regex::new(r"__([^_]+)$|^([^_]*)__").unwrap();
    
    // Better detection of inline code with support for multiple backticks
    static ref INLINE_CODE: Regex = Regex::new(r"(`+)([^`]|[^`].*?[^`])(`+)").unwrap();
    
    // List markers pattern - used to avoid confusion with emphasis
    static ref LIST_MARKER: Regex = Regex::new(r"^\s*[*+-]\s+").unwrap();
    
    // Valid emphasis at start of line that should not be treated as lists
    static ref VALID_START_EMPHASIS: Regex = Regex::new(r"^(\*\*[^*\s]|\*[^*\s]|__[^_\s]|_[^_\s])").unwrap();
    
    // Documentation style patterns
    static ref DOC_METADATA_PATTERN: Regex = Regex::new(r"^\s*\*?\s*\*\*[^*]+\*\*\s*:").unwrap();
    
    // Bold text pattern (for preserving bold text in documentation)
    static ref BOLD_TEXT_PATTERN: Regex = Regex::new(r"\*\*[^*]+\*\*").unwrap();
    
    // Multi-line emphasis detection (for potential future use)
    static ref MULTI_LINE_EMPHASIS_START: Regex = Regex::new(r"(\*\*|\*|__|_)([^*_\s].*?)$").unwrap();
    static ref MULTI_LINE_EMPHASIS_END: Regex = Regex::new(r"^(.*?)(\*\*|\*|__|_)").unwrap();
}

/// Structure to track code block state
struct CodeBlockState {
    in_fenced_code: bool,
    in_alternate_fenced: bool,
    in_front_matter: bool,
}

impl CodeBlockState {
    fn new() -> Self {
        CodeBlockState {
            in_fenced_code: false,
            in_alternate_fenced: false,
            in_front_matter: false,
        }
    }
    
    fn is_in_code_block(&self, line: &str) -> bool {
        if self.in_fenced_code || self.in_alternate_fenced || self.in_front_matter {
            return true;
        }
        
        // Check if the line is an indented code block
        INDENTED_CODE_BLOCK.is_match(line)
    }
    
    fn update(&mut self, line: &str) {
        // Front matter handling
        if FRONT_MATTER_DELIM.is_match(line) {
            self.in_front_matter = !self.in_front_matter;
            return;
        }
        
        // Skip updating code block state if in front matter
        if self.in_front_matter {
            return;
        }
        
        // Fenced code block handling
        if FENCED_CODE_BLOCK_START.is_match(line) {
            self.in_fenced_code = true;
        } else if FENCED_CODE_BLOCK_END.is_match(line) && self.in_fenced_code {
            self.in_fenced_code = false;
        }
        
        // Alternate fenced code block handling
        if ALTERNATE_FENCED_CODE_BLOCK_START.is_match(line) {
            self.in_alternate_fenced = true;
        } else if ALTERNATE_FENCED_CODE_BLOCK_END.is_match(line) && self.in_alternate_fenced {
            self.in_alternate_fenced = false;
        }
    }
}

// Enhanced inline code replacement to handle nested backticks
fn replace_inline_code(line: &str) -> String {
    let mut result = line.to_string();
    let mut offset = 0;
    
    for cap in INLINE_CODE.captures_iter(line) {
        if let (Some(full_match), Some(_opening), Some(_content), Some(_closing)) = 
            (cap.get(0), cap.get(1), cap.get(2), cap.get(3)) {
            
            let match_start = full_match.start();
            let match_end = full_match.end();
            let placeholder = " ".repeat(match_end - match_start);
            
            result.replace_range(match_start + offset..match_end + offset, &placeholder);
            offset += placeholder.len() - (match_end - match_start);
        }
    }
    
    result
}

#[derive(Default)]
pub struct MD037SpacesAroundEmphasis;

impl Rule for MD037SpacesAroundEmphasis {
    fn name(&self) -> &'static str {
        "MD037"
    }

    fn description(&self) -> &'static str {
        "Spaces inside emphasis markers"
    }

    fn check(&self, content: &str) -> LintResult {
        let mut warnings = Vec::new();
        let lines: Vec<&str> = content.lines().collect();
        let mut code_block_state = CodeBlockState::new();
        
        for (_i, line) in lines.iter().enumerate() {
            // Update code block state
            code_block_state.update(line);
            
            // Skip processing if we're in a code block
            if code_block_state.is_in_code_block(line) {
                continue;
            }
            
            // Process the line for emphasis patterns
            let line_no_code = replace_inline_code(line);
            check_emphasis_patterns(&line_no_code, _i + 1, line, &mut warnings);
        }

        Ok(warnings)
    }

    fn fix(&self, content: &str) -> Result<String, LintError> {
        let lines: Vec<&str> = content.lines().collect();
        let mut fixed_lines = Vec::new();
        let mut code_block_state = CodeBlockState::new();
        
        for (_i, line) in lines.iter().enumerate() {
            // Update code block state
            code_block_state.update(line);
            
            // Don't modify lines in code blocks
            if code_block_state.is_in_code_block(line) {
                fixed_lines.push(line.to_string());
                continue;
            }
            
            // Fix emphasis patterns
            fixed_lines.push(fix_emphasis_patterns(line));
        }
        
        // Join lines and preserve trailing newline
        let result = if fixed_lines.is_empty() {
            String::new()
        } else {
            fixed_lines.join("\n")
        };
        
        // Preserve trailing newline if original had it
        let result = if content.ends_with('\n') {
            format!("{}\n", result.trim_end())
        } else {
            result
        };
        
        Ok(result)
    }
}

// Check for spaces inside emphasis markers with enhanced handling
fn check_emphasis_patterns(line: &str, line_num: usize, original_line: &str, warnings: &mut Vec<LintWarning>) {
    // Skip if this is a list marker rather than emphasis
    if LIST_MARKER.is_match(line) {
        return;
    }
    
    // Skip documentation-style patterns like "* *Rule Type*: `warning`"
    if line.trim_start().starts_with("* *") && line.contains("*:") {
        return;
    }
    
    // Skip documentation-style patterns like "* **Default Level**: `1`"
    if line.trim_start().starts_with("* **") && line.contains("**:") {
        return;
    }
    
    // Skip documentation metadata patterns like "**Rule Type**: `warning`"
    if DOC_METADATA_PATTERN.is_match(line) {
        return;
    }
    
    // Skip lines with bold text (common in documentation)
    if BOLD_TEXT_PATTERN.is_match(line) {
        return;
    }
    
    // Skip valid emphasis at the start of a line
    if VALID_START_EMPHASIS.is_match(line) {
        // Still check the rest of the line for emphasis issues
        let emphasis_start = line.find(' ').unwrap_or(line.len());
        if emphasis_start < line.len() {
            let rest_of_line = &line[emphasis_start..];
            check_emphasis_with_pattern(rest_of_line, &ASTERISK_EMPHASIS, "*", line_num, original_line, warnings);
            check_emphasis_with_pattern(rest_of_line, &DOUBLE_ASTERISK_EMPHASIS, "**", line_num, original_line, warnings);
            check_emphasis_with_pattern(rest_of_line, &UNDERSCORE_EMPHASIS, "_", line_num, original_line, warnings);
            check_emphasis_with_pattern(rest_of_line, &DOUBLE_UNDERSCORE_EMPHASIS, "__", line_num, original_line, warnings);
        }
        return;
    }
    
    check_emphasis_with_pattern(line, &ASTERISK_EMPHASIS, "*", line_num, original_line, warnings);
    check_emphasis_with_pattern(line, &DOUBLE_ASTERISK_EMPHASIS, "**", line_num, original_line, warnings);
    check_emphasis_with_pattern(line, &UNDERSCORE_EMPHASIS, "_", line_num, original_line, warnings);
    check_emphasis_with_pattern(line, &DOUBLE_UNDERSCORE_EMPHASIS, "__", line_num, original_line, warnings);
}

// Check a specific emphasis pattern and add warnings
fn check_emphasis_with_pattern(
    line: &str, 
    pattern: &Regex, 
    marker_type: &str,
    line_num: usize, 
    original_line: &str,
    warnings: &mut Vec<LintWarning>
) {
    for cap in pattern.captures_iter(line) {
        if let Some(m) = cap.get(0) {
            // Don't flag at the beginning of a line if it could be confused with a list marker
            if m.start() == 0 && (line.starts_with('*') || line.starts_with("**")) {
                continue;
            }
            
            // Compute the actual position in the original line
            let actual_start = find_actual_position(original_line, m.start());
            
            warnings.push(LintWarning {
                line: line_num,
                column: actual_start + 1,
                message: format!("Spaces inside {} emphasis markers", marker_type),
                fix: Some(Fix {
                    line: line_num,
                    column: actual_start + 1,
                    replacement: fix_specific_emphasis_section(original_line, m.start(), m.end()),
                }),
            });
        }
    }
}

// Find the actual position in the original line accounting for code spans
fn find_actual_position(original_line: &str, position_in_processed: usize) -> usize {
    // This is a simplification - for a complete solution, we would need to
    // track character positions during the inline code replacement
    let mut in_code = false;
    let mut backtick_count = 0;
    let mut processed_pos = 0;
    
    for (i, c) in original_line.chars().enumerate() {
        if c == '`' {
            backtick_count += 1;
            if backtick_count == 1 {
                in_code = !in_code;
            } else if backtick_count > 1 && !in_code {
                // Multiple backticks starting code span
                in_code = true;
                backtick_count = 0;
            } else if backtick_count > 1 && in_code {
                // Multiple backticks ending code span
                in_code = false;
                backtick_count = 0;
            }
        } else {
            backtick_count = 0;
            
            if !in_code {
                processed_pos += 1;
            }
            
            if processed_pos > position_in_processed {
                return i;
            }
        }
    }
    
    // Fallback
    position_in_processed.min(original_line.len())
}

// Fix a specific section of emphasis
fn fix_specific_emphasis_section(line: &str, start_approx: usize, end_approx: usize) -> String {
    // Try to identify the specific emphasis section
    let section = &line[start_approx.min(line.len())..end_approx.min(line.len())];
    
    // Detect the type of emphasis
    if section.starts_with("**") && section.ends_with("**") {
        let content = section.trim_start_matches("**").trim_end_matches("**").trim();
        return format!("**{}**", content);
    } else if section.starts_with('*') && section.ends_with('*') {
        let content = section.trim_start_matches('*').trim_end_matches('*').trim();
        return format!("*{}*", content);
    } else if section.starts_with("__") && section.ends_with("__") {
        let content = section.trim_start_matches("__").trim_end_matches("__").trim();
        return format!("__{}__", content);
    } else if section.starts_with('_') && section.ends_with('_') {
        let content = section.trim_start_matches('_').trim_end_matches('_').trim();
        return format!("_{}_", content);
    }
    
    // Fallback - fix the entire line
    fix_emphasis_patterns(line)
}

// Fix spaces inside emphasis markers
fn fix_emphasis_patterns(line: &str) -> String {
    // Save code spans first
    let (line_no_code, code_spans) = extract_code_spans(line);
    
    let mut result = line_no_code;
    
    // Fix emphasis patterns
    result = ASTERISK_EMPHASIS.replace_all(&result, |caps: &regex::Captures| {
        for i in 1..4 {
            if let Some(m) = caps.get(i) {
                return format!("*{}*", m.as_str());
            }
        }
        caps.get(0).map_or("", |m| m.as_str()).to_string()
    }).to_string();
    
    result = DOUBLE_ASTERISK_EMPHASIS.replace_all(&result, |caps: &regex::Captures| {
        for i in 1..4 {
            if let Some(m) = caps.get(i) {
                return format!("**{}**", m.as_str());
            }
        }
        caps.get(0).map_or("", |m| m.as_str()).to_string()
    }).to_string();
    
    result = UNDERSCORE_EMPHASIS.replace_all(&result, |caps: &regex::Captures| {
        for i in 1..4 {
            if let Some(m) = caps.get(i) {
                return format!("_{}_", m.as_str());
            }
        }
        caps.get(0).map_or("", |m| m.as_str()).to_string()
    }).to_string();
    
    result = DOUBLE_UNDERSCORE_EMPHASIS.replace_all(&result, |caps: &regex::Captures| {
        for i in 1..4 {
            if let Some(m) = caps.get(i) {
                return format!("__{}__", m.as_str());
            }
        }
        caps.get(0).map_or("", |m| m.as_str()).to_string()
    }).to_string();
    
    // Restore code spans
    restore_code_spans(result, code_spans)
}

// Extract code spans from a line, replacing them with placeholders
fn extract_code_spans(line: &str) -> (String, Vec<(String, String)>) {
    let mut result = line.to_string();
    let mut code_spans = Vec::new();
    let mut positions = Vec::new();
    
    for (i, cap) in INLINE_CODE.captures_iter(line).enumerate() {
        if let Some(m) = cap.get(0) {
            let code_span = line[m.start()..m.end()].to_string();
            let placeholder = format!("CODE_SPAN_{}", i);
            code_spans.push((placeholder.clone(), code_span));
            positions.push((m.start(), m.end(), placeholder));
        }
    }
    
    // Replace code spans in reverse order to maintain indices
    positions.sort_by(|a, b| b.0.cmp(&a.0));
    for (start, end, placeholder) in positions {
        if start < result.len() && end <= result.len() {
            result.replace_range(start..end, &placeholder);
        }
    }
    
    (result, code_spans)
}

// Restore code spans from placeholders
fn restore_code_spans(mut content: String, code_spans: Vec<(String, String)>) -> String {
    for (placeholder, code_span) in code_spans {
        content = content.replace(&placeholder, &code_span);
    }
    content
}