rumdl_lib/rules/
md013_line_length.rs

1/// Rule MD013: Line length
2///
3/// See [docs/md013.md](../../docs/md013.md) for full documentation, configuration, and examples.
4use crate::rule::{LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
5use crate::rule_config_serde::RuleConfig;
6use crate::utils::document_structure::{DocumentStructure, DocumentStructureExtensions};
7use crate::utils::range_utils::calculate_excess_range;
8use crate::utils::regex_cache::{
9    IMAGE_REF_PATTERN, INLINE_LINK_REGEX as MARKDOWN_LINK_PATTERN, LINK_REF_PATTERN, URL_IN_TEXT, URL_PATTERN,
10};
11use toml;
12
13pub mod md013_config;
14use md013_config::MD013Config;
15
16#[derive(Clone, Default)]
17pub struct MD013LineLength {
18    config: MD013Config,
19}
20
21impl MD013LineLength {
22    pub fn new(line_length: usize, code_blocks: bool, tables: bool, headings: bool, strict: bool) -> Self {
23        Self {
24            config: MD013Config {
25                line_length,
26                code_blocks,
27                tables,
28                headings,
29                strict,
30                heading_line_length: None,
31                code_block_line_length: None,
32                stern: false,
33                enable_reflow: false,
34            },
35        }
36    }
37
38    pub fn from_config_struct(config: MD013Config) -> Self {
39        Self { config }
40    }
41
42    fn is_in_table(lines: &[&str], current_line: usize) -> bool {
43        // Check if current line is part of a table
44        let current = lines[current_line].trim();
45        if current.starts_with('|') || current.starts_with("|-") {
46            return true;
47        }
48
49        // Check if line is between table markers
50        if current_line > 0 && current_line + 1 < lines.len() {
51            let prev = lines[current_line - 1].trim();
52            let next = lines[current_line + 1].trim();
53            if (prev.starts_with('|') || prev.starts_with("|-")) && (next.starts_with('|') || next.starts_with("|-")) {
54                return true;
55            }
56        }
57        false
58    }
59
60    fn should_ignore_line(
61        &self,
62        line: &str,
63        _lines: &[&str],
64        current_line: usize,
65        structure: &DocumentStructure,
66    ) -> bool {
67        if self.config.strict || self.config.stern {
68            return false;
69        }
70
71        // Quick check for common patterns before expensive regex
72        let trimmed = line.trim();
73
74        // Only skip if the entire line is a URL (quick check first)
75        if (trimmed.starts_with("http://") || trimmed.starts_with("https://")) && URL_PATTERN.is_match(trimmed) {
76            return true;
77        }
78
79        // Only skip if the entire line is an image reference (quick check first)
80        if trimmed.starts_with("![") && trimmed.ends_with(']') && IMAGE_REF_PATTERN.is_match(trimmed) {
81            return true;
82        }
83
84        // Only skip if the entire line is a link reference (quick check first)
85        if trimmed.starts_with('[') && trimmed.contains("]:") && LINK_REF_PATTERN.is_match(trimmed) {
86            return true;
87        }
88
89        // Code blocks with long strings (only check if in code block)
90        if structure.is_in_code_block(current_line + 1)
91            && !trimmed.is_empty()
92            && !line.contains(' ')
93            && !line.contains('\t')
94        {
95            return true;
96        }
97
98        false
99    }
100}
101
102impl Rule for MD013LineLength {
103    fn name(&self) -> &'static str {
104        "MD013"
105    }
106
107    fn description(&self) -> &'static str {
108        "Line length should not be excessive"
109    }
110
111    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
112        let content = ctx.content;
113
114        // Early return for empty content
115        if content.is_empty() {
116            return Ok(Vec::new());
117        }
118
119        // Quick check: if total content is shorter than line limit, definitely no violations
120        if content.len() <= self.config.line_length {
121            return Ok(Vec::new());
122        }
123
124        // More aggressive early return - check if any line could possibly be long
125        let has_long_lines = if !ctx.lines.is_empty() {
126            ctx.lines
127                .iter()
128                .any(|line| line.content.len() > self.config.line_length)
129        } else {
130            // Fallback: do a quick scan for newlines to estimate max line length
131            let mut max_line_len = 0;
132            let mut current_line_len = 0;
133            for ch in content.chars() {
134                if ch == '\n' {
135                    max_line_len = max_line_len.max(current_line_len);
136                    current_line_len = 0;
137                } else {
138                    current_line_len += 1;
139                }
140            }
141            max_line_len = max_line_len.max(current_line_len);
142            max_line_len > self.config.line_length
143        };
144
145        if !has_long_lines {
146            return Ok(Vec::new());
147        }
148
149        // Create structure manually
150        let structure = DocumentStructure::new(content);
151        self.check_with_structure(ctx, &structure)
152    }
153
154    /// Optimized check using pre-computed document structure
155    fn check_with_structure(
156        &self,
157        ctx: &crate::lint_context::LintContext,
158        structure: &DocumentStructure,
159    ) -> LintResult {
160        let content = ctx.content;
161        let mut warnings = Vec::new();
162
163        // Early return was already done in check(), so we know there are long lines
164
165        // Check for inline configuration overrides
166        let inline_config = crate::inline_config::InlineConfig::from_content(content);
167        let config_override = inline_config.get_rule_config("MD013");
168
169        // Apply configuration override if present
170        let effective_config = if let Some(json_config) = config_override {
171            if let Some(obj) = json_config.as_object() {
172                let mut config = self.config.clone();
173                if let Some(line_length) = obj.get("line_length").and_then(|v| v.as_u64()) {
174                    config.line_length = line_length as usize;
175                }
176                if let Some(code_blocks) = obj.get("code_blocks").and_then(|v| v.as_bool()) {
177                    config.code_blocks = code_blocks;
178                }
179                if let Some(tables) = obj.get("tables").and_then(|v| v.as_bool()) {
180                    config.tables = tables;
181                }
182                if let Some(headings) = obj.get("headings").and_then(|v| v.as_bool()) {
183                    config.headings = headings;
184                }
185                if let Some(strict) = obj.get("strict").and_then(|v| v.as_bool()) {
186                    config.strict = strict;
187                }
188                if let Some(stern) = obj.get("stern").and_then(|v| v.as_bool()) {
189                    config.stern = stern;
190                }
191                if let Some(heading_line_length) = obj.get("heading_line_length").and_then(|v| v.as_u64()) {
192                    config.heading_line_length = Some(heading_line_length as usize);
193                }
194                if let Some(code_block_line_length) = obj.get("code_block_line_length").and_then(|v| v.as_u64()) {
195                    config.code_block_line_length = Some(code_block_line_length as usize);
196                }
197                config
198            } else {
199                self.config.clone()
200            }
201        } else {
202            self.config.clone()
203        };
204
205        // Use ctx.lines if available for better performance
206        let lines: Vec<&str> = if !ctx.lines.is_empty() {
207            ctx.lines.iter().map(|l| l.content.as_str()).collect()
208        } else {
209            content.lines().collect()
210        };
211
212        // Create a quick lookup set for heading lines
213        let heading_lines_set: std::collections::HashSet<usize> = structure.heading_lines.iter().cloned().collect();
214
215        // Pre-compute table lines for efficiency instead of calling is_in_table for each line
216        let table_lines_set: std::collections::HashSet<usize> = if effective_config.tables {
217            let mut table_lines = std::collections::HashSet::new();
218            let mut in_table = false;
219
220            for (i, line) in lines.iter().enumerate() {
221                let line_number = i + 1;
222
223                // Quick check if in code block using pre-computed blocks from context or structure
224                let in_code = if !ctx.code_blocks.is_empty() {
225                    ctx.code_blocks
226                        .iter()
227                        .any(|(start, end)| *start <= line_number && line_number <= *end)
228                } else {
229                    structure.is_in_code_block(line_number)
230                };
231
232                if !in_code && line.contains('|') {
233                    in_table = true;
234                    table_lines.insert(line_number);
235                } else if in_table && line.trim().is_empty() {
236                    in_table = false;
237                } else if in_table {
238                    table_lines.insert(line_number);
239                }
240            }
241            table_lines
242        } else {
243            std::collections::HashSet::new()
244        };
245
246        for (line_num, line) in lines.iter().enumerate() {
247            let line_number = line_num + 1;
248
249            // Calculate effective length excluding unbreakable URLs
250            let effective_length = self.calculate_effective_length(line);
251
252            // Determine the appropriate line length limit based on line type
253            let line_limit = if heading_lines_set.contains(&line_number) {
254                effective_config
255                    .heading_line_length
256                    .unwrap_or(effective_config.line_length)
257            } else if structure.is_in_code_block(line_number) {
258                effective_config
259                    .code_block_line_length
260                    .unwrap_or(effective_config.line_length)
261            } else {
262                effective_config.line_length
263            };
264
265            // Skip short lines immediately
266            if effective_length <= line_limit {
267                continue;
268            }
269
270            // Skip various block types efficiently
271            if !effective_config.strict && !effective_config.stern {
272                // Skip setext heading underlines
273                if !line.trim().is_empty() && line.trim().chars().all(|c| c == '=' || c == '-') {
274                    continue;
275                }
276
277                // Skip block elements according to config flags (optimized checks)
278                if (effective_config.headings
279                    && heading_lines_set.contains(&line_number)
280                    && effective_config.heading_line_length.is_none())
281                    || (!effective_config.code_blocks
282                        && structure.is_in_code_block(line_number)
283                        && effective_config.code_block_line_length.is_none())
284                    || (effective_config.tables && table_lines_set.contains(&line_number))
285                    || structure.is_in_blockquote(line_number)
286                    || structure.is_in_html_block(line_number)
287                {
288                    continue;
289                }
290
291                // Skip lines that are only a URL, image ref, or link ref
292                if self.should_ignore_line(line, &lines, line_num, structure) {
293                    continue;
294                }
295            } else if effective_config.stern {
296                // In stern mode, only skip if explicitly configured
297                if (effective_config.headings
298                    && heading_lines_set.contains(&line_number)
299                    && effective_config.heading_line_length.is_none())
300                    || (!effective_config.code_blocks
301                        && structure.is_in_code_block(line_number)
302                        && effective_config.code_block_line_length.is_none())
303                    || (effective_config.tables && table_lines_set.contains(&line_number))
304                {
305                    continue;
306                }
307            }
308
309            // Only provide a fix if reflow is enabled
310            let fix = if self.config.enable_reflow && !self.should_skip_line_for_fix(line, line_num, structure) {
311                // Provide a placeholder fix to indicate that reflow will happen
312                // The actual reflow is done in the fix() method
313                Some(crate::rule::Fix {
314                    range: 0..0,                // Placeholder range
315                    replacement: String::new(), // Placeholder replacement
316                })
317            } else {
318                None
319            };
320
321            let message = format!("Line length {effective_length} exceeds {line_limit} characters");
322
323            // Calculate precise character range for the excess portion
324            let (start_line, start_col, end_line, end_col) = calculate_excess_range(line_number, line, line_limit);
325
326            warnings.push(LintWarning {
327                rule_name: Some(self.name()),
328                message,
329                line: start_line,
330                column: start_col,
331                end_line,
332                end_column: end_col,
333                severity: Severity::Warning,
334                fix,
335            });
336        }
337        Ok(warnings)
338    }
339
340    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
341        // Only fix if reflow is enabled
342        if self.config.enable_reflow {
343            let reflow_options = crate::utils::text_reflow::ReflowOptions {
344                line_length: self.config.line_length,
345                break_on_sentences: true,
346                preserve_breaks: false,
347            };
348
349            return Ok(crate::utils::text_reflow::reflow_markdown(ctx.content, &reflow_options));
350        }
351
352        // Without reflow, MD013 has no fixes available
353        Ok(ctx.content.to_string())
354    }
355
356    fn as_any(&self) -> &dyn std::any::Any {
357        self
358    }
359
360    fn as_maybe_document_structure(&self) -> Option<&dyn crate::rule::MaybeDocumentStructure> {
361        Some(self)
362    }
363
364    fn category(&self) -> RuleCategory {
365        RuleCategory::Whitespace
366    }
367
368    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
369        // Skip if content is empty
370        if ctx.content.is_empty() {
371            return true;
372        }
373
374        // Quick check: if total content is shorter than line limit, definitely skip
375        if ctx.content.len() <= self.config.line_length {
376            return true;
377        }
378
379        // Use more efficient check - any() with early termination instead of all()
380        !ctx.lines
381            .iter()
382            .any(|line| line.content.len() > self.config.line_length)
383    }
384
385    fn default_config_section(&self) -> Option<(String, toml::Value)> {
386        let default_config = MD013Config::default();
387        let json_value = serde_json::to_value(&default_config).ok()?;
388        let toml_value = crate::rule_config_serde::json_to_toml_value(&json_value)?;
389
390        if let toml::Value::Table(table) = toml_value {
391            if !table.is_empty() {
392                Some((MD013Config::RULE_NAME.to_string(), toml::Value::Table(table)))
393            } else {
394                None
395            }
396        } else {
397            None
398        }
399    }
400
401    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
402    where
403        Self: Sized,
404    {
405        let mut rule_config = crate::rule_config_serde::load_rule_config::<MD013Config>(config);
406        // Special handling for line_length from global config
407        if rule_config.line_length == 80 {
408            // default value
409            rule_config.line_length = config.global.line_length as usize;
410        }
411        Box::new(Self::from_config_struct(rule_config))
412    }
413}
414
415impl MD013LineLength {
416    /// Check if a line should be skipped for fixing
417    fn should_skip_line_for_fix(&self, line: &str, line_num: usize, structure: &DocumentStructure) -> bool {
418        let line_number = line_num + 1; // 1-based
419
420        // Skip code blocks
421        if structure.is_in_code_block(line_number) {
422            return true;
423        }
424
425        // Skip HTML blocks
426        if structure.is_in_html_block(line_number) {
427            return true;
428        }
429
430        // Skip tables (they have complex formatting)
431        if Self::is_in_table(&[line], 0) {
432            return true;
433        }
434
435        // Skip lines that are only URLs (can't be wrapped)
436        if line.trim().starts_with("http://") || line.trim().starts_with("https://") {
437            return true;
438        }
439
440        // Skip setext heading underlines
441        if !line.trim().is_empty() && line.trim().chars().all(|c| c == '=' || c == '-') {
442            return true;
443        }
444
445        false
446    }
447
448    /// Calculate effective line length excluding unbreakable URLs
449    fn calculate_effective_length(&self, line: &str) -> usize {
450        if self.config.strict || self.config.stern {
451            // In strict or stern mode, count everything
452            return line.chars().count();
453        }
454
455        // Quick check: if line doesn't contain "http" or "[", it can't have URLs or markdown links
456        if !line.contains("http") && !line.contains('[') {
457            return line.chars().count();
458        }
459
460        let mut effective_line = line.to_string();
461
462        // First handle markdown links to avoid double-counting URLs
463        // Pattern: [text](very-long-url) -> [text](url)
464        if line.contains('[') && line.contains("](") {
465            for cap in MARKDOWN_LINK_PATTERN.captures_iter(&effective_line.clone()) {
466                if let (Some(full_match), Some(text), Some(url)) = (cap.get(0), cap.get(1), cap.get(2))
467                    && url.as_str().len() > 15
468                {
469                    let replacement = format!("[{}](url)", text.as_str());
470                    effective_line = effective_line.replacen(full_match.as_str(), &replacement, 1);
471                }
472            }
473        }
474
475        // Then replace bare URLs with a placeholder of reasonable length
476        // This allows lines with long URLs to pass if the rest of the content is reasonable
477        if effective_line.contains("http") {
478            for url_match in URL_IN_TEXT.find_iter(&effective_line.clone()) {
479                let url = url_match.as_str();
480                // Skip if this URL is already part of a markdown link we handled
481                if !effective_line.contains(&format!("({url})")) {
482                    // Replace URL with placeholder that represents a "reasonable" URL length
483                    // Using 15 chars as a reasonable URL placeholder (e.g., "https://ex.com")
484                    let placeholder = "x".repeat(15.min(url.len()));
485                    effective_line = effective_line.replacen(url, &placeholder, 1);
486                }
487            }
488        }
489
490        effective_line.chars().count()
491    }
492}
493
494impl DocumentStructureExtensions for MD013LineLength {
495    fn has_relevant_elements(
496        &self,
497        ctx: &crate::lint_context::LintContext,
498        _doc_structure: &DocumentStructure,
499    ) -> bool {
500        // This rule always applies unless content is empty
501        !ctx.content.is_empty()
502    }
503}
504
505#[cfg(test)]
506mod tests {
507    use super::*;
508    use crate::lint_context::LintContext;
509
510    #[test]
511    fn test_default_config() {
512        let rule = MD013LineLength::default();
513        assert_eq!(rule.config.line_length, 80);
514        assert!(rule.config.code_blocks); // Default is true
515        assert!(rule.config.tables); // Default is true
516        assert!(rule.config.headings);
517        assert!(!rule.config.strict);
518    }
519
520    #[test]
521    fn test_custom_config() {
522        let rule = MD013LineLength::new(100, true, true, false, true);
523        assert_eq!(rule.config.line_length, 100);
524        assert!(rule.config.code_blocks);
525        assert!(rule.config.tables);
526        assert!(!rule.config.headings);
527        assert!(rule.config.strict);
528    }
529
530    #[test]
531    fn test_basic_line_length_violation() {
532        let rule = MD013LineLength::new(50, false, false, false, false);
533        let content = "This is a line that is definitely longer than fifty characters and should trigger a warning.";
534        let ctx = LintContext::new(content);
535        let result = rule.check(&ctx).unwrap();
536
537        assert_eq!(result.len(), 1);
538        assert!(result[0].message.contains("Line length"));
539        assert!(result[0].message.contains("exceeds 50 characters"));
540    }
541
542    #[test]
543    fn test_no_violation_under_limit() {
544        let rule = MD013LineLength::new(100, false, false, false, false);
545        let content = "Short line.\nAnother short line.";
546        let ctx = LintContext::new(content);
547        let result = rule.check(&ctx).unwrap();
548
549        assert_eq!(result.len(), 0);
550    }
551
552    #[test]
553    fn test_multiple_violations() {
554        let rule = MD013LineLength::new(30, false, false, false, false);
555        let content = "This line is definitely longer than thirty chars.\nThis is also a line that exceeds the limit.\nShort line.";
556        let ctx = LintContext::new(content);
557        let result = rule.check(&ctx).unwrap();
558
559        assert_eq!(result.len(), 2);
560        assert_eq!(result[0].line, 1);
561        assert_eq!(result[1].line, 2);
562    }
563
564    #[test]
565    fn test_code_blocks_exemption() {
566        let rule = MD013LineLength::new(30, false, false, false, false);
567        let content = "```\nThis is a very long line inside a code block that should be ignored.\n```";
568        let ctx = LintContext::new(content);
569        let result = rule.check(&ctx).unwrap();
570
571        assert_eq!(result.len(), 0);
572    }
573
574    #[test]
575    fn test_code_blocks_not_exempt_when_configured() {
576        let rule = MD013LineLength::new(30, true, false, false, false);
577        let content = "```\nThis is a very long line inside a code block that should NOT be ignored.\n```";
578        let ctx = LintContext::new(content);
579        let result = rule.check(&ctx).unwrap();
580
581        assert!(!result.is_empty());
582    }
583
584    #[test]
585    fn test_heading_exemption() {
586        let rule = MD013LineLength::new(30, false, false, true, false);
587        let content = "# This is a very long heading that would normally exceed the limit";
588        let ctx = LintContext::new(content);
589        let result = rule.check(&ctx).unwrap();
590
591        assert_eq!(result.len(), 0);
592    }
593
594    #[test]
595    fn test_heading_not_exempt_when_configured() {
596        let rule = MD013LineLength::new(30, false, false, false, false);
597        let content = "# This is a very long heading that should trigger a warning";
598        let ctx = LintContext::new(content);
599        let result = rule.check(&ctx).unwrap();
600
601        assert_eq!(result.len(), 1);
602    }
603
604    #[test]
605    fn test_table_detection() {
606        let lines = vec![
607            "| Column 1 | Column 2 |",
608            "|----------|----------|",
609            "| Value 1  | Value 2  |",
610        ];
611
612        assert!(MD013LineLength::is_in_table(&lines, 0));
613        assert!(MD013LineLength::is_in_table(&lines, 1));
614        assert!(MD013LineLength::is_in_table(&lines, 2));
615    }
616
617    #[test]
618    fn test_table_exemption() {
619        let rule = MD013LineLength::new(30, false, true, false, false);
620        let content = "| This is a very long table header | Another long column header |\n|-----------------------------------|-------------------------------|";
621        let ctx = LintContext::new(content);
622        let result = rule.check(&ctx).unwrap();
623
624        assert_eq!(result.len(), 0);
625    }
626
627    #[test]
628    fn test_url_exemption() {
629        let rule = MD013LineLength::new(30, false, false, false, false);
630        let content = "https://example.com/this/is/a/very/long/url/that/exceeds/the/limit";
631        let ctx = LintContext::new(content);
632        let result = rule.check(&ctx).unwrap();
633
634        assert_eq!(result.len(), 0);
635    }
636
637    #[test]
638    fn test_image_reference_exemption() {
639        let rule = MD013LineLength::new(30, false, false, false, false);
640        let content = "![This is a very long image alt text that exceeds limit][reference]";
641        let ctx = LintContext::new(content);
642        let result = rule.check(&ctx).unwrap();
643
644        assert_eq!(result.len(), 0);
645    }
646
647    #[test]
648    fn test_link_reference_exemption() {
649        let rule = MD013LineLength::new(30, false, false, false, false);
650        let content = "[reference]: https://example.com/very/long/url/that/exceeds/limit";
651        let ctx = LintContext::new(content);
652        let result = rule.check(&ctx).unwrap();
653
654        assert_eq!(result.len(), 0);
655    }
656
657    #[test]
658    fn test_strict_mode() {
659        let rule = MD013LineLength::new(30, false, false, false, true);
660        let content = "https://example.com/this/is/a/very/long/url/that/exceeds/the/limit";
661        let ctx = LintContext::new(content);
662        let result = rule.check(&ctx).unwrap();
663
664        // In strict mode, even URLs trigger warnings
665        assert_eq!(result.len(), 1);
666    }
667
668    #[test]
669    fn test_blockquote_exemption() {
670        let rule = MD013LineLength::new(30, false, false, false, false);
671        let content = "> This is a very long line inside a blockquote that should be ignored.";
672        let ctx = LintContext::new(content);
673        let result = rule.check(&ctx).unwrap();
674
675        assert_eq!(result.len(), 0);
676    }
677
678    #[test]
679    fn test_setext_heading_underline_exemption() {
680        let rule = MD013LineLength::new(30, false, false, false, false);
681        let content = "Heading\n========================================";
682        let ctx = LintContext::new(content);
683        let result = rule.check(&ctx).unwrap();
684
685        // The underline should be exempt
686        assert_eq!(result.len(), 0);
687    }
688
689    #[test]
690    fn test_no_fix_without_reflow() {
691        let rule = MD013LineLength::new(60, false, false, false, false);
692        let content = "This line has trailing whitespace that makes it too long      ";
693        let ctx = LintContext::new(content);
694        let result = rule.check(&ctx).unwrap();
695
696        assert_eq!(result.len(), 1);
697        // Without enable_reflow, no fix is provided
698        assert!(result[0].fix.is_none());
699
700        // Fix method returns content unchanged
701        let fixed = rule.fix(&ctx).unwrap();
702        assert_eq!(fixed, content);
703    }
704
705    #[test]
706    fn test_character_vs_byte_counting() {
707        let rule = MD013LineLength::new(10, false, false, false, false);
708        // Unicode characters should count as 1 character each
709        let content = "你好世界这是测试文字超过限制"; // 14 characters
710        let ctx = LintContext::new(content);
711        let result = rule.check(&ctx).unwrap();
712
713        assert_eq!(result.len(), 1);
714        assert_eq!(result[0].line, 1);
715    }
716
717    #[test]
718    fn test_empty_content() {
719        let rule = MD013LineLength::default();
720        let ctx = LintContext::new("");
721        let result = rule.check(&ctx).unwrap();
722
723        assert_eq!(result.len(), 0);
724    }
725
726    #[test]
727    fn test_excess_range_calculation() {
728        let rule = MD013LineLength::new(10, false, false, false, false);
729        let content = "12345678901234567890"; // 20 chars, limit is 10
730        let ctx = LintContext::new(content);
731        let result = rule.check(&ctx).unwrap();
732
733        assert_eq!(result.len(), 1);
734        // The warning should highlight from character 11 onwards
735        assert_eq!(result[0].column, 11);
736        assert_eq!(result[0].end_column, 21);
737    }
738
739    #[test]
740    fn test_html_block_exemption() {
741        let rule = MD013LineLength::new(30, false, false, false, false);
742        let content = "<div>\nThis is a very long line inside an HTML block that should be ignored.\n</div>";
743        let ctx = LintContext::new(content);
744        let result = rule.check(&ctx).unwrap();
745
746        // HTML blocks should be exempt
747        assert_eq!(result.len(), 0);
748    }
749
750    #[test]
751    fn test_mixed_content() {
752        // code_blocks=false, tables=true, headings=true
753        let rule = MD013LineLength::new(30, false, true, true, false);
754        let content = r#"# This heading is very long but should be exempt
755
756This regular paragraph line is too long and should trigger.
757
758```
759Code block line that is very long but exempt.
760```
761
762| Table | With very long content |
763|-------|------------------------|
764
765Another long line that should trigger a warning."#;
766
767        let ctx = LintContext::new(content);
768        let result = rule.check(&ctx).unwrap();
769
770        // Should have warnings for the two regular paragraph lines only
771        assert_eq!(result.len(), 2);
772        assert_eq!(result[0].line, 3);
773        assert_eq!(result[1].line, 12);
774    }
775
776    #[test]
777    fn test_fix_without_reflow_preserves_content() {
778        let rule = MD013LineLength::new(50, false, false, false, false);
779        let content = "Line 1\nThis line has trailing spaces and is too long      \nLine 3";
780        let ctx = LintContext::new(content);
781
782        // Without enable_reflow, content is unchanged
783        let fixed = rule.fix(&ctx).unwrap();
784        assert_eq!(fixed, content);
785    }
786
787    #[test]
788    fn test_has_relevant_elements() {
789        let rule = MD013LineLength::default();
790        let structure = DocumentStructure::new("test");
791
792        let ctx = LintContext::new("Some content");
793        assert!(rule.has_relevant_elements(&ctx, &structure));
794
795        let empty_ctx = LintContext::new("");
796        assert!(!rule.has_relevant_elements(&empty_ctx, &structure));
797    }
798
799    #[test]
800    fn test_rule_metadata() {
801        let rule = MD013LineLength::default();
802        assert_eq!(rule.name(), "MD013");
803        assert_eq!(rule.description(), "Line length should not be excessive");
804        assert_eq!(rule.category(), RuleCategory::Whitespace);
805    }
806
807    #[test]
808    fn test_url_embedded_in_text() {
809        let rule = MD013LineLength::new(50, false, false, false, false);
810
811        // This line would be 85 chars, but only ~45 without the URL
812        let content = "Check the docs at https://example.com/very/long/url/that/exceeds/limit for info";
813        let ctx = LintContext::new(content);
814        let result = rule.check(&ctx).unwrap();
815
816        // Should not flag because effective length (with URL placeholder) is under 50
817        assert_eq!(result.len(), 0);
818    }
819
820    #[test]
821    fn test_multiple_urls_in_line() {
822        let rule = MD013LineLength::new(50, false, false, false, false);
823
824        // Line with multiple URLs
825        let content = "See https://first-url.com/long and https://second-url.com/also/very/long here";
826        let ctx = LintContext::new(content);
827
828        let result = rule.check(&ctx).unwrap();
829
830        // Should not flag because effective length is reasonable
831        assert_eq!(result.len(), 0);
832    }
833
834    #[test]
835    fn test_markdown_link_with_long_url() {
836        let rule = MD013LineLength::new(50, false, false, false, false);
837
838        // Markdown link with very long URL
839        let content = "Check the [documentation](https://example.com/very/long/path/to/documentation/page) for details";
840        let ctx = LintContext::new(content);
841        let result = rule.check(&ctx).unwrap();
842
843        // Should not flag because effective length counts link as short
844        assert_eq!(result.len(), 0);
845    }
846
847    #[test]
848    fn test_line_too_long_even_without_urls() {
849        let rule = MD013LineLength::new(50, false, false, false, false);
850
851        // Line that's too long even after URL exclusion
852        let content = "This is a very long line with lots of text and https://url.com that still exceeds the limit";
853        let ctx = LintContext::new(content);
854        let result = rule.check(&ctx).unwrap();
855
856        // Should flag because even with URL placeholder, line is too long
857        assert_eq!(result.len(), 1);
858    }
859
860    #[test]
861    fn test_strict_mode_counts_urls() {
862        let rule = MD013LineLength::new(50, false, false, false, true); // strict=true
863
864        // Same line that passes in non-strict mode
865        let content = "Check the docs at https://example.com/very/long/url/that/exceeds/limit for info";
866        let ctx = LintContext::new(content);
867        let result = rule.check(&ctx).unwrap();
868
869        // In strict mode, should flag because full URL is counted
870        assert_eq!(result.len(), 1);
871    }
872
873    #[test]
874    fn test_documentation_example_from_md051() {
875        let rule = MD013LineLength::new(80, false, false, false, false);
876
877        // This is the actual line from md051.md that was causing issues
878        let content = r#"For more information, see the [CommonMark specification](https://spec.commonmark.org/0.30/#link-reference-definitions)."#;
879        let ctx = LintContext::new(content);
880        let result = rule.check(&ctx).unwrap();
881
882        // Should not flag because the URL is in a markdown link
883        assert_eq!(result.len(), 0);
884    }
885
886    #[test]
887    fn test_text_reflow_simple() {
888        let config = MD013Config {
889            line_length: 30,
890            enable_reflow: true,
891            ..Default::default()
892        };
893        let rule = MD013LineLength::from_config_struct(config);
894
895        let content = "This is a very long line that definitely exceeds thirty characters and needs to be wrapped.";
896        let ctx = LintContext::new(content);
897
898        let fixed = rule.fix(&ctx).unwrap();
899
900        // Verify all lines are under 30 chars
901        for line in fixed.lines() {
902            assert!(
903                line.chars().count() <= 30,
904                "Line too long: {} (len={})",
905                line,
906                line.chars().count()
907            );
908        }
909
910        // Verify content is preserved
911        let fixed_words: Vec<&str> = fixed.split_whitespace().collect();
912        let original_words: Vec<&str> = content.split_whitespace().collect();
913        assert_eq!(fixed_words, original_words);
914    }
915
916    #[test]
917    fn test_text_reflow_preserves_markdown_elements() {
918        let config = MD013Config {
919            line_length: 40,
920            enable_reflow: true,
921            ..Default::default()
922        };
923        let rule = MD013LineLength::from_config_struct(config);
924
925        let content = "This paragraph has **bold text** and *italic text* and [a link](https://example.com) that should be preserved.";
926        let ctx = LintContext::new(content);
927
928        let fixed = rule.fix(&ctx).unwrap();
929
930        // Verify markdown elements are preserved
931        assert!(fixed.contains("**bold text**"), "Bold text not preserved in: {fixed}");
932        assert!(fixed.contains("*italic text*"), "Italic text not preserved in: {fixed}");
933        assert!(
934            fixed.contains("[a link](https://example.com)"),
935            "Link not preserved in: {fixed}"
936        );
937
938        // Verify all lines are under 40 chars
939        for line in fixed.lines() {
940            assert!(line.len() <= 40, "Line too long: {line}");
941        }
942    }
943
944    #[test]
945    fn test_text_reflow_preserves_code_blocks() {
946        let config = MD013Config {
947            line_length: 30,
948            enable_reflow: true,
949            ..Default::default()
950        };
951        let rule = MD013LineLength::from_config_struct(config);
952
953        let content = r#"Here is some text.
954
955```python
956def very_long_function_name_that_exceeds_limit():
957    return "This should not be wrapped"
958```
959
960More text after code block."#;
961        let ctx = LintContext::new(content);
962
963        let fixed = rule.fix(&ctx).unwrap();
964
965        // Verify code block is preserved
966        assert!(fixed.contains("def very_long_function_name_that_exceeds_limit():"));
967        assert!(fixed.contains("```python"));
968        assert!(fixed.contains("```"));
969    }
970
971    #[test]
972    fn test_text_reflow_preserves_lists() {
973        let config = MD013Config {
974            line_length: 30,
975            enable_reflow: true,
976            ..Default::default()
977        };
978        let rule = MD013LineLength::from_config_struct(config);
979
980        let content = r#"Here is a list:
981
9821. First item with a very long line that needs wrapping
9832. Second item is short
9843. Third item also has a long line that exceeds the limit
985
986And a bullet list:
987
988- Bullet item with very long content that needs wrapping
989- Short bullet"#;
990        let ctx = LintContext::new(content);
991
992        let fixed = rule.fix(&ctx).unwrap();
993
994        // Verify list structure is preserved
995        assert!(fixed.contains("1. "));
996        assert!(fixed.contains("2. "));
997        assert!(fixed.contains("3. "));
998        assert!(fixed.contains("- "));
999
1000        // Verify proper indentation for wrapped lines
1001        let lines: Vec<&str> = fixed.lines().collect();
1002        for (i, line) in lines.iter().enumerate() {
1003            if line.trim().starts_with("1.") || line.trim().starts_with("2.") || line.trim().starts_with("3.") {
1004                // Check if next line is a continuation (should be indented with 3 spaces for numbered lists)
1005                if i + 1 < lines.len()
1006                    && !lines[i + 1].trim().is_empty()
1007                    && !lines[i + 1].trim().starts_with(char::is_numeric)
1008                    && !lines[i + 1].trim().starts_with("-")
1009                {
1010                    // Numbered list continuation lines should have 3 spaces
1011                    assert!(lines[i + 1].starts_with("   ") || lines[i + 1].trim().is_empty());
1012                }
1013            } else if line.trim().starts_with("-") {
1014                // Check if next line is a continuation (should be indented with 2 spaces for dash lists)
1015                if i + 1 < lines.len()
1016                    && !lines[i + 1].trim().is_empty()
1017                    && !lines[i + 1].trim().starts_with(char::is_numeric)
1018                    && !lines[i + 1].trim().starts_with("-")
1019                {
1020                    // Dash list continuation lines should have 2 spaces
1021                    assert!(lines[i + 1].starts_with("  ") || lines[i + 1].trim().is_empty());
1022                }
1023            }
1024        }
1025    }
1026
1027    #[test]
1028    fn test_text_reflow_disabled_by_default() {
1029        let rule = MD013LineLength::new(30, false, false, false, false);
1030
1031        let content = "This is a very long line that definitely exceeds thirty characters.";
1032        let ctx = LintContext::new(content);
1033
1034        let fixed = rule.fix(&ctx).unwrap();
1035
1036        // Without reflow enabled, it should only trim whitespace (if any)
1037        // Since there's no trailing whitespace, content should be unchanged
1038        assert_eq!(fixed, content);
1039    }
1040
1041    #[test]
1042    fn test_reflow_with_hard_line_breaks() {
1043        // Test that lines with exactly 2 trailing spaces are preserved as hard breaks
1044        let config = MD013Config {
1045            line_length: 40,
1046            enable_reflow: true,
1047            ..Default::default()
1048        };
1049        let rule = MD013LineLength::from_config_struct(config);
1050
1051        // Test with exactly 2 spaces (hard line break)
1052        let content = "This line has a hard break at the end  \nAnd this continues on the next line that is also quite long and needs wrapping";
1053        let ctx = LintContext::new(content);
1054        let fixed = rule.fix(&ctx).unwrap();
1055
1056        // Should preserve the hard line break (2 spaces)
1057        assert!(
1058            fixed.contains("  \n"),
1059            "Hard line break with exactly 2 spaces should be preserved"
1060        );
1061    }
1062
1063    #[test]
1064    fn test_reflow_preserves_reference_links() {
1065        let config = MD013Config {
1066            line_length: 40,
1067            enable_reflow: true,
1068            ..Default::default()
1069        };
1070        let rule = MD013LineLength::from_config_struct(config);
1071
1072        let content = "This is a very long line with a [reference link][ref] that should not be broken apart when reflowing the text.
1073
1074[ref]: https://example.com";
1075        let ctx = LintContext::new(content);
1076        let fixed = rule.fix(&ctx).unwrap();
1077
1078        // Reference link should remain intact
1079        assert!(fixed.contains("[reference link][ref]"));
1080        assert!(!fixed.contains("[ reference link]"));
1081        assert!(!fixed.contains("[ref ]"));
1082    }
1083
1084    #[test]
1085    fn test_reflow_with_nested_markdown_elements() {
1086        let config = MD013Config {
1087            line_length: 35,
1088            enable_reflow: true,
1089            ..Default::default()
1090        };
1091        let rule = MD013LineLength::from_config_struct(config);
1092
1093        let content = "This text has **bold with `code` inside** and should handle it properly when wrapping";
1094        let ctx = LintContext::new(content);
1095        let fixed = rule.fix(&ctx).unwrap();
1096
1097        // Nested elements should be preserved
1098        assert!(fixed.contains("**bold with `code` inside**"));
1099    }
1100
1101    #[test]
1102    fn test_reflow_with_unbalanced_markdown() {
1103        // Test edge case with unbalanced markdown
1104        let config = MD013Config {
1105            line_length: 30,
1106            enable_reflow: true,
1107            ..Default::default()
1108        };
1109        let rule = MD013LineLength::from_config_struct(config);
1110
1111        let content = "This has **unbalanced bold that goes on for a very long time without closing";
1112        let ctx = LintContext::new(content);
1113        let fixed = rule.fix(&ctx).unwrap();
1114
1115        // Should handle gracefully without panic
1116        // The text reflow handles unbalanced markdown by treating it as a bold element
1117        // Check that the content is properly reflowed without panic
1118        assert!(!fixed.is_empty());
1119        // Verify the content is wrapped to 30 chars
1120        for line in fixed.lines() {
1121            assert!(line.len() <= 30 || line.starts_with("**"), "Line exceeds limit: {line}");
1122        }
1123    }
1124
1125    #[test]
1126    fn test_reflow_fix_indicator() {
1127        // Test that enable_reflow provides fix indicators
1128        let config = MD013Config {
1129            line_length: 30,
1130            enable_reflow: true,
1131            ..Default::default()
1132        };
1133        let rule = MD013LineLength::from_config_struct(config);
1134
1135        let content = "This is a very long line that definitely exceeds the thirty character limit";
1136        let ctx = LintContext::new(content);
1137        let warnings = rule.check(&ctx).unwrap();
1138
1139        // Should have a fix indicator when enable_reflow is true
1140        assert!(!warnings.is_empty());
1141        assert!(
1142            warnings[0].fix.is_some(),
1143            "Should provide fix indicator when enable_reflow is true"
1144        );
1145    }
1146
1147    #[test]
1148    fn test_no_fix_indicator_without_reflow() {
1149        // Test that without enable_reflow, no fix is provided
1150        let config = MD013Config {
1151            line_length: 30,
1152            enable_reflow: false,
1153            ..Default::default()
1154        };
1155        let rule = MD013LineLength::from_config_struct(config);
1156
1157        let content = "This is a very long line that definitely exceeds the thirty character limit";
1158        let ctx = LintContext::new(content);
1159        let warnings = rule.check(&ctx).unwrap();
1160
1161        // Should NOT have a fix indicator when enable_reflow is false
1162        assert!(!warnings.is_empty());
1163        assert!(
1164            warnings[0].fix.is_none(),
1165            "Should not provide fix when enable_reflow is false"
1166        );
1167    }
1168
1169    #[test]
1170    fn test_reflow_preserves_all_reference_link_types() {
1171        let config = MD013Config {
1172            line_length: 40,
1173            enable_reflow: true,
1174            ..Default::default()
1175        };
1176        let rule = MD013LineLength::from_config_struct(config);
1177
1178        let content = "Test [full reference][ref] and [collapsed][] and [shortcut] reference links in a very long line.
1179
1180[ref]: https://example.com
1181[collapsed]: https://example.com
1182[shortcut]: https://example.com";
1183
1184        let ctx = LintContext::new(content);
1185        let fixed = rule.fix(&ctx).unwrap();
1186
1187        // All reference link types should be preserved
1188        assert!(fixed.contains("[full reference][ref]"));
1189        assert!(fixed.contains("[collapsed][]"));
1190        assert!(fixed.contains("[shortcut]"));
1191    }
1192
1193    #[test]
1194    fn test_reflow_handles_images_correctly() {
1195        let config = MD013Config {
1196            line_length: 40,
1197            enable_reflow: true,
1198            ..Default::default()
1199        };
1200        let rule = MD013LineLength::from_config_struct(config);
1201
1202        let content = "This line has an ![image alt text](https://example.com/image.png) that should not be broken when reflowing.";
1203        let ctx = LintContext::new(content);
1204        let fixed = rule.fix(&ctx).unwrap();
1205
1206        // Image should remain intact
1207        assert!(fixed.contains("![image alt text](https://example.com/image.png)"));
1208    }
1209}