rumdl_lib/rules/
md013_line_length.rs

1/// Rule MD013: Line length
2///
3/// See [docs/md013.md](../../docs/md013.md) for full documentation, configuration, and examples.
4use crate::rule::{LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
5use crate::rule_config_serde::RuleConfig;
6use crate::utils::document_structure::{DocumentStructure, DocumentStructureExtensions};
7use crate::utils::range_utils::calculate_excess_range;
8use crate::utils::regex_cache::{
9    IMAGE_REF_PATTERN, INLINE_LINK_REGEX as MARKDOWN_LINK_PATTERN, LINK_REF_PATTERN, URL_IN_TEXT, URL_PATTERN,
10};
11use toml;
12
13pub mod md013_config;
14use md013_config::MD013Config;
15
16#[derive(Clone, Default)]
17pub struct MD013LineLength {
18    config: MD013Config,
19}
20
21impl MD013LineLength {
22    pub fn new(line_length: usize, code_blocks: bool, tables: bool, headings: bool, strict: bool) -> Self {
23        Self {
24            config: MD013Config {
25                line_length,
26                code_blocks,
27                tables,
28                headings,
29                strict,
30                heading_line_length: None,
31                code_block_line_length: None,
32                stern: false,
33                reflow: false,
34            },
35        }
36    }
37
38    pub fn from_config_struct(config: MD013Config) -> Self {
39        Self { config }
40    }
41
42    fn is_in_table(lines: &[&str], current_line: usize) -> bool {
43        // Check if current line is part of a table
44        let current = lines[current_line].trim();
45        if current.starts_with('|') || current.starts_with("|-") {
46            return true;
47        }
48
49        // Check if line is between table markers
50        if current_line > 0 && current_line + 1 < lines.len() {
51            let prev = lines[current_line - 1].trim();
52            let next = lines[current_line + 1].trim();
53            if (prev.starts_with('|') || prev.starts_with("|-")) && (next.starts_with('|') || next.starts_with("|-")) {
54                return true;
55            }
56        }
57        false
58    }
59
60    fn should_ignore_line(
61        &self,
62        line: &str,
63        _lines: &[&str],
64        current_line: usize,
65        structure: &DocumentStructure,
66    ) -> bool {
67        if self.config.strict || self.config.stern {
68            return false;
69        }
70
71        // Quick check for common patterns before expensive regex
72        let trimmed = line.trim();
73
74        // Only skip if the entire line is a URL (quick check first)
75        if (trimmed.starts_with("http://") || trimmed.starts_with("https://")) && URL_PATTERN.is_match(trimmed) {
76            return true;
77        }
78
79        // Only skip if the entire line is an image reference (quick check first)
80        if trimmed.starts_with("![") && trimmed.ends_with(']') && IMAGE_REF_PATTERN.is_match(trimmed) {
81            return true;
82        }
83
84        // Only skip if the entire line is a link reference (quick check first)
85        if trimmed.starts_with('[') && trimmed.contains("]:") && LINK_REF_PATTERN.is_match(trimmed) {
86            return true;
87        }
88
89        // Code blocks with long strings (only check if in code block)
90        if structure.is_in_code_block(current_line + 1)
91            && !trimmed.is_empty()
92            && !line.contains(' ')
93            && !line.contains('\t')
94        {
95            return true;
96        }
97
98        false
99    }
100}
101
102impl Rule for MD013LineLength {
103    fn name(&self) -> &'static str {
104        "MD013"
105    }
106
107    fn description(&self) -> &'static str {
108        "Line length should not be excessive"
109    }
110
111    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
112        let content = ctx.content;
113
114        // Early return for empty content
115        if content.is_empty() {
116            return Ok(Vec::new());
117        }
118
119        // Quick check: if total content is shorter than line limit, definitely no violations
120        if content.len() <= self.config.line_length {
121            return Ok(Vec::new());
122        }
123
124        // More aggressive early return - check if any line could possibly be long
125        let has_long_lines = if !ctx.lines.is_empty() {
126            ctx.lines
127                .iter()
128                .any(|line| line.content.len() > self.config.line_length)
129        } else {
130            // Fallback: do a quick scan for newlines to estimate max line length
131            let mut max_line_len = 0;
132            let mut current_line_len = 0;
133            for ch in content.chars() {
134                if ch == '\n' {
135                    max_line_len = max_line_len.max(current_line_len);
136                    current_line_len = 0;
137                } else {
138                    current_line_len += 1;
139                }
140            }
141            max_line_len = max_line_len.max(current_line_len);
142            max_line_len > self.config.line_length
143        };
144
145        if !has_long_lines {
146            return Ok(Vec::new());
147        }
148
149        // Create structure manually
150        let structure = DocumentStructure::new(content);
151        self.check_with_structure(ctx, &structure)
152    }
153
154    /// Optimized check using pre-computed document structure
155    fn check_with_structure(
156        &self,
157        ctx: &crate::lint_context::LintContext,
158        structure: &DocumentStructure,
159    ) -> LintResult {
160        let content = ctx.content;
161        let mut warnings = Vec::new();
162
163        // Early return was already done in check(), so we know there are long lines
164
165        // Check for inline configuration overrides
166        let inline_config = crate::inline_config::InlineConfig::from_content(content);
167        let config_override = inline_config.get_rule_config("MD013");
168
169        // Apply configuration override if present
170        let effective_config = if let Some(json_config) = config_override {
171            if let Some(obj) = json_config.as_object() {
172                let mut config = self.config.clone();
173                if let Some(line_length) = obj.get("line_length").and_then(|v| v.as_u64()) {
174                    config.line_length = line_length as usize;
175                }
176                if let Some(code_blocks) = obj.get("code_blocks").and_then(|v| v.as_bool()) {
177                    config.code_blocks = code_blocks;
178                }
179                if let Some(tables) = obj.get("tables").and_then(|v| v.as_bool()) {
180                    config.tables = tables;
181                }
182                if let Some(headings) = obj.get("headings").and_then(|v| v.as_bool()) {
183                    config.headings = headings;
184                }
185                if let Some(strict) = obj.get("strict").and_then(|v| v.as_bool()) {
186                    config.strict = strict;
187                }
188                if let Some(stern) = obj.get("stern").and_then(|v| v.as_bool()) {
189                    config.stern = stern;
190                }
191                if let Some(heading_line_length) = obj.get("heading_line_length").and_then(|v| v.as_u64()) {
192                    config.heading_line_length = Some(heading_line_length as usize);
193                }
194                if let Some(code_block_line_length) = obj.get("code_block_line_length").and_then(|v| v.as_u64()) {
195                    config.code_block_line_length = Some(code_block_line_length as usize);
196                }
197                config
198            } else {
199                self.config.clone()
200            }
201        } else {
202            self.config.clone()
203        };
204
205        // Use ctx.lines if available for better performance
206        let lines: Vec<&str> = if !ctx.lines.is_empty() {
207            ctx.lines.iter().map(|l| l.content.as_str()).collect()
208        } else {
209            content.lines().collect()
210        };
211
212        // Create a quick lookup set for heading lines
213        let heading_lines_set: std::collections::HashSet<usize> = structure.heading_lines.iter().cloned().collect();
214
215        // Pre-compute table lines for efficiency instead of calling is_in_table for each line
216        let table_lines_set: std::collections::HashSet<usize> = if effective_config.tables {
217            let mut table_lines = std::collections::HashSet::new();
218            let mut in_table = false;
219
220            for (i, line) in lines.iter().enumerate() {
221                let line_number = i + 1;
222
223                // Quick check if in code block using pre-computed blocks from context or structure
224                let in_code = if !ctx.code_blocks.is_empty() {
225                    ctx.code_blocks
226                        .iter()
227                        .any(|(start, end)| *start <= line_number && line_number <= *end)
228                } else {
229                    structure.is_in_code_block(line_number)
230                };
231
232                if !in_code && line.contains('|') {
233                    in_table = true;
234                    table_lines.insert(line_number);
235                } else if in_table && line.trim().is_empty() {
236                    in_table = false;
237                } else if in_table {
238                    table_lines.insert(line_number);
239                }
240            }
241            table_lines
242        } else {
243            std::collections::HashSet::new()
244        };
245
246        for (line_num, line) in lines.iter().enumerate() {
247            let line_number = line_num + 1;
248
249            // Calculate effective length excluding unbreakable URLs
250            let effective_length = self.calculate_effective_length(line);
251
252            // Determine the appropriate line length limit based on line type
253            let line_limit = if heading_lines_set.contains(&line_number) {
254                effective_config
255                    .heading_line_length
256                    .unwrap_or(effective_config.line_length)
257            } else if structure.is_in_code_block(line_number) {
258                effective_config
259                    .code_block_line_length
260                    .unwrap_or(effective_config.line_length)
261            } else {
262                effective_config.line_length
263            };
264
265            // Skip short lines immediately
266            if effective_length <= line_limit {
267                continue;
268            }
269
270            // Skip various block types efficiently
271            if !effective_config.strict && !effective_config.stern {
272                // Skip setext heading underlines
273                if !line.trim().is_empty() && line.trim().chars().all(|c| c == '=' || c == '-') {
274                    continue;
275                }
276
277                // Skip block elements according to config flags (optimized checks)
278                if (effective_config.headings
279                    && heading_lines_set.contains(&line_number)
280                    && effective_config.heading_line_length.is_none())
281                    || (!effective_config.code_blocks
282                        && structure.is_in_code_block(line_number)
283                        && effective_config.code_block_line_length.is_none())
284                    || (effective_config.tables && table_lines_set.contains(&line_number))
285                    || structure.is_in_blockquote(line_number)
286                    || structure.is_in_html_block(line_number)
287                {
288                    continue;
289                }
290
291                // Skip lines that are only a URL, image ref, or link ref
292                if self.should_ignore_line(line, &lines, line_num, structure) {
293                    continue;
294                }
295            } else if effective_config.stern {
296                // In stern mode, only skip if explicitly configured
297                if (effective_config.headings
298                    && heading_lines_set.contains(&line_number)
299                    && effective_config.heading_line_length.is_none())
300                    || (!effective_config.code_blocks
301                        && structure.is_in_code_block(line_number)
302                        && effective_config.code_block_line_length.is_none())
303                    || (effective_config.tables && table_lines_set.contains(&line_number))
304                {
305                    continue;
306                }
307            }
308
309            // Only provide a fix if reflow is enabled
310            let fix = if self.config.reflow && !self.should_skip_line_for_fix(line, line_num, structure) {
311                // Provide a placeholder fix to indicate that reflow will happen
312                // The actual reflow is done in the fix() method
313                Some(crate::rule::Fix {
314                    range: 0..0,                // Placeholder range
315                    replacement: String::new(), // Placeholder replacement
316                })
317            } else {
318                None
319            };
320
321            let message = format!("Line length {effective_length} exceeds {line_limit} characters");
322
323            // Calculate precise character range for the excess portion
324            let (start_line, start_col, end_line, end_col) = calculate_excess_range(line_number, line, line_limit);
325
326            warnings.push(LintWarning {
327                rule_name: Some(self.name()),
328                message,
329                line: start_line,
330                column: start_col,
331                end_line,
332                end_column: end_col,
333                severity: Severity::Warning,
334                fix,
335            });
336        }
337        Ok(warnings)
338    }
339
340    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
341        // Only fix if reflow is enabled
342        if self.config.reflow {
343            let reflow_options = crate::utils::text_reflow::ReflowOptions {
344                line_length: self.config.line_length,
345                break_on_sentences: true,
346                preserve_breaks: false,
347            };
348
349            return Ok(crate::utils::text_reflow::reflow_markdown(ctx.content, &reflow_options));
350        }
351
352        // Without reflow, MD013 has no fixes available
353        Ok(ctx.content.to_string())
354    }
355
356    fn as_any(&self) -> &dyn std::any::Any {
357        self
358    }
359
360    fn as_maybe_document_structure(&self) -> Option<&dyn crate::rule::MaybeDocumentStructure> {
361        Some(self)
362    }
363
364    fn category(&self) -> RuleCategory {
365        RuleCategory::Whitespace
366    }
367
368    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
369        // Skip if content is empty
370        if ctx.content.is_empty() {
371            return true;
372        }
373
374        // Quick check: if total content is shorter than line limit, definitely skip
375        if ctx.content.len() <= self.config.line_length {
376            return true;
377        }
378
379        // Use more efficient check - any() with early termination instead of all()
380        !ctx.lines
381            .iter()
382            .any(|line| line.content.len() > self.config.line_length)
383    }
384
385    fn default_config_section(&self) -> Option<(String, toml::Value)> {
386        let default_config = MD013Config::default();
387        let json_value = serde_json::to_value(&default_config).ok()?;
388        let toml_value = crate::rule_config_serde::json_to_toml_value(&json_value)?;
389
390        if let toml::Value::Table(table) = toml_value {
391            if !table.is_empty() {
392                Some((MD013Config::RULE_NAME.to_string(), toml::Value::Table(table)))
393            } else {
394                None
395            }
396        } else {
397            None
398        }
399    }
400
401    fn config_aliases(&self) -> Option<std::collections::HashMap<String, String>> {
402        let mut aliases = std::collections::HashMap::new();
403        aliases.insert("enable_reflow".to_string(), "reflow".to_string());
404        Some(aliases)
405    }
406
407    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
408    where
409        Self: Sized,
410    {
411        let mut rule_config = crate::rule_config_serde::load_rule_config::<MD013Config>(config);
412        // Special handling for line_length from global config
413        if rule_config.line_length == 80 {
414            // default value
415            rule_config.line_length = config.global.line_length as usize;
416        }
417        Box::new(Self::from_config_struct(rule_config))
418    }
419}
420
421impl MD013LineLength {
422    /// Check if a line should be skipped for fixing
423    fn should_skip_line_for_fix(&self, line: &str, line_num: usize, structure: &DocumentStructure) -> bool {
424        let line_number = line_num + 1; // 1-based
425
426        // Skip code blocks
427        if structure.is_in_code_block(line_number) {
428            return true;
429        }
430
431        // Skip HTML blocks
432        if structure.is_in_html_block(line_number) {
433            return true;
434        }
435
436        // Skip tables (they have complex formatting)
437        if Self::is_in_table(&[line], 0) {
438            return true;
439        }
440
441        // Skip lines that are only URLs (can't be wrapped)
442        if line.trim().starts_with("http://") || line.trim().starts_with("https://") {
443            return true;
444        }
445
446        // Skip setext heading underlines
447        if !line.trim().is_empty() && line.trim().chars().all(|c| c == '=' || c == '-') {
448            return true;
449        }
450
451        false
452    }
453
454    /// Calculate effective line length excluding unbreakable URLs
455    fn calculate_effective_length(&self, line: &str) -> usize {
456        if self.config.strict || self.config.stern {
457            // In strict or stern mode, count everything
458            return line.chars().count();
459        }
460
461        // Quick check: if line doesn't contain "http" or "[", it can't have URLs or markdown links
462        if !line.contains("http") && !line.contains('[') {
463            return line.chars().count();
464        }
465
466        let mut effective_line = line.to_string();
467
468        // First handle markdown links to avoid double-counting URLs
469        // Pattern: [text](very-long-url) -> [text](url)
470        if line.contains('[') && line.contains("](") {
471            for cap in MARKDOWN_LINK_PATTERN.captures_iter(&effective_line.clone()) {
472                if let (Some(full_match), Some(text), Some(url)) = (cap.get(0), cap.get(1), cap.get(2))
473                    && url.as_str().len() > 15
474                {
475                    let replacement = format!("[{}](url)", text.as_str());
476                    effective_line = effective_line.replacen(full_match.as_str(), &replacement, 1);
477                }
478            }
479        }
480
481        // Then replace bare URLs with a placeholder of reasonable length
482        // This allows lines with long URLs to pass if the rest of the content is reasonable
483        if effective_line.contains("http") {
484            for url_match in URL_IN_TEXT.find_iter(&effective_line.clone()) {
485                let url = url_match.as_str();
486                // Skip if this URL is already part of a markdown link we handled
487                if !effective_line.contains(&format!("({url})")) {
488                    // Replace URL with placeholder that represents a "reasonable" URL length
489                    // Using 15 chars as a reasonable URL placeholder (e.g., "https://ex.com")
490                    let placeholder = "x".repeat(15.min(url.len()));
491                    effective_line = effective_line.replacen(url, &placeholder, 1);
492                }
493            }
494        }
495
496        effective_line.chars().count()
497    }
498}
499
500impl DocumentStructureExtensions for MD013LineLength {
501    fn has_relevant_elements(
502        &self,
503        ctx: &crate::lint_context::LintContext,
504        _doc_structure: &DocumentStructure,
505    ) -> bool {
506        // This rule always applies unless content is empty
507        !ctx.content.is_empty()
508    }
509}
510
511#[cfg(test)]
512mod tests {
513    use super::*;
514    use crate::lint_context::LintContext;
515
516    #[test]
517    fn test_default_config() {
518        let rule = MD013LineLength::default();
519        assert_eq!(rule.config.line_length, 80);
520        assert!(rule.config.code_blocks); // Default is true
521        assert!(rule.config.tables); // Default is true
522        assert!(rule.config.headings);
523        assert!(!rule.config.strict);
524    }
525
526    #[test]
527    fn test_custom_config() {
528        let rule = MD013LineLength::new(100, true, true, false, true);
529        assert_eq!(rule.config.line_length, 100);
530        assert!(rule.config.code_blocks);
531        assert!(rule.config.tables);
532        assert!(!rule.config.headings);
533        assert!(rule.config.strict);
534    }
535
536    #[test]
537    fn test_basic_line_length_violation() {
538        let rule = MD013LineLength::new(50, false, false, false, false);
539        let content = "This is a line that is definitely longer than fifty characters and should trigger a warning.";
540        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
541        let result = rule.check(&ctx).unwrap();
542
543        assert_eq!(result.len(), 1);
544        assert!(result[0].message.contains("Line length"));
545        assert!(result[0].message.contains("exceeds 50 characters"));
546    }
547
548    #[test]
549    fn test_no_violation_under_limit() {
550        let rule = MD013LineLength::new(100, false, false, false, false);
551        let content = "Short line.\nAnother short line.";
552        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
553        let result = rule.check(&ctx).unwrap();
554
555        assert_eq!(result.len(), 0);
556    }
557
558    #[test]
559    fn test_multiple_violations() {
560        let rule = MD013LineLength::new(30, false, false, false, false);
561        let content = "This line is definitely longer than thirty chars.\nThis is also a line that exceeds the limit.\nShort line.";
562        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
563        let result = rule.check(&ctx).unwrap();
564
565        assert_eq!(result.len(), 2);
566        assert_eq!(result[0].line, 1);
567        assert_eq!(result[1].line, 2);
568    }
569
570    #[test]
571    fn test_code_blocks_exemption() {
572        let rule = MD013LineLength::new(30, false, false, false, false);
573        let content = "```\nThis is a very long line inside a code block that should be ignored.\n```";
574        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
575        let result = rule.check(&ctx).unwrap();
576
577        assert_eq!(result.len(), 0);
578    }
579
580    #[test]
581    fn test_code_blocks_not_exempt_when_configured() {
582        let rule = MD013LineLength::new(30, true, false, false, false);
583        let content = "```\nThis is a very long line inside a code block that should NOT be ignored.\n```";
584        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
585        let result = rule.check(&ctx).unwrap();
586
587        assert!(!result.is_empty());
588    }
589
590    #[test]
591    fn test_heading_exemption() {
592        let rule = MD013LineLength::new(30, false, false, true, false);
593        let content = "# This is a very long heading that would normally exceed the limit";
594        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
595        let result = rule.check(&ctx).unwrap();
596
597        assert_eq!(result.len(), 0);
598    }
599
600    #[test]
601    fn test_heading_not_exempt_when_configured() {
602        let rule = MD013LineLength::new(30, false, false, false, false);
603        let content = "# This is a very long heading that should trigger a warning";
604        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
605        let result = rule.check(&ctx).unwrap();
606
607        assert_eq!(result.len(), 1);
608    }
609
610    #[test]
611    fn test_table_detection() {
612        let lines = vec![
613            "| Column 1 | Column 2 |",
614            "|----------|----------|",
615            "| Value 1  | Value 2  |",
616        ];
617
618        assert!(MD013LineLength::is_in_table(&lines, 0));
619        assert!(MD013LineLength::is_in_table(&lines, 1));
620        assert!(MD013LineLength::is_in_table(&lines, 2));
621    }
622
623    #[test]
624    fn test_table_exemption() {
625        let rule = MD013LineLength::new(30, false, true, false, false);
626        let content = "| This is a very long table header | Another long column header |\n|-----------------------------------|-------------------------------|";
627        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
628        let result = rule.check(&ctx).unwrap();
629
630        assert_eq!(result.len(), 0);
631    }
632
633    #[test]
634    fn test_url_exemption() {
635        let rule = MD013LineLength::new(30, false, false, false, false);
636        let content = "https://example.com/this/is/a/very/long/url/that/exceeds/the/limit";
637        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
638        let result = rule.check(&ctx).unwrap();
639
640        assert_eq!(result.len(), 0);
641    }
642
643    #[test]
644    fn test_image_reference_exemption() {
645        let rule = MD013LineLength::new(30, false, false, false, false);
646        let content = "![This is a very long image alt text that exceeds limit][reference]";
647        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
648        let result = rule.check(&ctx).unwrap();
649
650        assert_eq!(result.len(), 0);
651    }
652
653    #[test]
654    fn test_link_reference_exemption() {
655        let rule = MD013LineLength::new(30, false, false, false, false);
656        let content = "[reference]: https://example.com/very/long/url/that/exceeds/limit";
657        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
658        let result = rule.check(&ctx).unwrap();
659
660        assert_eq!(result.len(), 0);
661    }
662
663    #[test]
664    fn test_strict_mode() {
665        let rule = MD013LineLength::new(30, false, false, false, true);
666        let content = "https://example.com/this/is/a/very/long/url/that/exceeds/the/limit";
667        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
668        let result = rule.check(&ctx).unwrap();
669
670        // In strict mode, even URLs trigger warnings
671        assert_eq!(result.len(), 1);
672    }
673
674    #[test]
675    fn test_blockquote_exemption() {
676        let rule = MD013LineLength::new(30, false, false, false, false);
677        let content = "> This is a very long line inside a blockquote that should be ignored.";
678        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
679        let result = rule.check(&ctx).unwrap();
680
681        assert_eq!(result.len(), 0);
682    }
683
684    #[test]
685    fn test_setext_heading_underline_exemption() {
686        let rule = MD013LineLength::new(30, false, false, false, false);
687        let content = "Heading\n========================================";
688        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
689        let result = rule.check(&ctx).unwrap();
690
691        // The underline should be exempt
692        assert_eq!(result.len(), 0);
693    }
694
695    #[test]
696    fn test_no_fix_without_reflow() {
697        let rule = MD013LineLength::new(60, false, false, false, false);
698        let content = "This line has trailing whitespace that makes it too long      ";
699        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
700        let result = rule.check(&ctx).unwrap();
701
702        assert_eq!(result.len(), 1);
703        // Without reflow, no fix is provided
704        assert!(result[0].fix.is_none());
705
706        // Fix method returns content unchanged
707        let fixed = rule.fix(&ctx).unwrap();
708        assert_eq!(fixed, content);
709    }
710
711    #[test]
712    fn test_character_vs_byte_counting() {
713        let rule = MD013LineLength::new(10, false, false, false, false);
714        // Unicode characters should count as 1 character each
715        let content = "你好世界这是测试文字超过限制"; // 14 characters
716        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
717        let result = rule.check(&ctx).unwrap();
718
719        assert_eq!(result.len(), 1);
720        assert_eq!(result[0].line, 1);
721    }
722
723    #[test]
724    fn test_empty_content() {
725        let rule = MD013LineLength::default();
726        let ctx = LintContext::new("", crate::config::MarkdownFlavor::Standard);
727        let result = rule.check(&ctx).unwrap();
728
729        assert_eq!(result.len(), 0);
730    }
731
732    #[test]
733    fn test_excess_range_calculation() {
734        let rule = MD013LineLength::new(10, false, false, false, false);
735        let content = "12345678901234567890"; // 20 chars, limit is 10
736        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
737        let result = rule.check(&ctx).unwrap();
738
739        assert_eq!(result.len(), 1);
740        // The warning should highlight from character 11 onwards
741        assert_eq!(result[0].column, 11);
742        assert_eq!(result[0].end_column, 21);
743    }
744
745    #[test]
746    fn test_html_block_exemption() {
747        let rule = MD013LineLength::new(30, false, false, false, false);
748        let content = "<div>\nThis is a very long line inside an HTML block that should be ignored.\n</div>";
749        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
750        let result = rule.check(&ctx).unwrap();
751
752        // HTML blocks should be exempt
753        assert_eq!(result.len(), 0);
754    }
755
756    #[test]
757    fn test_mixed_content() {
758        // code_blocks=false, tables=true, headings=true
759        let rule = MD013LineLength::new(30, false, true, true, false);
760        let content = r#"# This heading is very long but should be exempt
761
762This regular paragraph line is too long and should trigger.
763
764```
765Code block line that is very long but exempt.
766```
767
768| Table | With very long content |
769|-------|------------------------|
770
771Another long line that should trigger a warning."#;
772
773        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
774        let result = rule.check(&ctx).unwrap();
775
776        // Should have warnings for the two regular paragraph lines only
777        assert_eq!(result.len(), 2);
778        assert_eq!(result[0].line, 3);
779        assert_eq!(result[1].line, 12);
780    }
781
782    #[test]
783    fn test_fix_without_reflow_preserves_content() {
784        let rule = MD013LineLength::new(50, false, false, false, false);
785        let content = "Line 1\nThis line has trailing spaces and is too long      \nLine 3";
786        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
787
788        // Without reflow, content is unchanged
789        let fixed = rule.fix(&ctx).unwrap();
790        assert_eq!(fixed, content);
791    }
792
793    #[test]
794    fn test_has_relevant_elements() {
795        let rule = MD013LineLength::default();
796        let structure = DocumentStructure::new("test");
797
798        let ctx = LintContext::new("Some content", crate::config::MarkdownFlavor::Standard);
799        assert!(rule.has_relevant_elements(&ctx, &structure));
800
801        let empty_ctx = LintContext::new("", crate::config::MarkdownFlavor::Standard);
802        assert!(!rule.has_relevant_elements(&empty_ctx, &structure));
803    }
804
805    #[test]
806    fn test_rule_metadata() {
807        let rule = MD013LineLength::default();
808        assert_eq!(rule.name(), "MD013");
809        assert_eq!(rule.description(), "Line length should not be excessive");
810        assert_eq!(rule.category(), RuleCategory::Whitespace);
811    }
812
813    #[test]
814    fn test_url_embedded_in_text() {
815        let rule = MD013LineLength::new(50, false, false, false, false);
816
817        // This line would be 85 chars, but only ~45 without the URL
818        let content = "Check the docs at https://example.com/very/long/url/that/exceeds/limit for info";
819        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
820        let result = rule.check(&ctx).unwrap();
821
822        // Should not flag because effective length (with URL placeholder) is under 50
823        assert_eq!(result.len(), 0);
824    }
825
826    #[test]
827    fn test_multiple_urls_in_line() {
828        let rule = MD013LineLength::new(50, false, false, false, false);
829
830        // Line with multiple URLs
831        let content = "See https://first-url.com/long and https://second-url.com/also/very/long here";
832        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
833
834        let result = rule.check(&ctx).unwrap();
835
836        // Should not flag because effective length is reasonable
837        assert_eq!(result.len(), 0);
838    }
839
840    #[test]
841    fn test_markdown_link_with_long_url() {
842        let rule = MD013LineLength::new(50, false, false, false, false);
843
844        // Markdown link with very long URL
845        let content = "Check the [documentation](https://example.com/very/long/path/to/documentation/page) for details";
846        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
847        let result = rule.check(&ctx).unwrap();
848
849        // Should not flag because effective length counts link as short
850        assert_eq!(result.len(), 0);
851    }
852
853    #[test]
854    fn test_line_too_long_even_without_urls() {
855        let rule = MD013LineLength::new(50, false, false, false, false);
856
857        // Line that's too long even after URL exclusion
858        let content = "This is a very long line with lots of text and https://url.com that still exceeds the limit";
859        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
860        let result = rule.check(&ctx).unwrap();
861
862        // Should flag because even with URL placeholder, line is too long
863        assert_eq!(result.len(), 1);
864    }
865
866    #[test]
867    fn test_strict_mode_counts_urls() {
868        let rule = MD013LineLength::new(50, false, false, false, true); // strict=true
869
870        // Same line that passes in non-strict mode
871        let content = "Check the docs at https://example.com/very/long/url/that/exceeds/limit for info";
872        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
873        let result = rule.check(&ctx).unwrap();
874
875        // In strict mode, should flag because full URL is counted
876        assert_eq!(result.len(), 1);
877    }
878
879    #[test]
880    fn test_documentation_example_from_md051() {
881        let rule = MD013LineLength::new(80, false, false, false, false);
882
883        // This is the actual line from md051.md that was causing issues
884        let content = r#"For more information, see the [CommonMark specification](https://spec.commonmark.org/0.30/#link-reference-definitions)."#;
885        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
886        let result = rule.check(&ctx).unwrap();
887
888        // Should not flag because the URL is in a markdown link
889        assert_eq!(result.len(), 0);
890    }
891
892    #[test]
893    fn test_text_reflow_simple() {
894        let config = MD013Config {
895            line_length: 30,
896            reflow: true,
897            ..Default::default()
898        };
899        let rule = MD013LineLength::from_config_struct(config);
900
901        let content = "This is a very long line that definitely exceeds thirty characters and needs to be wrapped.";
902        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
903
904        let fixed = rule.fix(&ctx).unwrap();
905
906        // Verify all lines are under 30 chars
907        for line in fixed.lines() {
908            assert!(
909                line.chars().count() <= 30,
910                "Line too long: {} (len={})",
911                line,
912                line.chars().count()
913            );
914        }
915
916        // Verify content is preserved
917        let fixed_words: Vec<&str> = fixed.split_whitespace().collect();
918        let original_words: Vec<&str> = content.split_whitespace().collect();
919        assert_eq!(fixed_words, original_words);
920    }
921
922    #[test]
923    fn test_text_reflow_preserves_markdown_elements() {
924        let config = MD013Config {
925            line_length: 40,
926            reflow: true,
927            ..Default::default()
928        };
929        let rule = MD013LineLength::from_config_struct(config);
930
931        let content = "This paragraph has **bold text** and *italic text* and [a link](https://example.com) that should be preserved.";
932        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
933
934        let fixed = rule.fix(&ctx).unwrap();
935
936        // Verify markdown elements are preserved
937        assert!(fixed.contains("**bold text**"), "Bold text not preserved in: {fixed}");
938        assert!(fixed.contains("*italic text*"), "Italic text not preserved in: {fixed}");
939        assert!(
940            fixed.contains("[a link](https://example.com)"),
941            "Link not preserved in: {fixed}"
942        );
943
944        // Verify all lines are under 40 chars
945        for line in fixed.lines() {
946            assert!(line.len() <= 40, "Line too long: {line}");
947        }
948    }
949
950    #[test]
951    fn test_text_reflow_preserves_code_blocks() {
952        let config = MD013Config {
953            line_length: 30,
954            reflow: true,
955            ..Default::default()
956        };
957        let rule = MD013LineLength::from_config_struct(config);
958
959        let content = r#"Here is some text.
960
961```python
962def very_long_function_name_that_exceeds_limit():
963    return "This should not be wrapped"
964```
965
966More text after code block."#;
967        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
968
969        let fixed = rule.fix(&ctx).unwrap();
970
971        // Verify code block is preserved
972        assert!(fixed.contains("def very_long_function_name_that_exceeds_limit():"));
973        assert!(fixed.contains("```python"));
974        assert!(fixed.contains("```"));
975    }
976
977    #[test]
978    fn test_text_reflow_preserves_lists() {
979        let config = MD013Config {
980            line_length: 30,
981            reflow: true,
982            ..Default::default()
983        };
984        let rule = MD013LineLength::from_config_struct(config);
985
986        let content = r#"Here is a list:
987
9881. First item with a very long line that needs wrapping
9892. Second item is short
9903. Third item also has a long line that exceeds the limit
991
992And a bullet list:
993
994- Bullet item with very long content that needs wrapping
995- Short bullet"#;
996        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
997
998        let fixed = rule.fix(&ctx).unwrap();
999
1000        // Verify list structure is preserved
1001        assert!(fixed.contains("1. "));
1002        assert!(fixed.contains("2. "));
1003        assert!(fixed.contains("3. "));
1004        assert!(fixed.contains("- "));
1005
1006        // Verify proper indentation for wrapped lines
1007        let lines: Vec<&str> = fixed.lines().collect();
1008        for (i, line) in lines.iter().enumerate() {
1009            if line.trim().starts_with("1.") || line.trim().starts_with("2.") || line.trim().starts_with("3.") {
1010                // Check if next line is a continuation (should be indented with 3 spaces for numbered lists)
1011                if i + 1 < lines.len()
1012                    && !lines[i + 1].trim().is_empty()
1013                    && !lines[i + 1].trim().starts_with(char::is_numeric)
1014                    && !lines[i + 1].trim().starts_with("-")
1015                {
1016                    // Numbered list continuation lines should have 3 spaces
1017                    assert!(lines[i + 1].starts_with("   ") || lines[i + 1].trim().is_empty());
1018                }
1019            } else if line.trim().starts_with("-") {
1020                // Check if next line is a continuation (should be indented with 2 spaces for dash lists)
1021                if i + 1 < lines.len()
1022                    && !lines[i + 1].trim().is_empty()
1023                    && !lines[i + 1].trim().starts_with(char::is_numeric)
1024                    && !lines[i + 1].trim().starts_with("-")
1025                {
1026                    // Dash list continuation lines should have 2 spaces
1027                    assert!(lines[i + 1].starts_with("  ") || lines[i + 1].trim().is_empty());
1028                }
1029            }
1030        }
1031    }
1032
1033    #[test]
1034    fn test_text_reflow_disabled_by_default() {
1035        let rule = MD013LineLength::new(30, false, false, false, false);
1036
1037        let content = "This is a very long line that definitely exceeds thirty characters.";
1038        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1039
1040        let fixed = rule.fix(&ctx).unwrap();
1041
1042        // Without reflow enabled, it should only trim whitespace (if any)
1043        // Since there's no trailing whitespace, content should be unchanged
1044        assert_eq!(fixed, content);
1045    }
1046
1047    #[test]
1048    fn test_reflow_with_hard_line_breaks() {
1049        // Test that lines with exactly 2 trailing spaces are preserved as hard breaks
1050        let config = MD013Config {
1051            line_length: 40,
1052            reflow: true,
1053            ..Default::default()
1054        };
1055        let rule = MD013LineLength::from_config_struct(config);
1056
1057        // Test with exactly 2 spaces (hard line break)
1058        let content = "This line has a hard break at the end  \nAnd this continues on the next line that is also quite long and needs wrapping";
1059        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1060        let fixed = rule.fix(&ctx).unwrap();
1061
1062        // Should preserve the hard line break (2 spaces)
1063        assert!(
1064            fixed.contains("  \n"),
1065            "Hard line break with exactly 2 spaces should be preserved"
1066        );
1067    }
1068
1069    #[test]
1070    fn test_reflow_preserves_reference_links() {
1071        let config = MD013Config {
1072            line_length: 40,
1073            reflow: true,
1074            ..Default::default()
1075        };
1076        let rule = MD013LineLength::from_config_struct(config);
1077
1078        let content = "This is a very long line with a [reference link][ref] that should not be broken apart when reflowing the text.
1079
1080[ref]: https://example.com";
1081        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1082        let fixed = rule.fix(&ctx).unwrap();
1083
1084        // Reference link should remain intact
1085        assert!(fixed.contains("[reference link][ref]"));
1086        assert!(!fixed.contains("[ reference link]"));
1087        assert!(!fixed.contains("[ref ]"));
1088    }
1089
1090    #[test]
1091    fn test_reflow_with_nested_markdown_elements() {
1092        let config = MD013Config {
1093            line_length: 35,
1094            reflow: true,
1095            ..Default::default()
1096        };
1097        let rule = MD013LineLength::from_config_struct(config);
1098
1099        let content = "This text has **bold with `code` inside** and should handle it properly when wrapping";
1100        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1101        let fixed = rule.fix(&ctx).unwrap();
1102
1103        // Nested elements should be preserved
1104        assert!(fixed.contains("**bold with `code` inside**"));
1105    }
1106
1107    #[test]
1108    fn test_reflow_with_unbalanced_markdown() {
1109        // Test edge case with unbalanced markdown
1110        let config = MD013Config {
1111            line_length: 30,
1112            reflow: true,
1113            ..Default::default()
1114        };
1115        let rule = MD013LineLength::from_config_struct(config);
1116
1117        let content = "This has **unbalanced bold that goes on for a very long time without closing";
1118        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1119        let fixed = rule.fix(&ctx).unwrap();
1120
1121        // Should handle gracefully without panic
1122        // The text reflow handles unbalanced markdown by treating it as a bold element
1123        // Check that the content is properly reflowed without panic
1124        assert!(!fixed.is_empty());
1125        // Verify the content is wrapped to 30 chars
1126        for line in fixed.lines() {
1127            assert!(line.len() <= 30 || line.starts_with("**"), "Line exceeds limit: {line}");
1128        }
1129    }
1130
1131    #[test]
1132    fn test_reflow_fix_indicator() {
1133        // Test that reflow provides fix indicators
1134        let config = MD013Config {
1135            line_length: 30,
1136            reflow: true,
1137            ..Default::default()
1138        };
1139        let rule = MD013LineLength::from_config_struct(config);
1140
1141        let content = "This is a very long line that definitely exceeds the thirty character limit";
1142        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1143        let warnings = rule.check(&ctx).unwrap();
1144
1145        // Should have a fix indicator when reflow is true
1146        assert!(!warnings.is_empty());
1147        assert!(
1148            warnings[0].fix.is_some(),
1149            "Should provide fix indicator when reflow is true"
1150        );
1151    }
1152
1153    #[test]
1154    fn test_no_fix_indicator_without_reflow() {
1155        // Test that without reflow, no fix is provided
1156        let config = MD013Config {
1157            line_length: 30,
1158            reflow: false,
1159            ..Default::default()
1160        };
1161        let rule = MD013LineLength::from_config_struct(config);
1162
1163        let content = "This is a very long line that definitely exceeds the thirty character limit";
1164        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1165        let warnings = rule.check(&ctx).unwrap();
1166
1167        // Should NOT have a fix indicator when reflow is false
1168        assert!(!warnings.is_empty());
1169        assert!(warnings[0].fix.is_none(), "Should not provide fix when reflow is false");
1170    }
1171
1172    #[test]
1173    fn test_reflow_preserves_all_reference_link_types() {
1174        let config = MD013Config {
1175            line_length: 40,
1176            reflow: true,
1177            ..Default::default()
1178        };
1179        let rule = MD013LineLength::from_config_struct(config);
1180
1181        let content = "Test [full reference][ref] and [collapsed][] and [shortcut] reference links in a very long line.
1182
1183[ref]: https://example.com
1184[collapsed]: https://example.com
1185[shortcut]: https://example.com";
1186
1187        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1188        let fixed = rule.fix(&ctx).unwrap();
1189
1190        // All reference link types should be preserved
1191        assert!(fixed.contains("[full reference][ref]"));
1192        assert!(fixed.contains("[collapsed][]"));
1193        assert!(fixed.contains("[shortcut]"));
1194    }
1195
1196    #[test]
1197    fn test_reflow_handles_images_correctly() {
1198        let config = MD013Config {
1199            line_length: 40,
1200            reflow: true,
1201            ..Default::default()
1202        };
1203        let rule = MD013LineLength::from_config_struct(config);
1204
1205        let content = "This line has an ![image alt text](https://example.com/image.png) that should not be broken when reflowing.";
1206        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
1207        let fixed = rule.fix(&ctx).unwrap();
1208
1209        // Image should remain intact
1210        assert!(fixed.contains("![image alt text](https://example.com/image.png)"));
1211    }
1212}