rumdl_lib/rules/
md033_no_inline_html.rs

1//!
2//! Rule MD033: No HTML tags
3//!
4//! See [docs/md033.md](../../docs/md033.md) for full documentation, configuration, and examples.
5
6use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
7use crate::utils::kramdown_utils::{is_kramdown_block_attribute, is_kramdown_extension};
8use crate::utils::regex_cache::*;
9use std::collections::HashSet;
10
11mod md033_config;
12use md033_config::MD033Config;
13
14#[derive(Clone)]
15pub struct MD033NoInlineHtml {
16    config: MD033Config,
17    allowed: HashSet<String>,
18}
19
20impl Default for MD033NoInlineHtml {
21    fn default() -> Self {
22        let config = MD033Config::default();
23        let allowed = config.allowed_set();
24        Self { config, allowed }
25    }
26}
27
28impl MD033NoInlineHtml {
29    pub fn new() -> Self {
30        Self::default()
31    }
32
33    pub fn with_allowed(allowed_vec: Vec<String>) -> Self {
34        let config = MD033Config {
35            allowed: allowed_vec.clone(),
36        };
37        let allowed = config.allowed_set();
38        Self { config, allowed }
39    }
40
41    pub fn from_config_struct(config: MD033Config) -> Self {
42        let allowed = config.allowed_set();
43        Self { config, allowed }
44    }
45
46    // Efficient check for allowed tags using HashSet (case-insensitive)
47    #[inline]
48    fn is_tag_allowed(&self, tag: &str) -> bool {
49        if self.allowed.is_empty() {
50            return false;
51        }
52        // Remove angle brackets and slashes, then split by whitespace or '>'
53        let tag = tag.trim_start_matches('<').trim_start_matches('/');
54        let tag_name = tag
55            .split(|c: char| c.is_whitespace() || c == '>' || c == '/')
56            .next()
57            .unwrap_or("");
58        self.allowed.contains(&tag_name.to_lowercase())
59    }
60
61    // Check if a tag is an HTML comment
62    #[inline]
63    fn is_html_comment(&self, tag: &str) -> bool {
64        tag.starts_with("<!--") && tag.ends_with("-->")
65    }
66
67    /// Check if a tag name is a valid HTML element or custom element.
68    /// Returns false for placeholder syntax like `<NAME>`, `<resource>`, `<actual>`.
69    ///
70    /// Per HTML spec, custom elements must contain a hyphen (e.g., `<my-component>`).
71    #[inline]
72    fn is_html_element_or_custom(tag_name: &str) -> bool {
73        const HTML_ELEMENTS: &[&str] = &[
74            // Document structure
75            "html",
76            "head",
77            "body",
78            "title",
79            "base",
80            "link",
81            "meta",
82            "style",
83            // Sections
84            "article",
85            "section",
86            "nav",
87            "aside",
88            "h1",
89            "h2",
90            "h3",
91            "h4",
92            "h5",
93            "h6",
94            "hgroup",
95            "header",
96            "footer",
97            "address",
98            "main",
99            "search",
100            // Grouping
101            "p",
102            "hr",
103            "pre",
104            "blockquote",
105            "ol",
106            "ul",
107            "menu",
108            "li",
109            "dl",
110            "dt",
111            "dd",
112            "figure",
113            "figcaption",
114            "div",
115            // Text-level
116            "a",
117            "em",
118            "strong",
119            "small",
120            "s",
121            "cite",
122            "q",
123            "dfn",
124            "abbr",
125            "ruby",
126            "rt",
127            "rp",
128            "data",
129            "time",
130            "code",
131            "var",
132            "samp",
133            "kbd",
134            "sub",
135            "sup",
136            "i",
137            "b",
138            "u",
139            "mark",
140            "bdi",
141            "bdo",
142            "span",
143            "br",
144            "wbr",
145            // Edits
146            "ins",
147            "del",
148            // Embedded
149            "picture",
150            "source",
151            "img",
152            "iframe",
153            "embed",
154            "object",
155            "param",
156            "video",
157            "audio",
158            "track",
159            "map",
160            "area",
161            "svg",
162            "math",
163            "canvas",
164            // Tables
165            "table",
166            "caption",
167            "colgroup",
168            "col",
169            "tbody",
170            "thead",
171            "tfoot",
172            "tr",
173            "td",
174            "th",
175            // Forms
176            "form",
177            "label",
178            "input",
179            "button",
180            "select",
181            "datalist",
182            "optgroup",
183            "option",
184            "textarea",
185            "output",
186            "progress",
187            "meter",
188            "fieldset",
189            "legend",
190            // Interactive
191            "details",
192            "summary",
193            "dialog",
194            // Scripting
195            "script",
196            "noscript",
197            "template",
198            "slot",
199            // Deprecated but recognized
200            "acronym",
201            "applet",
202            "basefont",
203            "big",
204            "center",
205            "dir",
206            "font",
207            "frame",
208            "frameset",
209            "isindex",
210            "noframes",
211            "strike",
212            "tt",
213        ];
214
215        let lower = tag_name.to_ascii_lowercase();
216        if HTML_ELEMENTS.contains(&lower.as_str()) {
217            return true;
218        }
219        // Custom elements must contain a hyphen per HTML spec
220        tag_name.contains('-')
221    }
222
223    // Check if a tag is likely a programming type annotation rather than HTML
224    #[inline]
225    fn is_likely_type_annotation(&self, tag: &str) -> bool {
226        // Common programming type names that are often used in generics
227        const COMMON_TYPES: &[&str] = &[
228            "string",
229            "number",
230            "any",
231            "void",
232            "null",
233            "undefined",
234            "array",
235            "promise",
236            "function",
237            "error",
238            "date",
239            "regexp",
240            "symbol",
241            "bigint",
242            "map",
243            "set",
244            "weakmap",
245            "weakset",
246            "iterator",
247            "generator",
248            "t",
249            "u",
250            "v",
251            "k",
252            "e", // Common single-letter type parameters
253            "userdata",
254            "apiresponse",
255            "config",
256            "options",
257            "params",
258            "result",
259            "response",
260            "request",
261            "data",
262            "item",
263            "element",
264            "node",
265        ];
266
267        let tag_content = tag
268            .trim_start_matches('<')
269            .trim_end_matches('>')
270            .trim_start_matches('/');
271        let tag_name = tag_content
272            .split(|c: char| c.is_whitespace() || c == '>' || c == '/')
273            .next()
274            .unwrap_or("");
275
276        // Check if it's a simple tag (no attributes) with a common type name
277        if !tag_content.contains(' ') && !tag_content.contains('=') {
278            COMMON_TYPES.contains(&tag_name.to_ascii_lowercase().as_str())
279        } else {
280            false
281        }
282    }
283
284    // Check if a tag is actually an email address in angle brackets
285    #[inline]
286    fn is_email_address(&self, tag: &str) -> bool {
287        let content = tag.trim_start_matches('<').trim_end_matches('>');
288        // Simple email pattern: contains @ and has reasonable structure
289        content.contains('@')
290            && content.chars().all(|c| c.is_alphanumeric() || "@.-_+".contains(c))
291            && content.split('@').count() == 2
292            && content.split('@').all(|part| !part.is_empty())
293    }
294
295    // Check if a tag has the markdown attribute (MkDocs/Material for MkDocs)
296    #[inline]
297    fn has_markdown_attribute(&self, tag: &str) -> bool {
298        // Check for various forms of markdown attribute
299        // Examples: <div markdown>, <div markdown="1">, <div class="result" markdown>
300        tag.contains(" markdown>") || tag.contains(" markdown=") || tag.contains(" markdown ")
301    }
302
303    // Check if a tag is actually a URL in angle brackets
304    #[inline]
305    fn is_url_in_angle_brackets(&self, tag: &str) -> bool {
306        let content = tag.trim_start_matches('<').trim_end_matches('>');
307        // Check for common URL schemes
308        content.starts_with("http://")
309            || content.starts_with("https://")
310            || content.starts_with("ftp://")
311            || content.starts_with("ftps://")
312            || content.starts_with("mailto:")
313    }
314
315    /// Calculate fix to remove HTML tags while keeping content
316    ///
317    /// For self-closing tags like `<br/>`, returns a single fix to remove the tag.
318    /// For paired tags like `<span>text</span>`, returns the replacement text (just the content).
319    ///
320    /// Returns (range, replacement_text) where range is the bytes to replace
321    /// and replacement_text is what to put there (content without tags, or empty for self-closing).
322    fn calculate_fix(
323        &self,
324        content: &str,
325        opening_tag: &str,
326        tag_byte_start: usize,
327    ) -> Option<(std::ops::Range<usize>, String)> {
328        // Check if it's a self-closing tag (ends with />)
329        if opening_tag.ends_with("/>") {
330            return Some((tag_byte_start..tag_byte_start + opening_tag.len(), String::new()));
331        }
332
333        // Extract tag name from opening tag (e.g., "<div>" -> "div", "<span class='x'>" -> "span")
334        let tag_name = opening_tag
335            .trim_start_matches('<')
336            .split(|c: char| c.is_whitespace() || c == '>' || c == '/')
337            .next()?
338            .to_lowercase();
339
340        // Build the closing tag pattern
341        let closing_tag = format!("</{tag_name}>");
342
343        // Search for the closing tag after the opening tag
344        let search_start = tag_byte_start + opening_tag.len();
345        if let Some(closing_pos) = content[search_start..].find(&closing_tag) {
346            let closing_byte_start = search_start + closing_pos;
347            let closing_byte_end = closing_byte_start + closing_tag.len();
348
349            // Extract the content between tags
350            let inner_content = &content[search_start..closing_byte_start];
351
352            return Some((tag_byte_start..closing_byte_end, inner_content.to_string()));
353        }
354
355        // If no closing tag found, just remove the opening tag
356        Some((tag_byte_start..tag_byte_start + opening_tag.len(), String::new()))
357    }
358}
359
360impl Rule for MD033NoInlineHtml {
361    fn name(&self) -> &'static str {
362        "MD033"
363    }
364
365    fn description(&self) -> &'static str {
366        "Inline HTML is not allowed"
367    }
368
369    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
370        let content = ctx.content;
371
372        // Early return: if no HTML tags at all, skip processing
373        if content.is_empty() || !ctx.likely_has_html() {
374            return Ok(Vec::new());
375        }
376
377        // Quick check for HTML tag pattern before expensive processing
378        if !HTML_TAG_QUICK_CHECK.is_match(content) {
379            return Ok(Vec::new());
380        }
381
382        let mut warnings = Vec::new();
383        let lines: Vec<&str> = content.lines().collect();
384
385        // Track nomarkdown and comment blocks (Kramdown extension)
386        let mut in_nomarkdown = false;
387        let mut in_comment = false;
388        let mut nomarkdown_ranges: Vec<(usize, usize)> = Vec::new();
389        let mut nomarkdown_start = 0;
390        let mut comment_start = 0;
391
392        for (i, line) in lines.iter().enumerate() {
393            let line_num = i + 1;
394
395            // Check for nomarkdown start
396            if line.trim() == "{::nomarkdown}" {
397                in_nomarkdown = true;
398                nomarkdown_start = line_num;
399            } else if line.trim() == "{:/nomarkdown}" && in_nomarkdown {
400                in_nomarkdown = false;
401                nomarkdown_ranges.push((nomarkdown_start, line_num));
402            }
403
404            // Check for comment blocks
405            if line.trim() == "{::comment}" {
406                in_comment = true;
407                comment_start = line_num;
408            } else if line.trim() == "{:/comment}" && in_comment {
409                in_comment = false;
410                nomarkdown_ranges.push((comment_start, line_num));
411            }
412        }
413
414        // Use centralized HTML parser to get all HTML tags (including multiline)
415        let html_tags = ctx.html_tags();
416
417        for html_tag in html_tags.iter() {
418            // Skip closing tags (only warn on opening tags)
419            if html_tag.is_closing {
420                continue;
421            }
422
423            let line_num = html_tag.line;
424            let tag_byte_start = html_tag.byte_offset;
425
426            // Reconstruct tag string from byte offsets
427            let tag = &content[html_tag.byte_offset..html_tag.byte_end];
428
429            // Skip tags in code blocks (uses proper code block detection from LintContext)
430            if ctx.line_info(line_num).is_some_and(|info| info.in_code_block) {
431                continue;
432            }
433
434            // Skip Kramdown extensions and block attributes
435            if let Some(line) = lines.get(line_num.saturating_sub(1))
436                && (is_kramdown_extension(line) || is_kramdown_block_attribute(line))
437            {
438                continue;
439            }
440
441            // Skip lines inside nomarkdown blocks
442            if nomarkdown_ranges
443                .iter()
444                .any(|(start, end)| line_num >= *start && line_num <= *end)
445            {
446                continue;
447            }
448
449            // Skip HTML tags inside HTML comments
450            if ctx.is_in_html_comment(tag_byte_start) {
451                continue;
452            }
453
454            // Skip HTML comments themselves
455            if self.is_html_comment(tag) {
456                continue;
457            }
458
459            // Skip angle brackets inside link reference definition titles
460            // e.g., [ref]: url "Title with <angle brackets>"
461            if ctx.is_in_link_title(tag_byte_start) {
462                continue;
463            }
464
465            // Skip JSX components in MDX files (e.g., <Chart />, <MyComponent>)
466            if ctx.flavor.supports_jsx() && html_tag.tag_name.chars().next().is_some_and(|c| c.is_uppercase()) {
467                continue;
468            }
469
470            // Skip non-HTML elements (placeholder syntax like <NAME>, <resource>)
471            if !Self::is_html_element_or_custom(&html_tag.tag_name) {
472                continue;
473            }
474
475            // Skip likely programming type annotations
476            if self.is_likely_type_annotation(tag) {
477                continue;
478            }
479
480            // Skip email addresses in angle brackets
481            if self.is_email_address(tag) {
482                continue;
483            }
484
485            // Skip URLs in angle brackets
486            if self.is_url_in_angle_brackets(tag) {
487                continue;
488            }
489
490            // Skip tags inside code spans (use byte offset for reliable multi-line span detection)
491            if ctx.is_byte_offset_in_code_span(tag_byte_start) {
492                continue;
493            }
494
495            // Skip allowed tags
496            if self.is_tag_allowed(tag) {
497                continue;
498            }
499
500            // Skip tags with markdown attribute in MkDocs mode
501            if ctx.flavor == crate::config::MarkdownFlavor::MkDocs && self.has_markdown_attribute(tag) {
502                continue;
503            }
504
505            // Calculate fix to remove HTML tags but keep content
506            let fix = self
507                .calculate_fix(content, tag, tag_byte_start)
508                .map(|(range, replacement)| Fix { range, replacement });
509
510            // Calculate actual end line and column for multiline tags
511            // Use byte_end - 1 to get the last character position of the tag
512            let (end_line, end_col) = if html_tag.byte_end > 0 {
513                ctx.offset_to_line_col(html_tag.byte_end - 1)
514            } else {
515                (line_num, html_tag.end_col + 1)
516            };
517
518            // Report the HTML tag
519            warnings.push(LintWarning {
520                rule_name: Some(self.name().to_string()),
521                line: line_num,
522                column: html_tag.start_col + 1, // Convert to 1-indexed
523                end_line,                       // Actual end line for multiline tags
524                end_column: end_col + 1,        // Actual end column
525                message: format!("Inline HTML found: {tag}"),
526                severity: Severity::Warning,
527                fix,
528            });
529        }
530
531        Ok(warnings)
532    }
533
534    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
535        // No fix for MD033: do not remove or alter HTML, just return the input unchanged
536        Ok(ctx.content.to_string())
537    }
538
539    fn fix_capability(&self) -> crate::rule::FixCapability {
540        crate::rule::FixCapability::Unfixable
541    }
542
543    /// Get the category of this rule for selective processing
544    fn category(&self) -> RuleCategory {
545        RuleCategory::Html
546    }
547
548    /// Check if this rule should be skipped
549    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
550        ctx.content.is_empty() || !ctx.likely_has_html()
551    }
552
553    fn as_any(&self) -> &dyn std::any::Any {
554        self
555    }
556
557    fn default_config_section(&self) -> Option<(String, toml::Value)> {
558        let json_value = serde_json::to_value(&self.config).ok()?;
559        Some((
560            self.name().to_string(),
561            crate::rule_config_serde::json_to_toml_value(&json_value)?,
562        ))
563    }
564
565    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
566    where
567        Self: Sized,
568    {
569        let rule_config = crate::rule_config_serde::load_rule_config::<MD033Config>(config);
570        Box::new(Self::from_config_struct(rule_config))
571    }
572}
573
574#[cfg(test)]
575mod tests {
576    use super::*;
577    use crate::lint_context::LintContext;
578    use crate::rule::Rule;
579
580    #[test]
581    fn test_md033_basic_html() {
582        let rule = MD033NoInlineHtml::default();
583        let content = "<div>Some content</div>";
584        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
585        let result = rule.check(&ctx).unwrap();
586        // Only reports opening tags, not closing tags
587        assert_eq!(result.len(), 1); // Only <div>, not </div>
588        assert!(result[0].message.starts_with("Inline HTML found: <div>"));
589    }
590
591    #[test]
592    fn test_md033_case_insensitive() {
593        let rule = MD033NoInlineHtml::default();
594        let content = "<DiV>Some <B>content</B></dIv>";
595        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
596        let result = rule.check(&ctx).unwrap();
597        // Only reports opening tags, not closing tags
598        assert_eq!(result.len(), 2); // <DiV>, <B> (not </B>, </dIv>)
599        assert_eq!(result[0].message, "Inline HTML found: <DiV>");
600        assert_eq!(result[1].message, "Inline HTML found: <B>");
601    }
602
603    #[test]
604    fn test_md033_allowed_tags() {
605        let rule = MD033NoInlineHtml::with_allowed(vec!["div".to_string(), "br".to_string()]);
606        let content = "<div>Allowed</div><p>Not allowed</p><br/>";
607        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
608        let result = rule.check(&ctx).unwrap();
609        // Only warnings for non-allowed opening tags (<p> only, div and br are allowed)
610        assert_eq!(result.len(), 1);
611        assert_eq!(result[0].message, "Inline HTML found: <p>");
612
613        // Test case-insensitivity of allowed tags
614        let content2 = "<DIV>Allowed</DIV><P>Not allowed</P><BR/>";
615        let ctx2 = LintContext::new(content2, crate::config::MarkdownFlavor::Standard, None);
616        let result2 = rule.check(&ctx2).unwrap();
617        assert_eq!(result2.len(), 1); // Only <P> flagged
618        assert_eq!(result2[0].message, "Inline HTML found: <P>");
619    }
620
621    #[test]
622    fn test_md033_html_comments() {
623        let rule = MD033NoInlineHtml::default();
624        let content = "<!-- This is a comment --> <p>Not a comment</p>";
625        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
626        let result = rule.check(&ctx).unwrap();
627        // Should detect warnings for HTML opening tags (comments are skipped, closing tags not reported)
628        assert_eq!(result.len(), 1); // Only <p>
629        assert_eq!(result[0].message, "Inline HTML found: <p>");
630    }
631
632    #[test]
633    fn test_md033_tags_in_links() {
634        let rule = MD033NoInlineHtml::default();
635        let content = "[Link](http://example.com/<div>)";
636        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
637        let result = rule.check(&ctx).unwrap();
638        // The <div> in the URL should be detected as HTML (not skipped)
639        assert_eq!(result.len(), 1);
640        assert_eq!(result[0].message, "Inline HTML found: <div>");
641
642        let content2 = "[Link <a>text</a>](url)";
643        let ctx2 = LintContext::new(content2, crate::config::MarkdownFlavor::Standard, None);
644        let result2 = rule.check(&ctx2).unwrap();
645        // Only reports opening tags
646        assert_eq!(result2.len(), 1); // Only <a>
647        assert_eq!(result2[0].message, "Inline HTML found: <a>");
648    }
649
650    #[test]
651    fn test_md033_fix_escaping() {
652        let rule = MD033NoInlineHtml::default();
653        let content = "Text with <div> and <br/> tags.";
654        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
655        let fixed_content = rule.fix(&ctx).unwrap();
656        // No fix for HTML tags; output should be unchanged
657        assert_eq!(fixed_content, content);
658    }
659
660    #[test]
661    fn test_md033_in_code_blocks() {
662        let rule = MD033NoInlineHtml::default();
663        let content = "```html\n<div>Code</div>\n```\n<div>Not code</div>";
664        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
665        let result = rule.check(&ctx).unwrap();
666        // Only reports opening tags outside code block
667        assert_eq!(result.len(), 1); // Only <div> outside code block
668        assert_eq!(result[0].message, "Inline HTML found: <div>");
669    }
670
671    #[test]
672    fn test_md033_in_code_spans() {
673        let rule = MD033NoInlineHtml::default();
674        let content = "Text with `<p>in code</p>` span. <br/> Not in span.";
675        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
676        let result = rule.check(&ctx).unwrap();
677        // Should detect <br/> outside code span, but not tags inside code span
678        assert_eq!(result.len(), 1);
679        assert_eq!(result[0].message, "Inline HTML found: <br/>");
680    }
681
682    #[test]
683    fn test_md033_issue_90_code_span_with_diff_block() {
684        // Test for issue #90: inline code span followed by diff code block
685        let rule = MD033NoInlineHtml::default();
686        let content = r#"# Heading
687
688`<env>`
689
690```diff
691- this
692+ that
693```"#;
694        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
695        let result = rule.check(&ctx).unwrap();
696        // Should NOT detect <env> as HTML since it's inside backticks
697        assert_eq!(result.len(), 0, "Should not report HTML tags inside code spans");
698    }
699
700    #[test]
701    fn test_md033_multiple_code_spans_with_angle_brackets() {
702        // Test multiple code spans on same line
703        let rule = MD033NoInlineHtml::default();
704        let content = "`<one>` and `<two>` and `<three>` are all code spans";
705        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
706        let result = rule.check(&ctx).unwrap();
707        assert_eq!(result.len(), 0, "Should not report HTML tags inside any code spans");
708    }
709
710    #[test]
711    fn test_md033_nested_angle_brackets_in_code_span() {
712        // Test nested angle brackets
713        let rule = MD033NoInlineHtml::default();
714        let content = "Text with `<<nested>>` brackets";
715        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
716        let result = rule.check(&ctx).unwrap();
717        assert_eq!(result.len(), 0, "Should handle nested angle brackets in code spans");
718    }
719
720    #[test]
721    fn test_md033_code_span_at_end_before_code_block() {
722        // Test code span at end of line before code block
723        let rule = MD033NoInlineHtml::default();
724        let content = "Testing `<test>`\n```\ncode here\n```";
725        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
726        let result = rule.check(&ctx).unwrap();
727        assert_eq!(result.len(), 0, "Should handle code span before code block");
728    }
729
730    #[test]
731    fn test_md033_quick_fix_inline_tag() {
732        // Test Quick Fix for inline HTML tags - keeps content, removes tags
733        let rule = MD033NoInlineHtml::default();
734        let content = "This has <span>inline text</span> that should keep content.";
735        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
736        let result = rule.check(&ctx).unwrap();
737
738        assert_eq!(result.len(), 1, "Should find one HTML tag");
739        assert!(result[0].fix.is_some(), "Should have a fix");
740
741        let fix = result[0].fix.as_ref().unwrap();
742        assert_eq!(&content[fix.range.clone()], "<span>inline text</span>");
743        assert_eq!(fix.replacement, "inline text");
744    }
745
746    #[test]
747    fn test_md033_quick_fix_multiline_tag() {
748        // Test Quick Fix for multiline HTML tags - keeps content
749        let rule = MD033NoInlineHtml::default();
750        let content = "<div>\nBlock content\n</div>";
751        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
752        let result = rule.check(&ctx).unwrap();
753
754        assert_eq!(result.len(), 1, "Should find one HTML tag");
755        assert!(result[0].fix.is_some(), "Should have a fix");
756
757        let fix = result[0].fix.as_ref().unwrap();
758        assert_eq!(&content[fix.range.clone()], "<div>\nBlock content\n</div>");
759        assert_eq!(fix.replacement, "\nBlock content\n");
760    }
761
762    #[test]
763    fn test_md033_quick_fix_self_closing_tag() {
764        // Test Quick Fix for self-closing tags - removes tag (no content)
765        let rule = MD033NoInlineHtml::default();
766        let content = "Self-closing: <br/>";
767        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
768        let result = rule.check(&ctx).unwrap();
769
770        assert_eq!(result.len(), 1, "Should find one HTML tag");
771        assert!(result[0].fix.is_some(), "Should have a fix");
772
773        let fix = result[0].fix.as_ref().unwrap();
774        assert_eq!(&content[fix.range.clone()], "<br/>");
775        assert_eq!(fix.replacement, "");
776    }
777
778    #[test]
779    fn test_md033_quick_fix_multiple_tags() {
780        // Test Quick Fix with multiple HTML tags - keeps content for both
781        let rule = MD033NoInlineHtml::default();
782        let content = "<span>first</span> and <strong>second</strong>";
783        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
784        let result = rule.check(&ctx).unwrap();
785
786        assert_eq!(result.len(), 2, "Should find two HTML tags");
787        assert!(result[0].fix.is_some(), "First tag should have a fix");
788        assert!(result[1].fix.is_some(), "Second tag should have a fix");
789
790        let fix1 = result[0].fix.as_ref().unwrap();
791        assert_eq!(&content[fix1.range.clone()], "<span>first</span>");
792        assert_eq!(fix1.replacement, "first");
793
794        let fix2 = result[1].fix.as_ref().unwrap();
795        assert_eq!(&content[fix2.range.clone()], "<strong>second</strong>");
796        assert_eq!(fix2.replacement, "second");
797    }
798
799    #[test]
800    fn test_md033_skip_angle_brackets_in_link_titles() {
801        // Angle brackets inside link reference definition titles should not be flagged as HTML
802        let rule = MD033NoInlineHtml::default();
803        let content = r#"# Test
804
805[example]: <https://example.com> "Title with <Angle Brackets> inside"
806
807Regular text with <div>content</div> HTML tag.
808"#;
809        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
810        let result = rule.check(&ctx).unwrap();
811
812        // Should only flag <div>, not <Angle Brackets> in the title (not a valid HTML element)
813        // Opening tag only (markdownlint behavior)
814        assert_eq!(result.len(), 1, "Should find opening div tag");
815        assert!(
816            result[0].message.contains("<div>"),
817            "Should flag <div>, got: {}",
818            result[0].message
819        );
820    }
821
822    #[test]
823    fn test_md033_skip_angle_brackets_in_link_title_single_quotes() {
824        // Test with single-quoted title
825        let rule = MD033NoInlineHtml::default();
826        let content = r#"[ref]: url 'Title <Help Wanted> here'
827
828<span>text</span> here
829"#;
830        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
831        let result = rule.check(&ctx).unwrap();
832
833        // <Help Wanted> is not a valid HTML element, so only <span> is flagged
834        // Opening tag only (markdownlint behavior)
835        assert_eq!(result.len(), 1, "Should find opening span tag");
836        assert!(
837            result[0].message.contains("<span>"),
838            "Should flag <span>, got: {}",
839            result[0].message
840        );
841    }
842
843    #[test]
844    fn test_md033_multiline_tag_end_line_calculation() {
845        // Test that multiline HTML tags report correct end_line
846        let rule = MD033NoInlineHtml::default();
847        let content = "<div\n  class=\"test\"\n  id=\"example\">";
848        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
849        let result = rule.check(&ctx).unwrap();
850
851        assert_eq!(result.len(), 1, "Should find one HTML tag");
852        // Tag starts on line 1
853        assert_eq!(result[0].line, 1, "Start line should be 1");
854        // Tag ends on line 3 (where the closing > is)
855        assert_eq!(result[0].end_line, 3, "End line should be 3");
856    }
857
858    #[test]
859    fn test_md033_single_line_tag_same_start_end_line() {
860        // Test that single-line HTML tags have same start and end line
861        let rule = MD033NoInlineHtml::default();
862        let content = "Some text <div class=\"test\"> more text";
863        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
864        let result = rule.check(&ctx).unwrap();
865
866        assert_eq!(result.len(), 1, "Should find one HTML tag");
867        assert_eq!(result[0].line, 1, "Start line should be 1");
868        assert_eq!(result[0].end_line, 1, "End line should be 1 for single-line tag");
869    }
870
871    #[test]
872    fn test_md033_multiline_tag_with_many_attributes() {
873        // Test multiline tag spanning multiple lines
874        let rule = MD033NoInlineHtml::default();
875        let content =
876            "Text\n<div\n  data-attr1=\"value1\"\n  data-attr2=\"value2\"\n  data-attr3=\"value3\">\nMore text";
877        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
878        let result = rule.check(&ctx).unwrap();
879
880        assert_eq!(result.len(), 1, "Should find one HTML tag");
881        // Tag starts on line 2 (first line is "Text")
882        assert_eq!(result[0].line, 2, "Start line should be 2");
883        // Tag ends on line 5 (where the closing > is)
884        assert_eq!(result[0].end_line, 5, "End line should be 5");
885    }
886}