Skip to main content

rumdl_lib/rules/
md033_no_inline_html.rs

1//!
2//! Rule MD033: No HTML tags
3//!
4//! See [docs/md033.md](../../docs/md033.md) for full documentation, configuration, and examples.
5
6use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
7use crate::utils::kramdown_utils::{is_kramdown_block_attribute, is_kramdown_extension};
8use crate::utils::regex_cache::*;
9use std::collections::HashSet;
10
11mod md033_config;
12use md033_config::MD033Config;
13
14#[derive(Clone)]
15pub struct MD033NoInlineHtml {
16    config: MD033Config,
17    allowed: HashSet<String>,
18    disallowed: HashSet<String>,
19}
20
21impl Default for MD033NoInlineHtml {
22    fn default() -> Self {
23        let config = MD033Config::default();
24        let allowed = config.allowed_set();
25        let disallowed = config.disallowed_set();
26        Self {
27            config,
28            allowed,
29            disallowed,
30        }
31    }
32}
33
34impl MD033NoInlineHtml {
35    pub fn new() -> Self {
36        Self::default()
37    }
38
39    pub fn with_allowed(allowed_vec: Vec<String>) -> Self {
40        let config = MD033Config {
41            allowed: allowed_vec.clone(),
42            disallowed: Vec::new(),
43            fix: false,
44            br_style: md033_config::BrStyle::default(),
45        };
46        let allowed = config.allowed_set();
47        let disallowed = config.disallowed_set();
48        Self {
49            config,
50            allowed,
51            disallowed,
52        }
53    }
54
55    pub fn with_disallowed(disallowed_vec: Vec<String>) -> Self {
56        let config = MD033Config {
57            allowed: Vec::new(),
58            disallowed: disallowed_vec.clone(),
59            fix: false,
60            br_style: md033_config::BrStyle::default(),
61        };
62        let allowed = config.allowed_set();
63        let disallowed = config.disallowed_set();
64        Self {
65            config,
66            allowed,
67            disallowed,
68        }
69    }
70
71    /// Create a new rule with auto-fix enabled
72    pub fn with_fix(fix: bool) -> Self {
73        let config = MD033Config {
74            allowed: Vec::new(),
75            disallowed: Vec::new(),
76            fix,
77            br_style: md033_config::BrStyle::default(),
78        };
79        let allowed = config.allowed_set();
80        let disallowed = config.disallowed_set();
81        Self {
82            config,
83            allowed,
84            disallowed,
85        }
86    }
87
88    pub fn from_config_struct(config: MD033Config) -> Self {
89        let allowed = config.allowed_set();
90        let disallowed = config.disallowed_set();
91        Self {
92            config,
93            allowed,
94            disallowed,
95        }
96    }
97
98    // Efficient check for allowed tags using HashSet (case-insensitive)
99    #[inline]
100    fn is_tag_allowed(&self, tag: &str) -> bool {
101        if self.allowed.is_empty() {
102            return false;
103        }
104        // Remove angle brackets and slashes, then split by whitespace or '>'
105        let tag = tag.trim_start_matches('<').trim_start_matches('/');
106        let tag_name = tag
107            .split(|c: char| c.is_whitespace() || c == '>' || c == '/')
108            .next()
109            .unwrap_or("");
110        self.allowed.contains(&tag_name.to_lowercase())
111    }
112
113    /// Check if a tag is in the disallowed set (for disallowed-only mode)
114    #[inline]
115    fn is_tag_disallowed(&self, tag: &str) -> bool {
116        if self.disallowed.is_empty() {
117            return false;
118        }
119        // Remove angle brackets and slashes, then split by whitespace or '>'
120        let tag = tag.trim_start_matches('<').trim_start_matches('/');
121        let tag_name = tag
122            .split(|c: char| c.is_whitespace() || c == '>' || c == '/')
123            .next()
124            .unwrap_or("");
125        self.disallowed.contains(&tag_name.to_lowercase())
126    }
127
128    /// Check if operating in disallowed-only mode
129    #[inline]
130    fn is_disallowed_mode(&self) -> bool {
131        self.config.is_disallowed_mode()
132    }
133
134    // Check if a tag is an HTML comment
135    #[inline]
136    fn is_html_comment(&self, tag: &str) -> bool {
137        tag.starts_with("<!--") && tag.ends_with("-->")
138    }
139
140    /// Check if a tag name is a valid HTML element or custom element.
141    /// Returns false for placeholder syntax like `<NAME>`, `<resource>`, `<actual>`.
142    ///
143    /// Per HTML spec, custom elements must contain a hyphen (e.g., `<my-component>`).
144    #[inline]
145    fn is_html_element_or_custom(tag_name: &str) -> bool {
146        const HTML_ELEMENTS: &[&str] = &[
147            // Document structure
148            "html",
149            "head",
150            "body",
151            "title",
152            "base",
153            "link",
154            "meta",
155            "style",
156            // Sections
157            "article",
158            "section",
159            "nav",
160            "aside",
161            "h1",
162            "h2",
163            "h3",
164            "h4",
165            "h5",
166            "h6",
167            "hgroup",
168            "header",
169            "footer",
170            "address",
171            "main",
172            "search",
173            // Grouping
174            "p",
175            "hr",
176            "pre",
177            "blockquote",
178            "ol",
179            "ul",
180            "menu",
181            "li",
182            "dl",
183            "dt",
184            "dd",
185            "figure",
186            "figcaption",
187            "div",
188            // Text-level
189            "a",
190            "em",
191            "strong",
192            "small",
193            "s",
194            "cite",
195            "q",
196            "dfn",
197            "abbr",
198            "ruby",
199            "rt",
200            "rp",
201            "data",
202            "time",
203            "code",
204            "var",
205            "samp",
206            "kbd",
207            "sub",
208            "sup",
209            "i",
210            "b",
211            "u",
212            "mark",
213            "bdi",
214            "bdo",
215            "span",
216            "br",
217            "wbr",
218            // Edits
219            "ins",
220            "del",
221            // Embedded
222            "picture",
223            "source",
224            "img",
225            "iframe",
226            "embed",
227            "object",
228            "param",
229            "video",
230            "audio",
231            "track",
232            "map",
233            "area",
234            "svg",
235            "math",
236            "canvas",
237            // Tables
238            "table",
239            "caption",
240            "colgroup",
241            "col",
242            "tbody",
243            "thead",
244            "tfoot",
245            "tr",
246            "td",
247            "th",
248            // Forms
249            "form",
250            "label",
251            "input",
252            "button",
253            "select",
254            "datalist",
255            "optgroup",
256            "option",
257            "textarea",
258            "output",
259            "progress",
260            "meter",
261            "fieldset",
262            "legend",
263            // Interactive
264            "details",
265            "summary",
266            "dialog",
267            // Scripting
268            "script",
269            "noscript",
270            "template",
271            "slot",
272            // Deprecated but recognized
273            "acronym",
274            "applet",
275            "basefont",
276            "big",
277            "center",
278            "dir",
279            "font",
280            "frame",
281            "frameset",
282            "isindex",
283            "marquee",
284            "noembed",
285            "noframes",
286            "plaintext",
287            "strike",
288            "tt",
289            "xmp",
290        ];
291
292        let lower = tag_name.to_ascii_lowercase();
293        if HTML_ELEMENTS.contains(&lower.as_str()) {
294            return true;
295        }
296        // Custom elements must contain a hyphen per HTML spec
297        tag_name.contains('-')
298    }
299
300    // Check if a tag is likely a programming type annotation rather than HTML
301    #[inline]
302    fn is_likely_type_annotation(&self, tag: &str) -> bool {
303        // Common programming type names that are often used in generics
304        const COMMON_TYPES: &[&str] = &[
305            "string",
306            "number",
307            "any",
308            "void",
309            "null",
310            "undefined",
311            "array",
312            "promise",
313            "function",
314            "error",
315            "date",
316            "regexp",
317            "symbol",
318            "bigint",
319            "map",
320            "set",
321            "weakmap",
322            "weakset",
323            "iterator",
324            "generator",
325            "t",
326            "u",
327            "v",
328            "k",
329            "e", // Common single-letter type parameters
330            "userdata",
331            "apiresponse",
332            "config",
333            "options",
334            "params",
335            "result",
336            "response",
337            "request",
338            "data",
339            "item",
340            "element",
341            "node",
342        ];
343
344        let tag_content = tag
345            .trim_start_matches('<')
346            .trim_end_matches('>')
347            .trim_start_matches('/');
348        let tag_name = tag_content
349            .split(|c: char| c.is_whitespace() || c == '>' || c == '/')
350            .next()
351            .unwrap_or("");
352
353        // Check if it's a simple tag (no attributes) with a common type name
354        if !tag_content.contains(' ') && !tag_content.contains('=') {
355            COMMON_TYPES.contains(&tag_name.to_ascii_lowercase().as_str())
356        } else {
357            false
358        }
359    }
360
361    // Check if a tag is actually an email address in angle brackets
362    #[inline]
363    fn is_email_address(&self, tag: &str) -> bool {
364        let content = tag.trim_start_matches('<').trim_end_matches('>');
365        // Simple email pattern: contains @ and has reasonable structure
366        content.contains('@')
367            && content.chars().all(|c| c.is_alphanumeric() || "@.-_+".contains(c))
368            && content.split('@').count() == 2
369            && content.split('@').all(|part| !part.is_empty())
370    }
371
372    // Check if a tag has the markdown attribute (MkDocs/Material for MkDocs)
373    #[inline]
374    fn has_markdown_attribute(&self, tag: &str) -> bool {
375        // Check for various forms of markdown attribute
376        // Examples: <div markdown>, <div markdown="1">, <div class="result" markdown>
377        tag.contains(" markdown>") || tag.contains(" markdown=") || tag.contains(" markdown ")
378    }
379
380    /// Check if a tag contains JSX-specific attributes that indicate it's JSX, not HTML
381    /// JSX uses different attribute names than HTML:
382    /// - `className` instead of `class`
383    /// - `htmlFor` instead of `for`
384    /// - camelCase event handlers (`onClick`, `onChange`, `onSubmit`, etc.)
385    /// - JSX expression syntax `={...}` for dynamic values
386    #[inline]
387    fn has_jsx_attributes(tag: &str) -> bool {
388        // JSX-specific attribute names (HTML uses class, for, onclick, etc.)
389        tag.contains("className")
390            || tag.contains("htmlFor")
391            || tag.contains("dangerouslySetInnerHTML")
392            // camelCase event handlers (JSX uses onClick, HTML uses onclick)
393            || tag.contains("onClick")
394            || tag.contains("onChange")
395            || tag.contains("onSubmit")
396            || tag.contains("onFocus")
397            || tag.contains("onBlur")
398            || tag.contains("onKeyDown")
399            || tag.contains("onKeyUp")
400            || tag.contains("onKeyPress")
401            || tag.contains("onMouseDown")
402            || tag.contains("onMouseUp")
403            || tag.contains("onMouseEnter")
404            || tag.contains("onMouseLeave")
405            // JSX expression syntax: ={expression} or ={ expression }
406            || tag.contains("={")
407    }
408
409    // Check if a tag is actually a URL in angle brackets
410    #[inline]
411    fn is_url_in_angle_brackets(&self, tag: &str) -> bool {
412        let content = tag.trim_start_matches('<').trim_end_matches('>');
413        // Check for common URL schemes
414        content.starts_with("http://")
415            || content.starts_with("https://")
416            || content.starts_with("ftp://")
417            || content.starts_with("ftps://")
418            || content.starts_with("mailto:")
419    }
420
421    /// Convert paired HTML tags to their Markdown equivalents.
422    /// Returns None if the tag cannot be safely converted (has nested tags, HTML entities, etc.)
423    fn convert_to_markdown(tag_name: &str, inner_content: &str) -> Option<String> {
424        // Skip if content contains nested HTML tags
425        if inner_content.contains('<') {
426            return None;
427        }
428        // Skip if content contains HTML entities (e.g., &vert;, &amp;, &lt;)
429        // These need HTML context to render correctly; markdown won't process them
430        if inner_content.contains('&') && inner_content.contains(';') {
431            // Check for common HTML entity patterns
432            let has_entity = inner_content
433                .split('&')
434                .skip(1)
435                .any(|part| part.split(';').next().is_some_and(|e| !e.is_empty() && e.len() < 10));
436            if has_entity {
437                return None;
438            }
439        }
440        match tag_name {
441            "em" | "i" => Some(format!("*{inner_content}*")),
442            "strong" | "b" => Some(format!("**{inner_content}**")),
443            "code" => {
444                // Handle backticks in content by using double backticks with padding
445                if inner_content.contains('`') {
446                    Some(format!("`` {inner_content} ``"))
447                } else {
448                    Some(format!("`{inner_content}`"))
449                }
450            }
451            _ => None,
452        }
453    }
454
455    /// Convert self-closing HTML tags to their Markdown equivalents.
456    fn convert_self_closing_to_markdown(&self, tag_name: &str) -> Option<String> {
457        match tag_name {
458            "br" => match self.config.br_style {
459                md033_config::BrStyle::TrailingSpaces => Some("  \n".to_string()),
460                md033_config::BrStyle::Backslash => Some("\\\n".to_string()),
461            },
462            "hr" => Some("\n---\n".to_string()),
463            _ => None,
464        }
465    }
466
467    /// Check if an HTML tag has attributes that would make conversion unsafe
468    fn has_significant_attributes(opening_tag: &str) -> bool {
469        // Tags with just whitespace or empty are fine
470        let tag_content = opening_tag
471            .trim_start_matches('<')
472            .trim_end_matches('>')
473            .trim_end_matches('/');
474
475        // Split by whitespace; if there's more than the tag name, it has attributes
476        let parts: Vec<&str> = tag_content.split_whitespace().collect();
477        parts.len() > 1
478    }
479
480    /// Check if a tag appears to be nested inside another HTML element
481    /// by looking at the surrounding context (e.g., `<code><em>text</em></code>`)
482    fn is_nested_in_html(content: &str, tag_byte_start: usize, tag_byte_end: usize) -> bool {
483        // Check if there's a `>` immediately before this tag (indicating inside another element)
484        if tag_byte_start > 0 {
485            let before = &content[..tag_byte_start];
486            let before_trimmed = before.trim_end();
487            if before_trimmed.ends_with('>') && !before_trimmed.ends_with("->") {
488                // Check it's not a closing tag or comment
489                if let Some(last_lt) = before_trimmed.rfind('<') {
490                    let potential_tag = &before_trimmed[last_lt..];
491                    // Skip if it's a closing tag (</...>) or comment (<!--)
492                    if !potential_tag.starts_with("</") && !potential_tag.starts_with("<!--") {
493                        return true;
494                    }
495                }
496            }
497        }
498        // Check if there's a `<` immediately after the closing tag (indicating inside another element)
499        if tag_byte_end < content.len() {
500            let after = &content[tag_byte_end..];
501            let after_trimmed = after.trim_start();
502            if after_trimmed.starts_with("</") {
503                return true;
504            }
505        }
506        false
507    }
508
509    /// Calculate fix to remove HTML tags while keeping content
510    ///
511    /// For self-closing tags like `<br/>`, returns a single fix to remove the tag.
512    /// For paired tags like `<span>text</span>`, returns the replacement text (just the content).
513    ///
514    /// Returns (range, replacement_text) where range is the bytes to replace
515    /// and replacement_text is what to put there (content without tags, or empty for self-closing).
516    ///
517    /// When `fix` is enabled and `in_html_block` is true, returns None to avoid
518    /// converting tags that are nested inside HTML block elements (like `<pre>`).
519    fn calculate_fix(
520        &self,
521        content: &str,
522        opening_tag: &str,
523        tag_byte_start: usize,
524        in_html_block: bool,
525    ) -> Option<(std::ops::Range<usize>, String)> {
526        // Extract tag name from opening tag
527        let tag_name = opening_tag
528            .trim_start_matches('<')
529            .split(|c: char| c.is_whitespace() || c == '>' || c == '/')
530            .next()?
531            .to_lowercase();
532
533        // Check if it's a self-closing tag (ends with /> or is a void element like <br>)
534        let is_self_closing =
535            opening_tag.ends_with("/>") || matches!(tag_name.as_str(), "br" | "hr" | "img" | "input" | "meta" | "link");
536
537        if is_self_closing {
538            // When fix is enabled, try to convert to Markdown equivalent
539            // But skip if we're inside an HTML block (would break structure)
540            if self.config.fix
541                && MD033Config::is_safe_fixable_tag(&tag_name)
542                && !in_html_block
543                && let Some(markdown) = self.convert_self_closing_to_markdown(&tag_name)
544            {
545                return Some((tag_byte_start..tag_byte_start + opening_tag.len(), markdown));
546            }
547            // Can't convert this self-closing tag to Markdown, don't provide a fix
548            // (e.g., <img>, <input>, <meta> - these have no Markdown equivalent)
549            return None;
550        }
551
552        // Search for the closing tag after the opening tag (case-insensitive)
553        let search_start = tag_byte_start + opening_tag.len();
554        let search_slice = &content[search_start..];
555
556        // Find closing tag case-insensitively
557        let closing_tag_lower = format!("</{tag_name}>");
558        let closing_pos = search_slice.to_ascii_lowercase().find(&closing_tag_lower);
559
560        if let Some(closing_pos) = closing_pos {
561            // Get actual closing tag from original content to get correct byte length
562            let closing_tag_len = closing_tag_lower.len();
563            let closing_byte_start = search_start + closing_pos;
564            let closing_byte_end = closing_byte_start + closing_tag_len;
565
566            // Extract the content between tags
567            let inner_content = &content[search_start..closing_byte_start];
568
569            // Skip auto-fix if inside an HTML block (like <pre>, <div>, etc.)
570            // Converting tags inside HTML blocks would break the intended structure
571            if in_html_block {
572                return None;
573            }
574
575            // Skip auto-fix if this tag is nested inside another HTML element
576            // e.g., <code><em>text</em></code> - don't convert the inner <em>
577            if Self::is_nested_in_html(content, tag_byte_start, closing_byte_end) {
578                return None;
579            }
580
581            // When fix is enabled and tag is safe to convert, try markdown conversion
582            // Tags with attributes are NOT converted - leave them as-is
583            if self.config.fix && MD033Config::is_safe_fixable_tag(&tag_name) {
584                if Self::has_significant_attributes(opening_tag) {
585                    // Don't provide a fix for tags with attributes
586                    // User may want to keep the attributes, so leave as-is
587                    return None;
588                }
589                if let Some(markdown) = Self::convert_to_markdown(&tag_name, inner_content) {
590                    return Some((tag_byte_start..closing_byte_end, markdown));
591                }
592                // convert_to_markdown returned None, meaning content has nested tags or
593                // HTML entities that shouldn't be converted - leave as-is
594                return None;
595            }
596
597            // For non-fixable tags, don't provide a fix
598            // (e.g., <a href="...">Link</a>, <div>content</div>)
599            return None;
600        }
601
602        // If no closing tag found, don't provide a fix (malformed HTML)
603        None
604    }
605}
606
607impl Rule for MD033NoInlineHtml {
608    fn name(&self) -> &'static str {
609        "MD033"
610    }
611
612    fn description(&self) -> &'static str {
613        "Inline HTML is not allowed"
614    }
615
616    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
617        let content = ctx.content;
618
619        // Early return: if no HTML tags at all, skip processing
620        if content.is_empty() || !ctx.likely_has_html() {
621            return Ok(Vec::new());
622        }
623
624        // Quick check for HTML tag pattern before expensive processing
625        if !HTML_TAG_QUICK_CHECK.is_match(content) {
626            return Ok(Vec::new());
627        }
628
629        let mut warnings = Vec::new();
630        let lines: Vec<&str> = content.lines().collect();
631
632        // Track nomarkdown and comment blocks (Kramdown extension)
633        let mut in_nomarkdown = false;
634        let mut in_comment = false;
635        let mut nomarkdown_ranges: Vec<(usize, usize)> = Vec::new();
636        let mut nomarkdown_start = 0;
637        let mut comment_start = 0;
638
639        for (i, line) in lines.iter().enumerate() {
640            let line_num = i + 1;
641
642            // Check for nomarkdown start
643            if line.trim() == "{::nomarkdown}" {
644                in_nomarkdown = true;
645                nomarkdown_start = line_num;
646            } else if line.trim() == "{:/nomarkdown}" && in_nomarkdown {
647                in_nomarkdown = false;
648                nomarkdown_ranges.push((nomarkdown_start, line_num));
649            }
650
651            // Check for comment blocks
652            if line.trim() == "{::comment}" {
653                in_comment = true;
654                comment_start = line_num;
655            } else if line.trim() == "{:/comment}" && in_comment {
656                in_comment = false;
657                nomarkdown_ranges.push((comment_start, line_num));
658            }
659        }
660
661        // Use centralized HTML parser to get all HTML tags (including multiline)
662        let html_tags = ctx.html_tags();
663
664        for html_tag in html_tags.iter() {
665            // Skip closing tags (only warn on opening tags)
666            if html_tag.is_closing {
667                continue;
668            }
669
670            let line_num = html_tag.line;
671            let tag_byte_start = html_tag.byte_offset;
672
673            // Reconstruct tag string from byte offsets
674            let tag = &content[html_tag.byte_offset..html_tag.byte_end];
675
676            // Skip tags in code blocks (uses proper code block detection from LintContext)
677            if ctx.line_info(line_num).is_some_and(|info| info.in_code_block) {
678                continue;
679            }
680
681            // Skip Kramdown extensions and block attributes
682            if let Some(line) = lines.get(line_num.saturating_sub(1))
683                && (is_kramdown_extension(line) || is_kramdown_block_attribute(line))
684            {
685                continue;
686            }
687
688            // Skip lines inside nomarkdown blocks
689            if nomarkdown_ranges
690                .iter()
691                .any(|(start, end)| line_num >= *start && line_num <= *end)
692            {
693                continue;
694            }
695
696            // Skip HTML tags inside HTML comments
697            if ctx.is_in_html_comment(tag_byte_start) {
698                continue;
699            }
700
701            // Skip HTML comments themselves
702            if self.is_html_comment(tag) {
703                continue;
704            }
705
706            // Skip angle brackets inside link reference definition titles
707            // e.g., [ref]: url "Title with <angle brackets>"
708            if ctx.is_in_link_title(tag_byte_start) {
709                continue;
710            }
711
712            // Skip JSX components in MDX files (e.g., <Chart />, <MyComponent>)
713            if ctx.flavor.supports_jsx() && html_tag.tag_name.chars().next().is_some_and(|c| c.is_uppercase()) {
714                continue;
715            }
716
717            // Skip JSX fragments in MDX files (<> and </>)
718            if ctx.flavor.supports_jsx() && (html_tag.tag_name.is_empty() || tag == "<>" || tag == "</>") {
719                continue;
720            }
721
722            // Skip elements with JSX-specific attributes in MDX files
723            // e.g., <div className="...">, <button onClick={handler}>
724            if ctx.flavor.supports_jsx() && Self::has_jsx_attributes(tag) {
725                continue;
726            }
727
728            // Skip non-HTML elements (placeholder syntax like <NAME>, <resource>)
729            if !Self::is_html_element_or_custom(&html_tag.tag_name) {
730                continue;
731            }
732
733            // Skip likely programming type annotations
734            if self.is_likely_type_annotation(tag) {
735                continue;
736            }
737
738            // Skip email addresses in angle brackets
739            if self.is_email_address(tag) {
740                continue;
741            }
742
743            // Skip URLs in angle brackets
744            if self.is_url_in_angle_brackets(tag) {
745                continue;
746            }
747
748            // Skip tags inside code spans (use byte offset for reliable multi-line span detection)
749            if ctx.is_byte_offset_in_code_span(tag_byte_start) {
750                continue;
751            }
752
753            // Determine whether to report this tag based on mode:
754            // - Disallowed mode: only report tags in the disallowed list
755            // - Default mode: report all tags except those in the allowed list
756            if self.is_disallowed_mode() {
757                // In disallowed mode, skip tags NOT in the disallowed list
758                if !self.is_tag_disallowed(tag) {
759                    continue;
760                }
761            } else {
762                // In default mode, skip allowed tags
763                if self.is_tag_allowed(tag) {
764                    continue;
765                }
766            }
767
768            // Skip tags with markdown attribute in MkDocs mode
769            if ctx.flavor == crate::config::MarkdownFlavor::MkDocs && self.has_markdown_attribute(tag) {
770                continue;
771            }
772
773            // Check if we're inside an HTML block (like <pre>, <div>, etc.)
774            let in_html_block = ctx.is_in_html_block(line_num);
775
776            // Calculate fix to remove HTML tags but keep content
777            let fix = self
778                .calculate_fix(content, tag, tag_byte_start, in_html_block)
779                .map(|(range, replacement)| Fix { range, replacement });
780
781            // Calculate actual end line and column for multiline tags
782            // Use byte_end - 1 to get the last character position of the tag
783            let (end_line, end_col) = if html_tag.byte_end > 0 {
784                ctx.offset_to_line_col(html_tag.byte_end - 1)
785            } else {
786                (line_num, html_tag.end_col + 1)
787            };
788
789            // Report the HTML tag
790            warnings.push(LintWarning {
791                rule_name: Some(self.name().to_string()),
792                line: line_num,
793                column: html_tag.start_col + 1, // Convert to 1-indexed
794                end_line,                       // Actual end line for multiline tags
795                end_column: end_col + 1,        // Actual end column
796                message: format!("Inline HTML found: {tag}"),
797                severity: Severity::Warning,
798                fix,
799            });
800        }
801
802        Ok(warnings)
803    }
804
805    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
806        // Auto-fix is opt-in: only apply if explicitly enabled in config
807        if !self.config.fix {
808            return Ok(ctx.content.to_string());
809        }
810
811        // Get warnings with their inline fixes
812        let warnings = self.check(ctx)?;
813
814        // If no warnings with fixes, return original content
815        if warnings.is_empty() || !warnings.iter().any(|w| w.fix.is_some()) {
816            return Ok(ctx.content.to_string());
817        }
818
819        // Collect all fixes and sort by range start (descending) to apply from end to beginning
820        let mut fixes: Vec<_> = warnings
821            .iter()
822            .filter_map(|w| w.fix.as_ref().map(|f| (f.range.start, f.range.end, &f.replacement)))
823            .collect();
824        fixes.sort_by(|a, b| b.0.cmp(&a.0));
825
826        // Apply fixes from end to beginning to preserve byte offsets
827        let mut result = ctx.content.to_string();
828        for (start, end, replacement) in fixes {
829            if start < result.len() && end <= result.len() && start <= end {
830                result.replace_range(start..end, replacement);
831            }
832        }
833
834        Ok(result)
835    }
836
837    fn fix_capability(&self) -> crate::rule::FixCapability {
838        if self.config.fix {
839            crate::rule::FixCapability::FullyFixable
840        } else {
841            crate::rule::FixCapability::Unfixable
842        }
843    }
844
845    /// Get the category of this rule for selective processing
846    fn category(&self) -> RuleCategory {
847        RuleCategory::Html
848    }
849
850    /// Check if this rule should be skipped
851    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
852        ctx.content.is_empty() || !ctx.likely_has_html()
853    }
854
855    fn as_any(&self) -> &dyn std::any::Any {
856        self
857    }
858
859    fn default_config_section(&self) -> Option<(String, toml::Value)> {
860        let json_value = serde_json::to_value(&self.config).ok()?;
861        Some((
862            self.name().to_string(),
863            crate::rule_config_serde::json_to_toml_value(&json_value)?,
864        ))
865    }
866
867    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
868    where
869        Self: Sized,
870    {
871        let rule_config = crate::rule_config_serde::load_rule_config::<MD033Config>(config);
872        Box::new(Self::from_config_struct(rule_config))
873    }
874}
875
876#[cfg(test)]
877mod tests {
878    use super::*;
879    use crate::lint_context::LintContext;
880    use crate::rule::Rule;
881
882    #[test]
883    fn test_md033_basic_html() {
884        let rule = MD033NoInlineHtml::default();
885        let content = "<div>Some content</div>";
886        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
887        let result = rule.check(&ctx).unwrap();
888        // Only reports opening tags, not closing tags
889        assert_eq!(result.len(), 1); // Only <div>, not </div>
890        assert!(result[0].message.starts_with("Inline HTML found: <div>"));
891    }
892
893    #[test]
894    fn test_md033_case_insensitive() {
895        let rule = MD033NoInlineHtml::default();
896        let content = "<DiV>Some <B>content</B></dIv>";
897        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
898        let result = rule.check(&ctx).unwrap();
899        // Only reports opening tags, not closing tags
900        assert_eq!(result.len(), 2); // <DiV>, <B> (not </B>, </dIv>)
901        assert_eq!(result[0].message, "Inline HTML found: <DiV>");
902        assert_eq!(result[1].message, "Inline HTML found: <B>");
903    }
904
905    #[test]
906    fn test_md033_allowed_tags() {
907        let rule = MD033NoInlineHtml::with_allowed(vec!["div".to_string(), "br".to_string()]);
908        let content = "<div>Allowed</div><p>Not allowed</p><br/>";
909        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
910        let result = rule.check(&ctx).unwrap();
911        // Only warnings for non-allowed opening tags (<p> only, div and br are allowed)
912        assert_eq!(result.len(), 1);
913        assert_eq!(result[0].message, "Inline HTML found: <p>");
914
915        // Test case-insensitivity of allowed tags
916        let content2 = "<DIV>Allowed</DIV><P>Not allowed</P><BR/>";
917        let ctx2 = LintContext::new(content2, crate::config::MarkdownFlavor::Standard, None);
918        let result2 = rule.check(&ctx2).unwrap();
919        assert_eq!(result2.len(), 1); // Only <P> flagged
920        assert_eq!(result2[0].message, "Inline HTML found: <P>");
921    }
922
923    #[test]
924    fn test_md033_html_comments() {
925        let rule = MD033NoInlineHtml::default();
926        let content = "<!-- This is a comment --> <p>Not a comment</p>";
927        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
928        let result = rule.check(&ctx).unwrap();
929        // Should detect warnings for HTML opening tags (comments are skipped, closing tags not reported)
930        assert_eq!(result.len(), 1); // Only <p>
931        assert_eq!(result[0].message, "Inline HTML found: <p>");
932    }
933
934    #[test]
935    fn test_md033_tags_in_links() {
936        let rule = MD033NoInlineHtml::default();
937        let content = "[Link](http://example.com/<div>)";
938        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
939        let result = rule.check(&ctx).unwrap();
940        // The <div> in the URL should be detected as HTML (not skipped)
941        assert_eq!(result.len(), 1);
942        assert_eq!(result[0].message, "Inline HTML found: <div>");
943
944        let content2 = "[Link <a>text</a>](url)";
945        let ctx2 = LintContext::new(content2, crate::config::MarkdownFlavor::Standard, None);
946        let result2 = rule.check(&ctx2).unwrap();
947        // Only reports opening tags
948        assert_eq!(result2.len(), 1); // Only <a>
949        assert_eq!(result2[0].message, "Inline HTML found: <a>");
950    }
951
952    #[test]
953    fn test_md033_fix_escaping() {
954        let rule = MD033NoInlineHtml::default();
955        let content = "Text with <div> and <br/> tags.";
956        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
957        let fixed_content = rule.fix(&ctx).unwrap();
958        // No fix for HTML tags; output should be unchanged
959        assert_eq!(fixed_content, content);
960    }
961
962    #[test]
963    fn test_md033_in_code_blocks() {
964        let rule = MD033NoInlineHtml::default();
965        let content = "```html\n<div>Code</div>\n```\n<div>Not code</div>";
966        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
967        let result = rule.check(&ctx).unwrap();
968        // Only reports opening tags outside code block
969        assert_eq!(result.len(), 1); // Only <div> outside code block
970        assert_eq!(result[0].message, "Inline HTML found: <div>");
971    }
972
973    #[test]
974    fn test_md033_in_code_spans() {
975        let rule = MD033NoInlineHtml::default();
976        let content = "Text with `<p>in code</p>` span. <br/> Not in span.";
977        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
978        let result = rule.check(&ctx).unwrap();
979        // Should detect <br/> outside code span, but not tags inside code span
980        assert_eq!(result.len(), 1);
981        assert_eq!(result[0].message, "Inline HTML found: <br/>");
982    }
983
984    #[test]
985    fn test_md033_issue_90_code_span_with_diff_block() {
986        // Test for issue #90: inline code span followed by diff code block
987        let rule = MD033NoInlineHtml::default();
988        let content = r#"# Heading
989
990`<env>`
991
992```diff
993- this
994+ that
995```"#;
996        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
997        let result = rule.check(&ctx).unwrap();
998        // Should NOT detect <env> as HTML since it's inside backticks
999        assert_eq!(result.len(), 0, "Should not report HTML tags inside code spans");
1000    }
1001
1002    #[test]
1003    fn test_md033_multiple_code_spans_with_angle_brackets() {
1004        // Test multiple code spans on same line
1005        let rule = MD033NoInlineHtml::default();
1006        let content = "`<one>` and `<two>` and `<three>` are all code spans";
1007        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1008        let result = rule.check(&ctx).unwrap();
1009        assert_eq!(result.len(), 0, "Should not report HTML tags inside any code spans");
1010    }
1011
1012    #[test]
1013    fn test_md033_nested_angle_brackets_in_code_span() {
1014        // Test nested angle brackets
1015        let rule = MD033NoInlineHtml::default();
1016        let content = "Text with `<<nested>>` brackets";
1017        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1018        let result = rule.check(&ctx).unwrap();
1019        assert_eq!(result.len(), 0, "Should handle nested angle brackets in code spans");
1020    }
1021
1022    #[test]
1023    fn test_md033_code_span_at_end_before_code_block() {
1024        // Test code span at end of line before code block
1025        let rule = MD033NoInlineHtml::default();
1026        let content = "Testing `<test>`\n```\ncode here\n```";
1027        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1028        let result = rule.check(&ctx).unwrap();
1029        assert_eq!(result.len(), 0, "Should handle code span before code block");
1030    }
1031
1032    #[test]
1033    fn test_md033_quick_fix_inline_tag() {
1034        // Test that non-fixable tags (like <span>) do NOT get a fix
1035        // Only safe fixable tags (em, i, strong, b, code, br, hr) with fix=true get fixes
1036        let rule = MD033NoInlineHtml::default();
1037        let content = "This has <span>inline text</span> that should keep content.";
1038        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1039        let result = rule.check(&ctx).unwrap();
1040
1041        assert_eq!(result.len(), 1, "Should find one HTML tag");
1042        // <span> is NOT a safe fixable tag, so no fix should be provided
1043        assert!(
1044            result[0].fix.is_none(),
1045            "Non-fixable tags like <span> should not have a fix"
1046        );
1047    }
1048
1049    #[test]
1050    fn test_md033_quick_fix_multiline_tag() {
1051        // HTML block elements like <div> are intentionally NOT auto-fixed
1052        // Removing them would change document structure significantly
1053        let rule = MD033NoInlineHtml::default();
1054        let content = "<div>\nBlock content\n</div>";
1055        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1056        let result = rule.check(&ctx).unwrap();
1057
1058        assert_eq!(result.len(), 1, "Should find one HTML tag");
1059        // HTML block elements should NOT have auto-fix
1060        assert!(result[0].fix.is_none(), "HTML block elements should NOT have auto-fix");
1061    }
1062
1063    #[test]
1064    fn test_md033_quick_fix_self_closing_tag() {
1065        // Test that self-closing tags with fix=false (default) do NOT get a fix
1066        let rule = MD033NoInlineHtml::default();
1067        let content = "Self-closing: <br/>";
1068        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1069        let result = rule.check(&ctx).unwrap();
1070
1071        assert_eq!(result.len(), 1, "Should find one HTML tag");
1072        // Default config has fix=false, so no fix should be provided
1073        assert!(
1074            result[0].fix.is_none(),
1075            "Self-closing tags should not have a fix when fix config is false"
1076        );
1077    }
1078
1079    #[test]
1080    fn test_md033_quick_fix_multiple_tags() {
1081        // Test that multiple tags without fix=true do NOT get fixes
1082        // <span> is not a safe fixable tag, <strong> is but fix=false by default
1083        let rule = MD033NoInlineHtml::default();
1084        let content = "<span>first</span> and <strong>second</strong>";
1085        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1086        let result = rule.check(&ctx).unwrap();
1087
1088        assert_eq!(result.len(), 2, "Should find two HTML tags");
1089        // Neither should have a fix: <span> is not fixable, <strong> is but fix=false
1090        assert!(result[0].fix.is_none(), "Non-fixable <span> should not have a fix");
1091        assert!(
1092            result[1].fix.is_none(),
1093            "<strong> should not have a fix when fix config is false"
1094        );
1095    }
1096
1097    #[test]
1098    fn test_md033_skip_angle_brackets_in_link_titles() {
1099        // Angle brackets inside link reference definition titles should not be flagged as HTML
1100        let rule = MD033NoInlineHtml::default();
1101        let content = r#"# Test
1102
1103[example]: <https://example.com> "Title with <Angle Brackets> inside"
1104
1105Regular text with <div>content</div> HTML tag.
1106"#;
1107        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1108        let result = rule.check(&ctx).unwrap();
1109
1110        // Should only flag <div>, not <Angle Brackets> in the title (not a valid HTML element)
1111        // Opening tag only (markdownlint behavior)
1112        assert_eq!(result.len(), 1, "Should find opening div tag");
1113        assert!(
1114            result[0].message.contains("<div>"),
1115            "Should flag <div>, got: {}",
1116            result[0].message
1117        );
1118    }
1119
1120    #[test]
1121    fn test_md033_skip_angle_brackets_in_link_title_single_quotes() {
1122        // Test with single-quoted title
1123        let rule = MD033NoInlineHtml::default();
1124        let content = r#"[ref]: url 'Title <Help Wanted> here'
1125
1126<span>text</span> here
1127"#;
1128        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1129        let result = rule.check(&ctx).unwrap();
1130
1131        // <Help Wanted> is not a valid HTML element, so only <span> is flagged
1132        // Opening tag only (markdownlint behavior)
1133        assert_eq!(result.len(), 1, "Should find opening span tag");
1134        assert!(
1135            result[0].message.contains("<span>"),
1136            "Should flag <span>, got: {}",
1137            result[0].message
1138        );
1139    }
1140
1141    #[test]
1142    fn test_md033_multiline_tag_end_line_calculation() {
1143        // Test that multiline HTML tags report correct end_line
1144        let rule = MD033NoInlineHtml::default();
1145        let content = "<div\n  class=\"test\"\n  id=\"example\">";
1146        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1147        let result = rule.check(&ctx).unwrap();
1148
1149        assert_eq!(result.len(), 1, "Should find one HTML tag");
1150        // Tag starts on line 1
1151        assert_eq!(result[0].line, 1, "Start line should be 1");
1152        // Tag ends on line 3 (where the closing > is)
1153        assert_eq!(result[0].end_line, 3, "End line should be 3");
1154    }
1155
1156    #[test]
1157    fn test_md033_single_line_tag_same_start_end_line() {
1158        // Test that single-line HTML tags have same start and end line
1159        let rule = MD033NoInlineHtml::default();
1160        let content = "Some text <div class=\"test\"> more text";
1161        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1162        let result = rule.check(&ctx).unwrap();
1163
1164        assert_eq!(result.len(), 1, "Should find one HTML tag");
1165        assert_eq!(result[0].line, 1, "Start line should be 1");
1166        assert_eq!(result[0].end_line, 1, "End line should be 1 for single-line tag");
1167    }
1168
1169    #[test]
1170    fn test_md033_multiline_tag_with_many_attributes() {
1171        // Test multiline tag spanning multiple lines
1172        let rule = MD033NoInlineHtml::default();
1173        let content =
1174            "Text\n<div\n  data-attr1=\"value1\"\n  data-attr2=\"value2\"\n  data-attr3=\"value3\">\nMore text";
1175        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1176        let result = rule.check(&ctx).unwrap();
1177
1178        assert_eq!(result.len(), 1, "Should find one HTML tag");
1179        // Tag starts on line 2 (first line is "Text")
1180        assert_eq!(result[0].line, 2, "Start line should be 2");
1181        // Tag ends on line 5 (where the closing > is)
1182        assert_eq!(result[0].end_line, 5, "End line should be 5");
1183    }
1184
1185    #[test]
1186    fn test_md033_disallowed_mode_basic() {
1187        // Test disallowed mode: only flags tags in the disallowed list
1188        let rule = MD033NoInlineHtml::with_disallowed(vec!["script".to_string(), "iframe".to_string()]);
1189        let content = "<div>Safe content</div><script>alert('xss')</script>";
1190        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1191        let result = rule.check(&ctx).unwrap();
1192
1193        // Should only flag <script>, not <div>
1194        assert_eq!(result.len(), 1, "Should only flag disallowed tags");
1195        assert!(result[0].message.contains("<script>"), "Should flag script tag");
1196    }
1197
1198    #[test]
1199    fn test_md033_disallowed_gfm_security_tags() {
1200        // Test GFM security tags expansion
1201        let rule = MD033NoInlineHtml::with_disallowed(vec!["gfm".to_string()]);
1202        let content = r#"
1203<div>Safe</div>
1204<title>Bad title</title>
1205<textarea>Bad textarea</textarea>
1206<style>.bad{}</style>
1207<iframe src="evil"></iframe>
1208<script>evil()</script>
1209<plaintext>old tag</plaintext>
1210<span>Safe span</span>
1211"#;
1212        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1213        let result = rule.check(&ctx).unwrap();
1214
1215        // Should flag: title, textarea, style, iframe, script, plaintext
1216        // Should NOT flag: div, span
1217        assert_eq!(result.len(), 6, "Should flag 6 GFM security tags");
1218
1219        let flagged_tags: Vec<&str> = result
1220            .iter()
1221            .filter_map(|w| w.message.split("<").nth(1))
1222            .filter_map(|s| s.split(">").next())
1223            .filter_map(|s| s.split_whitespace().next())
1224            .collect();
1225
1226        assert!(flagged_tags.contains(&"title"), "Should flag title");
1227        assert!(flagged_tags.contains(&"textarea"), "Should flag textarea");
1228        assert!(flagged_tags.contains(&"style"), "Should flag style");
1229        assert!(flagged_tags.contains(&"iframe"), "Should flag iframe");
1230        assert!(flagged_tags.contains(&"script"), "Should flag script");
1231        assert!(flagged_tags.contains(&"plaintext"), "Should flag plaintext");
1232        assert!(!flagged_tags.contains(&"div"), "Should NOT flag div");
1233        assert!(!flagged_tags.contains(&"span"), "Should NOT flag span");
1234    }
1235
1236    #[test]
1237    fn test_md033_disallowed_case_insensitive() {
1238        // Test that disallowed check is case-insensitive
1239        let rule = MD033NoInlineHtml::with_disallowed(vec!["script".to_string()]);
1240        let content = "<SCRIPT>alert('xss')</SCRIPT><Script>alert('xss')</Script>";
1241        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1242        let result = rule.check(&ctx).unwrap();
1243
1244        // Should flag both <SCRIPT> and <Script>
1245        assert_eq!(result.len(), 2, "Should flag both case variants");
1246    }
1247
1248    #[test]
1249    fn test_md033_disallowed_with_attributes() {
1250        // Test that disallowed mode works with tags that have attributes
1251        let rule = MD033NoInlineHtml::with_disallowed(vec!["iframe".to_string()]);
1252        let content = r#"<iframe src="https://evil.com" width="100" height="100"></iframe>"#;
1253        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1254        let result = rule.check(&ctx).unwrap();
1255
1256        assert_eq!(result.len(), 1, "Should flag iframe with attributes");
1257        assert!(result[0].message.contains("iframe"), "Should flag iframe");
1258    }
1259
1260    #[test]
1261    fn test_md033_disallowed_all_gfm_tags() {
1262        // Verify all GFM disallowed tags are covered
1263        use md033_config::GFM_DISALLOWED_TAGS;
1264        let rule = MD033NoInlineHtml::with_disallowed(vec!["gfm".to_string()]);
1265
1266        for tag in GFM_DISALLOWED_TAGS {
1267            let content = format!("<{tag}>content</{tag}>");
1268            let ctx = LintContext::new(&content, crate::config::MarkdownFlavor::Standard, None);
1269            let result = rule.check(&ctx).unwrap();
1270
1271            assert_eq!(result.len(), 1, "GFM tag <{tag}> should be flagged");
1272        }
1273    }
1274
1275    #[test]
1276    fn test_md033_disallowed_mixed_with_custom() {
1277        // Test mixing "gfm" with custom disallowed tags
1278        let rule = MD033NoInlineHtml::with_disallowed(vec![
1279            "gfm".to_string(),
1280            "marquee".to_string(), // Custom disallowed tag
1281        ]);
1282        let content = r#"<script>bad</script><marquee>annoying</marquee><div>ok</div>"#;
1283        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1284        let result = rule.check(&ctx).unwrap();
1285
1286        // Should flag script (gfm) and marquee (custom)
1287        assert_eq!(result.len(), 2, "Should flag both gfm and custom tags");
1288    }
1289
1290    #[test]
1291    fn test_md033_disallowed_empty_means_default_mode() {
1292        // Empty disallowed list means default mode (flag all HTML)
1293        let rule = MD033NoInlineHtml::with_disallowed(vec![]);
1294        let content = "<div>content</div>";
1295        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1296        let result = rule.check(&ctx).unwrap();
1297
1298        // Should flag <div> in default mode
1299        assert_eq!(result.len(), 1, "Empty disallowed = default mode");
1300    }
1301
1302    #[test]
1303    fn test_md033_jsx_fragments_in_mdx() {
1304        // JSX fragments (<> and </>) should not trigger warnings in MDX
1305        let rule = MD033NoInlineHtml::default();
1306        let content = r#"# MDX Document
1307
1308<>
1309  <Heading />
1310  <Content />
1311</>
1312
1313<div>Regular HTML should still be flagged</div>
1314"#;
1315        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::MDX, None);
1316        let result = rule.check(&ctx).unwrap();
1317
1318        // Should only flag <div>, not the fragments or JSX components
1319        assert_eq!(result.len(), 1, "Should only find one HTML tag (the div)");
1320        assert!(
1321            result[0].message.contains("<div>"),
1322            "Should flag <div>, not JSX fragments"
1323        );
1324    }
1325
1326    #[test]
1327    fn test_md033_jsx_components_in_mdx() {
1328        // JSX components (capitalized) should not trigger warnings in MDX
1329        let rule = MD033NoInlineHtml::default();
1330        let content = r#"<CustomComponent prop="value">
1331  Content
1332</CustomComponent>
1333
1334<MyButton onClick={handler}>Click</MyButton>
1335"#;
1336        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::MDX, None);
1337        let result = rule.check(&ctx).unwrap();
1338
1339        // No warnings - all are JSX components
1340        assert_eq!(result.len(), 0, "Should not flag JSX components in MDX");
1341    }
1342
1343    #[test]
1344    fn test_md033_jsx_not_skipped_in_standard_markdown() {
1345        // In standard markdown, capitalized tags should still be flagged if they're valid HTML
1346        let rule = MD033NoInlineHtml::default();
1347        let content = "<Script>alert(1)</Script>";
1348        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1349        let result = rule.check(&ctx).unwrap();
1350
1351        // Should flag <Script> in standard markdown (it's a valid HTML element)
1352        assert_eq!(result.len(), 1, "Should flag <Script> in standard markdown");
1353    }
1354
1355    #[test]
1356    fn test_md033_jsx_attributes_in_mdx() {
1357        // Elements with JSX-specific attributes should not trigger warnings in MDX
1358        let rule = MD033NoInlineHtml::default();
1359        let content = r#"# MDX with JSX Attributes
1360
1361<div className="card big">Content</div>
1362
1363<button onClick={handleClick}>Click me</button>
1364
1365<label htmlFor="input-id">Label</label>
1366
1367<input onChange={handleChange} />
1368
1369<div class="html-class">Regular HTML should be flagged</div>
1370"#;
1371        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::MDX, None);
1372        let result = rule.check(&ctx).unwrap();
1373
1374        // Should only flag the div with regular HTML "class" attribute
1375        assert_eq!(
1376            result.len(),
1377            1,
1378            "Should only flag HTML element without JSX attributes, got: {result:?}"
1379        );
1380        assert!(
1381            result[0].message.contains("<div class="),
1382            "Should flag the div with HTML class attribute"
1383        );
1384    }
1385
1386    #[test]
1387    fn test_md033_jsx_attributes_not_skipped_in_standard() {
1388        // In standard markdown, JSX attributes should still be flagged
1389        let rule = MD033NoInlineHtml::default();
1390        let content = r#"<div className="card">Content</div>"#;
1391        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1392        let result = rule.check(&ctx).unwrap();
1393
1394        // Should flag in standard markdown
1395        assert_eq!(result.len(), 1, "Should flag JSX-style elements in standard markdown");
1396    }
1397
1398    // Auto-fix tests for MD033
1399
1400    #[test]
1401    fn test_md033_fix_disabled_by_default() {
1402        // Auto-fix should be disabled by default
1403        let rule = MD033NoInlineHtml::default();
1404        assert!(!rule.config.fix, "Fix should be disabled by default");
1405        assert_eq!(rule.fix_capability(), crate::rule::FixCapability::Unfixable);
1406    }
1407
1408    #[test]
1409    fn test_md033_fix_enabled_em_to_italic() {
1410        // When fix is enabled, <em>text</em> should convert to *text*
1411        let rule = MD033NoInlineHtml::with_fix(true);
1412        let content = "This has <em>emphasized text</em> here.";
1413        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1414        let fixed = rule.fix(&ctx).unwrap();
1415        assert_eq!(fixed, "This has *emphasized text* here.");
1416    }
1417
1418    #[test]
1419    fn test_md033_fix_enabled_i_to_italic() {
1420        // <i>text</i> should convert to *text*
1421        let rule = MD033NoInlineHtml::with_fix(true);
1422        let content = "This has <i>italic text</i> here.";
1423        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1424        let fixed = rule.fix(&ctx).unwrap();
1425        assert_eq!(fixed, "This has *italic text* here.");
1426    }
1427
1428    #[test]
1429    fn test_md033_fix_enabled_strong_to_bold() {
1430        // <strong>text</strong> should convert to **text**
1431        let rule = MD033NoInlineHtml::with_fix(true);
1432        let content = "This has <strong>bold text</strong> here.";
1433        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1434        let fixed = rule.fix(&ctx).unwrap();
1435        assert_eq!(fixed, "This has **bold text** here.");
1436    }
1437
1438    #[test]
1439    fn test_md033_fix_enabled_b_to_bold() {
1440        // <b>text</b> should convert to **text**
1441        let rule = MD033NoInlineHtml::with_fix(true);
1442        let content = "This has <b>bold text</b> here.";
1443        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1444        let fixed = rule.fix(&ctx).unwrap();
1445        assert_eq!(fixed, "This has **bold text** here.");
1446    }
1447
1448    #[test]
1449    fn test_md033_fix_enabled_code_to_backticks() {
1450        // <code>text</code> should convert to `text`
1451        let rule = MD033NoInlineHtml::with_fix(true);
1452        let content = "This has <code>inline code</code> here.";
1453        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1454        let fixed = rule.fix(&ctx).unwrap();
1455        assert_eq!(fixed, "This has `inline code` here.");
1456    }
1457
1458    #[test]
1459    fn test_md033_fix_enabled_code_with_backticks() {
1460        // <code>text with `backticks`</code> should use double backticks
1461        let rule = MD033NoInlineHtml::with_fix(true);
1462        let content = "This has <code>text with `backticks`</code> here.";
1463        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1464        let fixed = rule.fix(&ctx).unwrap();
1465        assert_eq!(fixed, "This has `` text with `backticks` `` here.");
1466    }
1467
1468    #[test]
1469    fn test_md033_fix_enabled_br_trailing_spaces() {
1470        // <br> should convert to two trailing spaces + newline (default)
1471        let rule = MD033NoInlineHtml::with_fix(true);
1472        let content = "First line<br>Second line";
1473        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1474        let fixed = rule.fix(&ctx).unwrap();
1475        assert_eq!(fixed, "First line  \nSecond line");
1476    }
1477
1478    #[test]
1479    fn test_md033_fix_enabled_br_self_closing() {
1480        // <br/> and <br /> should also convert
1481        let rule = MD033NoInlineHtml::with_fix(true);
1482        let content = "First<br/>second<br />third";
1483        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1484        let fixed = rule.fix(&ctx).unwrap();
1485        assert_eq!(fixed, "First  \nsecond  \nthird");
1486    }
1487
1488    #[test]
1489    fn test_md033_fix_enabled_br_backslash_style() {
1490        // With br_style = backslash, <br> should convert to backslash + newline
1491        let config = MD033Config {
1492            allowed: Vec::new(),
1493            disallowed: Vec::new(),
1494            fix: true,
1495            br_style: md033_config::BrStyle::Backslash,
1496        };
1497        let rule = MD033NoInlineHtml::from_config_struct(config);
1498        let content = "First line<br>Second line";
1499        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1500        let fixed = rule.fix(&ctx).unwrap();
1501        assert_eq!(fixed, "First line\\\nSecond line");
1502    }
1503
1504    #[test]
1505    fn test_md033_fix_enabled_hr() {
1506        // <hr> should convert to horizontal rule
1507        let rule = MD033NoInlineHtml::with_fix(true);
1508        let content = "Above<hr>Below";
1509        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1510        let fixed = rule.fix(&ctx).unwrap();
1511        assert_eq!(fixed, "Above\n---\nBelow");
1512    }
1513
1514    #[test]
1515    fn test_md033_fix_enabled_hr_self_closing() {
1516        // <hr/> should also convert
1517        let rule = MD033NoInlineHtml::with_fix(true);
1518        let content = "Above<hr/>Below";
1519        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1520        let fixed = rule.fix(&ctx).unwrap();
1521        assert_eq!(fixed, "Above\n---\nBelow");
1522    }
1523
1524    #[test]
1525    fn test_md033_fix_skips_nested_tags() {
1526        // Tags with nested HTML - outer tags may not be fully fixed due to overlapping ranges
1527        // The inner tags are processed first, which can invalidate outer tag ranges
1528        let rule = MD033NoInlineHtml::with_fix(true);
1529        let content = "This has <em>text with <strong>nested</strong> tags</em> here.";
1530        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1531        let fixed = rule.fix(&ctx).unwrap();
1532        // Inner <strong> is converted to markdown, outer <em> range becomes invalid
1533        // This is expected behavior - user should run fix multiple times for nested tags
1534        assert_eq!(fixed, "This has <em>text with **nested** tags</em> here.");
1535    }
1536
1537    #[test]
1538    fn test_md033_fix_skips_tags_with_attributes() {
1539        // Tags with attributes should NOT be fixed at all - leave as-is
1540        // User may want to keep the attributes (e.g., class="highlight" for styling)
1541        let rule = MD033NoInlineHtml::with_fix(true);
1542        let content = "This has <em class=\"highlight\">emphasized</em> text.";
1543        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1544        let fixed = rule.fix(&ctx).unwrap();
1545        // Content should remain unchanged - we don't know if attributes matter
1546        assert_eq!(fixed, content);
1547    }
1548
1549    #[test]
1550    fn test_md033_fix_disabled_no_changes() {
1551        // When fix is disabled, original content should be returned
1552        let rule = MD033NoInlineHtml::default(); // fix is false by default
1553        let content = "This has <em>emphasized text</em> here.";
1554        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1555        let fixed = rule.fix(&ctx).unwrap();
1556        assert_eq!(fixed, content, "Should return original content when fix is disabled");
1557    }
1558
1559    #[test]
1560    fn test_md033_fix_capability_enabled() {
1561        let rule = MD033NoInlineHtml::with_fix(true);
1562        assert_eq!(rule.fix_capability(), crate::rule::FixCapability::FullyFixable);
1563    }
1564
1565    #[test]
1566    fn test_md033_fix_multiple_tags() {
1567        // Test fixing multiple HTML tags in one document
1568        let rule = MD033NoInlineHtml::with_fix(true);
1569        let content = "Here is <em>italic</em> and <strong>bold</strong> text.";
1570        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1571        let fixed = rule.fix(&ctx).unwrap();
1572        assert_eq!(fixed, "Here is *italic* and **bold** text.");
1573    }
1574
1575    #[test]
1576    fn test_md033_fix_uppercase_tags() {
1577        // HTML tags are case-insensitive
1578        let rule = MD033NoInlineHtml::with_fix(true);
1579        let content = "This has <EM>emphasized</EM> text.";
1580        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1581        let fixed = rule.fix(&ctx).unwrap();
1582        assert_eq!(fixed, "This has *emphasized* text.");
1583    }
1584
1585    #[test]
1586    fn test_md033_fix_unsafe_tags_not_modified() {
1587        // Tags without safe markdown equivalents should NOT be modified
1588        // Only safe fixable tags (em, i, strong, b, code, br, hr) get converted
1589        let rule = MD033NoInlineHtml::with_fix(true);
1590        let content = "This has <div>a div</div> content.";
1591        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1592        let fixed = rule.fix(&ctx).unwrap();
1593        // <div> is not a safe fixable tag, so content should be unchanged
1594        assert_eq!(fixed, "This has <div>a div</div> content.");
1595    }
1596
1597    #[test]
1598    fn test_md033_fix_img_tag_not_removed() {
1599        // Regression test: <img> tags should NOT be removed or modified
1600        // They have no Markdown equivalent, so fix should leave them unchanged
1601        let rule = MD033NoInlineHtml::with_fix(true);
1602        let content = "Image: <img src=\"photo.jpg\" alt=\"My Photo\">";
1603        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1604        let fixed = rule.fix(&ctx).unwrap();
1605        // <img> is a self-closing tag without a Markdown equivalent - must be unchanged
1606        assert_eq!(fixed, "Image: <img src=\"photo.jpg\" alt=\"My Photo\">");
1607    }
1608
1609    #[test]
1610    fn test_md033_fix_mixed_safe_and_unsafe_tags() {
1611        // Mix of safe fixable tags and unsafe tags on the same line
1612        // Safe tags should be converted, unsafe should be left unchanged
1613        let rule = MD033NoInlineHtml::with_fix(true);
1614        let content = "<em>italic</em> and <img src=\"x.jpg\"> and <strong>bold</strong>";
1615        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1616        let fixed = rule.fix(&ctx).unwrap();
1617        // <em> and <strong> are safe, <img> is not
1618        assert_eq!(fixed, "*italic* and <img src=\"x.jpg\"> and **bold**");
1619    }
1620
1621    #[test]
1622    fn test_md033_fix_multiple_tags_same_line() {
1623        // Multiple tags on the same line should all be fixed correctly
1624        let rule = MD033NoInlineHtml::with_fix(true);
1625        let content = "Regular text <i>italic</i> and <b>bold</b> here.";
1626        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1627        let fixed = rule.fix(&ctx).unwrap();
1628        assert_eq!(fixed, "Regular text *italic* and **bold** here.");
1629    }
1630
1631    #[test]
1632    fn test_md033_fix_multiple_em_tags_same_line() {
1633        // Multiple em/strong tags on the same line
1634        let rule = MD033NoInlineHtml::with_fix(true);
1635        let content = "<em>first</em> and <strong>second</strong> and <code>third</code>";
1636        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1637        let fixed = rule.fix(&ctx).unwrap();
1638        assert_eq!(fixed, "*first* and **second** and `third`");
1639    }
1640
1641    #[test]
1642    fn test_md033_fix_skips_tags_inside_pre() {
1643        // Tags inside <pre> blocks should NOT be fixed (would break structure)
1644        let rule = MD033NoInlineHtml::with_fix(true);
1645        let content = "<pre><code><em>VALUE</em></code></pre>";
1646        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1647        let fixed = rule.fix(&ctx).unwrap();
1648        // The <em> inside <pre><code> should NOT be converted
1649        // Only the outer structure might be changed
1650        assert!(
1651            !fixed.contains("*VALUE*"),
1652            "Tags inside <pre> should not be converted to markdown. Got: {fixed}"
1653        );
1654    }
1655
1656    #[test]
1657    fn test_md033_fix_skips_tags_inside_div() {
1658        // Tags inside HTML block elements should not be fixed
1659        let rule = MD033NoInlineHtml::with_fix(true);
1660        let content = "<div>\n<em>emphasized</em>\n</div>";
1661        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1662        let fixed = rule.fix(&ctx).unwrap();
1663        // The <em> inside <div> should not be converted to *emphasized*
1664        assert!(
1665            !fixed.contains("*emphasized*"),
1666            "Tags inside HTML blocks should not be converted. Got: {fixed}"
1667        );
1668    }
1669
1670    #[test]
1671    fn test_md033_fix_outside_html_block() {
1672        // Tags outside HTML blocks should still be fixed
1673        let rule = MD033NoInlineHtml::with_fix(true);
1674        let content = "<div>\ncontent\n</div>\n\nOutside <em>emphasized</em> text.";
1675        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1676        let fixed = rule.fix(&ctx).unwrap();
1677        // The <em> outside the div should be converted
1678        assert!(
1679            fixed.contains("*emphasized*"),
1680            "Tags outside HTML blocks should be converted. Got: {fixed}"
1681        );
1682    }
1683
1684    #[test]
1685    fn test_md033_fix_with_id_attribute() {
1686        // Tags with id attributes should not be fixed (id might be used for anchors)
1687        let rule = MD033NoInlineHtml::with_fix(true);
1688        let content = "See <em id=\"important\">this note</em> for details.";
1689        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1690        let fixed = rule.fix(&ctx).unwrap();
1691        // Should remain unchanged - id attribute matters for linking
1692        assert_eq!(fixed, content);
1693    }
1694
1695    #[test]
1696    fn test_md033_fix_with_style_attribute() {
1697        // Tags with style attributes should not be fixed
1698        let rule = MD033NoInlineHtml::with_fix(true);
1699        let content = "This is <strong style=\"color: red\">important</strong> text.";
1700        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1701        let fixed = rule.fix(&ctx).unwrap();
1702        // Should remain unchanged - style attribute provides formatting
1703        assert_eq!(fixed, content);
1704    }
1705
1706    #[test]
1707    fn test_md033_fix_mixed_with_and_without_attributes() {
1708        // Mix of tags with and without attributes
1709        let rule = MD033NoInlineHtml::with_fix(true);
1710        let content = "<em>normal</em> and <em class=\"special\">styled</em> text.";
1711        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1712        let fixed = rule.fix(&ctx).unwrap();
1713        // Only the tag without attributes should be fixed
1714        assert_eq!(fixed, "*normal* and <em class=\"special\">styled</em> text.");
1715    }
1716
1717    #[test]
1718    fn test_md033_quick_fix_tag_with_attributes_no_fix() {
1719        // Quick fix should not be provided for tags with attributes
1720        let rule = MD033NoInlineHtml::with_fix(true);
1721        let content = "<em class=\"test\">emphasized</em>";
1722        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1723        let result = rule.check(&ctx).unwrap();
1724
1725        assert_eq!(result.len(), 1, "Should find one HTML tag");
1726        // No fix should be provided for tags with attributes
1727        assert!(
1728            result[0].fix.is_none(),
1729            "Should NOT have a fix for tags with attributes"
1730        );
1731    }
1732
1733    #[test]
1734    fn test_md033_fix_skips_html_entities() {
1735        // Tags containing HTML entities should NOT be fixed
1736        // HTML entities need HTML context to render; markdown won't process them
1737        let rule = MD033NoInlineHtml::with_fix(true);
1738        let content = "<code>&vert;</code>";
1739        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1740        let fixed = rule.fix(&ctx).unwrap();
1741        // Should remain unchanged - converting would break rendering
1742        assert_eq!(fixed, content);
1743    }
1744
1745    #[test]
1746    fn test_md033_fix_skips_multiple_html_entities() {
1747        // Multiple HTML entities should also be skipped
1748        let rule = MD033NoInlineHtml::with_fix(true);
1749        let content = "<code>&lt;T&gt;</code>";
1750        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1751        let fixed = rule.fix(&ctx).unwrap();
1752        // Should remain unchanged
1753        assert_eq!(fixed, content);
1754    }
1755
1756    #[test]
1757    fn test_md033_fix_allows_ampersand_without_entity() {
1758        // Content with & but no semicolon should still be fixed
1759        let rule = MD033NoInlineHtml::with_fix(true);
1760        let content = "<code>a & b</code>";
1761        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1762        let fixed = rule.fix(&ctx).unwrap();
1763        // Should be converted since & is not part of an entity
1764        assert_eq!(fixed, "`a & b`");
1765    }
1766
1767    #[test]
1768    fn test_md033_fix_em_with_entities_skipped() {
1769        // <em> with entities should also be skipped
1770        let rule = MD033NoInlineHtml::with_fix(true);
1771        let content = "<em>&nbsp;text</em>";
1772        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1773        let fixed = rule.fix(&ctx).unwrap();
1774        // Should remain unchanged
1775        assert_eq!(fixed, content);
1776    }
1777
1778    #[test]
1779    fn test_md033_fix_skips_nested_em_in_code() {
1780        // Tags nested inside other HTML elements should NOT be fixed
1781        // e.g., <code><em>n</em></code> - the <em> should not be converted
1782        let rule = MD033NoInlineHtml::with_fix(true);
1783        let content = "<code><em>n</em></code>";
1784        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1785        let fixed = rule.fix(&ctx).unwrap();
1786        // The inner <em> should NOT be converted to *n* because it's nested
1787        // The whole structure should be left as-is (or outer code converted, but not inner)
1788        assert!(
1789            !fixed.contains("*n*"),
1790            "Nested <em> should not be converted to markdown. Got: {fixed}"
1791        );
1792    }
1793
1794    #[test]
1795    fn test_md033_fix_skips_nested_in_table() {
1796        // Tags nested in HTML structures in tables should not be fixed
1797        let rule = MD033NoInlineHtml::with_fix(true);
1798        let content = "| <code>><em>n</em></code> | description |";
1799        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1800        let fixed = rule.fix(&ctx).unwrap();
1801        // Should not convert nested <em> to *n*
1802        assert!(
1803            !fixed.contains("*n*"),
1804            "Nested tags in table should not be converted. Got: {fixed}"
1805        );
1806    }
1807
1808    #[test]
1809    fn test_md033_fix_standalone_em_still_converted() {
1810        // Standalone (non-nested) <em> should still be converted
1811        let rule = MD033NoInlineHtml::with_fix(true);
1812        let content = "This is <em>emphasized</em> text.";
1813        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1814        let fixed = rule.fix(&ctx).unwrap();
1815        assert_eq!(fixed, "This is *emphasized* text.");
1816    }
1817}