Skip to main content

rumdl_lib/rules/
md033_no_inline_html.rs

1//!
2//! Rule MD033: No HTML tags
3//!
4//! See [docs/md033.md](../../docs/md033.md) for full documentation, configuration, and examples.
5
6use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
7use crate::utils::kramdown_utils::{is_kramdown_block_attribute, is_kramdown_extension};
8use crate::utils::regex_cache::*;
9use std::collections::HashSet;
10
11mod md033_config;
12use md033_config::MD033Config;
13
14#[derive(Clone)]
15pub struct MD033NoInlineHtml {
16    config: MD033Config,
17    allowed: HashSet<String>,
18    disallowed: HashSet<String>,
19}
20
21impl Default for MD033NoInlineHtml {
22    fn default() -> Self {
23        let config = MD033Config::default();
24        let allowed = config.allowed_set();
25        let disallowed = config.disallowed_set();
26        Self {
27            config,
28            allowed,
29            disallowed,
30        }
31    }
32}
33
34impl MD033NoInlineHtml {
35    pub fn new() -> Self {
36        Self::default()
37    }
38
39    pub fn with_allowed(allowed_vec: Vec<String>) -> Self {
40        let config = MD033Config {
41            allowed: allowed_vec.clone(),
42            disallowed: Vec::new(),
43            fix: false,
44            br_style: md033_config::BrStyle::default(),
45        };
46        let allowed = config.allowed_set();
47        let disallowed = config.disallowed_set();
48        Self {
49            config,
50            allowed,
51            disallowed,
52        }
53    }
54
55    pub fn with_disallowed(disallowed_vec: Vec<String>) -> Self {
56        let config = MD033Config {
57            allowed: Vec::new(),
58            disallowed: disallowed_vec.clone(),
59            fix: false,
60            br_style: md033_config::BrStyle::default(),
61        };
62        let allowed = config.allowed_set();
63        let disallowed = config.disallowed_set();
64        Self {
65            config,
66            allowed,
67            disallowed,
68        }
69    }
70
71    /// Create a new rule with auto-fix enabled
72    pub fn with_fix(fix: bool) -> Self {
73        let config = MD033Config {
74            allowed: Vec::new(),
75            disallowed: Vec::new(),
76            fix,
77            br_style: md033_config::BrStyle::default(),
78        };
79        let allowed = config.allowed_set();
80        let disallowed = config.disallowed_set();
81        Self {
82            config,
83            allowed,
84            disallowed,
85        }
86    }
87
88    pub fn from_config_struct(config: MD033Config) -> Self {
89        let allowed = config.allowed_set();
90        let disallowed = config.disallowed_set();
91        Self {
92            config,
93            allowed,
94            disallowed,
95        }
96    }
97
98    // Efficient check for allowed tags using HashSet (case-insensitive)
99    #[inline]
100    fn is_tag_allowed(&self, tag: &str) -> bool {
101        if self.allowed.is_empty() {
102            return false;
103        }
104        // Remove angle brackets and slashes, then split by whitespace or '>'
105        let tag = tag.trim_start_matches('<').trim_start_matches('/');
106        let tag_name = tag
107            .split(|c: char| c.is_whitespace() || c == '>' || c == '/')
108            .next()
109            .unwrap_or("");
110        self.allowed.contains(&tag_name.to_lowercase())
111    }
112
113    /// Check if a tag is in the disallowed set (for disallowed-only mode)
114    #[inline]
115    fn is_tag_disallowed(&self, tag: &str) -> bool {
116        if self.disallowed.is_empty() {
117            return false;
118        }
119        // Remove angle brackets and slashes, then split by whitespace or '>'
120        let tag = tag.trim_start_matches('<').trim_start_matches('/');
121        let tag_name = tag
122            .split(|c: char| c.is_whitespace() || c == '>' || c == '/')
123            .next()
124            .unwrap_or("");
125        self.disallowed.contains(&tag_name.to_lowercase())
126    }
127
128    /// Check if operating in disallowed-only mode
129    #[inline]
130    fn is_disallowed_mode(&self) -> bool {
131        self.config.is_disallowed_mode()
132    }
133
134    // Check if a tag is an HTML comment
135    #[inline]
136    fn is_html_comment(&self, tag: &str) -> bool {
137        tag.starts_with("<!--") && tag.ends_with("-->")
138    }
139
140    /// Check if a tag name is a valid HTML element or custom element.
141    /// Returns false for placeholder syntax like `<NAME>`, `<resource>`, `<actual>`.
142    ///
143    /// Per HTML spec, custom elements must contain a hyphen (e.g., `<my-component>`).
144    #[inline]
145    fn is_html_element_or_custom(tag_name: &str) -> bool {
146        const HTML_ELEMENTS: &[&str] = &[
147            // Document structure
148            "html",
149            "head",
150            "body",
151            "title",
152            "base",
153            "link",
154            "meta",
155            "style",
156            // Sections
157            "article",
158            "section",
159            "nav",
160            "aside",
161            "h1",
162            "h2",
163            "h3",
164            "h4",
165            "h5",
166            "h6",
167            "hgroup",
168            "header",
169            "footer",
170            "address",
171            "main",
172            "search",
173            // Grouping
174            "p",
175            "hr",
176            "pre",
177            "blockquote",
178            "ol",
179            "ul",
180            "menu",
181            "li",
182            "dl",
183            "dt",
184            "dd",
185            "figure",
186            "figcaption",
187            "div",
188            // Text-level
189            "a",
190            "em",
191            "strong",
192            "small",
193            "s",
194            "cite",
195            "q",
196            "dfn",
197            "abbr",
198            "ruby",
199            "rt",
200            "rp",
201            "data",
202            "time",
203            "code",
204            "var",
205            "samp",
206            "kbd",
207            "sub",
208            "sup",
209            "i",
210            "b",
211            "u",
212            "mark",
213            "bdi",
214            "bdo",
215            "span",
216            "br",
217            "wbr",
218            // Edits
219            "ins",
220            "del",
221            // Embedded
222            "picture",
223            "source",
224            "img",
225            "iframe",
226            "embed",
227            "object",
228            "param",
229            "video",
230            "audio",
231            "track",
232            "map",
233            "area",
234            "svg",
235            "math",
236            "canvas",
237            // Tables
238            "table",
239            "caption",
240            "colgroup",
241            "col",
242            "tbody",
243            "thead",
244            "tfoot",
245            "tr",
246            "td",
247            "th",
248            // Forms
249            "form",
250            "label",
251            "input",
252            "button",
253            "select",
254            "datalist",
255            "optgroup",
256            "option",
257            "textarea",
258            "output",
259            "progress",
260            "meter",
261            "fieldset",
262            "legend",
263            // Interactive
264            "details",
265            "summary",
266            "dialog",
267            // Scripting
268            "script",
269            "noscript",
270            "template",
271            "slot",
272            // Deprecated but recognized
273            "acronym",
274            "applet",
275            "basefont",
276            "big",
277            "center",
278            "dir",
279            "font",
280            "frame",
281            "frameset",
282            "isindex",
283            "marquee",
284            "noembed",
285            "noframes",
286            "plaintext",
287            "strike",
288            "tt",
289            "xmp",
290        ];
291
292        let lower = tag_name.to_ascii_lowercase();
293        if HTML_ELEMENTS.contains(&lower.as_str()) {
294            return true;
295        }
296        // Custom elements must contain a hyphen per HTML spec
297        tag_name.contains('-')
298    }
299
300    // Check if a tag is likely a programming type annotation rather than HTML
301    #[inline]
302    fn is_likely_type_annotation(&self, tag: &str) -> bool {
303        // Common programming type names that are often used in generics
304        const COMMON_TYPES: &[&str] = &[
305            "string",
306            "number",
307            "any",
308            "void",
309            "null",
310            "undefined",
311            "array",
312            "promise",
313            "function",
314            "error",
315            "date",
316            "regexp",
317            "symbol",
318            "bigint",
319            "map",
320            "set",
321            "weakmap",
322            "weakset",
323            "iterator",
324            "generator",
325            "t",
326            "u",
327            "v",
328            "k",
329            "e", // Common single-letter type parameters
330            "userdata",
331            "apiresponse",
332            "config",
333            "options",
334            "params",
335            "result",
336            "response",
337            "request",
338            "data",
339            "item",
340            "element",
341            "node",
342        ];
343
344        let tag_content = tag
345            .trim_start_matches('<')
346            .trim_end_matches('>')
347            .trim_start_matches('/');
348        let tag_name = tag_content
349            .split(|c: char| c.is_whitespace() || c == '>' || c == '/')
350            .next()
351            .unwrap_or("");
352
353        // Check if it's a simple tag (no attributes) with a common type name
354        if !tag_content.contains(' ') && !tag_content.contains('=') {
355            COMMON_TYPES.contains(&tag_name.to_ascii_lowercase().as_str())
356        } else {
357            false
358        }
359    }
360
361    // Check if a tag is actually an email address in angle brackets
362    #[inline]
363    fn is_email_address(&self, tag: &str) -> bool {
364        let content = tag.trim_start_matches('<').trim_end_matches('>');
365        // Simple email pattern: contains @ and has reasonable structure
366        content.contains('@')
367            && content.chars().all(|c| c.is_alphanumeric() || "@.-_+".contains(c))
368            && content.split('@').count() == 2
369            && content.split('@').all(|part| !part.is_empty())
370    }
371
372    // Check if a tag has the markdown attribute (MkDocs/Material for MkDocs)
373    #[inline]
374    fn has_markdown_attribute(&self, tag: &str) -> bool {
375        // Check for various forms of markdown attribute
376        // Examples: <div markdown>, <div markdown="1">, <div class="result" markdown>
377        tag.contains(" markdown>") || tag.contains(" markdown=") || tag.contains(" markdown ")
378    }
379
380    /// Check if a tag contains JSX-specific attributes that indicate it's JSX, not HTML
381    /// JSX uses different attribute names than HTML:
382    /// - `className` instead of `class`
383    /// - `htmlFor` instead of `for`
384    /// - camelCase event handlers (`onClick`, `onChange`, `onSubmit`, etc.)
385    /// - JSX expression syntax `={...}` for dynamic values
386    #[inline]
387    fn has_jsx_attributes(tag: &str) -> bool {
388        // JSX-specific attribute names (HTML uses class, for, onclick, etc.)
389        tag.contains("className")
390            || tag.contains("htmlFor")
391            || tag.contains("dangerouslySetInnerHTML")
392            // camelCase event handlers (JSX uses onClick, HTML uses onclick)
393            || tag.contains("onClick")
394            || tag.contains("onChange")
395            || tag.contains("onSubmit")
396            || tag.contains("onFocus")
397            || tag.contains("onBlur")
398            || tag.contains("onKeyDown")
399            || tag.contains("onKeyUp")
400            || tag.contains("onKeyPress")
401            || tag.contains("onMouseDown")
402            || tag.contains("onMouseUp")
403            || tag.contains("onMouseEnter")
404            || tag.contains("onMouseLeave")
405            // JSX expression syntax: ={expression} or ={ expression }
406            || tag.contains("={")
407    }
408
409    // Check if a tag is actually a URL in angle brackets
410    #[inline]
411    fn is_url_in_angle_brackets(&self, tag: &str) -> bool {
412        let content = tag.trim_start_matches('<').trim_end_matches('>');
413        // Check for common URL schemes
414        content.starts_with("http://")
415            || content.starts_with("https://")
416            || content.starts_with("ftp://")
417            || content.starts_with("ftps://")
418            || content.starts_with("mailto:")
419    }
420
421    /// Convert paired HTML tags to their Markdown equivalents.
422    /// Returns None if the tag cannot be safely converted (has nested tags, HTML entities, etc.)
423    fn convert_to_markdown(tag_name: &str, inner_content: &str) -> Option<String> {
424        // Skip if content contains nested HTML tags
425        if inner_content.contains('<') {
426            return None;
427        }
428        // Skip if content contains HTML entities (e.g., &vert;, &amp;, &lt;)
429        // These need HTML context to render correctly; markdown won't process them
430        if inner_content.contains('&') && inner_content.contains(';') {
431            // Check for common HTML entity patterns
432            let has_entity = inner_content
433                .split('&')
434                .skip(1)
435                .any(|part| part.split(';').next().is_some_and(|e| !e.is_empty() && e.len() < 10));
436            if has_entity {
437                return None;
438            }
439        }
440        match tag_name {
441            "em" | "i" => Some(format!("*{inner_content}*")),
442            "strong" | "b" => Some(format!("**{inner_content}**")),
443            "code" => {
444                // Handle backticks in content by using double backticks with padding
445                if inner_content.contains('`') {
446                    Some(format!("`` {inner_content} ``"))
447                } else {
448                    Some(format!("`{inner_content}`"))
449                }
450            }
451            _ => None,
452        }
453    }
454
455    /// Convert self-closing HTML tags to their Markdown equivalents.
456    fn convert_self_closing_to_markdown(&self, tag_name: &str, opening_tag: &str) -> Option<String> {
457        match tag_name {
458            "br" => match self.config.br_style {
459                md033_config::BrStyle::TrailingSpaces => Some("  \n".to_string()),
460                md033_config::BrStyle::Backslash => Some("\\\n".to_string()),
461            },
462            "hr" => Some("\n---\n".to_string()),
463            "img" => Self::convert_img_to_markdown(opening_tag),
464            _ => None,
465        }
466    }
467
468    /// Parse all attributes from an HTML tag into a list of (name, value) pairs.
469    /// This provides proper attribute parsing instead of naive string matching.
470    fn parse_attributes(tag: &str) -> Vec<(String, Option<String>)> {
471        let mut attrs = Vec::new();
472
473        // Remove < and > and tag name
474        let tag_content = tag.trim_start_matches('<').trim_end_matches('>').trim_end_matches('/');
475
476        // Find first whitespace to skip tag name
477        let attr_start = tag_content
478            .find(|c: char| c.is_whitespace())
479            .map(|i| i + 1)
480            .unwrap_or(tag_content.len());
481
482        if attr_start >= tag_content.len() {
483            return attrs;
484        }
485
486        let attr_str = &tag_content[attr_start..];
487        let mut chars = attr_str.chars().peekable();
488
489        while chars.peek().is_some() {
490            // Skip whitespace
491            while chars.peek().is_some_and(|c| c.is_whitespace()) {
492                chars.next();
493            }
494
495            if chars.peek().is_none() {
496                break;
497            }
498
499            // Read attribute name
500            let mut attr_name = String::new();
501            while let Some(&c) = chars.peek() {
502                if c.is_whitespace() || c == '=' || c == '>' || c == '/' {
503                    break;
504                }
505                attr_name.push(c);
506                chars.next();
507            }
508
509            if attr_name.is_empty() {
510                break;
511            }
512
513            // Skip whitespace before =
514            while chars.peek().is_some_and(|c| c.is_whitespace()) {
515                chars.next();
516            }
517
518            // Check for = and value
519            if chars.peek() == Some(&'=') {
520                chars.next(); // consume =
521
522                // Skip whitespace after =
523                while chars.peek().is_some_and(|c| c.is_whitespace()) {
524                    chars.next();
525                }
526
527                // Read value
528                let mut value = String::new();
529                if let Some(&quote) = chars.peek() {
530                    if quote == '"' || quote == '\'' {
531                        chars.next(); // consume opening quote
532                        for c in chars.by_ref() {
533                            if c == quote {
534                                break;
535                            }
536                            value.push(c);
537                        }
538                    } else {
539                        // Unquoted value
540                        while let Some(&c) = chars.peek() {
541                            if c.is_whitespace() || c == '>' || c == '/' {
542                                break;
543                            }
544                            value.push(c);
545                            chars.next();
546                        }
547                    }
548                }
549                attrs.push((attr_name.to_ascii_lowercase(), Some(value)));
550            } else {
551                // Boolean attribute (no value)
552                attrs.push((attr_name.to_ascii_lowercase(), None));
553            }
554        }
555
556        attrs
557    }
558
559    /// Extract an HTML attribute value from a tag string.
560    /// Handles double quotes, single quotes, and unquoted values.
561    /// Returns None if the attribute is not found.
562    fn extract_attribute(tag: &str, attr_name: &str) -> Option<String> {
563        let attrs = Self::parse_attributes(tag);
564        let attr_lower = attr_name.to_ascii_lowercase();
565
566        attrs
567            .into_iter()
568            .find(|(name, _)| name == &attr_lower)
569            .and_then(|(_, value)| value)
570    }
571
572    /// Check if an HTML tag has extra attributes beyond the specified allowed ones.
573    /// Uses proper attribute parsing to avoid false positives from string matching.
574    fn has_extra_attributes(tag: &str, allowed_attrs: &[&str]) -> bool {
575        let attrs = Self::parse_attributes(tag);
576
577        // All event handlers (on*) are dangerous
578        // Plus common attributes that would be lost in markdown conversion
579        const DANGEROUS_ATTR_PREFIXES: &[&str] = &["on"]; // onclick, onload, onerror, etc.
580        const DANGEROUS_ATTRS: &[&str] = &[
581            "class",
582            "id",
583            "style",
584            "target",
585            "rel",
586            "download",
587            "referrerpolicy",
588            "crossorigin",
589            "loading",
590            "decoding",
591            "fetchpriority",
592            "sizes",
593            "srcset",
594            "usemap",
595            "ismap",
596            "width",
597            "height",
598            "name",   // anchor names
599            "data-*", // data attributes (checked separately)
600        ];
601
602        for (attr_name, _) in attrs {
603            // Skip allowed attributes
604            if allowed_attrs.iter().any(|a| a.to_ascii_lowercase() == attr_name) {
605                continue;
606            }
607
608            // Check for event handlers (on*)
609            for prefix in DANGEROUS_ATTR_PREFIXES {
610                if attr_name.starts_with(prefix) && attr_name.len() > prefix.len() {
611                    return true;
612                }
613            }
614
615            // Check for data-* attributes
616            if attr_name.starts_with("data-") {
617                return true;
618            }
619
620            // Check for other dangerous attributes
621            if DANGEROUS_ATTRS.contains(&attr_name.as_str()) {
622                return true;
623            }
624        }
625
626        false
627    }
628
629    /// Convert `<a href="url">text</a>` to `[text](url)` or `[text](url "title")`
630    /// Returns None if conversion is not safe.
631    fn convert_a_to_markdown(opening_tag: &str, inner_content: &str) -> Option<String> {
632        // Extract href attribute
633        let href = Self::extract_attribute(opening_tag, "href")?;
634
635        // Check URL is safe
636        if !MD033Config::is_safe_url(&href) {
637            return None;
638        }
639
640        // Check for nested HTML tags in content
641        if inner_content.contains('<') {
642            return None;
643        }
644
645        // Check for HTML entities that wouldn't render correctly in markdown
646        if inner_content.contains('&') && inner_content.contains(';') {
647            let has_entity = inner_content
648                .split('&')
649                .skip(1)
650                .any(|part| part.split(';').next().is_some_and(|e| !e.is_empty() && e.len() < 10));
651            if has_entity {
652                return None;
653            }
654        }
655
656        // Extract optional title attribute
657        let title = Self::extract_attribute(opening_tag, "title");
658
659        // Check for extra dangerous attributes (title is allowed)
660        if Self::has_extra_attributes(opening_tag, &["href", "title"]) {
661            return None;
662        }
663
664        // Escape special markdown characters in link text
665        // Brackets need escaping to avoid breaking the link syntax
666        let escaped_text = inner_content.replace('[', r"\[").replace(']', r"\]");
667
668        // Escape parentheses in URL
669        let escaped_url = href.replace('(', "%28").replace(')', "%29");
670
671        // Format with or without title
672        if let Some(title_text) = title {
673            // Escape quotes in title
674            let escaped_title = title_text.replace('"', r#"\""#);
675            Some(format!("[{escaped_text}]({escaped_url} \"{escaped_title}\")"))
676        } else {
677            Some(format!("[{escaped_text}]({escaped_url})"))
678        }
679    }
680
681    /// Convert `<img src="url" alt="text">` to `![alt](src)` or `![alt](src "title")`
682    /// Returns None if conversion is not safe.
683    fn convert_img_to_markdown(tag: &str) -> Option<String> {
684        // Extract src attribute (required)
685        let src = Self::extract_attribute(tag, "src")?;
686
687        // Check URL is safe
688        if !MD033Config::is_safe_url(&src) {
689            return None;
690        }
691
692        // Extract alt attribute (optional, default to empty)
693        let alt = Self::extract_attribute(tag, "alt").unwrap_or_default();
694
695        // Extract optional title attribute
696        let title = Self::extract_attribute(tag, "title");
697
698        // Check for extra dangerous attributes (title is allowed)
699        if Self::has_extra_attributes(tag, &["src", "alt", "title"]) {
700            return None;
701        }
702
703        // Escape special markdown characters in alt text
704        let escaped_alt = alt.replace('[', r"\[").replace(']', r"\]");
705
706        // Escape parentheses in URL
707        let escaped_url = src.replace('(', "%28").replace(')', "%29");
708
709        // Format with or without title
710        if let Some(title_text) = title {
711            // Escape quotes in title
712            let escaped_title = title_text.replace('"', r#"\""#);
713            Some(format!("![{escaped_alt}]({escaped_url} \"{escaped_title}\")"))
714        } else {
715            Some(format!("![{escaped_alt}]({escaped_url})"))
716        }
717    }
718
719    /// Check if an HTML tag has attributes that would make conversion unsafe
720    fn has_significant_attributes(opening_tag: &str) -> bool {
721        // Tags with just whitespace or empty are fine
722        let tag_content = opening_tag
723            .trim_start_matches('<')
724            .trim_end_matches('>')
725            .trim_end_matches('/');
726
727        // Split by whitespace; if there's more than the tag name, it has attributes
728        let parts: Vec<&str> = tag_content.split_whitespace().collect();
729        parts.len() > 1
730    }
731
732    /// Check if a tag appears to be nested inside another HTML element
733    /// by looking at the surrounding context (e.g., `<code><em>text</em></code>`)
734    fn is_nested_in_html(content: &str, tag_byte_start: usize, tag_byte_end: usize) -> bool {
735        // Check if there's a `>` immediately before this tag (indicating inside another element)
736        if tag_byte_start > 0 {
737            let before = &content[..tag_byte_start];
738            let before_trimmed = before.trim_end();
739            if before_trimmed.ends_with('>') && !before_trimmed.ends_with("->") {
740                // Check it's not a closing tag or comment
741                if let Some(last_lt) = before_trimmed.rfind('<') {
742                    let potential_tag = &before_trimmed[last_lt..];
743                    // Skip if it's a closing tag (</...>) or comment (<!--)
744                    if !potential_tag.starts_with("</") && !potential_tag.starts_with("<!--") {
745                        return true;
746                    }
747                }
748            }
749        }
750        // Check if there's a `<` immediately after the closing tag (indicating inside another element)
751        if tag_byte_end < content.len() {
752            let after = &content[tag_byte_end..];
753            let after_trimmed = after.trim_start();
754            if after_trimmed.starts_with("</") {
755                return true;
756            }
757        }
758        false
759    }
760
761    /// Calculate fix to remove HTML tags while keeping content
762    ///
763    /// For self-closing tags like `<br/>`, returns a single fix to remove the tag.
764    /// For paired tags like `<span>text</span>`, returns the replacement text (just the content).
765    ///
766    /// Returns (range, replacement_text) where range is the bytes to replace
767    /// and replacement_text is what to put there (content without tags, or empty for self-closing).
768    ///
769    /// When `fix` is enabled and `in_html_block` is true, returns None to avoid
770    /// converting tags that are nested inside HTML block elements (like `<pre>`).
771    fn calculate_fix(
772        &self,
773        content: &str,
774        opening_tag: &str,
775        tag_byte_start: usize,
776        in_html_block: bool,
777    ) -> Option<(std::ops::Range<usize>, String)> {
778        // Extract tag name from opening tag
779        let tag_name = opening_tag
780            .trim_start_matches('<')
781            .split(|c: char| c.is_whitespace() || c == '>' || c == '/')
782            .next()?
783            .to_lowercase();
784
785        // Check if it's a self-closing tag (ends with /> or is a void element like <br>)
786        let is_self_closing =
787            opening_tag.ends_with("/>") || matches!(tag_name.as_str(), "br" | "hr" | "img" | "input" | "meta" | "link");
788
789        if is_self_closing {
790            // When fix is enabled, try to convert to Markdown equivalent
791            // But skip if we're inside an HTML block (would break structure)
792            if self.config.fix
793                && MD033Config::is_safe_fixable_tag(&tag_name)
794                && !in_html_block
795                && let Some(markdown) = self.convert_self_closing_to_markdown(&tag_name, opening_tag)
796            {
797                return Some((tag_byte_start..tag_byte_start + opening_tag.len(), markdown));
798            }
799            // Can't convert this self-closing tag to Markdown, don't provide a fix
800            // (e.g., <input>, <meta> - these have no Markdown equivalent without the new img support)
801            return None;
802        }
803
804        // Search for the closing tag after the opening tag (case-insensitive)
805        let search_start = tag_byte_start + opening_tag.len();
806        let search_slice = &content[search_start..];
807
808        // Find closing tag case-insensitively
809        let closing_tag_lower = format!("</{tag_name}>");
810        let closing_pos = search_slice.to_ascii_lowercase().find(&closing_tag_lower);
811
812        if let Some(closing_pos) = closing_pos {
813            // Get actual closing tag from original content to get correct byte length
814            let closing_tag_len = closing_tag_lower.len();
815            let closing_byte_start = search_start + closing_pos;
816            let closing_byte_end = closing_byte_start + closing_tag_len;
817
818            // Extract the content between tags
819            let inner_content = &content[search_start..closing_byte_start];
820
821            // Skip auto-fix if inside an HTML block (like <pre>, <div>, etc.)
822            // Converting tags inside HTML blocks would break the intended structure
823            if in_html_block {
824                return None;
825            }
826
827            // Skip auto-fix if this tag is nested inside another HTML element
828            // e.g., <code><em>text</em></code> - don't convert the inner <em>
829            if Self::is_nested_in_html(content, tag_byte_start, closing_byte_end) {
830                return None;
831            }
832
833            // When fix is enabled and tag is safe to convert, try markdown conversion
834            if self.config.fix && MD033Config::is_safe_fixable_tag(&tag_name) {
835                // Handle <a> tags specially - they require attribute extraction
836                if tag_name == "a" {
837                    if let Some(markdown) = Self::convert_a_to_markdown(opening_tag, inner_content) {
838                        return Some((tag_byte_start..closing_byte_end, markdown));
839                    }
840                    // convert_a_to_markdown returned None - unsafe URL, nested HTML, etc.
841                    return None;
842                }
843
844                // For simple tags (em, strong, code, etc.) - no attributes allowed
845                if Self::has_significant_attributes(opening_tag) {
846                    // Don't provide a fix for tags with attributes
847                    // User may want to keep the attributes, so leave as-is
848                    return None;
849                }
850                if let Some(markdown) = Self::convert_to_markdown(&tag_name, inner_content) {
851                    return Some((tag_byte_start..closing_byte_end, markdown));
852                }
853                // convert_to_markdown returned None, meaning content has nested tags or
854                // HTML entities that shouldn't be converted - leave as-is
855                return None;
856            }
857
858            // For non-fixable tags, don't provide a fix
859            // (e.g., <div>content</div>, <span>text</span>)
860            return None;
861        }
862
863        // If no closing tag found, don't provide a fix (malformed HTML)
864        None
865    }
866}
867
868impl Rule for MD033NoInlineHtml {
869    fn name(&self) -> &'static str {
870        "MD033"
871    }
872
873    fn description(&self) -> &'static str {
874        "Inline HTML is not allowed"
875    }
876
877    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
878        let content = ctx.content;
879
880        // Early return: if no HTML tags at all, skip processing
881        if content.is_empty() || !ctx.likely_has_html() {
882            return Ok(Vec::new());
883        }
884
885        // Quick check for HTML tag pattern before expensive processing
886        if !HTML_TAG_QUICK_CHECK.is_match(content) {
887            return Ok(Vec::new());
888        }
889
890        let mut warnings = Vec::new();
891        let lines: Vec<&str> = content.lines().collect();
892
893        // Track nomarkdown and comment blocks (Kramdown extension)
894        let mut in_nomarkdown = false;
895        let mut in_comment = false;
896        let mut nomarkdown_ranges: Vec<(usize, usize)> = Vec::new();
897        let mut nomarkdown_start = 0;
898        let mut comment_start = 0;
899
900        for (i, line) in lines.iter().enumerate() {
901            let line_num = i + 1;
902
903            // Check for nomarkdown start
904            if line.trim() == "{::nomarkdown}" {
905                in_nomarkdown = true;
906                nomarkdown_start = line_num;
907            } else if line.trim() == "{:/nomarkdown}" && in_nomarkdown {
908                in_nomarkdown = false;
909                nomarkdown_ranges.push((nomarkdown_start, line_num));
910            }
911
912            // Check for comment blocks
913            if line.trim() == "{::comment}" {
914                in_comment = true;
915                comment_start = line_num;
916            } else if line.trim() == "{:/comment}" && in_comment {
917                in_comment = false;
918                nomarkdown_ranges.push((comment_start, line_num));
919            }
920        }
921
922        // Use centralized HTML parser to get all HTML tags (including multiline)
923        let html_tags = ctx.html_tags();
924
925        for html_tag in html_tags.iter() {
926            // Skip closing tags (only warn on opening tags)
927            if html_tag.is_closing {
928                continue;
929            }
930
931            let line_num = html_tag.line;
932            let tag_byte_start = html_tag.byte_offset;
933
934            // Reconstruct tag string from byte offsets
935            let tag = &content[html_tag.byte_offset..html_tag.byte_end];
936
937            // Skip tags in code blocks or PyMdown blocks (uses proper detection from LintContext)
938            if ctx
939                .line_info(line_num)
940                .is_some_and(|info| info.in_code_block || info.in_pymdown_block)
941            {
942                continue;
943            }
944
945            // Skip Kramdown extensions and block attributes
946            if let Some(line) = lines.get(line_num.saturating_sub(1))
947                && (is_kramdown_extension(line) || is_kramdown_block_attribute(line))
948            {
949                continue;
950            }
951
952            // Skip lines inside nomarkdown blocks
953            if nomarkdown_ranges
954                .iter()
955                .any(|(start, end)| line_num >= *start && line_num <= *end)
956            {
957                continue;
958            }
959
960            // Skip HTML tags inside HTML comments
961            if ctx.is_in_html_comment(tag_byte_start) {
962                continue;
963            }
964
965            // Skip HTML comments themselves
966            if self.is_html_comment(tag) {
967                continue;
968            }
969
970            // Skip angle brackets inside link reference definition titles
971            // e.g., [ref]: url "Title with <angle brackets>"
972            if ctx.is_in_link_title(tag_byte_start) {
973                continue;
974            }
975
976            // Skip JSX components in MDX files (e.g., <Chart />, <MyComponent>)
977            if ctx.flavor.supports_jsx() && html_tag.tag_name.chars().next().is_some_and(|c| c.is_uppercase()) {
978                continue;
979            }
980
981            // Skip JSX fragments in MDX files (<> and </>)
982            if ctx.flavor.supports_jsx() && (html_tag.tag_name.is_empty() || tag == "<>" || tag == "</>") {
983                continue;
984            }
985
986            // Skip elements with JSX-specific attributes in MDX files
987            // e.g., <div className="...">, <button onClick={handler}>
988            if ctx.flavor.supports_jsx() && Self::has_jsx_attributes(tag) {
989                continue;
990            }
991
992            // Skip non-HTML elements (placeholder syntax like <NAME>, <resource>)
993            if !Self::is_html_element_or_custom(&html_tag.tag_name) {
994                continue;
995            }
996
997            // Skip likely programming type annotations
998            if self.is_likely_type_annotation(tag) {
999                continue;
1000            }
1001
1002            // Skip email addresses in angle brackets
1003            if self.is_email_address(tag) {
1004                continue;
1005            }
1006
1007            // Skip URLs in angle brackets
1008            if self.is_url_in_angle_brackets(tag) {
1009                continue;
1010            }
1011
1012            // Skip tags inside code spans (use byte offset for reliable multi-line span detection)
1013            if ctx.is_byte_offset_in_code_span(tag_byte_start) {
1014                continue;
1015            }
1016
1017            // Determine whether to report this tag based on mode:
1018            // - Disallowed mode: only report tags in the disallowed list
1019            // - Default mode: report all tags except those in the allowed list
1020            if self.is_disallowed_mode() {
1021                // In disallowed mode, skip tags NOT in the disallowed list
1022                if !self.is_tag_disallowed(tag) {
1023                    continue;
1024                }
1025            } else {
1026                // In default mode, skip allowed tags
1027                if self.is_tag_allowed(tag) {
1028                    continue;
1029                }
1030            }
1031
1032            // Skip tags with markdown attribute in MkDocs mode
1033            if ctx.flavor == crate::config::MarkdownFlavor::MkDocs && self.has_markdown_attribute(tag) {
1034                continue;
1035            }
1036
1037            // Check if we're inside an HTML block (like <pre>, <div>, etc.)
1038            let in_html_block = ctx.is_in_html_block(line_num);
1039
1040            // Calculate fix to remove HTML tags but keep content
1041            let fix = self
1042                .calculate_fix(content, tag, tag_byte_start, in_html_block)
1043                .map(|(range, replacement)| Fix { range, replacement });
1044
1045            // Calculate actual end line and column for multiline tags
1046            // Use byte_end - 1 to get the last character position of the tag
1047            let (end_line, end_col) = if html_tag.byte_end > 0 {
1048                ctx.offset_to_line_col(html_tag.byte_end - 1)
1049            } else {
1050                (line_num, html_tag.end_col + 1)
1051            };
1052
1053            // Report the HTML tag
1054            warnings.push(LintWarning {
1055                rule_name: Some(self.name().to_string()),
1056                line: line_num,
1057                column: html_tag.start_col + 1, // Convert to 1-indexed
1058                end_line,                       // Actual end line for multiline tags
1059                end_column: end_col + 1,        // Actual end column
1060                message: format!("Inline HTML found: {tag}"),
1061                severity: Severity::Warning,
1062                fix,
1063            });
1064        }
1065
1066        Ok(warnings)
1067    }
1068
1069    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
1070        // Auto-fix is opt-in: only apply if explicitly enabled in config
1071        if !self.config.fix {
1072            return Ok(ctx.content.to_string());
1073        }
1074
1075        // Get warnings with their inline fixes
1076        let warnings = self.check(ctx)?;
1077
1078        // If no warnings with fixes, return original content
1079        if warnings.is_empty() || !warnings.iter().any(|w| w.fix.is_some()) {
1080            return Ok(ctx.content.to_string());
1081        }
1082
1083        // Collect all fixes and sort by range start (descending) to apply from end to beginning
1084        let mut fixes: Vec<_> = warnings
1085            .iter()
1086            .filter_map(|w| w.fix.as_ref().map(|f| (f.range.start, f.range.end, &f.replacement)))
1087            .collect();
1088        fixes.sort_by(|a, b| b.0.cmp(&a.0));
1089
1090        // Apply fixes from end to beginning to preserve byte offsets
1091        let mut result = ctx.content.to_string();
1092        for (start, end, replacement) in fixes {
1093            if start < result.len() && end <= result.len() && start <= end {
1094                result.replace_range(start..end, replacement);
1095            }
1096        }
1097
1098        Ok(result)
1099    }
1100
1101    fn fix_capability(&self) -> crate::rule::FixCapability {
1102        if self.config.fix {
1103            crate::rule::FixCapability::FullyFixable
1104        } else {
1105            crate::rule::FixCapability::Unfixable
1106        }
1107    }
1108
1109    /// Get the category of this rule for selective processing
1110    fn category(&self) -> RuleCategory {
1111        RuleCategory::Html
1112    }
1113
1114    /// Check if this rule should be skipped
1115    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
1116        ctx.content.is_empty() || !ctx.likely_has_html()
1117    }
1118
1119    fn as_any(&self) -> &dyn std::any::Any {
1120        self
1121    }
1122
1123    fn default_config_section(&self) -> Option<(String, toml::Value)> {
1124        let json_value = serde_json::to_value(&self.config).ok()?;
1125        Some((
1126            self.name().to_string(),
1127            crate::rule_config_serde::json_to_toml_value(&json_value)?,
1128        ))
1129    }
1130
1131    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
1132    where
1133        Self: Sized,
1134    {
1135        let rule_config = crate::rule_config_serde::load_rule_config::<MD033Config>(config);
1136        Box::new(Self::from_config_struct(rule_config))
1137    }
1138}
1139
1140#[cfg(test)]
1141mod tests {
1142    use super::*;
1143    use crate::lint_context::LintContext;
1144    use crate::rule::Rule;
1145
1146    #[test]
1147    fn test_md033_basic_html() {
1148        let rule = MD033NoInlineHtml::default();
1149        let content = "<div>Some content</div>";
1150        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1151        let result = rule.check(&ctx).unwrap();
1152        // Only reports opening tags, not closing tags
1153        assert_eq!(result.len(), 1); // Only <div>, not </div>
1154        assert!(result[0].message.starts_with("Inline HTML found: <div>"));
1155    }
1156
1157    #[test]
1158    fn test_md033_case_insensitive() {
1159        let rule = MD033NoInlineHtml::default();
1160        let content = "<DiV>Some <B>content</B></dIv>";
1161        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1162        let result = rule.check(&ctx).unwrap();
1163        // Only reports opening tags, not closing tags
1164        assert_eq!(result.len(), 2); // <DiV>, <B> (not </B>, </dIv>)
1165        assert_eq!(result[0].message, "Inline HTML found: <DiV>");
1166        assert_eq!(result[1].message, "Inline HTML found: <B>");
1167    }
1168
1169    #[test]
1170    fn test_md033_allowed_tags() {
1171        let rule = MD033NoInlineHtml::with_allowed(vec!["div".to_string(), "br".to_string()]);
1172        let content = "<div>Allowed</div><p>Not allowed</p><br/>";
1173        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1174        let result = rule.check(&ctx).unwrap();
1175        // Only warnings for non-allowed opening tags (<p> only, div and br are allowed)
1176        assert_eq!(result.len(), 1);
1177        assert_eq!(result[0].message, "Inline HTML found: <p>");
1178
1179        // Test case-insensitivity of allowed tags
1180        let content2 = "<DIV>Allowed</DIV><P>Not allowed</P><BR/>";
1181        let ctx2 = LintContext::new(content2, crate::config::MarkdownFlavor::Standard, None);
1182        let result2 = rule.check(&ctx2).unwrap();
1183        assert_eq!(result2.len(), 1); // Only <P> flagged
1184        assert_eq!(result2[0].message, "Inline HTML found: <P>");
1185    }
1186
1187    #[test]
1188    fn test_md033_html_comments() {
1189        let rule = MD033NoInlineHtml::default();
1190        let content = "<!-- This is a comment --> <p>Not a comment</p>";
1191        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1192        let result = rule.check(&ctx).unwrap();
1193        // Should detect warnings for HTML opening tags (comments are skipped, closing tags not reported)
1194        assert_eq!(result.len(), 1); // Only <p>
1195        assert_eq!(result[0].message, "Inline HTML found: <p>");
1196    }
1197
1198    #[test]
1199    fn test_md033_tags_in_links() {
1200        let rule = MD033NoInlineHtml::default();
1201        let content = "[Link](http://example.com/<div>)";
1202        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1203        let result = rule.check(&ctx).unwrap();
1204        // The <div> in the URL should be detected as HTML (not skipped)
1205        assert_eq!(result.len(), 1);
1206        assert_eq!(result[0].message, "Inline HTML found: <div>");
1207
1208        let content2 = "[Link <a>text</a>](url)";
1209        let ctx2 = LintContext::new(content2, crate::config::MarkdownFlavor::Standard, None);
1210        let result2 = rule.check(&ctx2).unwrap();
1211        // Only reports opening tags
1212        assert_eq!(result2.len(), 1); // Only <a>
1213        assert_eq!(result2[0].message, "Inline HTML found: <a>");
1214    }
1215
1216    #[test]
1217    fn test_md033_fix_escaping() {
1218        let rule = MD033NoInlineHtml::default();
1219        let content = "Text with <div> and <br/> tags.";
1220        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1221        let fixed_content = rule.fix(&ctx).unwrap();
1222        // No fix for HTML tags; output should be unchanged
1223        assert_eq!(fixed_content, content);
1224    }
1225
1226    #[test]
1227    fn test_md033_in_code_blocks() {
1228        let rule = MD033NoInlineHtml::default();
1229        let content = "```html\n<div>Code</div>\n```\n<div>Not code</div>";
1230        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1231        let result = rule.check(&ctx).unwrap();
1232        // Only reports opening tags outside code block
1233        assert_eq!(result.len(), 1); // Only <div> outside code block
1234        assert_eq!(result[0].message, "Inline HTML found: <div>");
1235    }
1236
1237    #[test]
1238    fn test_md033_in_code_spans() {
1239        let rule = MD033NoInlineHtml::default();
1240        let content = "Text with `<p>in code</p>` span. <br/> Not in span.";
1241        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1242        let result = rule.check(&ctx).unwrap();
1243        // Should detect <br/> outside code span, but not tags inside code span
1244        assert_eq!(result.len(), 1);
1245        assert_eq!(result[0].message, "Inline HTML found: <br/>");
1246    }
1247
1248    #[test]
1249    fn test_md033_issue_90_code_span_with_diff_block() {
1250        // Test for issue #90: inline code span followed by diff code block
1251        let rule = MD033NoInlineHtml::default();
1252        let content = r#"# Heading
1253
1254`<env>`
1255
1256```diff
1257- this
1258+ that
1259```"#;
1260        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1261        let result = rule.check(&ctx).unwrap();
1262        // Should NOT detect <env> as HTML since it's inside backticks
1263        assert_eq!(result.len(), 0, "Should not report HTML tags inside code spans");
1264    }
1265
1266    #[test]
1267    fn test_md033_multiple_code_spans_with_angle_brackets() {
1268        // Test multiple code spans on same line
1269        let rule = MD033NoInlineHtml::default();
1270        let content = "`<one>` and `<two>` and `<three>` are all code spans";
1271        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1272        let result = rule.check(&ctx).unwrap();
1273        assert_eq!(result.len(), 0, "Should not report HTML tags inside any code spans");
1274    }
1275
1276    #[test]
1277    fn test_md033_nested_angle_brackets_in_code_span() {
1278        // Test nested angle brackets
1279        let rule = MD033NoInlineHtml::default();
1280        let content = "Text with `<<nested>>` brackets";
1281        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1282        let result = rule.check(&ctx).unwrap();
1283        assert_eq!(result.len(), 0, "Should handle nested angle brackets in code spans");
1284    }
1285
1286    #[test]
1287    fn test_md033_code_span_at_end_before_code_block() {
1288        // Test code span at end of line before code block
1289        let rule = MD033NoInlineHtml::default();
1290        let content = "Testing `<test>`\n```\ncode here\n```";
1291        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1292        let result = rule.check(&ctx).unwrap();
1293        assert_eq!(result.len(), 0, "Should handle code span before code block");
1294    }
1295
1296    #[test]
1297    fn test_md033_quick_fix_inline_tag() {
1298        // Test that non-fixable tags (like <span>) do NOT get a fix
1299        // Only safe fixable tags (em, i, strong, b, code, br, hr) with fix=true get fixes
1300        let rule = MD033NoInlineHtml::default();
1301        let content = "This has <span>inline text</span> that should keep content.";
1302        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1303        let result = rule.check(&ctx).unwrap();
1304
1305        assert_eq!(result.len(), 1, "Should find one HTML tag");
1306        // <span> is NOT a safe fixable tag, so no fix should be provided
1307        assert!(
1308            result[0].fix.is_none(),
1309            "Non-fixable tags like <span> should not have a fix"
1310        );
1311    }
1312
1313    #[test]
1314    fn test_md033_quick_fix_multiline_tag() {
1315        // HTML block elements like <div> are intentionally NOT auto-fixed
1316        // Removing them would change document structure significantly
1317        let rule = MD033NoInlineHtml::default();
1318        let content = "<div>\nBlock content\n</div>";
1319        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1320        let result = rule.check(&ctx).unwrap();
1321
1322        assert_eq!(result.len(), 1, "Should find one HTML tag");
1323        // HTML block elements should NOT have auto-fix
1324        assert!(result[0].fix.is_none(), "HTML block elements should NOT have auto-fix");
1325    }
1326
1327    #[test]
1328    fn test_md033_quick_fix_self_closing_tag() {
1329        // Test that self-closing tags with fix=false (default) do NOT get a fix
1330        let rule = MD033NoInlineHtml::default();
1331        let content = "Self-closing: <br/>";
1332        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1333        let result = rule.check(&ctx).unwrap();
1334
1335        assert_eq!(result.len(), 1, "Should find one HTML tag");
1336        // Default config has fix=false, so no fix should be provided
1337        assert!(
1338            result[0].fix.is_none(),
1339            "Self-closing tags should not have a fix when fix config is false"
1340        );
1341    }
1342
1343    #[test]
1344    fn test_md033_quick_fix_multiple_tags() {
1345        // Test that multiple tags without fix=true do NOT get fixes
1346        // <span> is not a safe fixable tag, <strong> is but fix=false by default
1347        let rule = MD033NoInlineHtml::default();
1348        let content = "<span>first</span> and <strong>second</strong>";
1349        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1350        let result = rule.check(&ctx).unwrap();
1351
1352        assert_eq!(result.len(), 2, "Should find two HTML tags");
1353        // Neither should have a fix: <span> is not fixable, <strong> is but fix=false
1354        assert!(result[0].fix.is_none(), "Non-fixable <span> should not have a fix");
1355        assert!(
1356            result[1].fix.is_none(),
1357            "<strong> should not have a fix when fix config is false"
1358        );
1359    }
1360
1361    #[test]
1362    fn test_md033_skip_angle_brackets_in_link_titles() {
1363        // Angle brackets inside link reference definition titles should not be flagged as HTML
1364        let rule = MD033NoInlineHtml::default();
1365        let content = r#"# Test
1366
1367[example]: <https://example.com> "Title with <Angle Brackets> inside"
1368
1369Regular text with <div>content</div> HTML tag.
1370"#;
1371        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1372        let result = rule.check(&ctx).unwrap();
1373
1374        // Should only flag <div>, not <Angle Brackets> in the title (not a valid HTML element)
1375        // Opening tag only (markdownlint behavior)
1376        assert_eq!(result.len(), 1, "Should find opening div tag");
1377        assert!(
1378            result[0].message.contains("<div>"),
1379            "Should flag <div>, got: {}",
1380            result[0].message
1381        );
1382    }
1383
1384    #[test]
1385    fn test_md033_skip_angle_brackets_in_link_title_single_quotes() {
1386        // Test with single-quoted title
1387        let rule = MD033NoInlineHtml::default();
1388        let content = r#"[ref]: url 'Title <Help Wanted> here'
1389
1390<span>text</span> here
1391"#;
1392        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1393        let result = rule.check(&ctx).unwrap();
1394
1395        // <Help Wanted> is not a valid HTML element, so only <span> is flagged
1396        // Opening tag only (markdownlint behavior)
1397        assert_eq!(result.len(), 1, "Should find opening span tag");
1398        assert!(
1399            result[0].message.contains("<span>"),
1400            "Should flag <span>, got: {}",
1401            result[0].message
1402        );
1403    }
1404
1405    #[test]
1406    fn test_md033_multiline_tag_end_line_calculation() {
1407        // Test that multiline HTML tags report correct end_line
1408        let rule = MD033NoInlineHtml::default();
1409        let content = "<div\n  class=\"test\"\n  id=\"example\">";
1410        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1411        let result = rule.check(&ctx).unwrap();
1412
1413        assert_eq!(result.len(), 1, "Should find one HTML tag");
1414        // Tag starts on line 1
1415        assert_eq!(result[0].line, 1, "Start line should be 1");
1416        // Tag ends on line 3 (where the closing > is)
1417        assert_eq!(result[0].end_line, 3, "End line should be 3");
1418    }
1419
1420    #[test]
1421    fn test_md033_single_line_tag_same_start_end_line() {
1422        // Test that single-line HTML tags have same start and end line
1423        let rule = MD033NoInlineHtml::default();
1424        let content = "Some text <div class=\"test\"> more text";
1425        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1426        let result = rule.check(&ctx).unwrap();
1427
1428        assert_eq!(result.len(), 1, "Should find one HTML tag");
1429        assert_eq!(result[0].line, 1, "Start line should be 1");
1430        assert_eq!(result[0].end_line, 1, "End line should be 1 for single-line tag");
1431    }
1432
1433    #[test]
1434    fn test_md033_multiline_tag_with_many_attributes() {
1435        // Test multiline tag spanning multiple lines
1436        let rule = MD033NoInlineHtml::default();
1437        let content =
1438            "Text\n<div\n  data-attr1=\"value1\"\n  data-attr2=\"value2\"\n  data-attr3=\"value3\">\nMore text";
1439        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1440        let result = rule.check(&ctx).unwrap();
1441
1442        assert_eq!(result.len(), 1, "Should find one HTML tag");
1443        // Tag starts on line 2 (first line is "Text")
1444        assert_eq!(result[0].line, 2, "Start line should be 2");
1445        // Tag ends on line 5 (where the closing > is)
1446        assert_eq!(result[0].end_line, 5, "End line should be 5");
1447    }
1448
1449    #[test]
1450    fn test_md033_disallowed_mode_basic() {
1451        // Test disallowed mode: only flags tags in the disallowed list
1452        let rule = MD033NoInlineHtml::with_disallowed(vec!["script".to_string(), "iframe".to_string()]);
1453        let content = "<div>Safe content</div><script>alert('xss')</script>";
1454        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1455        let result = rule.check(&ctx).unwrap();
1456
1457        // Should only flag <script>, not <div>
1458        assert_eq!(result.len(), 1, "Should only flag disallowed tags");
1459        assert!(result[0].message.contains("<script>"), "Should flag script tag");
1460    }
1461
1462    #[test]
1463    fn test_md033_disallowed_gfm_security_tags() {
1464        // Test GFM security tags expansion
1465        let rule = MD033NoInlineHtml::with_disallowed(vec!["gfm".to_string()]);
1466        let content = r#"
1467<div>Safe</div>
1468<title>Bad title</title>
1469<textarea>Bad textarea</textarea>
1470<style>.bad{}</style>
1471<iframe src="evil"></iframe>
1472<script>evil()</script>
1473<plaintext>old tag</plaintext>
1474<span>Safe span</span>
1475"#;
1476        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1477        let result = rule.check(&ctx).unwrap();
1478
1479        // Should flag: title, textarea, style, iframe, script, plaintext
1480        // Should NOT flag: div, span
1481        assert_eq!(result.len(), 6, "Should flag 6 GFM security tags");
1482
1483        let flagged_tags: Vec<&str> = result
1484            .iter()
1485            .filter_map(|w| w.message.split("<").nth(1))
1486            .filter_map(|s| s.split(">").next())
1487            .filter_map(|s| s.split_whitespace().next())
1488            .collect();
1489
1490        assert!(flagged_tags.contains(&"title"), "Should flag title");
1491        assert!(flagged_tags.contains(&"textarea"), "Should flag textarea");
1492        assert!(flagged_tags.contains(&"style"), "Should flag style");
1493        assert!(flagged_tags.contains(&"iframe"), "Should flag iframe");
1494        assert!(flagged_tags.contains(&"script"), "Should flag script");
1495        assert!(flagged_tags.contains(&"plaintext"), "Should flag plaintext");
1496        assert!(!flagged_tags.contains(&"div"), "Should NOT flag div");
1497        assert!(!flagged_tags.contains(&"span"), "Should NOT flag span");
1498    }
1499
1500    #[test]
1501    fn test_md033_disallowed_case_insensitive() {
1502        // Test that disallowed check is case-insensitive
1503        let rule = MD033NoInlineHtml::with_disallowed(vec!["script".to_string()]);
1504        let content = "<SCRIPT>alert('xss')</SCRIPT><Script>alert('xss')</Script>";
1505        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1506        let result = rule.check(&ctx).unwrap();
1507
1508        // Should flag both <SCRIPT> and <Script>
1509        assert_eq!(result.len(), 2, "Should flag both case variants");
1510    }
1511
1512    #[test]
1513    fn test_md033_disallowed_with_attributes() {
1514        // Test that disallowed mode works with tags that have attributes
1515        let rule = MD033NoInlineHtml::with_disallowed(vec!["iframe".to_string()]);
1516        let content = r#"<iframe src="https://evil.com" width="100" height="100"></iframe>"#;
1517        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1518        let result = rule.check(&ctx).unwrap();
1519
1520        assert_eq!(result.len(), 1, "Should flag iframe with attributes");
1521        assert!(result[0].message.contains("iframe"), "Should flag iframe");
1522    }
1523
1524    #[test]
1525    fn test_md033_disallowed_all_gfm_tags() {
1526        // Verify all GFM disallowed tags are covered
1527        use md033_config::GFM_DISALLOWED_TAGS;
1528        let rule = MD033NoInlineHtml::with_disallowed(vec!["gfm".to_string()]);
1529
1530        for tag in GFM_DISALLOWED_TAGS {
1531            let content = format!("<{tag}>content</{tag}>");
1532            let ctx = LintContext::new(&content, crate::config::MarkdownFlavor::Standard, None);
1533            let result = rule.check(&ctx).unwrap();
1534
1535            assert_eq!(result.len(), 1, "GFM tag <{tag}> should be flagged");
1536        }
1537    }
1538
1539    #[test]
1540    fn test_md033_disallowed_mixed_with_custom() {
1541        // Test mixing "gfm" with custom disallowed tags
1542        let rule = MD033NoInlineHtml::with_disallowed(vec![
1543            "gfm".to_string(),
1544            "marquee".to_string(), // Custom disallowed tag
1545        ]);
1546        let content = r#"<script>bad</script><marquee>annoying</marquee><div>ok</div>"#;
1547        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1548        let result = rule.check(&ctx).unwrap();
1549
1550        // Should flag script (gfm) and marquee (custom)
1551        assert_eq!(result.len(), 2, "Should flag both gfm and custom tags");
1552    }
1553
1554    #[test]
1555    fn test_md033_disallowed_empty_means_default_mode() {
1556        // Empty disallowed list means default mode (flag all HTML)
1557        let rule = MD033NoInlineHtml::with_disallowed(vec![]);
1558        let content = "<div>content</div>";
1559        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1560        let result = rule.check(&ctx).unwrap();
1561
1562        // Should flag <div> in default mode
1563        assert_eq!(result.len(), 1, "Empty disallowed = default mode");
1564    }
1565
1566    #[test]
1567    fn test_md033_jsx_fragments_in_mdx() {
1568        // JSX fragments (<> and </>) should not trigger warnings in MDX
1569        let rule = MD033NoInlineHtml::default();
1570        let content = r#"# MDX Document
1571
1572<>
1573  <Heading />
1574  <Content />
1575</>
1576
1577<div>Regular HTML should still be flagged</div>
1578"#;
1579        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::MDX, None);
1580        let result = rule.check(&ctx).unwrap();
1581
1582        // Should only flag <div>, not the fragments or JSX components
1583        assert_eq!(result.len(), 1, "Should only find one HTML tag (the div)");
1584        assert!(
1585            result[0].message.contains("<div>"),
1586            "Should flag <div>, not JSX fragments"
1587        );
1588    }
1589
1590    #[test]
1591    fn test_md033_jsx_components_in_mdx() {
1592        // JSX components (capitalized) should not trigger warnings in MDX
1593        let rule = MD033NoInlineHtml::default();
1594        let content = r#"<CustomComponent prop="value">
1595  Content
1596</CustomComponent>
1597
1598<MyButton onClick={handler}>Click</MyButton>
1599"#;
1600        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::MDX, None);
1601        let result = rule.check(&ctx).unwrap();
1602
1603        // No warnings - all are JSX components
1604        assert_eq!(result.len(), 0, "Should not flag JSX components in MDX");
1605    }
1606
1607    #[test]
1608    fn test_md033_jsx_not_skipped_in_standard_markdown() {
1609        // In standard markdown, capitalized tags should still be flagged if they're valid HTML
1610        let rule = MD033NoInlineHtml::default();
1611        let content = "<Script>alert(1)</Script>";
1612        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1613        let result = rule.check(&ctx).unwrap();
1614
1615        // Should flag <Script> in standard markdown (it's a valid HTML element)
1616        assert_eq!(result.len(), 1, "Should flag <Script> in standard markdown");
1617    }
1618
1619    #[test]
1620    fn test_md033_jsx_attributes_in_mdx() {
1621        // Elements with JSX-specific attributes should not trigger warnings in MDX
1622        let rule = MD033NoInlineHtml::default();
1623        let content = r#"# MDX with JSX Attributes
1624
1625<div className="card big">Content</div>
1626
1627<button onClick={handleClick}>Click me</button>
1628
1629<label htmlFor="input-id">Label</label>
1630
1631<input onChange={handleChange} />
1632
1633<div class="html-class">Regular HTML should be flagged</div>
1634"#;
1635        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::MDX, None);
1636        let result = rule.check(&ctx).unwrap();
1637
1638        // Should only flag the div with regular HTML "class" attribute
1639        assert_eq!(
1640            result.len(),
1641            1,
1642            "Should only flag HTML element without JSX attributes, got: {result:?}"
1643        );
1644        assert!(
1645            result[0].message.contains("<div class="),
1646            "Should flag the div with HTML class attribute"
1647        );
1648    }
1649
1650    #[test]
1651    fn test_md033_jsx_attributes_not_skipped_in_standard() {
1652        // In standard markdown, JSX attributes should still be flagged
1653        let rule = MD033NoInlineHtml::default();
1654        let content = r#"<div className="card">Content</div>"#;
1655        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1656        let result = rule.check(&ctx).unwrap();
1657
1658        // Should flag in standard markdown
1659        assert_eq!(result.len(), 1, "Should flag JSX-style elements in standard markdown");
1660    }
1661
1662    // Auto-fix tests for MD033
1663
1664    #[test]
1665    fn test_md033_fix_disabled_by_default() {
1666        // Auto-fix should be disabled by default
1667        let rule = MD033NoInlineHtml::default();
1668        assert!(!rule.config.fix, "Fix should be disabled by default");
1669        assert_eq!(rule.fix_capability(), crate::rule::FixCapability::Unfixable);
1670    }
1671
1672    #[test]
1673    fn test_md033_fix_enabled_em_to_italic() {
1674        // When fix is enabled, <em>text</em> should convert to *text*
1675        let rule = MD033NoInlineHtml::with_fix(true);
1676        let content = "This has <em>emphasized text</em> here.";
1677        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1678        let fixed = rule.fix(&ctx).unwrap();
1679        assert_eq!(fixed, "This has *emphasized text* here.");
1680    }
1681
1682    #[test]
1683    fn test_md033_fix_enabled_i_to_italic() {
1684        // <i>text</i> should convert to *text*
1685        let rule = MD033NoInlineHtml::with_fix(true);
1686        let content = "This has <i>italic text</i> here.";
1687        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1688        let fixed = rule.fix(&ctx).unwrap();
1689        assert_eq!(fixed, "This has *italic text* here.");
1690    }
1691
1692    #[test]
1693    fn test_md033_fix_enabled_strong_to_bold() {
1694        // <strong>text</strong> should convert to **text**
1695        let rule = MD033NoInlineHtml::with_fix(true);
1696        let content = "This has <strong>bold text</strong> here.";
1697        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1698        let fixed = rule.fix(&ctx).unwrap();
1699        assert_eq!(fixed, "This has **bold text** here.");
1700    }
1701
1702    #[test]
1703    fn test_md033_fix_enabled_b_to_bold() {
1704        // <b>text</b> should convert to **text**
1705        let rule = MD033NoInlineHtml::with_fix(true);
1706        let content = "This has <b>bold text</b> here.";
1707        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1708        let fixed = rule.fix(&ctx).unwrap();
1709        assert_eq!(fixed, "This has **bold text** here.");
1710    }
1711
1712    #[test]
1713    fn test_md033_fix_enabled_code_to_backticks() {
1714        // <code>text</code> should convert to `text`
1715        let rule = MD033NoInlineHtml::with_fix(true);
1716        let content = "This has <code>inline code</code> here.";
1717        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1718        let fixed = rule.fix(&ctx).unwrap();
1719        assert_eq!(fixed, "This has `inline code` here.");
1720    }
1721
1722    #[test]
1723    fn test_md033_fix_enabled_code_with_backticks() {
1724        // <code>text with `backticks`</code> should use double backticks
1725        let rule = MD033NoInlineHtml::with_fix(true);
1726        let content = "This has <code>text with `backticks`</code> here.";
1727        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1728        let fixed = rule.fix(&ctx).unwrap();
1729        assert_eq!(fixed, "This has `` text with `backticks` `` here.");
1730    }
1731
1732    #[test]
1733    fn test_md033_fix_enabled_br_trailing_spaces() {
1734        // <br> should convert to two trailing spaces + newline (default)
1735        let rule = MD033NoInlineHtml::with_fix(true);
1736        let content = "First line<br>Second line";
1737        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1738        let fixed = rule.fix(&ctx).unwrap();
1739        assert_eq!(fixed, "First line  \nSecond line");
1740    }
1741
1742    #[test]
1743    fn test_md033_fix_enabled_br_self_closing() {
1744        // <br/> and <br /> should also convert
1745        let rule = MD033NoInlineHtml::with_fix(true);
1746        let content = "First<br/>second<br />third";
1747        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1748        let fixed = rule.fix(&ctx).unwrap();
1749        assert_eq!(fixed, "First  \nsecond  \nthird");
1750    }
1751
1752    #[test]
1753    fn test_md033_fix_enabled_br_backslash_style() {
1754        // With br_style = backslash, <br> should convert to backslash + newline
1755        let config = MD033Config {
1756            allowed: Vec::new(),
1757            disallowed: Vec::new(),
1758            fix: true,
1759            br_style: md033_config::BrStyle::Backslash,
1760        };
1761        let rule = MD033NoInlineHtml::from_config_struct(config);
1762        let content = "First line<br>Second line";
1763        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1764        let fixed = rule.fix(&ctx).unwrap();
1765        assert_eq!(fixed, "First line\\\nSecond line");
1766    }
1767
1768    #[test]
1769    fn test_md033_fix_enabled_hr() {
1770        // <hr> should convert to horizontal rule
1771        let rule = MD033NoInlineHtml::with_fix(true);
1772        let content = "Above<hr>Below";
1773        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1774        let fixed = rule.fix(&ctx).unwrap();
1775        assert_eq!(fixed, "Above\n---\nBelow");
1776    }
1777
1778    #[test]
1779    fn test_md033_fix_enabled_hr_self_closing() {
1780        // <hr/> should also convert
1781        let rule = MD033NoInlineHtml::with_fix(true);
1782        let content = "Above<hr/>Below";
1783        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1784        let fixed = rule.fix(&ctx).unwrap();
1785        assert_eq!(fixed, "Above\n---\nBelow");
1786    }
1787
1788    #[test]
1789    fn test_md033_fix_skips_nested_tags() {
1790        // Tags with nested HTML - outer tags may not be fully fixed due to overlapping ranges
1791        // The inner tags are processed first, which can invalidate outer tag ranges
1792        let rule = MD033NoInlineHtml::with_fix(true);
1793        let content = "This has <em>text with <strong>nested</strong> tags</em> here.";
1794        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1795        let fixed = rule.fix(&ctx).unwrap();
1796        // Inner <strong> is converted to markdown, outer <em> range becomes invalid
1797        // This is expected behavior - user should run fix multiple times for nested tags
1798        assert_eq!(fixed, "This has <em>text with **nested** tags</em> here.");
1799    }
1800
1801    #[test]
1802    fn test_md033_fix_skips_tags_with_attributes() {
1803        // Tags with attributes should NOT be fixed at all - leave as-is
1804        // User may want to keep the attributes (e.g., class="highlight" for styling)
1805        let rule = MD033NoInlineHtml::with_fix(true);
1806        let content = "This has <em class=\"highlight\">emphasized</em> text.";
1807        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1808        let fixed = rule.fix(&ctx).unwrap();
1809        // Content should remain unchanged - we don't know if attributes matter
1810        assert_eq!(fixed, content);
1811    }
1812
1813    #[test]
1814    fn test_md033_fix_disabled_no_changes() {
1815        // When fix is disabled, original content should be returned
1816        let rule = MD033NoInlineHtml::default(); // fix is false by default
1817        let content = "This has <em>emphasized text</em> here.";
1818        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1819        let fixed = rule.fix(&ctx).unwrap();
1820        assert_eq!(fixed, content, "Should return original content when fix is disabled");
1821    }
1822
1823    #[test]
1824    fn test_md033_fix_capability_enabled() {
1825        let rule = MD033NoInlineHtml::with_fix(true);
1826        assert_eq!(rule.fix_capability(), crate::rule::FixCapability::FullyFixable);
1827    }
1828
1829    #[test]
1830    fn test_md033_fix_multiple_tags() {
1831        // Test fixing multiple HTML tags in one document
1832        let rule = MD033NoInlineHtml::with_fix(true);
1833        let content = "Here is <em>italic</em> and <strong>bold</strong> text.";
1834        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1835        let fixed = rule.fix(&ctx).unwrap();
1836        assert_eq!(fixed, "Here is *italic* and **bold** text.");
1837    }
1838
1839    #[test]
1840    fn test_md033_fix_uppercase_tags() {
1841        // HTML tags are case-insensitive
1842        let rule = MD033NoInlineHtml::with_fix(true);
1843        let content = "This has <EM>emphasized</EM> text.";
1844        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1845        let fixed = rule.fix(&ctx).unwrap();
1846        assert_eq!(fixed, "This has *emphasized* text.");
1847    }
1848
1849    #[test]
1850    fn test_md033_fix_unsafe_tags_not_modified() {
1851        // Tags without safe markdown equivalents should NOT be modified
1852        // Only safe fixable tags (em, i, strong, b, code, br, hr) get converted
1853        let rule = MD033NoInlineHtml::with_fix(true);
1854        let content = "This has <div>a div</div> content.";
1855        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1856        let fixed = rule.fix(&ctx).unwrap();
1857        // <div> is not a safe fixable tag, so content should be unchanged
1858        assert_eq!(fixed, "This has <div>a div</div> content.");
1859    }
1860
1861    #[test]
1862    fn test_md033_fix_img_tag_converted() {
1863        // <img> tags with simple src/alt attributes are converted to markdown images
1864        let rule = MD033NoInlineHtml::with_fix(true);
1865        let content = "Image: <img src=\"photo.jpg\" alt=\"My Photo\">";
1866        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1867        let fixed = rule.fix(&ctx).unwrap();
1868        // <img> is converted to ![alt](src) format
1869        assert_eq!(fixed, "Image: ![My Photo](photo.jpg)");
1870    }
1871
1872    #[test]
1873    fn test_md033_fix_img_tag_with_extra_attrs_not_converted() {
1874        // <img> tags with width/height/style attributes are NOT converted
1875        let rule = MD033NoInlineHtml::with_fix(true);
1876        let content = "Image: <img src=\"photo.jpg\" alt=\"My Photo\" width=\"100\">";
1877        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1878        let fixed = rule.fix(&ctx).unwrap();
1879        // Has width attribute - not safe to convert
1880        assert_eq!(fixed, "Image: <img src=\"photo.jpg\" alt=\"My Photo\" width=\"100\">");
1881    }
1882
1883    #[test]
1884    fn test_md033_fix_mixed_safe_tags() {
1885        // All tags are now safe fixable (em, img, strong)
1886        let rule = MD033NoInlineHtml::with_fix(true);
1887        let content = "<em>italic</em> and <img src=\"x.jpg\"> and <strong>bold</strong>";
1888        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1889        let fixed = rule.fix(&ctx).unwrap();
1890        // All are converted
1891        assert_eq!(fixed, "*italic* and ![](x.jpg) and **bold**");
1892    }
1893
1894    #[test]
1895    fn test_md033_fix_multiple_tags_same_line() {
1896        // Multiple tags on the same line should all be fixed correctly
1897        let rule = MD033NoInlineHtml::with_fix(true);
1898        let content = "Regular text <i>italic</i> and <b>bold</b> here.";
1899        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1900        let fixed = rule.fix(&ctx).unwrap();
1901        assert_eq!(fixed, "Regular text *italic* and **bold** here.");
1902    }
1903
1904    #[test]
1905    fn test_md033_fix_multiple_em_tags_same_line() {
1906        // Multiple em/strong tags on the same line
1907        let rule = MD033NoInlineHtml::with_fix(true);
1908        let content = "<em>first</em> and <strong>second</strong> and <code>third</code>";
1909        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1910        let fixed = rule.fix(&ctx).unwrap();
1911        assert_eq!(fixed, "*first* and **second** and `third`");
1912    }
1913
1914    #[test]
1915    fn test_md033_fix_skips_tags_inside_pre() {
1916        // Tags inside <pre> blocks should NOT be fixed (would break structure)
1917        let rule = MD033NoInlineHtml::with_fix(true);
1918        let content = "<pre><code><em>VALUE</em></code></pre>";
1919        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1920        let fixed = rule.fix(&ctx).unwrap();
1921        // The <em> inside <pre><code> should NOT be converted
1922        // Only the outer structure might be changed
1923        assert!(
1924            !fixed.contains("*VALUE*"),
1925            "Tags inside <pre> should not be converted to markdown. Got: {fixed}"
1926        );
1927    }
1928
1929    #[test]
1930    fn test_md033_fix_skips_tags_inside_div() {
1931        // Tags inside HTML block elements should not be fixed
1932        let rule = MD033NoInlineHtml::with_fix(true);
1933        let content = "<div>\n<em>emphasized</em>\n</div>";
1934        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1935        let fixed = rule.fix(&ctx).unwrap();
1936        // The <em> inside <div> should not be converted to *emphasized*
1937        assert!(
1938            !fixed.contains("*emphasized*"),
1939            "Tags inside HTML blocks should not be converted. Got: {fixed}"
1940        );
1941    }
1942
1943    #[test]
1944    fn test_md033_fix_outside_html_block() {
1945        // Tags outside HTML blocks should still be fixed
1946        let rule = MD033NoInlineHtml::with_fix(true);
1947        let content = "<div>\ncontent\n</div>\n\nOutside <em>emphasized</em> text.";
1948        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1949        let fixed = rule.fix(&ctx).unwrap();
1950        // The <em> outside the div should be converted
1951        assert!(
1952            fixed.contains("*emphasized*"),
1953            "Tags outside HTML blocks should be converted. Got: {fixed}"
1954        );
1955    }
1956
1957    #[test]
1958    fn test_md033_fix_with_id_attribute() {
1959        // Tags with id attributes should not be fixed (id might be used for anchors)
1960        let rule = MD033NoInlineHtml::with_fix(true);
1961        let content = "See <em id=\"important\">this note</em> for details.";
1962        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1963        let fixed = rule.fix(&ctx).unwrap();
1964        // Should remain unchanged - id attribute matters for linking
1965        assert_eq!(fixed, content);
1966    }
1967
1968    #[test]
1969    fn test_md033_fix_with_style_attribute() {
1970        // Tags with style attributes should not be fixed
1971        let rule = MD033NoInlineHtml::with_fix(true);
1972        let content = "This is <strong style=\"color: red\">important</strong> text.";
1973        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1974        let fixed = rule.fix(&ctx).unwrap();
1975        // Should remain unchanged - style attribute provides formatting
1976        assert_eq!(fixed, content);
1977    }
1978
1979    #[test]
1980    fn test_md033_fix_mixed_with_and_without_attributes() {
1981        // Mix of tags with and without attributes
1982        let rule = MD033NoInlineHtml::with_fix(true);
1983        let content = "<em>normal</em> and <em class=\"special\">styled</em> text.";
1984        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1985        let fixed = rule.fix(&ctx).unwrap();
1986        // Only the tag without attributes should be fixed
1987        assert_eq!(fixed, "*normal* and <em class=\"special\">styled</em> text.");
1988    }
1989
1990    #[test]
1991    fn test_md033_quick_fix_tag_with_attributes_no_fix() {
1992        // Quick fix should not be provided for tags with attributes
1993        let rule = MD033NoInlineHtml::with_fix(true);
1994        let content = "<em class=\"test\">emphasized</em>";
1995        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1996        let result = rule.check(&ctx).unwrap();
1997
1998        assert_eq!(result.len(), 1, "Should find one HTML tag");
1999        // No fix should be provided for tags with attributes
2000        assert!(
2001            result[0].fix.is_none(),
2002            "Should NOT have a fix for tags with attributes"
2003        );
2004    }
2005
2006    #[test]
2007    fn test_md033_fix_skips_html_entities() {
2008        // Tags containing HTML entities should NOT be fixed
2009        // HTML entities need HTML context to render; markdown won't process them
2010        let rule = MD033NoInlineHtml::with_fix(true);
2011        let content = "<code>&vert;</code>";
2012        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2013        let fixed = rule.fix(&ctx).unwrap();
2014        // Should remain unchanged - converting would break rendering
2015        assert_eq!(fixed, content);
2016    }
2017
2018    #[test]
2019    fn test_md033_fix_skips_multiple_html_entities() {
2020        // Multiple HTML entities should also be skipped
2021        let rule = MD033NoInlineHtml::with_fix(true);
2022        let content = "<code>&lt;T&gt;</code>";
2023        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2024        let fixed = rule.fix(&ctx).unwrap();
2025        // Should remain unchanged
2026        assert_eq!(fixed, content);
2027    }
2028
2029    #[test]
2030    fn test_md033_fix_allows_ampersand_without_entity() {
2031        // Content with & but no semicolon should still be fixed
2032        let rule = MD033NoInlineHtml::with_fix(true);
2033        let content = "<code>a & b</code>";
2034        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2035        let fixed = rule.fix(&ctx).unwrap();
2036        // Should be converted since & is not part of an entity
2037        assert_eq!(fixed, "`a & b`");
2038    }
2039
2040    #[test]
2041    fn test_md033_fix_em_with_entities_skipped() {
2042        // <em> with entities should also be skipped
2043        let rule = MD033NoInlineHtml::with_fix(true);
2044        let content = "<em>&nbsp;text</em>";
2045        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2046        let fixed = rule.fix(&ctx).unwrap();
2047        // Should remain unchanged
2048        assert_eq!(fixed, content);
2049    }
2050
2051    #[test]
2052    fn test_md033_fix_skips_nested_em_in_code() {
2053        // Tags nested inside other HTML elements should NOT be fixed
2054        // e.g., <code><em>n</em></code> - the <em> should not be converted
2055        let rule = MD033NoInlineHtml::with_fix(true);
2056        let content = "<code><em>n</em></code>";
2057        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2058        let fixed = rule.fix(&ctx).unwrap();
2059        // The inner <em> should NOT be converted to *n* because it's nested
2060        // The whole structure should be left as-is (or outer code converted, but not inner)
2061        assert!(
2062            !fixed.contains("*n*"),
2063            "Nested <em> should not be converted to markdown. Got: {fixed}"
2064        );
2065    }
2066
2067    #[test]
2068    fn test_md033_fix_skips_nested_in_table() {
2069        // Tags nested in HTML structures in tables should not be fixed
2070        let rule = MD033NoInlineHtml::with_fix(true);
2071        let content = "| <code>><em>n</em></code> | description |";
2072        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2073        let fixed = rule.fix(&ctx).unwrap();
2074        // Should not convert nested <em> to *n*
2075        assert!(
2076            !fixed.contains("*n*"),
2077            "Nested tags in table should not be converted. Got: {fixed}"
2078        );
2079    }
2080
2081    #[test]
2082    fn test_md033_fix_standalone_em_still_converted() {
2083        // Standalone (non-nested) <em> should still be converted
2084        let rule = MD033NoInlineHtml::with_fix(true);
2085        let content = "This is <em>emphasized</em> text.";
2086        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2087        let fixed = rule.fix(&ctx).unwrap();
2088        assert_eq!(fixed, "This is *emphasized* text.");
2089    }
2090
2091    // ==========================================================================
2092    // Obsidian Templater Plugin Syntax Tests
2093    //
2094    // Templater is a popular Obsidian plugin that uses `<% ... %>` syntax for
2095    // template interpolation. The `<%` pattern is NOT captured by the HTML tag
2096    // parser because `%` is not a valid HTML tag name character (tags must start
2097    // with a letter). This behavior is documented here with comprehensive tests.
2098    //
2099    // Reference: https://silentvoid13.github.io/Templater/
2100    // ==========================================================================
2101
2102    #[test]
2103    fn test_md033_templater_basic_interpolation_not_flagged() {
2104        // Basic Templater interpolation: <% expr %>
2105        // Should NOT be flagged because `%` is not a valid HTML tag character
2106        let rule = MD033NoInlineHtml::default();
2107        let content = "Today is <% tp.date.now() %> which is nice.";
2108        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2109        let result = rule.check(&ctx).unwrap();
2110        assert!(
2111            result.is_empty(),
2112            "Templater basic interpolation should not be flagged as HTML. Got: {result:?}"
2113        );
2114    }
2115
2116    #[test]
2117    fn test_md033_templater_file_functions_not_flagged() {
2118        // Templater file functions: <% tp.file.* %>
2119        let rule = MD033NoInlineHtml::default();
2120        let content = "File: <% tp.file.title %>\nCreated: <% tp.file.creation_date() %>";
2121        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2122        let result = rule.check(&ctx).unwrap();
2123        assert!(
2124            result.is_empty(),
2125            "Templater file functions should not be flagged. Got: {result:?}"
2126        );
2127    }
2128
2129    #[test]
2130    fn test_md033_templater_with_arguments_not_flagged() {
2131        // Templater with function arguments
2132        let rule = MD033NoInlineHtml::default();
2133        let content = r#"Date: <% tp.date.now("YYYY-MM-DD") %>"#;
2134        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2135        let result = rule.check(&ctx).unwrap();
2136        assert!(
2137            result.is_empty(),
2138            "Templater with arguments should not be flagged. Got: {result:?}"
2139        );
2140    }
2141
2142    #[test]
2143    fn test_md033_templater_javascript_execution_not_flagged() {
2144        // Templater JavaScript execution block: <%* code %>
2145        let rule = MD033NoInlineHtml::default();
2146        let content = "<%* const today = tp.date.now(); tR += today; %>";
2147        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2148        let result = rule.check(&ctx).unwrap();
2149        assert!(
2150            result.is_empty(),
2151            "Templater JS execution block should not be flagged. Got: {result:?}"
2152        );
2153    }
2154
2155    #[test]
2156    fn test_md033_templater_dynamic_execution_not_flagged() {
2157        // Templater dynamic/preview execution: <%+ expr %>
2158        let rule = MD033NoInlineHtml::default();
2159        let content = "Dynamic: <%+ tp.date.now() %>";
2160        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2161        let result = rule.check(&ctx).unwrap();
2162        assert!(
2163            result.is_empty(),
2164            "Templater dynamic execution should not be flagged. Got: {result:?}"
2165        );
2166    }
2167
2168    #[test]
2169    fn test_md033_templater_whitespace_trim_all_not_flagged() {
2170        // Templater whitespace control - trim all: <%_ expr _%>
2171        let rule = MD033NoInlineHtml::default();
2172        let content = "<%_ tp.date.now() _%>";
2173        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2174        let result = rule.check(&ctx).unwrap();
2175        assert!(
2176            result.is_empty(),
2177            "Templater trim-all whitespace should not be flagged. Got: {result:?}"
2178        );
2179    }
2180
2181    #[test]
2182    fn test_md033_templater_whitespace_trim_newline_not_flagged() {
2183        // Templater whitespace control - trim newline: <%- expr -%>
2184        let rule = MD033NoInlineHtml::default();
2185        let content = "<%- tp.date.now() -%>";
2186        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2187        let result = rule.check(&ctx).unwrap();
2188        assert!(
2189            result.is_empty(),
2190            "Templater trim-newline should not be flagged. Got: {result:?}"
2191        );
2192    }
2193
2194    #[test]
2195    fn test_md033_templater_combined_modifiers_not_flagged() {
2196        // Templater combined whitespace and execution modifiers
2197        let rule = MD033NoInlineHtml::default();
2198        let contents = [
2199            "<%-* const x = 1; -%>",  // trim + JS execution
2200            "<%_+ tp.date.now() _%>", // trim-all + dynamic
2201            "<%- tp.file.title -%>",  // trim-newline only
2202            "<%_ tp.file.title _%>",  // trim-all only
2203        ];
2204        for content in contents {
2205            let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2206            let result = rule.check(&ctx).unwrap();
2207            assert!(
2208                result.is_empty(),
2209                "Templater combined modifiers should not be flagged: {content}. Got: {result:?}"
2210            );
2211        }
2212    }
2213
2214    #[test]
2215    fn test_md033_templater_multiline_block_not_flagged() {
2216        // Multi-line Templater JavaScript block
2217        let rule = MD033NoInlineHtml::default();
2218        let content = r#"<%*
2219const x = 1;
2220const y = 2;
2221tR += x + y;
2222%>"#;
2223        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2224        let result = rule.check(&ctx).unwrap();
2225        assert!(
2226            result.is_empty(),
2227            "Templater multi-line block should not be flagged. Got: {result:?}"
2228        );
2229    }
2230
2231    #[test]
2232    fn test_md033_templater_with_angle_brackets_in_condition_not_flagged() {
2233        // Templater with angle brackets in JavaScript condition
2234        // This is a key edge case: `<` inside Templater should not trigger HTML detection
2235        let rule = MD033NoInlineHtml::default();
2236        let content = "<%* if (x < 5) { tR += 'small'; } %>";
2237        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2238        let result = rule.check(&ctx).unwrap();
2239        assert!(
2240            result.is_empty(),
2241            "Templater with angle brackets in conditions should not be flagged. Got: {result:?}"
2242        );
2243    }
2244
2245    #[test]
2246    fn test_md033_templater_mixed_with_html_only_html_flagged() {
2247        // Templater syntax mixed with actual HTML - only HTML should be flagged
2248        let rule = MD033NoInlineHtml::default();
2249        let content = "<% tp.date.now() %> is today's date. <div>This is HTML</div>";
2250        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2251        let result = rule.check(&ctx).unwrap();
2252        assert_eq!(result.len(), 1, "Should only flag the HTML div tag");
2253        assert!(
2254            result[0].message.contains("<div>"),
2255            "Should flag <div>, got: {}",
2256            result[0].message
2257        );
2258    }
2259
2260    #[test]
2261    fn test_md033_templater_in_heading_not_flagged() {
2262        // Templater in markdown heading
2263        let rule = MD033NoInlineHtml::default();
2264        let content = "# <% tp.file.title %>";
2265        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2266        let result = rule.check(&ctx).unwrap();
2267        assert!(
2268            result.is_empty(),
2269            "Templater in heading should not be flagged. Got: {result:?}"
2270        );
2271    }
2272
2273    #[test]
2274    fn test_md033_templater_multiple_on_same_line_not_flagged() {
2275        // Multiple Templater blocks on same line
2276        let rule = MD033NoInlineHtml::default();
2277        let content = "From <% tp.date.now() %> to <% tp.date.tomorrow() %> we have meetings.";
2278        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2279        let result = rule.check(&ctx).unwrap();
2280        assert!(
2281            result.is_empty(),
2282            "Multiple Templater blocks should not be flagged. Got: {result:?}"
2283        );
2284    }
2285
2286    #[test]
2287    fn test_md033_templater_in_code_block_not_flagged() {
2288        // Templater syntax in code blocks should not be flagged (code blocks are skipped)
2289        let rule = MD033NoInlineHtml::default();
2290        let content = "```\n<% tp.date.now() %>\n```";
2291        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2292        let result = rule.check(&ctx).unwrap();
2293        assert!(
2294            result.is_empty(),
2295            "Templater in code block should not be flagged. Got: {result:?}"
2296        );
2297    }
2298
2299    #[test]
2300    fn test_md033_templater_in_inline_code_not_flagged() {
2301        // Templater syntax in inline code span should not be flagged
2302        let rule = MD033NoInlineHtml::default();
2303        let content = "Use `<% tp.date.now() %>` for current date.";
2304        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2305        let result = rule.check(&ctx).unwrap();
2306        assert!(
2307            result.is_empty(),
2308            "Templater in inline code should not be flagged. Got: {result:?}"
2309        );
2310    }
2311
2312    #[test]
2313    fn test_md033_templater_also_works_in_standard_flavor() {
2314        // Templater syntax should also not be flagged in Standard flavor
2315        // because the HTML parser doesn't recognize `<%` as a valid tag
2316        let rule = MD033NoInlineHtml::default();
2317        let content = "<% tp.date.now() %> works everywhere.";
2318        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2319        let result = rule.check(&ctx).unwrap();
2320        assert!(
2321            result.is_empty(),
2322            "Templater should not be flagged even in Standard flavor. Got: {result:?}"
2323        );
2324    }
2325
2326    #[test]
2327    fn test_md033_templater_empty_tag_not_flagged() {
2328        // Empty Templater tags
2329        let rule = MD033NoInlineHtml::default();
2330        let content = "<%>";
2331        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2332        let result = rule.check(&ctx).unwrap();
2333        assert!(
2334            result.is_empty(),
2335            "Empty Templater-like tag should not be flagged. Got: {result:?}"
2336        );
2337    }
2338
2339    #[test]
2340    fn test_md033_templater_unclosed_not_flagged() {
2341        // Unclosed Templater tags - these are template errors, not HTML
2342        let rule = MD033NoInlineHtml::default();
2343        let content = "<% tp.date.now() without closing tag";
2344        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2345        let result = rule.check(&ctx).unwrap();
2346        assert!(
2347            result.is_empty(),
2348            "Unclosed Templater should not be flagged as HTML. Got: {result:?}"
2349        );
2350    }
2351
2352    #[test]
2353    fn test_md033_templater_with_newlines_inside_not_flagged() {
2354        // Templater with newlines inside the expression
2355        let rule = MD033NoInlineHtml::default();
2356        let content = r#"<% tp.date.now("YYYY") +
2357"-" +
2358tp.date.now("MM") %>"#;
2359        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2360        let result = rule.check(&ctx).unwrap();
2361        assert!(
2362            result.is_empty(),
2363            "Templater with internal newlines should not be flagged. Got: {result:?}"
2364        );
2365    }
2366
2367    #[test]
2368    fn test_md033_erb_style_tags_not_flagged() {
2369        // ERB/EJS style tags (similar to Templater) are also not HTML
2370        // This documents the general principle that `<%` is not valid HTML
2371        let rule = MD033NoInlineHtml::default();
2372        let content = "<%= variable %> and <% code %> and <%# comment %>";
2373        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2374        let result = rule.check(&ctx).unwrap();
2375        assert!(
2376            result.is_empty(),
2377            "ERB/EJS style tags should not be flagged as HTML. Got: {result:?}"
2378        );
2379    }
2380
2381    #[test]
2382    fn test_md033_templater_complex_expression_not_flagged() {
2383        // Complex Templater expression with multiple function calls
2384        let rule = MD033NoInlineHtml::default();
2385        let content = r#"<%*
2386const file = tp.file.title;
2387const date = tp.date.now("YYYY-MM-DD");
2388const folder = tp.file.folder();
2389tR += `# ${file}\n\nCreated: ${date}\nIn: ${folder}`;
2390%>"#;
2391        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2392        let result = rule.check(&ctx).unwrap();
2393        assert!(
2394            result.is_empty(),
2395            "Complex Templater expression should not be flagged. Got: {result:?}"
2396        );
2397    }
2398
2399    #[test]
2400    fn test_md033_percent_sign_variations_not_flagged() {
2401        // Various patterns starting with <% that should all be safe
2402        let rule = MD033NoInlineHtml::default();
2403        let patterns = [
2404            "<%=",  // ERB output
2405            "<%#",  // ERB comment
2406            "<%%",  // Double percent
2407            "<%!",  // Some template engines
2408            "<%@",  // JSP directive
2409            "<%--", // JSP comment
2410        ];
2411        for pattern in patterns {
2412            let content = format!("{pattern} content %>");
2413            let ctx = LintContext::new(&content, crate::config::MarkdownFlavor::Standard, None);
2414            let result = rule.check(&ctx).unwrap();
2415            assert!(
2416                result.is_empty(),
2417                "Pattern {pattern} should not be flagged. Got: {result:?}"
2418            );
2419        }
2420    }
2421}