Skip to main content

rumdl_lib/rules/
md033_no_inline_html.rs

1//!
2//! Rule MD033: No HTML tags
3//!
4//! See [docs/md033.md](../../docs/md033.md) for full documentation, configuration, and examples.
5
6use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
7use crate::utils::kramdown_utils::{is_kramdown_block_attribute, is_kramdown_extension};
8use crate::utils::regex_cache::*;
9use std::collections::HashSet;
10
11mod md033_config;
12use md033_config::MD033Config;
13
14#[derive(Clone)]
15pub struct MD033NoInlineHtml {
16    config: MD033Config,
17    allowed: HashSet<String>,
18    disallowed: HashSet<String>,
19}
20
21impl Default for MD033NoInlineHtml {
22    fn default() -> Self {
23        let config = MD033Config::default();
24        let allowed = config.allowed_set();
25        let disallowed = config.disallowed_set();
26        Self {
27            config,
28            allowed,
29            disallowed,
30        }
31    }
32}
33
34impl MD033NoInlineHtml {
35    pub fn new() -> Self {
36        Self::default()
37    }
38
39    pub fn with_allowed(allowed_vec: Vec<String>) -> Self {
40        let config = MD033Config {
41            allowed: allowed_vec.clone(),
42            disallowed: Vec::new(),
43            fix: false,
44            br_style: md033_config::BrStyle::default(),
45        };
46        let allowed = config.allowed_set();
47        let disallowed = config.disallowed_set();
48        Self {
49            config,
50            allowed,
51            disallowed,
52        }
53    }
54
55    pub fn with_disallowed(disallowed_vec: Vec<String>) -> Self {
56        let config = MD033Config {
57            allowed: Vec::new(),
58            disallowed: disallowed_vec.clone(),
59            fix: false,
60            br_style: md033_config::BrStyle::default(),
61        };
62        let allowed = config.allowed_set();
63        let disallowed = config.disallowed_set();
64        Self {
65            config,
66            allowed,
67            disallowed,
68        }
69    }
70
71    /// Create a new rule with auto-fix enabled
72    pub fn with_fix(fix: bool) -> Self {
73        let config = MD033Config {
74            allowed: Vec::new(),
75            disallowed: Vec::new(),
76            fix,
77            br_style: md033_config::BrStyle::default(),
78        };
79        let allowed = config.allowed_set();
80        let disallowed = config.disallowed_set();
81        Self {
82            config,
83            allowed,
84            disallowed,
85        }
86    }
87
88    pub fn from_config_struct(config: MD033Config) -> Self {
89        let allowed = config.allowed_set();
90        let disallowed = config.disallowed_set();
91        Self {
92            config,
93            allowed,
94            disallowed,
95        }
96    }
97
98    // Efficient check for allowed tags using HashSet (case-insensitive)
99    #[inline]
100    fn is_tag_allowed(&self, tag: &str) -> bool {
101        if self.allowed.is_empty() {
102            return false;
103        }
104        // Remove angle brackets and slashes, then split by whitespace or '>'
105        let tag = tag.trim_start_matches('<').trim_start_matches('/');
106        let tag_name = tag
107            .split(|c: char| c.is_whitespace() || c == '>' || c == '/')
108            .next()
109            .unwrap_or("");
110        self.allowed.contains(&tag_name.to_lowercase())
111    }
112
113    /// Check if a tag is in the disallowed set (for disallowed-only mode)
114    #[inline]
115    fn is_tag_disallowed(&self, tag: &str) -> bool {
116        if self.disallowed.is_empty() {
117            return false;
118        }
119        // Remove angle brackets and slashes, then split by whitespace or '>'
120        let tag = tag.trim_start_matches('<').trim_start_matches('/');
121        let tag_name = tag
122            .split(|c: char| c.is_whitespace() || c == '>' || c == '/')
123            .next()
124            .unwrap_or("");
125        self.disallowed.contains(&tag_name.to_lowercase())
126    }
127
128    /// Check if operating in disallowed-only mode
129    #[inline]
130    fn is_disallowed_mode(&self) -> bool {
131        self.config.is_disallowed_mode()
132    }
133
134    // Check if a tag is an HTML comment
135    #[inline]
136    fn is_html_comment(&self, tag: &str) -> bool {
137        tag.starts_with("<!--") && tag.ends_with("-->")
138    }
139
140    /// Check if a tag name is a valid HTML element or custom element.
141    /// Returns false for placeholder syntax like `<NAME>`, `<resource>`, `<actual>`.
142    ///
143    /// Per HTML spec, custom elements must contain a hyphen (e.g., `<my-component>`).
144    #[inline]
145    fn is_html_element_or_custom(tag_name: &str) -> bool {
146        const HTML_ELEMENTS: &[&str] = &[
147            // Document structure
148            "html",
149            "head",
150            "body",
151            "title",
152            "base",
153            "link",
154            "meta",
155            "style",
156            // Sections
157            "article",
158            "section",
159            "nav",
160            "aside",
161            "h1",
162            "h2",
163            "h3",
164            "h4",
165            "h5",
166            "h6",
167            "hgroup",
168            "header",
169            "footer",
170            "address",
171            "main",
172            "search",
173            // Grouping
174            "p",
175            "hr",
176            "pre",
177            "blockquote",
178            "ol",
179            "ul",
180            "menu",
181            "li",
182            "dl",
183            "dt",
184            "dd",
185            "figure",
186            "figcaption",
187            "div",
188            // Text-level
189            "a",
190            "em",
191            "strong",
192            "small",
193            "s",
194            "cite",
195            "q",
196            "dfn",
197            "abbr",
198            "ruby",
199            "rt",
200            "rp",
201            "data",
202            "time",
203            "code",
204            "var",
205            "samp",
206            "kbd",
207            "sub",
208            "sup",
209            "i",
210            "b",
211            "u",
212            "mark",
213            "bdi",
214            "bdo",
215            "span",
216            "br",
217            "wbr",
218            // Edits
219            "ins",
220            "del",
221            // Embedded
222            "picture",
223            "source",
224            "img",
225            "iframe",
226            "embed",
227            "object",
228            "param",
229            "video",
230            "audio",
231            "track",
232            "map",
233            "area",
234            "svg",
235            "math",
236            "canvas",
237            // Tables
238            "table",
239            "caption",
240            "colgroup",
241            "col",
242            "tbody",
243            "thead",
244            "tfoot",
245            "tr",
246            "td",
247            "th",
248            // Forms
249            "form",
250            "label",
251            "input",
252            "button",
253            "select",
254            "datalist",
255            "optgroup",
256            "option",
257            "textarea",
258            "output",
259            "progress",
260            "meter",
261            "fieldset",
262            "legend",
263            // Interactive
264            "details",
265            "summary",
266            "dialog",
267            // Scripting
268            "script",
269            "noscript",
270            "template",
271            "slot",
272            // Deprecated but recognized
273            "acronym",
274            "applet",
275            "basefont",
276            "big",
277            "center",
278            "dir",
279            "font",
280            "frame",
281            "frameset",
282            "isindex",
283            "marquee",
284            "noembed",
285            "noframes",
286            "plaintext",
287            "strike",
288            "tt",
289            "xmp",
290        ];
291
292        let lower = tag_name.to_ascii_lowercase();
293        if HTML_ELEMENTS.contains(&lower.as_str()) {
294            return true;
295        }
296        // Custom elements must contain a hyphen per HTML spec
297        tag_name.contains('-')
298    }
299
300    // Check if a tag is likely a programming type annotation rather than HTML
301    #[inline]
302    fn is_likely_type_annotation(&self, tag: &str) -> bool {
303        // Common programming type names that are often used in generics
304        const COMMON_TYPES: &[&str] = &[
305            "string",
306            "number",
307            "any",
308            "void",
309            "null",
310            "undefined",
311            "array",
312            "promise",
313            "function",
314            "error",
315            "date",
316            "regexp",
317            "symbol",
318            "bigint",
319            "map",
320            "set",
321            "weakmap",
322            "weakset",
323            "iterator",
324            "generator",
325            "t",
326            "u",
327            "v",
328            "k",
329            "e", // Common single-letter type parameters
330            "userdata",
331            "apiresponse",
332            "config",
333            "options",
334            "params",
335            "result",
336            "response",
337            "request",
338            "data",
339            "item",
340            "element",
341            "node",
342        ];
343
344        let tag_content = tag
345            .trim_start_matches('<')
346            .trim_end_matches('>')
347            .trim_start_matches('/');
348        let tag_name = tag_content
349            .split(|c: char| c.is_whitespace() || c == '>' || c == '/')
350            .next()
351            .unwrap_or("");
352
353        // Check if it's a simple tag (no attributes) with a common type name
354        if !tag_content.contains(' ') && !tag_content.contains('=') {
355            COMMON_TYPES.contains(&tag_name.to_ascii_lowercase().as_str())
356        } else {
357            false
358        }
359    }
360
361    // Check if a tag is actually an email address in angle brackets
362    #[inline]
363    fn is_email_address(&self, tag: &str) -> bool {
364        let content = tag.trim_start_matches('<').trim_end_matches('>');
365        // Simple email pattern: contains @ and has reasonable structure
366        content.contains('@')
367            && content.chars().all(|c| c.is_alphanumeric() || "@.-_+".contains(c))
368            && content.split('@').count() == 2
369            && content.split('@').all(|part| !part.is_empty())
370    }
371
372    // Check if a tag has the markdown attribute (MkDocs/Material for MkDocs)
373    #[inline]
374    fn has_markdown_attribute(&self, tag: &str) -> bool {
375        // Check for various forms of markdown attribute
376        // Examples: <div markdown>, <div markdown="1">, <div class="result" markdown>
377        tag.contains(" markdown>") || tag.contains(" markdown=") || tag.contains(" markdown ")
378    }
379
380    /// Check if a tag contains JSX-specific attributes that indicate it's JSX, not HTML
381    /// JSX uses different attribute names than HTML:
382    /// - `className` instead of `class`
383    /// - `htmlFor` instead of `for`
384    /// - camelCase event handlers (`onClick`, `onChange`, `onSubmit`, etc.)
385    /// - JSX expression syntax `={...}` for dynamic values
386    #[inline]
387    fn has_jsx_attributes(tag: &str) -> bool {
388        // JSX-specific attribute names (HTML uses class, for, onclick, etc.)
389        tag.contains("className")
390            || tag.contains("htmlFor")
391            || tag.contains("dangerouslySetInnerHTML")
392            // camelCase event handlers (JSX uses onClick, HTML uses onclick)
393            || tag.contains("onClick")
394            || tag.contains("onChange")
395            || tag.contains("onSubmit")
396            || tag.contains("onFocus")
397            || tag.contains("onBlur")
398            || tag.contains("onKeyDown")
399            || tag.contains("onKeyUp")
400            || tag.contains("onKeyPress")
401            || tag.contains("onMouseDown")
402            || tag.contains("onMouseUp")
403            || tag.contains("onMouseEnter")
404            || tag.contains("onMouseLeave")
405            // JSX expression syntax: ={expression} or ={ expression }
406            || tag.contains("={")
407    }
408
409    // Check if a tag is actually a URL in angle brackets
410    #[inline]
411    fn is_url_in_angle_brackets(&self, tag: &str) -> bool {
412        let content = tag.trim_start_matches('<').trim_end_matches('>');
413        // Check for common URL schemes
414        content.starts_with("http://")
415            || content.starts_with("https://")
416            || content.starts_with("ftp://")
417            || content.starts_with("ftps://")
418            || content.starts_with("mailto:")
419    }
420
421    /// Convert paired HTML tags to their Markdown equivalents.
422    /// Returns None if the tag cannot be safely converted (has nested tags, HTML entities, etc.)
423    fn convert_to_markdown(tag_name: &str, inner_content: &str) -> Option<String> {
424        // Skip if content contains nested HTML tags
425        if inner_content.contains('<') {
426            return None;
427        }
428        // Skip if content contains HTML entities (e.g., &vert;, &amp;, &lt;)
429        // These need HTML context to render correctly; markdown won't process them
430        if inner_content.contains('&') && inner_content.contains(';') {
431            // Check for common HTML entity patterns
432            let has_entity = inner_content
433                .split('&')
434                .skip(1)
435                .any(|part| part.split(';').next().is_some_and(|e| !e.is_empty() && e.len() < 10));
436            if has_entity {
437                return None;
438            }
439        }
440        match tag_name {
441            "em" | "i" => Some(format!("*{inner_content}*")),
442            "strong" | "b" => Some(format!("**{inner_content}**")),
443            "code" => {
444                // Handle backticks in content by using double backticks with padding
445                if inner_content.contains('`') {
446                    Some(format!("`` {inner_content} ``"))
447                } else {
448                    Some(format!("`{inner_content}`"))
449                }
450            }
451            _ => None,
452        }
453    }
454
455    /// Convert self-closing HTML tags to their Markdown equivalents.
456    fn convert_self_closing_to_markdown(&self, tag_name: &str, opening_tag: &str) -> Option<String> {
457        match tag_name {
458            "br" => match self.config.br_style {
459                md033_config::BrStyle::TrailingSpaces => Some("  \n".to_string()),
460                md033_config::BrStyle::Backslash => Some("\\\n".to_string()),
461            },
462            "hr" => Some("\n---\n".to_string()),
463            "img" => Self::convert_img_to_markdown(opening_tag),
464            _ => None,
465        }
466    }
467
468    /// Parse all attributes from an HTML tag into a list of (name, value) pairs.
469    /// This provides proper attribute parsing instead of naive string matching.
470    fn parse_attributes(tag: &str) -> Vec<(String, Option<String>)> {
471        let mut attrs = Vec::new();
472
473        // Remove < and > and tag name
474        let tag_content = tag.trim_start_matches('<').trim_end_matches('>').trim_end_matches('/');
475
476        // Find first whitespace to skip tag name
477        let attr_start = tag_content
478            .find(|c: char| c.is_whitespace())
479            .map(|i| i + 1)
480            .unwrap_or(tag_content.len());
481
482        if attr_start >= tag_content.len() {
483            return attrs;
484        }
485
486        let attr_str = &tag_content[attr_start..];
487        let mut chars = attr_str.chars().peekable();
488
489        while chars.peek().is_some() {
490            // Skip whitespace
491            while chars.peek().is_some_and(|c| c.is_whitespace()) {
492                chars.next();
493            }
494
495            if chars.peek().is_none() {
496                break;
497            }
498
499            // Read attribute name
500            let mut attr_name = String::new();
501            while let Some(&c) = chars.peek() {
502                if c.is_whitespace() || c == '=' || c == '>' || c == '/' {
503                    break;
504                }
505                attr_name.push(c);
506                chars.next();
507            }
508
509            if attr_name.is_empty() {
510                break;
511            }
512
513            // Skip whitespace before =
514            while chars.peek().is_some_and(|c| c.is_whitespace()) {
515                chars.next();
516            }
517
518            // Check for = and value
519            if chars.peek() == Some(&'=') {
520                chars.next(); // consume =
521
522                // Skip whitespace after =
523                while chars.peek().is_some_and(|c| c.is_whitespace()) {
524                    chars.next();
525                }
526
527                // Read value
528                let mut value = String::new();
529                if let Some(&quote) = chars.peek() {
530                    if quote == '"' || quote == '\'' {
531                        chars.next(); // consume opening quote
532                        for c in chars.by_ref() {
533                            if c == quote {
534                                break;
535                            }
536                            value.push(c);
537                        }
538                    } else {
539                        // Unquoted value
540                        while let Some(&c) = chars.peek() {
541                            if c.is_whitespace() || c == '>' || c == '/' {
542                                break;
543                            }
544                            value.push(c);
545                            chars.next();
546                        }
547                    }
548                }
549                attrs.push((attr_name.to_ascii_lowercase(), Some(value)));
550            } else {
551                // Boolean attribute (no value)
552                attrs.push((attr_name.to_ascii_lowercase(), None));
553            }
554        }
555
556        attrs
557    }
558
559    /// Extract an HTML attribute value from a tag string.
560    /// Handles double quotes, single quotes, and unquoted values.
561    /// Returns None if the attribute is not found.
562    fn extract_attribute(tag: &str, attr_name: &str) -> Option<String> {
563        let attrs = Self::parse_attributes(tag);
564        let attr_lower = attr_name.to_ascii_lowercase();
565
566        attrs
567            .into_iter()
568            .find(|(name, _)| name == &attr_lower)
569            .and_then(|(_, value)| value)
570    }
571
572    /// Check if an HTML tag has extra attributes beyond the specified allowed ones.
573    /// Uses proper attribute parsing to avoid false positives from string matching.
574    fn has_extra_attributes(tag: &str, allowed_attrs: &[&str]) -> bool {
575        let attrs = Self::parse_attributes(tag);
576
577        // All event handlers (on*) are dangerous
578        // Plus common attributes that would be lost in markdown conversion
579        const DANGEROUS_ATTR_PREFIXES: &[&str] = &["on"]; // onclick, onload, onerror, etc.
580        const DANGEROUS_ATTRS: &[&str] = &[
581            "class",
582            "id",
583            "style",
584            "target",
585            "rel",
586            "download",
587            "referrerpolicy",
588            "crossorigin",
589            "loading",
590            "decoding",
591            "fetchpriority",
592            "sizes",
593            "srcset",
594            "usemap",
595            "ismap",
596            "width",
597            "height",
598            "name",   // anchor names
599            "data-*", // data attributes (checked separately)
600        ];
601
602        for (attr_name, _) in attrs {
603            // Skip allowed attributes
604            if allowed_attrs.iter().any(|a| a.to_ascii_lowercase() == attr_name) {
605                continue;
606            }
607
608            // Check for event handlers (on*)
609            for prefix in DANGEROUS_ATTR_PREFIXES {
610                if attr_name.starts_with(prefix) && attr_name.len() > prefix.len() {
611                    return true;
612                }
613            }
614
615            // Check for data-* attributes
616            if attr_name.starts_with("data-") {
617                return true;
618            }
619
620            // Check for other dangerous attributes
621            if DANGEROUS_ATTRS.contains(&attr_name.as_str()) {
622                return true;
623            }
624        }
625
626        false
627    }
628
629    /// Convert `<a href="url">text</a>` to `[text](url)` or `[text](url "title")`
630    /// Returns None if conversion is not safe.
631    fn convert_a_to_markdown(opening_tag: &str, inner_content: &str) -> Option<String> {
632        // Extract href attribute
633        let href = Self::extract_attribute(opening_tag, "href")?;
634
635        // Check URL is safe
636        if !MD033Config::is_safe_url(&href) {
637            return None;
638        }
639
640        // Check for nested HTML tags in content
641        if inner_content.contains('<') {
642            return None;
643        }
644
645        // Check for HTML entities that wouldn't render correctly in markdown
646        if inner_content.contains('&') && inner_content.contains(';') {
647            let has_entity = inner_content
648                .split('&')
649                .skip(1)
650                .any(|part| part.split(';').next().is_some_and(|e| !e.is_empty() && e.len() < 10));
651            if has_entity {
652                return None;
653            }
654        }
655
656        // Extract optional title attribute
657        let title = Self::extract_attribute(opening_tag, "title");
658
659        // Check for extra dangerous attributes (title is allowed)
660        if Self::has_extra_attributes(opening_tag, &["href", "title"]) {
661            return None;
662        }
663
664        // Escape special markdown characters in link text
665        // Brackets need escaping to avoid breaking the link syntax
666        let escaped_text = inner_content.replace('[', r"\[").replace(']', r"\]");
667
668        // Escape parentheses in URL
669        let escaped_url = href.replace('(', "%28").replace(')', "%29");
670
671        // Format with or without title
672        if let Some(title_text) = title {
673            // Escape quotes in title
674            let escaped_title = title_text.replace('"', r#"\""#);
675            Some(format!("[{escaped_text}]({escaped_url} \"{escaped_title}\")"))
676        } else {
677            Some(format!("[{escaped_text}]({escaped_url})"))
678        }
679    }
680
681    /// Convert `<img src="url" alt="text">` to `![alt](src)` or `![alt](src "title")`
682    /// Returns None if conversion is not safe.
683    fn convert_img_to_markdown(tag: &str) -> Option<String> {
684        // Extract src attribute (required)
685        let src = Self::extract_attribute(tag, "src")?;
686
687        // Check URL is safe
688        if !MD033Config::is_safe_url(&src) {
689            return None;
690        }
691
692        // Extract alt attribute (optional, default to empty)
693        let alt = Self::extract_attribute(tag, "alt").unwrap_or_default();
694
695        // Extract optional title attribute
696        let title = Self::extract_attribute(tag, "title");
697
698        // Check for extra dangerous attributes (title is allowed)
699        if Self::has_extra_attributes(tag, &["src", "alt", "title"]) {
700            return None;
701        }
702
703        // Escape special markdown characters in alt text
704        let escaped_alt = alt.replace('[', r"\[").replace(']', r"\]");
705
706        // Escape parentheses in URL
707        let escaped_url = src.replace('(', "%28").replace(')', "%29");
708
709        // Format with or without title
710        if let Some(title_text) = title {
711            // Escape quotes in title
712            let escaped_title = title_text.replace('"', r#"\""#);
713            Some(format!("![{escaped_alt}]({escaped_url} \"{escaped_title}\")"))
714        } else {
715            Some(format!("![{escaped_alt}]({escaped_url})"))
716        }
717    }
718
719    /// Check if an HTML tag has attributes that would make conversion unsafe
720    fn has_significant_attributes(opening_tag: &str) -> bool {
721        // Tags with just whitespace or empty are fine
722        let tag_content = opening_tag
723            .trim_start_matches('<')
724            .trim_end_matches('>')
725            .trim_end_matches('/');
726
727        // Split by whitespace; if there's more than the tag name, it has attributes
728        let parts: Vec<&str> = tag_content.split_whitespace().collect();
729        parts.len() > 1
730    }
731
732    /// Check if a tag appears to be nested inside another HTML element
733    /// by looking at the surrounding context (e.g., `<code><em>text</em></code>`)
734    fn is_nested_in_html(content: &str, tag_byte_start: usize, tag_byte_end: usize) -> bool {
735        // Check if there's a `>` immediately before this tag (indicating inside another element)
736        if tag_byte_start > 0 {
737            let before = &content[..tag_byte_start];
738            let before_trimmed = before.trim_end();
739            if before_trimmed.ends_with('>') && !before_trimmed.ends_with("->") {
740                // Check it's not a closing tag or comment
741                if let Some(last_lt) = before_trimmed.rfind('<') {
742                    let potential_tag = &before_trimmed[last_lt..];
743                    // Skip if it's a closing tag (</...>) or comment (<!--)
744                    if !potential_tag.starts_with("</") && !potential_tag.starts_with("<!--") {
745                        return true;
746                    }
747                }
748            }
749        }
750        // Check if there's a `<` immediately after the closing tag (indicating inside another element)
751        if tag_byte_end < content.len() {
752            let after = &content[tag_byte_end..];
753            let after_trimmed = after.trim_start();
754            if after_trimmed.starts_with("</") {
755                return true;
756            }
757        }
758        false
759    }
760
761    /// Calculate fix to remove HTML tags while keeping content
762    ///
763    /// For self-closing tags like `<br/>`, returns a single fix to remove the tag.
764    /// For paired tags like `<span>text</span>`, returns the replacement text (just the content).
765    ///
766    /// Returns (range, replacement_text) where range is the bytes to replace
767    /// and replacement_text is what to put there (content without tags, or empty for self-closing).
768    ///
769    /// When `fix` is enabled and `in_html_block` is true, returns None to avoid
770    /// converting tags that are nested inside HTML block elements (like `<pre>`).
771    fn calculate_fix(
772        &self,
773        content: &str,
774        opening_tag: &str,
775        tag_byte_start: usize,
776        in_html_block: bool,
777    ) -> Option<(std::ops::Range<usize>, String)> {
778        // Extract tag name from opening tag
779        let tag_name = opening_tag
780            .trim_start_matches('<')
781            .split(|c: char| c.is_whitespace() || c == '>' || c == '/')
782            .next()?
783            .to_lowercase();
784
785        // Check if it's a self-closing tag (ends with /> or is a void element like <br>)
786        let is_self_closing =
787            opening_tag.ends_with("/>") || matches!(tag_name.as_str(), "br" | "hr" | "img" | "input" | "meta" | "link");
788
789        if is_self_closing {
790            // When fix is enabled, try to convert to Markdown equivalent
791            // But skip if we're inside an HTML block (would break structure)
792            if self.config.fix
793                && MD033Config::is_safe_fixable_tag(&tag_name)
794                && !in_html_block
795                && let Some(markdown) = self.convert_self_closing_to_markdown(&tag_name, opening_tag)
796            {
797                return Some((tag_byte_start..tag_byte_start + opening_tag.len(), markdown));
798            }
799            // Can't convert this self-closing tag to Markdown, don't provide a fix
800            // (e.g., <input>, <meta> - these have no Markdown equivalent without the new img support)
801            return None;
802        }
803
804        // Search for the closing tag after the opening tag (case-insensitive)
805        let search_start = tag_byte_start + opening_tag.len();
806        let search_slice = &content[search_start..];
807
808        // Find closing tag case-insensitively
809        let closing_tag_lower = format!("</{tag_name}>");
810        let closing_pos = search_slice.to_ascii_lowercase().find(&closing_tag_lower);
811
812        if let Some(closing_pos) = closing_pos {
813            // Get actual closing tag from original content to get correct byte length
814            let closing_tag_len = closing_tag_lower.len();
815            let closing_byte_start = search_start + closing_pos;
816            let closing_byte_end = closing_byte_start + closing_tag_len;
817
818            // Extract the content between tags
819            let inner_content = &content[search_start..closing_byte_start];
820
821            // Skip auto-fix if inside an HTML block (like <pre>, <div>, etc.)
822            // Converting tags inside HTML blocks would break the intended structure
823            if in_html_block {
824                return None;
825            }
826
827            // Skip auto-fix if this tag is nested inside another HTML element
828            // e.g., <code><em>text</em></code> - don't convert the inner <em>
829            if Self::is_nested_in_html(content, tag_byte_start, closing_byte_end) {
830                return None;
831            }
832
833            // When fix is enabled and tag is safe to convert, try markdown conversion
834            if self.config.fix && MD033Config::is_safe_fixable_tag(&tag_name) {
835                // Handle <a> tags specially - they require attribute extraction
836                if tag_name == "a" {
837                    if let Some(markdown) = Self::convert_a_to_markdown(opening_tag, inner_content) {
838                        return Some((tag_byte_start..closing_byte_end, markdown));
839                    }
840                    // convert_a_to_markdown returned None - unsafe URL, nested HTML, etc.
841                    return None;
842                }
843
844                // For simple tags (em, strong, code, etc.) - no attributes allowed
845                if Self::has_significant_attributes(opening_tag) {
846                    // Don't provide a fix for tags with attributes
847                    // User may want to keep the attributes, so leave as-is
848                    return None;
849                }
850                if let Some(markdown) = Self::convert_to_markdown(&tag_name, inner_content) {
851                    return Some((tag_byte_start..closing_byte_end, markdown));
852                }
853                // convert_to_markdown returned None, meaning content has nested tags or
854                // HTML entities that shouldn't be converted - leave as-is
855                return None;
856            }
857
858            // For non-fixable tags, don't provide a fix
859            // (e.g., <div>content</div>, <span>text</span>)
860            return None;
861        }
862
863        // If no closing tag found, don't provide a fix (malformed HTML)
864        None
865    }
866}
867
868impl Rule for MD033NoInlineHtml {
869    fn name(&self) -> &'static str {
870        "MD033"
871    }
872
873    fn description(&self) -> &'static str {
874        "Inline HTML is not allowed"
875    }
876
877    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
878        let content = ctx.content;
879
880        // Early return: if no HTML tags at all, skip processing
881        if content.is_empty() || !ctx.likely_has_html() {
882            return Ok(Vec::new());
883        }
884
885        // Quick check for HTML tag pattern before expensive processing
886        if !HTML_TAG_QUICK_CHECK.is_match(content) {
887            return Ok(Vec::new());
888        }
889
890        let mut warnings = Vec::new();
891        let lines: Vec<&str> = content.lines().collect();
892
893        // Track nomarkdown and comment blocks (Kramdown extension)
894        let mut in_nomarkdown = false;
895        let mut in_comment = false;
896        let mut nomarkdown_ranges: Vec<(usize, usize)> = Vec::new();
897        let mut nomarkdown_start = 0;
898        let mut comment_start = 0;
899
900        for (i, line) in lines.iter().enumerate() {
901            let line_num = i + 1;
902
903            // Check for nomarkdown start
904            if line.trim() == "{::nomarkdown}" {
905                in_nomarkdown = true;
906                nomarkdown_start = line_num;
907            } else if line.trim() == "{:/nomarkdown}" && in_nomarkdown {
908                in_nomarkdown = false;
909                nomarkdown_ranges.push((nomarkdown_start, line_num));
910            }
911
912            // Check for comment blocks
913            if line.trim() == "{::comment}" {
914                in_comment = true;
915                comment_start = line_num;
916            } else if line.trim() == "{:/comment}" && in_comment {
917                in_comment = false;
918                nomarkdown_ranges.push((comment_start, line_num));
919            }
920        }
921
922        // Use centralized HTML parser to get all HTML tags (including multiline)
923        let html_tags = ctx.html_tags();
924
925        for html_tag in html_tags.iter() {
926            // Skip closing tags (only warn on opening tags)
927            if html_tag.is_closing {
928                continue;
929            }
930
931            let line_num = html_tag.line;
932            let tag_byte_start = html_tag.byte_offset;
933
934            // Reconstruct tag string from byte offsets
935            let tag = &content[html_tag.byte_offset..html_tag.byte_end];
936
937            // Skip tags in code blocks (uses proper code block detection from LintContext)
938            if ctx.line_info(line_num).is_some_and(|info| info.in_code_block) {
939                continue;
940            }
941
942            // Skip Kramdown extensions and block attributes
943            if let Some(line) = lines.get(line_num.saturating_sub(1))
944                && (is_kramdown_extension(line) || is_kramdown_block_attribute(line))
945            {
946                continue;
947            }
948
949            // Skip lines inside nomarkdown blocks
950            if nomarkdown_ranges
951                .iter()
952                .any(|(start, end)| line_num >= *start && line_num <= *end)
953            {
954                continue;
955            }
956
957            // Skip HTML tags inside HTML comments
958            if ctx.is_in_html_comment(tag_byte_start) {
959                continue;
960            }
961
962            // Skip HTML comments themselves
963            if self.is_html_comment(tag) {
964                continue;
965            }
966
967            // Skip angle brackets inside link reference definition titles
968            // e.g., [ref]: url "Title with <angle brackets>"
969            if ctx.is_in_link_title(tag_byte_start) {
970                continue;
971            }
972
973            // Skip JSX components in MDX files (e.g., <Chart />, <MyComponent>)
974            if ctx.flavor.supports_jsx() && html_tag.tag_name.chars().next().is_some_and(|c| c.is_uppercase()) {
975                continue;
976            }
977
978            // Skip JSX fragments in MDX files (<> and </>)
979            if ctx.flavor.supports_jsx() && (html_tag.tag_name.is_empty() || tag == "<>" || tag == "</>") {
980                continue;
981            }
982
983            // Skip elements with JSX-specific attributes in MDX files
984            // e.g., <div className="...">, <button onClick={handler}>
985            if ctx.flavor.supports_jsx() && Self::has_jsx_attributes(tag) {
986                continue;
987            }
988
989            // Skip non-HTML elements (placeholder syntax like <NAME>, <resource>)
990            if !Self::is_html_element_or_custom(&html_tag.tag_name) {
991                continue;
992            }
993
994            // Skip likely programming type annotations
995            if self.is_likely_type_annotation(tag) {
996                continue;
997            }
998
999            // Skip email addresses in angle brackets
1000            if self.is_email_address(tag) {
1001                continue;
1002            }
1003
1004            // Skip URLs in angle brackets
1005            if self.is_url_in_angle_brackets(tag) {
1006                continue;
1007            }
1008
1009            // Skip tags inside code spans (use byte offset for reliable multi-line span detection)
1010            if ctx.is_byte_offset_in_code_span(tag_byte_start) {
1011                continue;
1012            }
1013
1014            // Determine whether to report this tag based on mode:
1015            // - Disallowed mode: only report tags in the disallowed list
1016            // - Default mode: report all tags except those in the allowed list
1017            if self.is_disallowed_mode() {
1018                // In disallowed mode, skip tags NOT in the disallowed list
1019                if !self.is_tag_disallowed(tag) {
1020                    continue;
1021                }
1022            } else {
1023                // In default mode, skip allowed tags
1024                if self.is_tag_allowed(tag) {
1025                    continue;
1026                }
1027            }
1028
1029            // Skip tags with markdown attribute in MkDocs mode
1030            if ctx.flavor == crate::config::MarkdownFlavor::MkDocs && self.has_markdown_attribute(tag) {
1031                continue;
1032            }
1033
1034            // Check if we're inside an HTML block (like <pre>, <div>, etc.)
1035            let in_html_block = ctx.is_in_html_block(line_num);
1036
1037            // Calculate fix to remove HTML tags but keep content
1038            let fix = self
1039                .calculate_fix(content, tag, tag_byte_start, in_html_block)
1040                .map(|(range, replacement)| Fix { range, replacement });
1041
1042            // Calculate actual end line and column for multiline tags
1043            // Use byte_end - 1 to get the last character position of the tag
1044            let (end_line, end_col) = if html_tag.byte_end > 0 {
1045                ctx.offset_to_line_col(html_tag.byte_end - 1)
1046            } else {
1047                (line_num, html_tag.end_col + 1)
1048            };
1049
1050            // Report the HTML tag
1051            warnings.push(LintWarning {
1052                rule_name: Some(self.name().to_string()),
1053                line: line_num,
1054                column: html_tag.start_col + 1, // Convert to 1-indexed
1055                end_line,                       // Actual end line for multiline tags
1056                end_column: end_col + 1,        // Actual end column
1057                message: format!("Inline HTML found: {tag}"),
1058                severity: Severity::Warning,
1059                fix,
1060            });
1061        }
1062
1063        Ok(warnings)
1064    }
1065
1066    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
1067        // Auto-fix is opt-in: only apply if explicitly enabled in config
1068        if !self.config.fix {
1069            return Ok(ctx.content.to_string());
1070        }
1071
1072        // Get warnings with their inline fixes
1073        let warnings = self.check(ctx)?;
1074
1075        // If no warnings with fixes, return original content
1076        if warnings.is_empty() || !warnings.iter().any(|w| w.fix.is_some()) {
1077            return Ok(ctx.content.to_string());
1078        }
1079
1080        // Collect all fixes and sort by range start (descending) to apply from end to beginning
1081        let mut fixes: Vec<_> = warnings
1082            .iter()
1083            .filter_map(|w| w.fix.as_ref().map(|f| (f.range.start, f.range.end, &f.replacement)))
1084            .collect();
1085        fixes.sort_by(|a, b| b.0.cmp(&a.0));
1086
1087        // Apply fixes from end to beginning to preserve byte offsets
1088        let mut result = ctx.content.to_string();
1089        for (start, end, replacement) in fixes {
1090            if start < result.len() && end <= result.len() && start <= end {
1091                result.replace_range(start..end, replacement);
1092            }
1093        }
1094
1095        Ok(result)
1096    }
1097
1098    fn fix_capability(&self) -> crate::rule::FixCapability {
1099        if self.config.fix {
1100            crate::rule::FixCapability::FullyFixable
1101        } else {
1102            crate::rule::FixCapability::Unfixable
1103        }
1104    }
1105
1106    /// Get the category of this rule for selective processing
1107    fn category(&self) -> RuleCategory {
1108        RuleCategory::Html
1109    }
1110
1111    /// Check if this rule should be skipped
1112    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
1113        ctx.content.is_empty() || !ctx.likely_has_html()
1114    }
1115
1116    fn as_any(&self) -> &dyn std::any::Any {
1117        self
1118    }
1119
1120    fn default_config_section(&self) -> Option<(String, toml::Value)> {
1121        let json_value = serde_json::to_value(&self.config).ok()?;
1122        Some((
1123            self.name().to_string(),
1124            crate::rule_config_serde::json_to_toml_value(&json_value)?,
1125        ))
1126    }
1127
1128    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
1129    where
1130        Self: Sized,
1131    {
1132        let rule_config = crate::rule_config_serde::load_rule_config::<MD033Config>(config);
1133        Box::new(Self::from_config_struct(rule_config))
1134    }
1135}
1136
1137#[cfg(test)]
1138mod tests {
1139    use super::*;
1140    use crate::lint_context::LintContext;
1141    use crate::rule::Rule;
1142
1143    #[test]
1144    fn test_md033_basic_html() {
1145        let rule = MD033NoInlineHtml::default();
1146        let content = "<div>Some content</div>";
1147        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1148        let result = rule.check(&ctx).unwrap();
1149        // Only reports opening tags, not closing tags
1150        assert_eq!(result.len(), 1); // Only <div>, not </div>
1151        assert!(result[0].message.starts_with("Inline HTML found: <div>"));
1152    }
1153
1154    #[test]
1155    fn test_md033_case_insensitive() {
1156        let rule = MD033NoInlineHtml::default();
1157        let content = "<DiV>Some <B>content</B></dIv>";
1158        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1159        let result = rule.check(&ctx).unwrap();
1160        // Only reports opening tags, not closing tags
1161        assert_eq!(result.len(), 2); // <DiV>, <B> (not </B>, </dIv>)
1162        assert_eq!(result[0].message, "Inline HTML found: <DiV>");
1163        assert_eq!(result[1].message, "Inline HTML found: <B>");
1164    }
1165
1166    #[test]
1167    fn test_md033_allowed_tags() {
1168        let rule = MD033NoInlineHtml::with_allowed(vec!["div".to_string(), "br".to_string()]);
1169        let content = "<div>Allowed</div><p>Not allowed</p><br/>";
1170        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1171        let result = rule.check(&ctx).unwrap();
1172        // Only warnings for non-allowed opening tags (<p> only, div and br are allowed)
1173        assert_eq!(result.len(), 1);
1174        assert_eq!(result[0].message, "Inline HTML found: <p>");
1175
1176        // Test case-insensitivity of allowed tags
1177        let content2 = "<DIV>Allowed</DIV><P>Not allowed</P><BR/>";
1178        let ctx2 = LintContext::new(content2, crate::config::MarkdownFlavor::Standard, None);
1179        let result2 = rule.check(&ctx2).unwrap();
1180        assert_eq!(result2.len(), 1); // Only <P> flagged
1181        assert_eq!(result2[0].message, "Inline HTML found: <P>");
1182    }
1183
1184    #[test]
1185    fn test_md033_html_comments() {
1186        let rule = MD033NoInlineHtml::default();
1187        let content = "<!-- This is a comment --> <p>Not a comment</p>";
1188        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1189        let result = rule.check(&ctx).unwrap();
1190        // Should detect warnings for HTML opening tags (comments are skipped, closing tags not reported)
1191        assert_eq!(result.len(), 1); // Only <p>
1192        assert_eq!(result[0].message, "Inline HTML found: <p>");
1193    }
1194
1195    #[test]
1196    fn test_md033_tags_in_links() {
1197        let rule = MD033NoInlineHtml::default();
1198        let content = "[Link](http://example.com/<div>)";
1199        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1200        let result = rule.check(&ctx).unwrap();
1201        // The <div> in the URL should be detected as HTML (not skipped)
1202        assert_eq!(result.len(), 1);
1203        assert_eq!(result[0].message, "Inline HTML found: <div>");
1204
1205        let content2 = "[Link <a>text</a>](url)";
1206        let ctx2 = LintContext::new(content2, crate::config::MarkdownFlavor::Standard, None);
1207        let result2 = rule.check(&ctx2).unwrap();
1208        // Only reports opening tags
1209        assert_eq!(result2.len(), 1); // Only <a>
1210        assert_eq!(result2[0].message, "Inline HTML found: <a>");
1211    }
1212
1213    #[test]
1214    fn test_md033_fix_escaping() {
1215        let rule = MD033NoInlineHtml::default();
1216        let content = "Text with <div> and <br/> tags.";
1217        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1218        let fixed_content = rule.fix(&ctx).unwrap();
1219        // No fix for HTML tags; output should be unchanged
1220        assert_eq!(fixed_content, content);
1221    }
1222
1223    #[test]
1224    fn test_md033_in_code_blocks() {
1225        let rule = MD033NoInlineHtml::default();
1226        let content = "```html\n<div>Code</div>\n```\n<div>Not code</div>";
1227        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1228        let result = rule.check(&ctx).unwrap();
1229        // Only reports opening tags outside code block
1230        assert_eq!(result.len(), 1); // Only <div> outside code block
1231        assert_eq!(result[0].message, "Inline HTML found: <div>");
1232    }
1233
1234    #[test]
1235    fn test_md033_in_code_spans() {
1236        let rule = MD033NoInlineHtml::default();
1237        let content = "Text with `<p>in code</p>` span. <br/> Not in span.";
1238        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1239        let result = rule.check(&ctx).unwrap();
1240        // Should detect <br/> outside code span, but not tags inside code span
1241        assert_eq!(result.len(), 1);
1242        assert_eq!(result[0].message, "Inline HTML found: <br/>");
1243    }
1244
1245    #[test]
1246    fn test_md033_issue_90_code_span_with_diff_block() {
1247        // Test for issue #90: inline code span followed by diff code block
1248        let rule = MD033NoInlineHtml::default();
1249        let content = r#"# Heading
1250
1251`<env>`
1252
1253```diff
1254- this
1255+ that
1256```"#;
1257        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1258        let result = rule.check(&ctx).unwrap();
1259        // Should NOT detect <env> as HTML since it's inside backticks
1260        assert_eq!(result.len(), 0, "Should not report HTML tags inside code spans");
1261    }
1262
1263    #[test]
1264    fn test_md033_multiple_code_spans_with_angle_brackets() {
1265        // Test multiple code spans on same line
1266        let rule = MD033NoInlineHtml::default();
1267        let content = "`<one>` and `<two>` and `<three>` are all code spans";
1268        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1269        let result = rule.check(&ctx).unwrap();
1270        assert_eq!(result.len(), 0, "Should not report HTML tags inside any code spans");
1271    }
1272
1273    #[test]
1274    fn test_md033_nested_angle_brackets_in_code_span() {
1275        // Test nested angle brackets
1276        let rule = MD033NoInlineHtml::default();
1277        let content = "Text with `<<nested>>` brackets";
1278        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1279        let result = rule.check(&ctx).unwrap();
1280        assert_eq!(result.len(), 0, "Should handle nested angle brackets in code spans");
1281    }
1282
1283    #[test]
1284    fn test_md033_code_span_at_end_before_code_block() {
1285        // Test code span at end of line before code block
1286        let rule = MD033NoInlineHtml::default();
1287        let content = "Testing `<test>`\n```\ncode here\n```";
1288        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1289        let result = rule.check(&ctx).unwrap();
1290        assert_eq!(result.len(), 0, "Should handle code span before code block");
1291    }
1292
1293    #[test]
1294    fn test_md033_quick_fix_inline_tag() {
1295        // Test that non-fixable tags (like <span>) do NOT get a fix
1296        // Only safe fixable tags (em, i, strong, b, code, br, hr) with fix=true get fixes
1297        let rule = MD033NoInlineHtml::default();
1298        let content = "This has <span>inline text</span> that should keep content.";
1299        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1300        let result = rule.check(&ctx).unwrap();
1301
1302        assert_eq!(result.len(), 1, "Should find one HTML tag");
1303        // <span> is NOT a safe fixable tag, so no fix should be provided
1304        assert!(
1305            result[0].fix.is_none(),
1306            "Non-fixable tags like <span> should not have a fix"
1307        );
1308    }
1309
1310    #[test]
1311    fn test_md033_quick_fix_multiline_tag() {
1312        // HTML block elements like <div> are intentionally NOT auto-fixed
1313        // Removing them would change document structure significantly
1314        let rule = MD033NoInlineHtml::default();
1315        let content = "<div>\nBlock content\n</div>";
1316        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1317        let result = rule.check(&ctx).unwrap();
1318
1319        assert_eq!(result.len(), 1, "Should find one HTML tag");
1320        // HTML block elements should NOT have auto-fix
1321        assert!(result[0].fix.is_none(), "HTML block elements should NOT have auto-fix");
1322    }
1323
1324    #[test]
1325    fn test_md033_quick_fix_self_closing_tag() {
1326        // Test that self-closing tags with fix=false (default) do NOT get a fix
1327        let rule = MD033NoInlineHtml::default();
1328        let content = "Self-closing: <br/>";
1329        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1330        let result = rule.check(&ctx).unwrap();
1331
1332        assert_eq!(result.len(), 1, "Should find one HTML tag");
1333        // Default config has fix=false, so no fix should be provided
1334        assert!(
1335            result[0].fix.is_none(),
1336            "Self-closing tags should not have a fix when fix config is false"
1337        );
1338    }
1339
1340    #[test]
1341    fn test_md033_quick_fix_multiple_tags() {
1342        // Test that multiple tags without fix=true do NOT get fixes
1343        // <span> is not a safe fixable tag, <strong> is but fix=false by default
1344        let rule = MD033NoInlineHtml::default();
1345        let content = "<span>first</span> and <strong>second</strong>";
1346        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1347        let result = rule.check(&ctx).unwrap();
1348
1349        assert_eq!(result.len(), 2, "Should find two HTML tags");
1350        // Neither should have a fix: <span> is not fixable, <strong> is but fix=false
1351        assert!(result[0].fix.is_none(), "Non-fixable <span> should not have a fix");
1352        assert!(
1353            result[1].fix.is_none(),
1354            "<strong> should not have a fix when fix config is false"
1355        );
1356    }
1357
1358    #[test]
1359    fn test_md033_skip_angle_brackets_in_link_titles() {
1360        // Angle brackets inside link reference definition titles should not be flagged as HTML
1361        let rule = MD033NoInlineHtml::default();
1362        let content = r#"# Test
1363
1364[example]: <https://example.com> "Title with <Angle Brackets> inside"
1365
1366Regular text with <div>content</div> HTML tag.
1367"#;
1368        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1369        let result = rule.check(&ctx).unwrap();
1370
1371        // Should only flag <div>, not <Angle Brackets> in the title (not a valid HTML element)
1372        // Opening tag only (markdownlint behavior)
1373        assert_eq!(result.len(), 1, "Should find opening div tag");
1374        assert!(
1375            result[0].message.contains("<div>"),
1376            "Should flag <div>, got: {}",
1377            result[0].message
1378        );
1379    }
1380
1381    #[test]
1382    fn test_md033_skip_angle_brackets_in_link_title_single_quotes() {
1383        // Test with single-quoted title
1384        let rule = MD033NoInlineHtml::default();
1385        let content = r#"[ref]: url 'Title <Help Wanted> here'
1386
1387<span>text</span> here
1388"#;
1389        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1390        let result = rule.check(&ctx).unwrap();
1391
1392        // <Help Wanted> is not a valid HTML element, so only <span> is flagged
1393        // Opening tag only (markdownlint behavior)
1394        assert_eq!(result.len(), 1, "Should find opening span tag");
1395        assert!(
1396            result[0].message.contains("<span>"),
1397            "Should flag <span>, got: {}",
1398            result[0].message
1399        );
1400    }
1401
1402    #[test]
1403    fn test_md033_multiline_tag_end_line_calculation() {
1404        // Test that multiline HTML tags report correct end_line
1405        let rule = MD033NoInlineHtml::default();
1406        let content = "<div\n  class=\"test\"\n  id=\"example\">";
1407        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1408        let result = rule.check(&ctx).unwrap();
1409
1410        assert_eq!(result.len(), 1, "Should find one HTML tag");
1411        // Tag starts on line 1
1412        assert_eq!(result[0].line, 1, "Start line should be 1");
1413        // Tag ends on line 3 (where the closing > is)
1414        assert_eq!(result[0].end_line, 3, "End line should be 3");
1415    }
1416
1417    #[test]
1418    fn test_md033_single_line_tag_same_start_end_line() {
1419        // Test that single-line HTML tags have same start and end line
1420        let rule = MD033NoInlineHtml::default();
1421        let content = "Some text <div class=\"test\"> more text";
1422        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1423        let result = rule.check(&ctx).unwrap();
1424
1425        assert_eq!(result.len(), 1, "Should find one HTML tag");
1426        assert_eq!(result[0].line, 1, "Start line should be 1");
1427        assert_eq!(result[0].end_line, 1, "End line should be 1 for single-line tag");
1428    }
1429
1430    #[test]
1431    fn test_md033_multiline_tag_with_many_attributes() {
1432        // Test multiline tag spanning multiple lines
1433        let rule = MD033NoInlineHtml::default();
1434        let content =
1435            "Text\n<div\n  data-attr1=\"value1\"\n  data-attr2=\"value2\"\n  data-attr3=\"value3\">\nMore text";
1436        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1437        let result = rule.check(&ctx).unwrap();
1438
1439        assert_eq!(result.len(), 1, "Should find one HTML tag");
1440        // Tag starts on line 2 (first line is "Text")
1441        assert_eq!(result[0].line, 2, "Start line should be 2");
1442        // Tag ends on line 5 (where the closing > is)
1443        assert_eq!(result[0].end_line, 5, "End line should be 5");
1444    }
1445
1446    #[test]
1447    fn test_md033_disallowed_mode_basic() {
1448        // Test disallowed mode: only flags tags in the disallowed list
1449        let rule = MD033NoInlineHtml::with_disallowed(vec!["script".to_string(), "iframe".to_string()]);
1450        let content = "<div>Safe content</div><script>alert('xss')</script>";
1451        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1452        let result = rule.check(&ctx).unwrap();
1453
1454        // Should only flag <script>, not <div>
1455        assert_eq!(result.len(), 1, "Should only flag disallowed tags");
1456        assert!(result[0].message.contains("<script>"), "Should flag script tag");
1457    }
1458
1459    #[test]
1460    fn test_md033_disallowed_gfm_security_tags() {
1461        // Test GFM security tags expansion
1462        let rule = MD033NoInlineHtml::with_disallowed(vec!["gfm".to_string()]);
1463        let content = r#"
1464<div>Safe</div>
1465<title>Bad title</title>
1466<textarea>Bad textarea</textarea>
1467<style>.bad{}</style>
1468<iframe src="evil"></iframe>
1469<script>evil()</script>
1470<plaintext>old tag</plaintext>
1471<span>Safe span</span>
1472"#;
1473        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1474        let result = rule.check(&ctx).unwrap();
1475
1476        // Should flag: title, textarea, style, iframe, script, plaintext
1477        // Should NOT flag: div, span
1478        assert_eq!(result.len(), 6, "Should flag 6 GFM security tags");
1479
1480        let flagged_tags: Vec<&str> = result
1481            .iter()
1482            .filter_map(|w| w.message.split("<").nth(1))
1483            .filter_map(|s| s.split(">").next())
1484            .filter_map(|s| s.split_whitespace().next())
1485            .collect();
1486
1487        assert!(flagged_tags.contains(&"title"), "Should flag title");
1488        assert!(flagged_tags.contains(&"textarea"), "Should flag textarea");
1489        assert!(flagged_tags.contains(&"style"), "Should flag style");
1490        assert!(flagged_tags.contains(&"iframe"), "Should flag iframe");
1491        assert!(flagged_tags.contains(&"script"), "Should flag script");
1492        assert!(flagged_tags.contains(&"plaintext"), "Should flag plaintext");
1493        assert!(!flagged_tags.contains(&"div"), "Should NOT flag div");
1494        assert!(!flagged_tags.contains(&"span"), "Should NOT flag span");
1495    }
1496
1497    #[test]
1498    fn test_md033_disallowed_case_insensitive() {
1499        // Test that disallowed check is case-insensitive
1500        let rule = MD033NoInlineHtml::with_disallowed(vec!["script".to_string()]);
1501        let content = "<SCRIPT>alert('xss')</SCRIPT><Script>alert('xss')</Script>";
1502        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1503        let result = rule.check(&ctx).unwrap();
1504
1505        // Should flag both <SCRIPT> and <Script>
1506        assert_eq!(result.len(), 2, "Should flag both case variants");
1507    }
1508
1509    #[test]
1510    fn test_md033_disallowed_with_attributes() {
1511        // Test that disallowed mode works with tags that have attributes
1512        let rule = MD033NoInlineHtml::with_disallowed(vec!["iframe".to_string()]);
1513        let content = r#"<iframe src="https://evil.com" width="100" height="100"></iframe>"#;
1514        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1515        let result = rule.check(&ctx).unwrap();
1516
1517        assert_eq!(result.len(), 1, "Should flag iframe with attributes");
1518        assert!(result[0].message.contains("iframe"), "Should flag iframe");
1519    }
1520
1521    #[test]
1522    fn test_md033_disallowed_all_gfm_tags() {
1523        // Verify all GFM disallowed tags are covered
1524        use md033_config::GFM_DISALLOWED_TAGS;
1525        let rule = MD033NoInlineHtml::with_disallowed(vec!["gfm".to_string()]);
1526
1527        for tag in GFM_DISALLOWED_TAGS {
1528            let content = format!("<{tag}>content</{tag}>");
1529            let ctx = LintContext::new(&content, crate::config::MarkdownFlavor::Standard, None);
1530            let result = rule.check(&ctx).unwrap();
1531
1532            assert_eq!(result.len(), 1, "GFM tag <{tag}> should be flagged");
1533        }
1534    }
1535
1536    #[test]
1537    fn test_md033_disallowed_mixed_with_custom() {
1538        // Test mixing "gfm" with custom disallowed tags
1539        let rule = MD033NoInlineHtml::with_disallowed(vec![
1540            "gfm".to_string(),
1541            "marquee".to_string(), // Custom disallowed tag
1542        ]);
1543        let content = r#"<script>bad</script><marquee>annoying</marquee><div>ok</div>"#;
1544        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1545        let result = rule.check(&ctx).unwrap();
1546
1547        // Should flag script (gfm) and marquee (custom)
1548        assert_eq!(result.len(), 2, "Should flag both gfm and custom tags");
1549    }
1550
1551    #[test]
1552    fn test_md033_disallowed_empty_means_default_mode() {
1553        // Empty disallowed list means default mode (flag all HTML)
1554        let rule = MD033NoInlineHtml::with_disallowed(vec![]);
1555        let content = "<div>content</div>";
1556        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1557        let result = rule.check(&ctx).unwrap();
1558
1559        // Should flag <div> in default mode
1560        assert_eq!(result.len(), 1, "Empty disallowed = default mode");
1561    }
1562
1563    #[test]
1564    fn test_md033_jsx_fragments_in_mdx() {
1565        // JSX fragments (<> and </>) should not trigger warnings in MDX
1566        let rule = MD033NoInlineHtml::default();
1567        let content = r#"# MDX Document
1568
1569<>
1570  <Heading />
1571  <Content />
1572</>
1573
1574<div>Regular HTML should still be flagged</div>
1575"#;
1576        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::MDX, None);
1577        let result = rule.check(&ctx).unwrap();
1578
1579        // Should only flag <div>, not the fragments or JSX components
1580        assert_eq!(result.len(), 1, "Should only find one HTML tag (the div)");
1581        assert!(
1582            result[0].message.contains("<div>"),
1583            "Should flag <div>, not JSX fragments"
1584        );
1585    }
1586
1587    #[test]
1588    fn test_md033_jsx_components_in_mdx() {
1589        // JSX components (capitalized) should not trigger warnings in MDX
1590        let rule = MD033NoInlineHtml::default();
1591        let content = r#"<CustomComponent prop="value">
1592  Content
1593</CustomComponent>
1594
1595<MyButton onClick={handler}>Click</MyButton>
1596"#;
1597        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::MDX, None);
1598        let result = rule.check(&ctx).unwrap();
1599
1600        // No warnings - all are JSX components
1601        assert_eq!(result.len(), 0, "Should not flag JSX components in MDX");
1602    }
1603
1604    #[test]
1605    fn test_md033_jsx_not_skipped_in_standard_markdown() {
1606        // In standard markdown, capitalized tags should still be flagged if they're valid HTML
1607        let rule = MD033NoInlineHtml::default();
1608        let content = "<Script>alert(1)</Script>";
1609        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1610        let result = rule.check(&ctx).unwrap();
1611
1612        // Should flag <Script> in standard markdown (it's a valid HTML element)
1613        assert_eq!(result.len(), 1, "Should flag <Script> in standard markdown");
1614    }
1615
1616    #[test]
1617    fn test_md033_jsx_attributes_in_mdx() {
1618        // Elements with JSX-specific attributes should not trigger warnings in MDX
1619        let rule = MD033NoInlineHtml::default();
1620        let content = r#"# MDX with JSX Attributes
1621
1622<div className="card big">Content</div>
1623
1624<button onClick={handleClick}>Click me</button>
1625
1626<label htmlFor="input-id">Label</label>
1627
1628<input onChange={handleChange} />
1629
1630<div class="html-class">Regular HTML should be flagged</div>
1631"#;
1632        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::MDX, None);
1633        let result = rule.check(&ctx).unwrap();
1634
1635        // Should only flag the div with regular HTML "class" attribute
1636        assert_eq!(
1637            result.len(),
1638            1,
1639            "Should only flag HTML element without JSX attributes, got: {result:?}"
1640        );
1641        assert!(
1642            result[0].message.contains("<div class="),
1643            "Should flag the div with HTML class attribute"
1644        );
1645    }
1646
1647    #[test]
1648    fn test_md033_jsx_attributes_not_skipped_in_standard() {
1649        // In standard markdown, JSX attributes should still be flagged
1650        let rule = MD033NoInlineHtml::default();
1651        let content = r#"<div className="card">Content</div>"#;
1652        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1653        let result = rule.check(&ctx).unwrap();
1654
1655        // Should flag in standard markdown
1656        assert_eq!(result.len(), 1, "Should flag JSX-style elements in standard markdown");
1657    }
1658
1659    // Auto-fix tests for MD033
1660
1661    #[test]
1662    fn test_md033_fix_disabled_by_default() {
1663        // Auto-fix should be disabled by default
1664        let rule = MD033NoInlineHtml::default();
1665        assert!(!rule.config.fix, "Fix should be disabled by default");
1666        assert_eq!(rule.fix_capability(), crate::rule::FixCapability::Unfixable);
1667    }
1668
1669    #[test]
1670    fn test_md033_fix_enabled_em_to_italic() {
1671        // When fix is enabled, <em>text</em> should convert to *text*
1672        let rule = MD033NoInlineHtml::with_fix(true);
1673        let content = "This has <em>emphasized text</em> here.";
1674        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1675        let fixed = rule.fix(&ctx).unwrap();
1676        assert_eq!(fixed, "This has *emphasized text* here.");
1677    }
1678
1679    #[test]
1680    fn test_md033_fix_enabled_i_to_italic() {
1681        // <i>text</i> should convert to *text*
1682        let rule = MD033NoInlineHtml::with_fix(true);
1683        let content = "This has <i>italic text</i> here.";
1684        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1685        let fixed = rule.fix(&ctx).unwrap();
1686        assert_eq!(fixed, "This has *italic text* here.");
1687    }
1688
1689    #[test]
1690    fn test_md033_fix_enabled_strong_to_bold() {
1691        // <strong>text</strong> should convert to **text**
1692        let rule = MD033NoInlineHtml::with_fix(true);
1693        let content = "This has <strong>bold text</strong> here.";
1694        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1695        let fixed = rule.fix(&ctx).unwrap();
1696        assert_eq!(fixed, "This has **bold text** here.");
1697    }
1698
1699    #[test]
1700    fn test_md033_fix_enabled_b_to_bold() {
1701        // <b>text</b> should convert to **text**
1702        let rule = MD033NoInlineHtml::with_fix(true);
1703        let content = "This has <b>bold text</b> here.";
1704        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1705        let fixed = rule.fix(&ctx).unwrap();
1706        assert_eq!(fixed, "This has **bold text** here.");
1707    }
1708
1709    #[test]
1710    fn test_md033_fix_enabled_code_to_backticks() {
1711        // <code>text</code> should convert to `text`
1712        let rule = MD033NoInlineHtml::with_fix(true);
1713        let content = "This has <code>inline code</code> here.";
1714        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1715        let fixed = rule.fix(&ctx).unwrap();
1716        assert_eq!(fixed, "This has `inline code` here.");
1717    }
1718
1719    #[test]
1720    fn test_md033_fix_enabled_code_with_backticks() {
1721        // <code>text with `backticks`</code> should use double backticks
1722        let rule = MD033NoInlineHtml::with_fix(true);
1723        let content = "This has <code>text with `backticks`</code> here.";
1724        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1725        let fixed = rule.fix(&ctx).unwrap();
1726        assert_eq!(fixed, "This has `` text with `backticks` `` here.");
1727    }
1728
1729    #[test]
1730    fn test_md033_fix_enabled_br_trailing_spaces() {
1731        // <br> should convert to two trailing spaces + newline (default)
1732        let rule = MD033NoInlineHtml::with_fix(true);
1733        let content = "First line<br>Second line";
1734        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1735        let fixed = rule.fix(&ctx).unwrap();
1736        assert_eq!(fixed, "First line  \nSecond line");
1737    }
1738
1739    #[test]
1740    fn test_md033_fix_enabled_br_self_closing() {
1741        // <br/> and <br /> should also convert
1742        let rule = MD033NoInlineHtml::with_fix(true);
1743        let content = "First<br/>second<br />third";
1744        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1745        let fixed = rule.fix(&ctx).unwrap();
1746        assert_eq!(fixed, "First  \nsecond  \nthird");
1747    }
1748
1749    #[test]
1750    fn test_md033_fix_enabled_br_backslash_style() {
1751        // With br_style = backslash, <br> should convert to backslash + newline
1752        let config = MD033Config {
1753            allowed: Vec::new(),
1754            disallowed: Vec::new(),
1755            fix: true,
1756            br_style: md033_config::BrStyle::Backslash,
1757        };
1758        let rule = MD033NoInlineHtml::from_config_struct(config);
1759        let content = "First line<br>Second line";
1760        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1761        let fixed = rule.fix(&ctx).unwrap();
1762        assert_eq!(fixed, "First line\\\nSecond line");
1763    }
1764
1765    #[test]
1766    fn test_md033_fix_enabled_hr() {
1767        // <hr> should convert to horizontal rule
1768        let rule = MD033NoInlineHtml::with_fix(true);
1769        let content = "Above<hr>Below";
1770        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1771        let fixed = rule.fix(&ctx).unwrap();
1772        assert_eq!(fixed, "Above\n---\nBelow");
1773    }
1774
1775    #[test]
1776    fn test_md033_fix_enabled_hr_self_closing() {
1777        // <hr/> should also convert
1778        let rule = MD033NoInlineHtml::with_fix(true);
1779        let content = "Above<hr/>Below";
1780        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1781        let fixed = rule.fix(&ctx).unwrap();
1782        assert_eq!(fixed, "Above\n---\nBelow");
1783    }
1784
1785    #[test]
1786    fn test_md033_fix_skips_nested_tags() {
1787        // Tags with nested HTML - outer tags may not be fully fixed due to overlapping ranges
1788        // The inner tags are processed first, which can invalidate outer tag ranges
1789        let rule = MD033NoInlineHtml::with_fix(true);
1790        let content = "This has <em>text with <strong>nested</strong> tags</em> here.";
1791        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1792        let fixed = rule.fix(&ctx).unwrap();
1793        // Inner <strong> is converted to markdown, outer <em> range becomes invalid
1794        // This is expected behavior - user should run fix multiple times for nested tags
1795        assert_eq!(fixed, "This has <em>text with **nested** tags</em> here.");
1796    }
1797
1798    #[test]
1799    fn test_md033_fix_skips_tags_with_attributes() {
1800        // Tags with attributes should NOT be fixed at all - leave as-is
1801        // User may want to keep the attributes (e.g., class="highlight" for styling)
1802        let rule = MD033NoInlineHtml::with_fix(true);
1803        let content = "This has <em class=\"highlight\">emphasized</em> text.";
1804        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1805        let fixed = rule.fix(&ctx).unwrap();
1806        // Content should remain unchanged - we don't know if attributes matter
1807        assert_eq!(fixed, content);
1808    }
1809
1810    #[test]
1811    fn test_md033_fix_disabled_no_changes() {
1812        // When fix is disabled, original content should be returned
1813        let rule = MD033NoInlineHtml::default(); // fix is false by default
1814        let content = "This has <em>emphasized text</em> here.";
1815        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1816        let fixed = rule.fix(&ctx).unwrap();
1817        assert_eq!(fixed, content, "Should return original content when fix is disabled");
1818    }
1819
1820    #[test]
1821    fn test_md033_fix_capability_enabled() {
1822        let rule = MD033NoInlineHtml::with_fix(true);
1823        assert_eq!(rule.fix_capability(), crate::rule::FixCapability::FullyFixable);
1824    }
1825
1826    #[test]
1827    fn test_md033_fix_multiple_tags() {
1828        // Test fixing multiple HTML tags in one document
1829        let rule = MD033NoInlineHtml::with_fix(true);
1830        let content = "Here is <em>italic</em> and <strong>bold</strong> text.";
1831        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1832        let fixed = rule.fix(&ctx).unwrap();
1833        assert_eq!(fixed, "Here is *italic* and **bold** text.");
1834    }
1835
1836    #[test]
1837    fn test_md033_fix_uppercase_tags() {
1838        // HTML tags are case-insensitive
1839        let rule = MD033NoInlineHtml::with_fix(true);
1840        let content = "This has <EM>emphasized</EM> text.";
1841        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1842        let fixed = rule.fix(&ctx).unwrap();
1843        assert_eq!(fixed, "This has *emphasized* text.");
1844    }
1845
1846    #[test]
1847    fn test_md033_fix_unsafe_tags_not_modified() {
1848        // Tags without safe markdown equivalents should NOT be modified
1849        // Only safe fixable tags (em, i, strong, b, code, br, hr) get converted
1850        let rule = MD033NoInlineHtml::with_fix(true);
1851        let content = "This has <div>a div</div> content.";
1852        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1853        let fixed = rule.fix(&ctx).unwrap();
1854        // <div> is not a safe fixable tag, so content should be unchanged
1855        assert_eq!(fixed, "This has <div>a div</div> content.");
1856    }
1857
1858    #[test]
1859    fn test_md033_fix_img_tag_converted() {
1860        // <img> tags with simple src/alt attributes are converted to markdown images
1861        let rule = MD033NoInlineHtml::with_fix(true);
1862        let content = "Image: <img src=\"photo.jpg\" alt=\"My Photo\">";
1863        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1864        let fixed = rule.fix(&ctx).unwrap();
1865        // <img> is converted to ![alt](src) format
1866        assert_eq!(fixed, "Image: ![My Photo](photo.jpg)");
1867    }
1868
1869    #[test]
1870    fn test_md033_fix_img_tag_with_extra_attrs_not_converted() {
1871        // <img> tags with width/height/style attributes are NOT converted
1872        let rule = MD033NoInlineHtml::with_fix(true);
1873        let content = "Image: <img src=\"photo.jpg\" alt=\"My Photo\" width=\"100\">";
1874        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1875        let fixed = rule.fix(&ctx).unwrap();
1876        // Has width attribute - not safe to convert
1877        assert_eq!(fixed, "Image: <img src=\"photo.jpg\" alt=\"My Photo\" width=\"100\">");
1878    }
1879
1880    #[test]
1881    fn test_md033_fix_mixed_safe_tags() {
1882        // All tags are now safe fixable (em, img, strong)
1883        let rule = MD033NoInlineHtml::with_fix(true);
1884        let content = "<em>italic</em> and <img src=\"x.jpg\"> and <strong>bold</strong>";
1885        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1886        let fixed = rule.fix(&ctx).unwrap();
1887        // All are converted
1888        assert_eq!(fixed, "*italic* and ![](x.jpg) and **bold**");
1889    }
1890
1891    #[test]
1892    fn test_md033_fix_multiple_tags_same_line() {
1893        // Multiple tags on the same line should all be fixed correctly
1894        let rule = MD033NoInlineHtml::with_fix(true);
1895        let content = "Regular text <i>italic</i> and <b>bold</b> here.";
1896        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1897        let fixed = rule.fix(&ctx).unwrap();
1898        assert_eq!(fixed, "Regular text *italic* and **bold** here.");
1899    }
1900
1901    #[test]
1902    fn test_md033_fix_multiple_em_tags_same_line() {
1903        // Multiple em/strong tags on the same line
1904        let rule = MD033NoInlineHtml::with_fix(true);
1905        let content = "<em>first</em> and <strong>second</strong> and <code>third</code>";
1906        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1907        let fixed = rule.fix(&ctx).unwrap();
1908        assert_eq!(fixed, "*first* and **second** and `third`");
1909    }
1910
1911    #[test]
1912    fn test_md033_fix_skips_tags_inside_pre() {
1913        // Tags inside <pre> blocks should NOT be fixed (would break structure)
1914        let rule = MD033NoInlineHtml::with_fix(true);
1915        let content = "<pre><code><em>VALUE</em></code></pre>";
1916        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1917        let fixed = rule.fix(&ctx).unwrap();
1918        // The <em> inside <pre><code> should NOT be converted
1919        // Only the outer structure might be changed
1920        assert!(
1921            !fixed.contains("*VALUE*"),
1922            "Tags inside <pre> should not be converted to markdown. Got: {fixed}"
1923        );
1924    }
1925
1926    #[test]
1927    fn test_md033_fix_skips_tags_inside_div() {
1928        // Tags inside HTML block elements should not be fixed
1929        let rule = MD033NoInlineHtml::with_fix(true);
1930        let content = "<div>\n<em>emphasized</em>\n</div>";
1931        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1932        let fixed = rule.fix(&ctx).unwrap();
1933        // The <em> inside <div> should not be converted to *emphasized*
1934        assert!(
1935            !fixed.contains("*emphasized*"),
1936            "Tags inside HTML blocks should not be converted. Got: {fixed}"
1937        );
1938    }
1939
1940    #[test]
1941    fn test_md033_fix_outside_html_block() {
1942        // Tags outside HTML blocks should still be fixed
1943        let rule = MD033NoInlineHtml::with_fix(true);
1944        let content = "<div>\ncontent\n</div>\n\nOutside <em>emphasized</em> text.";
1945        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1946        let fixed = rule.fix(&ctx).unwrap();
1947        // The <em> outside the div should be converted
1948        assert!(
1949            fixed.contains("*emphasized*"),
1950            "Tags outside HTML blocks should be converted. Got: {fixed}"
1951        );
1952    }
1953
1954    #[test]
1955    fn test_md033_fix_with_id_attribute() {
1956        // Tags with id attributes should not be fixed (id might be used for anchors)
1957        let rule = MD033NoInlineHtml::with_fix(true);
1958        let content = "See <em id=\"important\">this note</em> for details.";
1959        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1960        let fixed = rule.fix(&ctx).unwrap();
1961        // Should remain unchanged - id attribute matters for linking
1962        assert_eq!(fixed, content);
1963    }
1964
1965    #[test]
1966    fn test_md033_fix_with_style_attribute() {
1967        // Tags with style attributes should not be fixed
1968        let rule = MD033NoInlineHtml::with_fix(true);
1969        let content = "This is <strong style=\"color: red\">important</strong> text.";
1970        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1971        let fixed = rule.fix(&ctx).unwrap();
1972        // Should remain unchanged - style attribute provides formatting
1973        assert_eq!(fixed, content);
1974    }
1975
1976    #[test]
1977    fn test_md033_fix_mixed_with_and_without_attributes() {
1978        // Mix of tags with and without attributes
1979        let rule = MD033NoInlineHtml::with_fix(true);
1980        let content = "<em>normal</em> and <em class=\"special\">styled</em> text.";
1981        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1982        let fixed = rule.fix(&ctx).unwrap();
1983        // Only the tag without attributes should be fixed
1984        assert_eq!(fixed, "*normal* and <em class=\"special\">styled</em> text.");
1985    }
1986
1987    #[test]
1988    fn test_md033_quick_fix_tag_with_attributes_no_fix() {
1989        // Quick fix should not be provided for tags with attributes
1990        let rule = MD033NoInlineHtml::with_fix(true);
1991        let content = "<em class=\"test\">emphasized</em>";
1992        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1993        let result = rule.check(&ctx).unwrap();
1994
1995        assert_eq!(result.len(), 1, "Should find one HTML tag");
1996        // No fix should be provided for tags with attributes
1997        assert!(
1998            result[0].fix.is_none(),
1999            "Should NOT have a fix for tags with attributes"
2000        );
2001    }
2002
2003    #[test]
2004    fn test_md033_fix_skips_html_entities() {
2005        // Tags containing HTML entities should NOT be fixed
2006        // HTML entities need HTML context to render; markdown won't process them
2007        let rule = MD033NoInlineHtml::with_fix(true);
2008        let content = "<code>&vert;</code>";
2009        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2010        let fixed = rule.fix(&ctx).unwrap();
2011        // Should remain unchanged - converting would break rendering
2012        assert_eq!(fixed, content);
2013    }
2014
2015    #[test]
2016    fn test_md033_fix_skips_multiple_html_entities() {
2017        // Multiple HTML entities should also be skipped
2018        let rule = MD033NoInlineHtml::with_fix(true);
2019        let content = "<code>&lt;T&gt;</code>";
2020        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2021        let fixed = rule.fix(&ctx).unwrap();
2022        // Should remain unchanged
2023        assert_eq!(fixed, content);
2024    }
2025
2026    #[test]
2027    fn test_md033_fix_allows_ampersand_without_entity() {
2028        // Content with & but no semicolon should still be fixed
2029        let rule = MD033NoInlineHtml::with_fix(true);
2030        let content = "<code>a & b</code>";
2031        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2032        let fixed = rule.fix(&ctx).unwrap();
2033        // Should be converted since & is not part of an entity
2034        assert_eq!(fixed, "`a & b`");
2035    }
2036
2037    #[test]
2038    fn test_md033_fix_em_with_entities_skipped() {
2039        // <em> with entities should also be skipped
2040        let rule = MD033NoInlineHtml::with_fix(true);
2041        let content = "<em>&nbsp;text</em>";
2042        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2043        let fixed = rule.fix(&ctx).unwrap();
2044        // Should remain unchanged
2045        assert_eq!(fixed, content);
2046    }
2047
2048    #[test]
2049    fn test_md033_fix_skips_nested_em_in_code() {
2050        // Tags nested inside other HTML elements should NOT be fixed
2051        // e.g., <code><em>n</em></code> - the <em> should not be converted
2052        let rule = MD033NoInlineHtml::with_fix(true);
2053        let content = "<code><em>n</em></code>";
2054        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2055        let fixed = rule.fix(&ctx).unwrap();
2056        // The inner <em> should NOT be converted to *n* because it's nested
2057        // The whole structure should be left as-is (or outer code converted, but not inner)
2058        assert!(
2059            !fixed.contains("*n*"),
2060            "Nested <em> should not be converted to markdown. Got: {fixed}"
2061        );
2062    }
2063
2064    #[test]
2065    fn test_md033_fix_skips_nested_in_table() {
2066        // Tags nested in HTML structures in tables should not be fixed
2067        let rule = MD033NoInlineHtml::with_fix(true);
2068        let content = "| <code>><em>n</em></code> | description |";
2069        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2070        let fixed = rule.fix(&ctx).unwrap();
2071        // Should not convert nested <em> to *n*
2072        assert!(
2073            !fixed.contains("*n*"),
2074            "Nested tags in table should not be converted. Got: {fixed}"
2075        );
2076    }
2077
2078    #[test]
2079    fn test_md033_fix_standalone_em_still_converted() {
2080        // Standalone (non-nested) <em> should still be converted
2081        let rule = MD033NoInlineHtml::with_fix(true);
2082        let content = "This is <em>emphasized</em> text.";
2083        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2084        let fixed = rule.fix(&ctx).unwrap();
2085        assert_eq!(fixed, "This is *emphasized* text.");
2086    }
2087
2088    // ==========================================================================
2089    // Obsidian Templater Plugin Syntax Tests
2090    //
2091    // Templater is a popular Obsidian plugin that uses `<% ... %>` syntax for
2092    // template interpolation. The `<%` pattern is NOT captured by the HTML tag
2093    // parser because `%` is not a valid HTML tag name character (tags must start
2094    // with a letter). This behavior is documented here with comprehensive tests.
2095    //
2096    // Reference: https://silentvoid13.github.io/Templater/
2097    // ==========================================================================
2098
2099    #[test]
2100    fn test_md033_templater_basic_interpolation_not_flagged() {
2101        // Basic Templater interpolation: <% expr %>
2102        // Should NOT be flagged because `%` is not a valid HTML tag character
2103        let rule = MD033NoInlineHtml::default();
2104        let content = "Today is <% tp.date.now() %> which is nice.";
2105        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2106        let result = rule.check(&ctx).unwrap();
2107        assert!(
2108            result.is_empty(),
2109            "Templater basic interpolation should not be flagged as HTML. Got: {result:?}"
2110        );
2111    }
2112
2113    #[test]
2114    fn test_md033_templater_file_functions_not_flagged() {
2115        // Templater file functions: <% tp.file.* %>
2116        let rule = MD033NoInlineHtml::default();
2117        let content = "File: <% tp.file.title %>\nCreated: <% tp.file.creation_date() %>";
2118        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2119        let result = rule.check(&ctx).unwrap();
2120        assert!(
2121            result.is_empty(),
2122            "Templater file functions should not be flagged. Got: {result:?}"
2123        );
2124    }
2125
2126    #[test]
2127    fn test_md033_templater_with_arguments_not_flagged() {
2128        // Templater with function arguments
2129        let rule = MD033NoInlineHtml::default();
2130        let content = r#"Date: <% tp.date.now("YYYY-MM-DD") %>"#;
2131        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2132        let result = rule.check(&ctx).unwrap();
2133        assert!(
2134            result.is_empty(),
2135            "Templater with arguments should not be flagged. Got: {result:?}"
2136        );
2137    }
2138
2139    #[test]
2140    fn test_md033_templater_javascript_execution_not_flagged() {
2141        // Templater JavaScript execution block: <%* code %>
2142        let rule = MD033NoInlineHtml::default();
2143        let content = "<%* const today = tp.date.now(); tR += today; %>";
2144        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2145        let result = rule.check(&ctx).unwrap();
2146        assert!(
2147            result.is_empty(),
2148            "Templater JS execution block should not be flagged. Got: {result:?}"
2149        );
2150    }
2151
2152    #[test]
2153    fn test_md033_templater_dynamic_execution_not_flagged() {
2154        // Templater dynamic/preview execution: <%+ expr %>
2155        let rule = MD033NoInlineHtml::default();
2156        let content = "Dynamic: <%+ tp.date.now() %>";
2157        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2158        let result = rule.check(&ctx).unwrap();
2159        assert!(
2160            result.is_empty(),
2161            "Templater dynamic execution should not be flagged. Got: {result:?}"
2162        );
2163    }
2164
2165    #[test]
2166    fn test_md033_templater_whitespace_trim_all_not_flagged() {
2167        // Templater whitespace control - trim all: <%_ expr _%>
2168        let rule = MD033NoInlineHtml::default();
2169        let content = "<%_ tp.date.now() _%>";
2170        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2171        let result = rule.check(&ctx).unwrap();
2172        assert!(
2173            result.is_empty(),
2174            "Templater trim-all whitespace should not be flagged. Got: {result:?}"
2175        );
2176    }
2177
2178    #[test]
2179    fn test_md033_templater_whitespace_trim_newline_not_flagged() {
2180        // Templater whitespace control - trim newline: <%- expr -%>
2181        let rule = MD033NoInlineHtml::default();
2182        let content = "<%- tp.date.now() -%>";
2183        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2184        let result = rule.check(&ctx).unwrap();
2185        assert!(
2186            result.is_empty(),
2187            "Templater trim-newline should not be flagged. Got: {result:?}"
2188        );
2189    }
2190
2191    #[test]
2192    fn test_md033_templater_combined_modifiers_not_flagged() {
2193        // Templater combined whitespace and execution modifiers
2194        let rule = MD033NoInlineHtml::default();
2195        let contents = [
2196            "<%-* const x = 1; -%>",  // trim + JS execution
2197            "<%_+ tp.date.now() _%>", // trim-all + dynamic
2198            "<%- tp.file.title -%>",  // trim-newline only
2199            "<%_ tp.file.title _%>",  // trim-all only
2200        ];
2201        for content in contents {
2202            let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2203            let result = rule.check(&ctx).unwrap();
2204            assert!(
2205                result.is_empty(),
2206                "Templater combined modifiers should not be flagged: {content}. Got: {result:?}"
2207            );
2208        }
2209    }
2210
2211    #[test]
2212    fn test_md033_templater_multiline_block_not_flagged() {
2213        // Multi-line Templater JavaScript block
2214        let rule = MD033NoInlineHtml::default();
2215        let content = r#"<%*
2216const x = 1;
2217const y = 2;
2218tR += x + y;
2219%>"#;
2220        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2221        let result = rule.check(&ctx).unwrap();
2222        assert!(
2223            result.is_empty(),
2224            "Templater multi-line block should not be flagged. Got: {result:?}"
2225        );
2226    }
2227
2228    #[test]
2229    fn test_md033_templater_with_angle_brackets_in_condition_not_flagged() {
2230        // Templater with angle brackets in JavaScript condition
2231        // This is a key edge case: `<` inside Templater should not trigger HTML detection
2232        let rule = MD033NoInlineHtml::default();
2233        let content = "<%* if (x < 5) { tR += 'small'; } %>";
2234        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2235        let result = rule.check(&ctx).unwrap();
2236        assert!(
2237            result.is_empty(),
2238            "Templater with angle brackets in conditions should not be flagged. Got: {result:?}"
2239        );
2240    }
2241
2242    #[test]
2243    fn test_md033_templater_mixed_with_html_only_html_flagged() {
2244        // Templater syntax mixed with actual HTML - only HTML should be flagged
2245        let rule = MD033NoInlineHtml::default();
2246        let content = "<% tp.date.now() %> is today's date. <div>This is HTML</div>";
2247        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2248        let result = rule.check(&ctx).unwrap();
2249        assert_eq!(result.len(), 1, "Should only flag the HTML div tag");
2250        assert!(
2251            result[0].message.contains("<div>"),
2252            "Should flag <div>, got: {}",
2253            result[0].message
2254        );
2255    }
2256
2257    #[test]
2258    fn test_md033_templater_in_heading_not_flagged() {
2259        // Templater in markdown heading
2260        let rule = MD033NoInlineHtml::default();
2261        let content = "# <% tp.file.title %>";
2262        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2263        let result = rule.check(&ctx).unwrap();
2264        assert!(
2265            result.is_empty(),
2266            "Templater in heading should not be flagged. Got: {result:?}"
2267        );
2268    }
2269
2270    #[test]
2271    fn test_md033_templater_multiple_on_same_line_not_flagged() {
2272        // Multiple Templater blocks on same line
2273        let rule = MD033NoInlineHtml::default();
2274        let content = "From <% tp.date.now() %> to <% tp.date.tomorrow() %> we have meetings.";
2275        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2276        let result = rule.check(&ctx).unwrap();
2277        assert!(
2278            result.is_empty(),
2279            "Multiple Templater blocks should not be flagged. Got: {result:?}"
2280        );
2281    }
2282
2283    #[test]
2284    fn test_md033_templater_in_code_block_not_flagged() {
2285        // Templater syntax in code blocks should not be flagged (code blocks are skipped)
2286        let rule = MD033NoInlineHtml::default();
2287        let content = "```\n<% tp.date.now() %>\n```";
2288        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2289        let result = rule.check(&ctx).unwrap();
2290        assert!(
2291            result.is_empty(),
2292            "Templater in code block should not be flagged. Got: {result:?}"
2293        );
2294    }
2295
2296    #[test]
2297    fn test_md033_templater_in_inline_code_not_flagged() {
2298        // Templater syntax in inline code span should not be flagged
2299        let rule = MD033NoInlineHtml::default();
2300        let content = "Use `<% tp.date.now() %>` for current date.";
2301        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2302        let result = rule.check(&ctx).unwrap();
2303        assert!(
2304            result.is_empty(),
2305            "Templater in inline code should not be flagged. Got: {result:?}"
2306        );
2307    }
2308
2309    #[test]
2310    fn test_md033_templater_also_works_in_standard_flavor() {
2311        // Templater syntax should also not be flagged in Standard flavor
2312        // because the HTML parser doesn't recognize `<%` as a valid tag
2313        let rule = MD033NoInlineHtml::default();
2314        let content = "<% tp.date.now() %> works everywhere.";
2315        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2316        let result = rule.check(&ctx).unwrap();
2317        assert!(
2318            result.is_empty(),
2319            "Templater should not be flagged even in Standard flavor. Got: {result:?}"
2320        );
2321    }
2322
2323    #[test]
2324    fn test_md033_templater_empty_tag_not_flagged() {
2325        // Empty Templater tags
2326        let rule = MD033NoInlineHtml::default();
2327        let content = "<%>";
2328        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2329        let result = rule.check(&ctx).unwrap();
2330        assert!(
2331            result.is_empty(),
2332            "Empty Templater-like tag should not be flagged. Got: {result:?}"
2333        );
2334    }
2335
2336    #[test]
2337    fn test_md033_templater_unclosed_not_flagged() {
2338        // Unclosed Templater tags - these are template errors, not HTML
2339        let rule = MD033NoInlineHtml::default();
2340        let content = "<% tp.date.now() without closing tag";
2341        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2342        let result = rule.check(&ctx).unwrap();
2343        assert!(
2344            result.is_empty(),
2345            "Unclosed Templater should not be flagged as HTML. Got: {result:?}"
2346        );
2347    }
2348
2349    #[test]
2350    fn test_md033_templater_with_newlines_inside_not_flagged() {
2351        // Templater with newlines inside the expression
2352        let rule = MD033NoInlineHtml::default();
2353        let content = r#"<% tp.date.now("YYYY") +
2354"-" +
2355tp.date.now("MM") %>"#;
2356        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2357        let result = rule.check(&ctx).unwrap();
2358        assert!(
2359            result.is_empty(),
2360            "Templater with internal newlines should not be flagged. Got: {result:?}"
2361        );
2362    }
2363
2364    #[test]
2365    fn test_md033_erb_style_tags_not_flagged() {
2366        // ERB/EJS style tags (similar to Templater) are also not HTML
2367        // This documents the general principle that `<%` is not valid HTML
2368        let rule = MD033NoInlineHtml::default();
2369        let content = "<%= variable %> and <% code %> and <%# comment %>";
2370        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2371        let result = rule.check(&ctx).unwrap();
2372        assert!(
2373            result.is_empty(),
2374            "ERB/EJS style tags should not be flagged as HTML. Got: {result:?}"
2375        );
2376    }
2377
2378    #[test]
2379    fn test_md033_templater_complex_expression_not_flagged() {
2380        // Complex Templater expression with multiple function calls
2381        let rule = MD033NoInlineHtml::default();
2382        let content = r#"<%*
2383const file = tp.file.title;
2384const date = tp.date.now("YYYY-MM-DD");
2385const folder = tp.file.folder();
2386tR += `# ${file}\n\nCreated: ${date}\nIn: ${folder}`;
2387%>"#;
2388        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2389        let result = rule.check(&ctx).unwrap();
2390        assert!(
2391            result.is_empty(),
2392            "Complex Templater expression should not be flagged. Got: {result:?}"
2393        );
2394    }
2395
2396    #[test]
2397    fn test_md033_percent_sign_variations_not_flagged() {
2398        // Various patterns starting with <% that should all be safe
2399        let rule = MD033NoInlineHtml::default();
2400        let patterns = [
2401            "<%=",  // ERB output
2402            "<%#",  // ERB comment
2403            "<%%",  // Double percent
2404            "<%!",  // Some template engines
2405            "<%@",  // JSP directive
2406            "<%--", // JSP comment
2407        ];
2408        for pattern in patterns {
2409            let content = format!("{pattern} content %>");
2410            let ctx = LintContext::new(&content, crate::config::MarkdownFlavor::Standard, None);
2411            let result = rule.check(&ctx).unwrap();
2412            assert!(
2413                result.is_empty(),
2414                "Pattern {pattern} should not be flagged. Got: {result:?}"
2415            );
2416        }
2417    }
2418}