Skip to main content

rumdl_lib/rules/
md033_no_inline_html.rs

1//!
2//! Rule MD033: No HTML tags
3//!
4//! See [docs/md033.md](../../docs/md033.md) for full documentation, configuration, and examples.
5
6use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
7use crate::utils::regex_cache::*;
8use std::collections::HashSet;
9
10mod md033_config;
11use md033_config::{MD033Config, MD033FixMode};
12
13#[derive(Clone)]
14pub struct MD033NoInlineHtml {
15    config: MD033Config,
16    allowed: HashSet<String>,
17    disallowed: HashSet<String>,
18    drop_attributes: HashSet<String>,
19    strip_wrapper_elements: HashSet<String>,
20}
21
22impl Default for MD033NoInlineHtml {
23    fn default() -> Self {
24        let config = MD033Config::default();
25        let allowed = config.allowed_set();
26        let disallowed = config.disallowed_set();
27        let drop_attributes = config.drop_attributes_set();
28        let strip_wrapper_elements = config.strip_wrapper_elements_set();
29        Self {
30            config,
31            allowed,
32            disallowed,
33            drop_attributes,
34            strip_wrapper_elements,
35        }
36    }
37}
38
39impl MD033NoInlineHtml {
40    pub fn new() -> Self {
41        Self::default()
42    }
43
44    pub fn with_allowed(allowed_vec: Vec<String>) -> Self {
45        let config = MD033Config {
46            allowed: allowed_vec.clone(),
47            disallowed: Vec::new(),
48            fix: false,
49            ..MD033Config::default()
50        };
51        let allowed = config.allowed_set();
52        let disallowed = config.disallowed_set();
53        let drop_attributes = config.drop_attributes_set();
54        let strip_wrapper_elements = config.strip_wrapper_elements_set();
55        Self {
56            config,
57            allowed,
58            disallowed,
59            drop_attributes,
60            strip_wrapper_elements,
61        }
62    }
63
64    pub fn with_disallowed(disallowed_vec: Vec<String>) -> Self {
65        let config = MD033Config {
66            allowed: Vec::new(),
67            disallowed: disallowed_vec.clone(),
68            fix: false,
69            ..MD033Config::default()
70        };
71        let allowed = config.allowed_set();
72        let disallowed = config.disallowed_set();
73        let drop_attributes = config.drop_attributes_set();
74        let strip_wrapper_elements = config.strip_wrapper_elements_set();
75        Self {
76            config,
77            allowed,
78            disallowed,
79            drop_attributes,
80            strip_wrapper_elements,
81        }
82    }
83
84    /// Create a new rule with auto-fix enabled
85    pub fn with_fix(fix: bool) -> Self {
86        let config = MD033Config {
87            allowed: Vec::new(),
88            disallowed: Vec::new(),
89            fix,
90            ..MD033Config::default()
91        };
92        let allowed = config.allowed_set();
93        let disallowed = config.disallowed_set();
94        let drop_attributes = config.drop_attributes_set();
95        let strip_wrapper_elements = config.strip_wrapper_elements_set();
96        Self {
97            config,
98            allowed,
99            disallowed,
100            drop_attributes,
101            strip_wrapper_elements,
102        }
103    }
104
105    pub fn from_config_struct(config: MD033Config) -> Self {
106        let allowed = config.allowed_set();
107        let disallowed = config.disallowed_set();
108        let drop_attributes = config.drop_attributes_set();
109        let strip_wrapper_elements = config.strip_wrapper_elements_set();
110        Self {
111            config,
112            allowed,
113            disallowed,
114            drop_attributes,
115            strip_wrapper_elements,
116        }
117    }
118
119    // Efficient check for allowed tags using HashSet (case-insensitive)
120    #[inline]
121    fn is_tag_allowed(&self, tag: &str) -> bool {
122        if self.allowed.is_empty() {
123            return false;
124        }
125        // Remove angle brackets and slashes, then split by whitespace or '>'
126        let tag = tag.trim_start_matches('<').trim_start_matches('/');
127        let tag_name = tag
128            .split(|c: char| c.is_whitespace() || c == '>' || c == '/')
129            .next()
130            .unwrap_or("");
131        self.allowed.contains(&tag_name.to_lowercase())
132    }
133
134    /// Check if a tag is in the disallowed set (for disallowed-only mode)
135    #[inline]
136    fn is_tag_disallowed(&self, tag: &str) -> bool {
137        if self.disallowed.is_empty() {
138            return false;
139        }
140        // Remove angle brackets and slashes, then split by whitespace or '>'
141        let tag = tag.trim_start_matches('<').trim_start_matches('/');
142        let tag_name = tag
143            .split(|c: char| c.is_whitespace() || c == '>' || c == '/')
144            .next()
145            .unwrap_or("");
146        self.disallowed.contains(&tag_name.to_lowercase())
147    }
148
149    /// Check if operating in disallowed-only mode
150    #[inline]
151    fn is_disallowed_mode(&self) -> bool {
152        self.config.is_disallowed_mode()
153    }
154
155    // Check if a tag is an HTML comment
156    #[inline]
157    fn is_html_comment(&self, tag: &str) -> bool {
158        tag.starts_with("<!--") && tag.ends_with("-->")
159    }
160
161    /// Check if a tag name is a valid HTML element or custom element.
162    /// Returns false for placeholder syntax like `<NAME>`, `<resource>`, `<actual>`.
163    ///
164    /// Per HTML spec, custom elements must contain a hyphen (e.g., `<my-component>`).
165    #[inline]
166    fn is_html_element_or_custom(tag_name: &str) -> bool {
167        const HTML_ELEMENTS: &[&str] = &[
168            // Document structure
169            "html",
170            "head",
171            "body",
172            "title",
173            "base",
174            "link",
175            "meta",
176            "style",
177            // Sections
178            "article",
179            "section",
180            "nav",
181            "aside",
182            "h1",
183            "h2",
184            "h3",
185            "h4",
186            "h5",
187            "h6",
188            "hgroup",
189            "header",
190            "footer",
191            "address",
192            "main",
193            "search",
194            // Grouping
195            "p",
196            "hr",
197            "pre",
198            "blockquote",
199            "ol",
200            "ul",
201            "menu",
202            "li",
203            "dl",
204            "dt",
205            "dd",
206            "figure",
207            "figcaption",
208            "div",
209            // Text-level
210            "a",
211            "em",
212            "strong",
213            "small",
214            "s",
215            "cite",
216            "q",
217            "dfn",
218            "abbr",
219            "ruby",
220            "rt",
221            "rp",
222            "data",
223            "time",
224            "code",
225            "var",
226            "samp",
227            "kbd",
228            "sub",
229            "sup",
230            "i",
231            "b",
232            "u",
233            "mark",
234            "bdi",
235            "bdo",
236            "span",
237            "br",
238            "wbr",
239            // Edits
240            "ins",
241            "del",
242            // Embedded
243            "picture",
244            "source",
245            "img",
246            "iframe",
247            "embed",
248            "object",
249            "param",
250            "video",
251            "audio",
252            "track",
253            "map",
254            "area",
255            "svg",
256            "math",
257            "canvas",
258            // Tables
259            "table",
260            "caption",
261            "colgroup",
262            "col",
263            "tbody",
264            "thead",
265            "tfoot",
266            "tr",
267            "td",
268            "th",
269            // Forms
270            "form",
271            "label",
272            "input",
273            "button",
274            "select",
275            "datalist",
276            "optgroup",
277            "option",
278            "textarea",
279            "output",
280            "progress",
281            "meter",
282            "fieldset",
283            "legend",
284            // Interactive
285            "details",
286            "summary",
287            "dialog",
288            // Scripting
289            "script",
290            "noscript",
291            "template",
292            "slot",
293            // Deprecated but recognized
294            "acronym",
295            "applet",
296            "basefont",
297            "big",
298            "center",
299            "dir",
300            "font",
301            "frame",
302            "frameset",
303            "isindex",
304            "marquee",
305            "noembed",
306            "noframes",
307            "plaintext",
308            "strike",
309            "tt",
310            "xmp",
311        ];
312
313        let lower = tag_name.to_ascii_lowercase();
314        if HTML_ELEMENTS.contains(&lower.as_str()) {
315            return true;
316        }
317        // Custom elements must contain a hyphen per HTML spec
318        tag_name.contains('-')
319    }
320
321    // Check if a tag is likely a programming type annotation rather than HTML
322    #[inline]
323    fn is_likely_type_annotation(&self, tag: &str) -> bool {
324        // Common programming type names that are often used in generics
325        const COMMON_TYPES: &[&str] = &[
326            "string",
327            "number",
328            "any",
329            "void",
330            "null",
331            "undefined",
332            "array",
333            "promise",
334            "function",
335            "error",
336            "date",
337            "regexp",
338            "symbol",
339            "bigint",
340            "map",
341            "set",
342            "weakmap",
343            "weakset",
344            "iterator",
345            "generator",
346            "t",
347            "u",
348            "v",
349            "k",
350            "e", // Common single-letter type parameters
351            "userdata",
352            "apiresponse",
353            "config",
354            "options",
355            "params",
356            "result",
357            "response",
358            "request",
359            "data",
360            "item",
361            "element",
362            "node",
363        ];
364
365        let tag_content = tag
366            .trim_start_matches('<')
367            .trim_end_matches('>')
368            .trim_start_matches('/');
369        let tag_name = tag_content
370            .split(|c: char| c.is_whitespace() || c == '>' || c == '/')
371            .next()
372            .unwrap_or("");
373
374        // Check if it's a simple tag (no attributes) with a common type name
375        if !tag_content.contains(' ') && !tag_content.contains('=') {
376            COMMON_TYPES.contains(&tag_name.to_ascii_lowercase().as_str())
377        } else {
378            false
379        }
380    }
381
382    // Check if a tag is actually an email address in angle brackets
383    #[inline]
384    fn is_email_address(&self, tag: &str) -> bool {
385        let content = tag.trim_start_matches('<').trim_end_matches('>');
386        // Simple email pattern: contains @ and has reasonable structure
387        content.contains('@')
388            && content.chars().all(|c| c.is_alphanumeric() || "@.-_+".contains(c))
389            && content.split('@').count() == 2
390            && content.split('@').all(|part| !part.is_empty())
391    }
392
393    // Check if a tag has the markdown attribute (MkDocs/Material for MkDocs)
394    #[inline]
395    fn has_markdown_attribute(&self, tag: &str) -> bool {
396        // Check for various forms of markdown attribute
397        // Examples: <div markdown>, <div markdown="1">, <div class="result" markdown>
398        tag.contains(" markdown>") || tag.contains(" markdown=") || tag.contains(" markdown ")
399    }
400
401    /// Check if a tag contains JSX-specific attributes that indicate it's JSX, not HTML
402    /// JSX uses different attribute names than HTML:
403    /// - `className` instead of `class`
404    /// - `htmlFor` instead of `for`
405    /// - camelCase event handlers (`onClick`, `onChange`, `onSubmit`, etc.)
406    /// - JSX expression syntax `={...}` for dynamic values
407    #[inline]
408    fn has_jsx_attributes(tag: &str) -> bool {
409        // JSX-specific attribute names (HTML uses class, for, onclick, etc.)
410        tag.contains("className")
411            || tag.contains("htmlFor")
412            || tag.contains("dangerouslySetInnerHTML")
413            // camelCase event handlers (JSX uses onClick, HTML uses onclick)
414            || tag.contains("onClick")
415            || tag.contains("onChange")
416            || tag.contains("onSubmit")
417            || tag.contains("onFocus")
418            || tag.contains("onBlur")
419            || tag.contains("onKeyDown")
420            || tag.contains("onKeyUp")
421            || tag.contains("onKeyPress")
422            || tag.contains("onMouseDown")
423            || tag.contains("onMouseUp")
424            || tag.contains("onMouseEnter")
425            || tag.contains("onMouseLeave")
426            // JSX expression syntax: ={expression} or ={ expression }
427            || tag.contains("={")
428    }
429
430    // Check if a tag is actually a URL in angle brackets
431    #[inline]
432    fn is_url_in_angle_brackets(&self, tag: &str) -> bool {
433        let content = tag.trim_start_matches('<').trim_end_matches('>');
434        // Check for common URL schemes
435        content.starts_with("http://")
436            || content.starts_with("https://")
437            || content.starts_with("ftp://")
438            || content.starts_with("ftps://")
439            || content.starts_with("mailto:")
440    }
441
442    #[inline]
443    fn is_relaxed_fix_mode(&self) -> bool {
444        self.config.fix_mode == MD033FixMode::Relaxed
445    }
446
447    #[inline]
448    fn is_droppable_attribute(&self, attr_name: &str) -> bool {
449        // Event handler attributes (onclick, onload, etc.) are never droppable
450        // because they can execute arbitrary JavaScript.
451        if attr_name.starts_with("on") && attr_name.len() > 2 {
452            return false;
453        }
454        self.drop_attributes.contains(attr_name)
455            || (attr_name.starts_with("data-")
456                && (self.drop_attributes.contains("data-*") || self.drop_attributes.contains("data-")))
457    }
458
459    #[inline]
460    fn is_strippable_wrapper(&self, tag_name: &str) -> bool {
461        self.is_relaxed_fix_mode() && self.strip_wrapper_elements.contains(tag_name)
462    }
463
464    /// Check whether `byte_offset` sits directly inside a top-level strippable
465    /// wrapper element (e.g. `<p>`).  Returns `true` only when:
466    ///  1. The nearest unclosed opening tag before the offset is a configured
467    ///     wrapper element, AND
468    ///  2. That wrapper is itself NOT nested inside another HTML element.
469    ///
470    /// Condition 2 prevents converting inner content when the wrapper cannot
471    /// be stripped (e.g. `<div><p><img/></p></div>` -- stripping `<p>` is
472    /// blocked because it is nested, so converting `<img>` would leave
473    /// markdown inside an HTML block where it won't render).
474    fn is_inside_strippable_wrapper(&self, content: &str, byte_offset: usize) -> bool {
475        if byte_offset == 0 {
476            return false;
477        }
478        let before = content[..byte_offset].trim_end();
479        if !before.ends_with('>') || before.ends_with("->") {
480            return false;
481        }
482        if let Some(last_lt) = before.rfind('<') {
483            let potential_tag = &before[last_lt..];
484            if potential_tag.starts_with("</") || potential_tag.starts_with("<!--") {
485                return false;
486            }
487            let parent_name = potential_tag
488                .trim_start_matches('<')
489                .split(|c: char| c.is_whitespace() || c == '>' || c == '/')
490                .next()
491                .unwrap_or("")
492                .to_lowercase();
493            if !self.strip_wrapper_elements.contains(&parent_name) {
494                return false;
495            }
496            // Verify the wrapper itself is not nested inside another element.
497            let wrapper_before = before[..last_lt].trim_end();
498            if wrapper_before.ends_with('>')
499                && !wrapper_before.ends_with("->")
500                && let Some(outer_lt) = wrapper_before.rfind('<')
501                && let outer_tag = &wrapper_before[outer_lt..]
502                && !outer_tag.starts_with("</")
503                && !outer_tag.starts_with("<!--")
504            {
505                return false;
506            }
507            return true;
508        }
509        false
510    }
511
512    /// Convert paired HTML tags to their Markdown equivalents.
513    /// Returns None if the tag cannot be safely converted (has nested tags, HTML entities, etc.)
514    fn convert_to_markdown(tag_name: &str, inner_content: &str) -> Option<String> {
515        // Skip if content contains nested HTML tags
516        if inner_content.contains('<') {
517            return None;
518        }
519        // Skip if content contains HTML entities (e.g., &vert;, &amp;, &lt;)
520        // These need HTML context to render correctly; markdown won't process them
521        if inner_content.contains('&') && inner_content.contains(';') {
522            // Check for common HTML entity patterns
523            let has_entity = inner_content
524                .split('&')
525                .skip(1)
526                .any(|part| part.split(';').next().is_some_and(|e| !e.is_empty() && e.len() < 10));
527            if has_entity {
528                return None;
529            }
530        }
531        match tag_name {
532            "em" | "i" => Some(format!("*{inner_content}*")),
533            "strong" | "b" => Some(format!("**{inner_content}**")),
534            "code" => {
535                // Handle backticks in content by using double backticks with padding
536                if inner_content.contains('`') {
537                    Some(format!("`` {inner_content} ``"))
538                } else {
539                    Some(format!("`{inner_content}`"))
540                }
541            }
542            _ => None,
543        }
544    }
545
546    /// Convert self-closing HTML tags to their Markdown equivalents.
547    fn convert_self_closing_to_markdown(&self, tag_name: &str, opening_tag: &str) -> Option<String> {
548        match tag_name {
549            "br" => match self.config.br_style {
550                md033_config::BrStyle::TrailingSpaces => Some("  \n".to_string()),
551                md033_config::BrStyle::Backslash => Some("\\\n".to_string()),
552            },
553            "hr" => Some("\n---\n".to_string()),
554            "img" => self.convert_img_to_markdown(opening_tag),
555            _ => None,
556        }
557    }
558
559    /// Parse all attributes from an HTML tag into a list of (name, value) pairs.
560    /// This provides proper attribute parsing instead of naive string matching.
561    fn parse_attributes(tag: &str) -> Vec<(String, Option<String>)> {
562        let mut attrs = Vec::new();
563
564        // Remove < and > and tag name
565        let tag_content = tag.trim_start_matches('<').trim_end_matches('>').trim_end_matches('/');
566
567        // Find first whitespace to skip tag name
568        let attr_start = tag_content
569            .find(|c: char| c.is_whitespace())
570            .map(|i| i + 1)
571            .unwrap_or(tag_content.len());
572
573        if attr_start >= tag_content.len() {
574            return attrs;
575        }
576
577        let attr_str = &tag_content[attr_start..];
578        let mut chars = attr_str.chars().peekable();
579
580        while chars.peek().is_some() {
581            // Skip whitespace
582            while chars.peek().is_some_and(|c| c.is_whitespace()) {
583                chars.next();
584            }
585
586            if chars.peek().is_none() {
587                break;
588            }
589
590            // Read attribute name
591            let mut attr_name = String::new();
592            while let Some(&c) = chars.peek() {
593                if c.is_whitespace() || c == '=' || c == '>' || c == '/' {
594                    break;
595                }
596                attr_name.push(c);
597                chars.next();
598            }
599
600            if attr_name.is_empty() {
601                break;
602            }
603
604            // Skip whitespace before =
605            while chars.peek().is_some_and(|c| c.is_whitespace()) {
606                chars.next();
607            }
608
609            // Check for = and value
610            if chars.peek() == Some(&'=') {
611                chars.next(); // consume =
612
613                // Skip whitespace after =
614                while chars.peek().is_some_and(|c| c.is_whitespace()) {
615                    chars.next();
616                }
617
618                // Read value
619                let mut value = String::new();
620                if let Some(&quote) = chars.peek() {
621                    if quote == '"' || quote == '\'' {
622                        chars.next(); // consume opening quote
623                        for c in chars.by_ref() {
624                            if c == quote {
625                                break;
626                            }
627                            value.push(c);
628                        }
629                    } else {
630                        // Unquoted value
631                        while let Some(&c) = chars.peek() {
632                            if c.is_whitespace() || c == '>' || c == '/' {
633                                break;
634                            }
635                            value.push(c);
636                            chars.next();
637                        }
638                    }
639                }
640                attrs.push((attr_name.to_ascii_lowercase(), Some(value)));
641            } else {
642                // Boolean attribute (no value)
643                attrs.push((attr_name.to_ascii_lowercase(), None));
644            }
645        }
646
647        attrs
648    }
649
650    /// Extract an HTML attribute value from a tag string.
651    /// Handles double quotes, single quotes, and unquoted values.
652    /// Returns None if the attribute is not found.
653    fn extract_attribute(tag: &str, attr_name: &str) -> Option<String> {
654        let attrs = Self::parse_attributes(tag);
655        let attr_lower = attr_name.to_ascii_lowercase();
656
657        attrs
658            .into_iter()
659            .find(|(name, _)| name == &attr_lower)
660            .and_then(|(_, value)| value)
661    }
662
663    /// Check if an HTML tag has extra attributes beyond the specified allowed ones.
664    /// Uses proper attribute parsing to avoid false positives from string matching.
665    fn has_extra_attributes(&self, tag: &str, allowed_attrs: &[&str]) -> bool {
666        let attrs = Self::parse_attributes(tag);
667
668        // All event handlers (on*) are dangerous
669        // Plus common attributes that would be lost in markdown conversion
670        const DANGEROUS_ATTR_PREFIXES: &[&str] = &["on"]; // onclick, onload, onerror, etc.
671        const DANGEROUS_ATTRS: &[&str] = &[
672            "class",
673            "id",
674            "style",
675            "target",
676            "rel",
677            "download",
678            "referrerpolicy",
679            "crossorigin",
680            "loading",
681            "decoding",
682            "fetchpriority",
683            "sizes",
684            "srcset",
685            "usemap",
686            "ismap",
687            "width",
688            "height",
689            "name",   // anchor names
690            "data-*", // data attributes (checked separately)
691        ];
692
693        for (attr_name, _) in attrs {
694            // Skip allowed attributes (list is small, linear scan is efficient)
695            if allowed_attrs.iter().any(|a| a.to_ascii_lowercase() == attr_name) {
696                continue;
697            }
698
699            if self.is_relaxed_fix_mode() {
700                if self.is_droppable_attribute(&attr_name) {
701                    continue;
702                }
703                return true;
704            }
705
706            // Check for event handlers (on*)
707            for prefix in DANGEROUS_ATTR_PREFIXES {
708                if attr_name.starts_with(prefix) && attr_name.len() > prefix.len() {
709                    return true;
710                }
711            }
712
713            // Check for data-* attributes
714            if attr_name.starts_with("data-") {
715                return true;
716            }
717
718            // Check for other dangerous attributes
719            if DANGEROUS_ATTRS.contains(&attr_name.as_str()) {
720                return true;
721            }
722        }
723
724        false
725    }
726
727    /// Convert `<a href="url">text</a>` to `[text](url)` or `[text](url "title")`
728    /// Returns None if conversion is not safe.
729    fn convert_a_to_markdown(&self, opening_tag: &str, inner_content: &str) -> Option<String> {
730        // Extract href attribute
731        let href = Self::extract_attribute(opening_tag, "href")?;
732
733        // Check URL is safe
734        if !MD033Config::is_safe_url(&href) {
735            return None;
736        }
737
738        // Check for nested HTML tags in content
739        if inner_content.contains('<') {
740            return None;
741        }
742
743        // Check for HTML entities that wouldn't render correctly in markdown
744        if inner_content.contains('&') && inner_content.contains(';') {
745            let has_entity = inner_content
746                .split('&')
747                .skip(1)
748                .any(|part| part.split(';').next().is_some_and(|e| !e.is_empty() && e.len() < 10));
749            if has_entity {
750                return None;
751            }
752        }
753
754        // Extract optional title attribute
755        let title = Self::extract_attribute(opening_tag, "title");
756
757        // Check for extra dangerous attributes (title is allowed)
758        if self.has_extra_attributes(opening_tag, &["href", "title"]) {
759            return None;
760        }
761
762        // If inner content is exactly a markdown image (from a prior <img> fix),
763        // use it directly without bracket escaping to produce valid [![alt](src)](href).
764        // Must verify the entire content is a single image — not mixed content like
765        // "![](url) extra [text]" where trailing brackets still need escaping.
766        let trimmed_inner = inner_content.trim();
767        let is_markdown_image =
768            trimmed_inner.starts_with("![") && trimmed_inner.contains("](") && trimmed_inner.ends_with(')') && {
769                // Verify the closing ](url) accounts for the rest of the content
770                // by finding the image's ]( and checking nothing follows the final )
771                if let Some(bracket_close) = trimmed_inner.rfind("](") {
772                    let after_paren = &trimmed_inner[bracket_close + 2..];
773                    // The rest should be just "url)" — find the matching close paren
774                    after_paren.ends_with(')')
775                        && after_paren.chars().filter(|&c| c == ')').count()
776                            >= after_paren.chars().filter(|&c| c == '(').count()
777                } else {
778                    false
779                }
780            };
781        let escaped_text = if is_markdown_image {
782            trimmed_inner.to_string()
783        } else {
784            // Escape special markdown characters in link text
785            // Brackets need escaping to avoid breaking the link syntax
786            inner_content.replace('[', r"\[").replace(']', r"\]")
787        };
788
789        // Escape parentheses in URL
790        let escaped_url = href.replace('(', "%28").replace(')', "%29");
791
792        // Format with or without title
793        if let Some(title_text) = title {
794            // Escape quotes in title
795            let escaped_title = title_text.replace('"', r#"\""#);
796            Some(format!("[{escaped_text}]({escaped_url} \"{escaped_title}\")"))
797        } else {
798            Some(format!("[{escaped_text}]({escaped_url})"))
799        }
800    }
801
802    /// Convert `<img src="url" alt="text">` to `![alt](src)` or `![alt](src "title")`
803    /// Returns None if conversion is not safe.
804    fn convert_img_to_markdown(&self, tag: &str) -> Option<String> {
805        // Extract src attribute (required)
806        let src = Self::extract_attribute(tag, "src")?;
807
808        // Check URL is safe
809        if !MD033Config::is_safe_url(&src) {
810            return None;
811        }
812
813        // Extract alt attribute (optional, default to empty)
814        let alt = Self::extract_attribute(tag, "alt").unwrap_or_default();
815
816        // Extract optional title attribute
817        let title = Self::extract_attribute(tag, "title");
818
819        // Check for extra dangerous attributes (title is allowed)
820        if self.has_extra_attributes(tag, &["src", "alt", "title"]) {
821            return None;
822        }
823
824        // Escape special markdown characters in alt text
825        let escaped_alt = alt.replace('[', r"\[").replace(']', r"\]");
826
827        // Escape parentheses in URL
828        let escaped_url = src.replace('(', "%28").replace(')', "%29");
829
830        // Format with or without title
831        if let Some(title_text) = title {
832            // Escape quotes in title
833            let escaped_title = title_text.replace('"', r#"\""#);
834            Some(format!("![{escaped_alt}]({escaped_url} \"{escaped_title}\")"))
835        } else {
836            Some(format!("![{escaped_alt}]({escaped_url})"))
837        }
838    }
839
840    /// Check if an HTML tag has attributes that would make conversion unsafe
841    fn has_significant_attributes(opening_tag: &str) -> bool {
842        // Tags with just whitespace or empty are fine
843        let tag_content = opening_tag
844            .trim_start_matches('<')
845            .trim_end_matches('>')
846            .trim_end_matches('/');
847
848        // Split by whitespace; if there's more than the tag name, it has attributes
849        let parts: Vec<&str> = tag_content.split_whitespace().collect();
850        parts.len() > 1
851    }
852
853    /// Check if a tag appears to be nested inside another HTML element
854    /// by looking at the surrounding context (e.g., `<code><em>text</em></code>`)
855    fn is_nested_in_html(content: &str, tag_byte_start: usize, tag_byte_end: usize) -> bool {
856        // Check if there's a `>` immediately before this tag (indicating inside another element)
857        if tag_byte_start > 0 {
858            let before = &content[..tag_byte_start];
859            let before_trimmed = before.trim_end();
860            if before_trimmed.ends_with('>') && !before_trimmed.ends_with("->") {
861                // Check it's not a closing tag or comment
862                if let Some(last_lt) = before_trimmed.rfind('<') {
863                    let potential_tag = &before_trimmed[last_lt..];
864                    // Skip if it's a closing tag (</...>) or comment (<!--)
865                    if !potential_tag.starts_with("</") && !potential_tag.starts_with("<!--") {
866                        return true;
867                    }
868                }
869            }
870        }
871        // Check if there's a `<` immediately after the closing tag (indicating inside another element)
872        if tag_byte_end < content.len() {
873            let after = &content[tag_byte_end..];
874            let after_trimmed = after.trim_start();
875            if after_trimmed.starts_with("</") {
876                return true;
877            }
878        }
879        false
880    }
881
882    /// Calculate fix to remove HTML tags while keeping content.
883    ///
884    /// For self-closing tags like `<br/>`, returns a single fix to remove the tag.
885    /// For paired tags like `<span>text</span>`, returns the replacement text (just the content).
886    ///
887    /// Returns (range, replacement_text) where range is the bytes to replace
888    /// and replacement_text is what to put there (content without tags, or empty for self-closing).
889    ///
890    /// When `in_html_block` is true, returns None in conservative mode.  In
891    /// relaxed mode two exceptions apply:
892    /// - Strippable wrapper elements (e.g. `<p>`) bypass the block guard so
893    ///   they can be stripped even though they ARE the HTML block.
894    /// - Self-closing tags whose direct parent is a strippable wrapper also
895    ///   bypass the guard so inner content can be converted first.
896    fn calculate_fix(
897        &self,
898        content: &str,
899        opening_tag: &str,
900        tag_byte_start: usize,
901        in_html_block: bool,
902    ) -> Option<(std::ops::Range<usize>, String)> {
903        // Extract tag name from opening tag
904        let tag_name = opening_tag
905            .trim_start_matches('<')
906            .split(|c: char| c.is_whitespace() || c == '>' || c == '/')
907            .next()?
908            .to_lowercase();
909
910        // Check if it's a self-closing tag (ends with /> or is a void element like <br>)
911        let is_self_closing =
912            opening_tag.ends_with("/>") || matches!(tag_name.as_str(), "br" | "hr" | "img" | "input" | "meta" | "link");
913
914        if is_self_closing {
915            // When fix is enabled, try to convert to Markdown equivalent.
916            // Skip tags inside HTML blocks (would break structure), UNLESS we
917            // are in relaxed mode and the containing block is a strippable
918            // wrapper -- this lets the inner element be converted first so the
919            // wrapper can be stripped on a subsequent pass.
920            let block_ok = !in_html_block
921                || (self.is_relaxed_fix_mode() && self.is_inside_strippable_wrapper(content, tag_byte_start));
922            if self.config.fix
923                && MD033Config::is_safe_fixable_tag(&tag_name)
924                && block_ok
925                && let Some(markdown) = self.convert_self_closing_to_markdown(&tag_name, opening_tag)
926            {
927                return Some((tag_byte_start..tag_byte_start + opening_tag.len(), markdown));
928            }
929            // Can't convert this self-closing tag to Markdown, don't provide a fix
930            // (e.g., <input>, <meta> - these have no Markdown equivalent without the new img support)
931            return None;
932        }
933
934        // Search for the closing tag after the opening tag (case-insensitive)
935        let search_start = tag_byte_start + opening_tag.len();
936        let search_slice = &content[search_start..];
937
938        // Find closing tag case-insensitively
939        let closing_tag_lower = format!("</{tag_name}>");
940        let closing_pos = search_slice.to_ascii_lowercase().find(&closing_tag_lower);
941
942        if let Some(closing_pos) = closing_pos {
943            // Get actual closing tag from original content to get correct byte length
944            let closing_tag_len = closing_tag_lower.len();
945            let closing_byte_start = search_start + closing_pos;
946            let closing_byte_end = closing_byte_start + closing_tag_len;
947
948            // Extract the content between tags
949            let inner_content = &content[search_start..closing_byte_start];
950
951            // In relaxed mode, check wrapper stripping BEFORE the in_html_block
952            // guard because the wrapper element itself IS the HTML block. We only
953            // strip when:
954            //  - the wrapper is not nested inside another HTML element
955            //  - the inner content no longer contains HTML tags (prevents
956            //    overlapping byte-range replacements within a single fix pass)
957            if self.config.fix && self.is_strippable_wrapper(&tag_name) {
958                if Self::is_nested_in_html(content, tag_byte_start, closing_byte_end) {
959                    return None;
960                }
961                if inner_content.contains('<') {
962                    return None;
963                }
964                return Some((tag_byte_start..closing_byte_end, inner_content.trim().to_string()));
965            }
966
967            // Skip auto-fix if inside an HTML block (like <pre>, <div>, etc.)
968            // Converting tags inside HTML blocks would break the intended structure
969            if in_html_block {
970                return None;
971            }
972
973            // Skip auto-fix if this tag is nested inside another HTML element
974            // e.g., <code><em>text</em></code> - don't convert the inner <em>
975            if Self::is_nested_in_html(content, tag_byte_start, closing_byte_end) {
976                return None;
977            }
978
979            // When fix is enabled and tag is safe to convert, try markdown conversion
980            if self.config.fix && MD033Config::is_safe_fixable_tag(&tag_name) {
981                // Handle <a> tags specially - they require attribute extraction
982                if tag_name == "a" {
983                    if let Some(markdown) = self.convert_a_to_markdown(opening_tag, inner_content) {
984                        return Some((tag_byte_start..closing_byte_end, markdown));
985                    }
986                    // convert_a_to_markdown returned None - unsafe URL, nested HTML, etc.
987                    return None;
988                }
989
990                // For simple tags (em, strong, code, etc.) - no attributes allowed
991                if Self::has_significant_attributes(opening_tag) {
992                    // Don't provide a fix for tags with attributes
993                    // User may want to keep the attributes, so leave as-is
994                    return None;
995                }
996                if let Some(markdown) = Self::convert_to_markdown(&tag_name, inner_content) {
997                    return Some((tag_byte_start..closing_byte_end, markdown));
998                }
999                // convert_to_markdown returned None, meaning content has nested tags or
1000                // HTML entities that shouldn't be converted - leave as-is
1001                return None;
1002            }
1003
1004            // For non-fixable tags, don't provide a fix
1005            // (e.g., <div>content</div>, <span>text</span>)
1006            return None;
1007        }
1008
1009        // If no closing tag found, don't provide a fix (malformed HTML)
1010        None
1011    }
1012}
1013
1014impl Rule for MD033NoInlineHtml {
1015    fn name(&self) -> &'static str {
1016        "MD033"
1017    }
1018
1019    fn description(&self) -> &'static str {
1020        "Inline HTML is not allowed"
1021    }
1022
1023    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
1024        let content = ctx.content;
1025
1026        // Early return: if no HTML tags at all, skip processing
1027        if content.is_empty() || !ctx.likely_has_html() {
1028            return Ok(Vec::new());
1029        }
1030
1031        // Quick check for HTML tag pattern before expensive processing
1032        if !HTML_TAG_QUICK_CHECK.is_match(content) {
1033            return Ok(Vec::new());
1034        }
1035
1036        let mut warnings = Vec::new();
1037
1038        // Use centralized HTML parser to get all HTML tags (including multiline)
1039        let html_tags = ctx.html_tags();
1040
1041        for html_tag in html_tags.iter() {
1042            // Skip closing tags (only warn on opening tags)
1043            if html_tag.is_closing {
1044                continue;
1045            }
1046
1047            let line_num = html_tag.line;
1048            let tag_byte_start = html_tag.byte_offset;
1049
1050            // Reconstruct tag string from byte offsets
1051            let tag = &content[html_tag.byte_offset..html_tag.byte_end];
1052
1053            // Skip tags in code blocks, PyMdown blocks, and block IALs
1054            if ctx
1055                .line_info(line_num)
1056                .is_some_and(|info| info.in_code_block || info.in_pymdown_block || info.is_kramdown_block_ial)
1057            {
1058                continue;
1059            }
1060
1061            // Skip HTML tags inside HTML comments
1062            if ctx.is_in_html_comment(tag_byte_start) {
1063                continue;
1064            }
1065
1066            // Skip HTML comments themselves
1067            if self.is_html_comment(tag) {
1068                continue;
1069            }
1070
1071            // Skip angle brackets inside link reference definition titles
1072            // e.g., [ref]: url "Title with <angle brackets>"
1073            if ctx.is_in_link_title(tag_byte_start) {
1074                continue;
1075            }
1076
1077            // Skip JSX components in MDX files (e.g., <Chart />, <MyComponent>)
1078            if ctx.flavor.supports_jsx() && html_tag.tag_name.chars().next().is_some_and(|c| c.is_uppercase()) {
1079                continue;
1080            }
1081
1082            // Skip JSX fragments in MDX files (<> and </>)
1083            if ctx.flavor.supports_jsx() && (html_tag.tag_name.is_empty() || tag == "<>" || tag == "</>") {
1084                continue;
1085            }
1086
1087            // Skip elements with JSX-specific attributes in MDX files
1088            // e.g., <div className="...">, <button onClick={handler}>
1089            if ctx.flavor.supports_jsx() && Self::has_jsx_attributes(tag) {
1090                continue;
1091            }
1092
1093            // Skip non-HTML elements (placeholder syntax like <NAME>, <resource>)
1094            if !Self::is_html_element_or_custom(&html_tag.tag_name) {
1095                continue;
1096            }
1097
1098            // Skip likely programming type annotations
1099            if self.is_likely_type_annotation(tag) {
1100                continue;
1101            }
1102
1103            // Skip email addresses in angle brackets
1104            if self.is_email_address(tag) {
1105                continue;
1106            }
1107
1108            // Skip URLs in angle brackets
1109            if self.is_url_in_angle_brackets(tag) {
1110                continue;
1111            }
1112
1113            // Skip tags inside code spans (use byte offset for reliable multi-line span detection)
1114            if ctx.is_byte_offset_in_code_span(tag_byte_start) {
1115                continue;
1116            }
1117
1118            // Determine whether to report this tag based on mode:
1119            // - Disallowed mode: only report tags in the disallowed list
1120            // - Default mode: report all tags except those in the allowed list
1121            if self.is_disallowed_mode() {
1122                // In disallowed mode, skip tags NOT in the disallowed list
1123                if !self.is_tag_disallowed(tag) {
1124                    continue;
1125                }
1126            } else {
1127                // In default mode, skip allowed tags
1128                if self.is_tag_allowed(tag) {
1129                    continue;
1130                }
1131            }
1132
1133            // Skip tags with markdown attribute in MkDocs mode
1134            if ctx.flavor == crate::config::MarkdownFlavor::MkDocs && self.has_markdown_attribute(tag) {
1135                continue;
1136            }
1137
1138            // Check if we're inside an HTML block (like <pre>, <div>, etc.)
1139            let in_html_block = ctx.is_in_html_block(line_num);
1140
1141            // Calculate fix to remove HTML tags but keep content
1142            let fix = self
1143                .calculate_fix(content, tag, tag_byte_start, in_html_block)
1144                .map(|(range, replacement)| Fix { range, replacement });
1145
1146            // Calculate actual end line and column for multiline tags
1147            // Use byte_end - 1 to get the last character position of the tag
1148            let (end_line, end_col) = if html_tag.byte_end > 0 {
1149                ctx.offset_to_line_col(html_tag.byte_end - 1)
1150            } else {
1151                (line_num, html_tag.end_col + 1)
1152            };
1153
1154            // Report the HTML tag
1155            warnings.push(LintWarning {
1156                rule_name: Some(self.name().to_string()),
1157                line: line_num,
1158                column: html_tag.start_col + 1, // Convert to 1-indexed
1159                end_line,                       // Actual end line for multiline tags
1160                end_column: end_col + 1,        // Actual end column
1161                message: format!("Inline HTML found: {tag}"),
1162                severity: Severity::Warning,
1163                fix,
1164            });
1165        }
1166
1167        Ok(warnings)
1168    }
1169
1170    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
1171        // Auto-fix is opt-in: only apply if explicitly enabled in config
1172        if !self.config.fix {
1173            return Ok(ctx.content.to_string());
1174        }
1175
1176        // Get warnings with their inline fixes
1177        let warnings = self.check(ctx)?;
1178        let warnings =
1179            crate::utils::fix_utils::filter_warnings_by_inline_config(warnings, ctx.inline_config(), self.name());
1180
1181        // If no warnings with fixes, return original content
1182        if warnings.is_empty() || !warnings.iter().any(|w| w.fix.is_some()) {
1183            return Ok(ctx.content.to_string());
1184        }
1185
1186        // Collect all fixes and sort by range start (descending) to apply from end to beginning
1187        let mut fixes: Vec<_> = warnings
1188            .iter()
1189            .filter_map(|w| w.fix.as_ref().map(|f| (f.range.start, f.range.end, &f.replacement)))
1190            .collect();
1191        fixes.sort_by(|a, b| b.0.cmp(&a.0));
1192
1193        // Apply fixes from end to beginning to preserve byte offsets
1194        let mut result = ctx.content.to_string();
1195        for (start, end, replacement) in fixes {
1196            if start < result.len() && end <= result.len() && start <= end {
1197                result.replace_range(start..end, replacement);
1198            }
1199        }
1200
1201        Ok(result)
1202    }
1203
1204    fn fix_capability(&self) -> crate::rule::FixCapability {
1205        if self.config.fix {
1206            crate::rule::FixCapability::FullyFixable
1207        } else {
1208            crate::rule::FixCapability::Unfixable
1209        }
1210    }
1211
1212    /// Get the category of this rule for selective processing
1213    fn category(&self) -> RuleCategory {
1214        RuleCategory::Html
1215    }
1216
1217    /// Check if this rule should be skipped
1218    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
1219        ctx.content.is_empty() || !ctx.likely_has_html()
1220    }
1221
1222    fn as_any(&self) -> &dyn std::any::Any {
1223        self
1224    }
1225
1226    fn default_config_section(&self) -> Option<(String, toml::Value)> {
1227        let json_value = serde_json::to_value(&self.config).ok()?;
1228        Some((
1229            self.name().to_string(),
1230            crate::rule_config_serde::json_to_toml_value(&json_value)?,
1231        ))
1232    }
1233
1234    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
1235    where
1236        Self: Sized,
1237    {
1238        let rule_config = crate::rule_config_serde::load_rule_config::<MD033Config>(config);
1239        Box::new(Self::from_config_struct(rule_config))
1240    }
1241}
1242
1243#[cfg(test)]
1244mod tests {
1245    use super::*;
1246    use crate::lint_context::LintContext;
1247    use crate::rule::Rule;
1248
1249    fn relaxed_fix_rule() -> MD033NoInlineHtml {
1250        let config = MD033Config {
1251            fix: true,
1252            fix_mode: MD033FixMode::Relaxed,
1253            ..MD033Config::default()
1254        };
1255        MD033NoInlineHtml::from_config_struct(config)
1256    }
1257
1258    #[test]
1259    fn test_md033_basic_html() {
1260        let rule = MD033NoInlineHtml::default();
1261        let content = "<div>Some content</div>";
1262        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1263        let result = rule.check(&ctx).unwrap();
1264        // Only reports opening tags, not closing tags
1265        assert_eq!(result.len(), 1); // Only <div>, not </div>
1266        assert!(result[0].message.starts_with("Inline HTML found: <div>"));
1267    }
1268
1269    #[test]
1270    fn test_md033_case_insensitive() {
1271        let rule = MD033NoInlineHtml::default();
1272        let content = "<DiV>Some <B>content</B></dIv>";
1273        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1274        let result = rule.check(&ctx).unwrap();
1275        // Only reports opening tags, not closing tags
1276        assert_eq!(result.len(), 2); // <DiV>, <B> (not </B>, </dIv>)
1277        assert_eq!(result[0].message, "Inline HTML found: <DiV>");
1278        assert_eq!(result[1].message, "Inline HTML found: <B>");
1279    }
1280
1281    #[test]
1282    fn test_md033_allowed_tags() {
1283        let rule = MD033NoInlineHtml::with_allowed(vec!["div".to_string(), "br".to_string()]);
1284        let content = "<div>Allowed</div><p>Not allowed</p><br/>";
1285        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1286        let result = rule.check(&ctx).unwrap();
1287        // Only warnings for non-allowed opening tags (<p> only, div and br are allowed)
1288        assert_eq!(result.len(), 1);
1289        assert_eq!(result[0].message, "Inline HTML found: <p>");
1290
1291        // Test case-insensitivity of allowed tags
1292        let content2 = "<DIV>Allowed</DIV><P>Not allowed</P><BR/>";
1293        let ctx2 = LintContext::new(content2, crate::config::MarkdownFlavor::Standard, None);
1294        let result2 = rule.check(&ctx2).unwrap();
1295        assert_eq!(result2.len(), 1); // Only <P> flagged
1296        assert_eq!(result2[0].message, "Inline HTML found: <P>");
1297    }
1298
1299    #[test]
1300    fn test_md033_html_comments() {
1301        let rule = MD033NoInlineHtml::default();
1302        let content = "<!-- This is a comment --> <p>Not a comment</p>";
1303        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1304        let result = rule.check(&ctx).unwrap();
1305        // Should detect warnings for HTML opening tags (comments are skipped, closing tags not reported)
1306        assert_eq!(result.len(), 1); // Only <p>
1307        assert_eq!(result[0].message, "Inline HTML found: <p>");
1308    }
1309
1310    #[test]
1311    fn test_md033_tags_in_links() {
1312        let rule = MD033NoInlineHtml::default();
1313        let content = "[Link](http://example.com/<div>)";
1314        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1315        let result = rule.check(&ctx).unwrap();
1316        // The <div> in the URL should be detected as HTML (not skipped)
1317        assert_eq!(result.len(), 1);
1318        assert_eq!(result[0].message, "Inline HTML found: <div>");
1319
1320        let content2 = "[Link <a>text</a>](url)";
1321        let ctx2 = LintContext::new(content2, crate::config::MarkdownFlavor::Standard, None);
1322        let result2 = rule.check(&ctx2).unwrap();
1323        // Only reports opening tags
1324        assert_eq!(result2.len(), 1); // Only <a>
1325        assert_eq!(result2[0].message, "Inline HTML found: <a>");
1326    }
1327
1328    #[test]
1329    fn test_md033_fix_escaping() {
1330        let rule = MD033NoInlineHtml::default();
1331        let content = "Text with <div> and <br/> tags.";
1332        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1333        let fixed_content = rule.fix(&ctx).unwrap();
1334        // No fix for HTML tags; output should be unchanged
1335        assert_eq!(fixed_content, content);
1336    }
1337
1338    #[test]
1339    fn test_md033_in_code_blocks() {
1340        let rule = MD033NoInlineHtml::default();
1341        let content = "```html\n<div>Code</div>\n```\n<div>Not code</div>";
1342        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1343        let result = rule.check(&ctx).unwrap();
1344        // Only reports opening tags outside code block
1345        assert_eq!(result.len(), 1); // Only <div> outside code block
1346        assert_eq!(result[0].message, "Inline HTML found: <div>");
1347    }
1348
1349    #[test]
1350    fn test_md033_in_code_spans() {
1351        let rule = MD033NoInlineHtml::default();
1352        let content = "Text with `<p>in code</p>` span. <br/> Not in span.";
1353        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1354        let result = rule.check(&ctx).unwrap();
1355        // Should detect <br/> outside code span, but not tags inside code span
1356        assert_eq!(result.len(), 1);
1357        assert_eq!(result[0].message, "Inline HTML found: <br/>");
1358    }
1359
1360    #[test]
1361    fn test_md033_issue_90_code_span_with_diff_block() {
1362        // Test for issue #90: inline code span followed by diff code block
1363        let rule = MD033NoInlineHtml::default();
1364        let content = r#"# Heading
1365
1366`<env>`
1367
1368```diff
1369- this
1370+ that
1371```"#;
1372        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1373        let result = rule.check(&ctx).unwrap();
1374        // Should NOT detect <env> as HTML since it's inside backticks
1375        assert_eq!(result.len(), 0, "Should not report HTML tags inside code spans");
1376    }
1377
1378    #[test]
1379    fn test_md033_multiple_code_spans_with_angle_brackets() {
1380        // Test multiple code spans on same line
1381        let rule = MD033NoInlineHtml::default();
1382        let content = "`<one>` and `<two>` and `<three>` are all code spans";
1383        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1384        let result = rule.check(&ctx).unwrap();
1385        assert_eq!(result.len(), 0, "Should not report HTML tags inside any code spans");
1386    }
1387
1388    #[test]
1389    fn test_md033_nested_angle_brackets_in_code_span() {
1390        // Test nested angle brackets
1391        let rule = MD033NoInlineHtml::default();
1392        let content = "Text with `<<nested>>` brackets";
1393        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1394        let result = rule.check(&ctx).unwrap();
1395        assert_eq!(result.len(), 0, "Should handle nested angle brackets in code spans");
1396    }
1397
1398    #[test]
1399    fn test_md033_code_span_at_end_before_code_block() {
1400        // Test code span at end of line before code block
1401        let rule = MD033NoInlineHtml::default();
1402        let content = "Testing `<test>`\n```\ncode here\n```";
1403        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1404        let result = rule.check(&ctx).unwrap();
1405        assert_eq!(result.len(), 0, "Should handle code span before code block");
1406    }
1407
1408    #[test]
1409    fn test_md033_quick_fix_inline_tag() {
1410        // Test that non-fixable tags (like <span>) do NOT get a fix
1411        // Only safe fixable tags (em, i, strong, b, code, br, hr) with fix=true get fixes
1412        let rule = MD033NoInlineHtml::default();
1413        let content = "This has <span>inline text</span> that should keep content.";
1414        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1415        let result = rule.check(&ctx).unwrap();
1416
1417        assert_eq!(result.len(), 1, "Should find one HTML tag");
1418        // <span> is NOT a safe fixable tag, so no fix should be provided
1419        assert!(
1420            result[0].fix.is_none(),
1421            "Non-fixable tags like <span> should not have a fix"
1422        );
1423    }
1424
1425    #[test]
1426    fn test_md033_quick_fix_multiline_tag() {
1427        // HTML block elements like <div> are intentionally NOT auto-fixed
1428        // Removing them would change document structure significantly
1429        let rule = MD033NoInlineHtml::default();
1430        let content = "<div>\nBlock content\n</div>";
1431        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1432        let result = rule.check(&ctx).unwrap();
1433
1434        assert_eq!(result.len(), 1, "Should find one HTML tag");
1435        // HTML block elements should NOT have auto-fix
1436        assert!(result[0].fix.is_none(), "HTML block elements should NOT have auto-fix");
1437    }
1438
1439    #[test]
1440    fn test_md033_quick_fix_self_closing_tag() {
1441        // Test that self-closing tags with fix=false (default) do NOT get a fix
1442        let rule = MD033NoInlineHtml::default();
1443        let content = "Self-closing: <br/>";
1444        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1445        let result = rule.check(&ctx).unwrap();
1446
1447        assert_eq!(result.len(), 1, "Should find one HTML tag");
1448        // Default config has fix=false, so no fix should be provided
1449        assert!(
1450            result[0].fix.is_none(),
1451            "Self-closing tags should not have a fix when fix config is false"
1452        );
1453    }
1454
1455    #[test]
1456    fn test_md033_quick_fix_multiple_tags() {
1457        // Test that multiple tags without fix=true do NOT get fixes
1458        // <span> is not a safe fixable tag, <strong> is but fix=false by default
1459        let rule = MD033NoInlineHtml::default();
1460        let content = "<span>first</span> and <strong>second</strong>";
1461        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1462        let result = rule.check(&ctx).unwrap();
1463
1464        assert_eq!(result.len(), 2, "Should find two HTML tags");
1465        // Neither should have a fix: <span> is not fixable, <strong> is but fix=false
1466        assert!(result[0].fix.is_none(), "Non-fixable <span> should not have a fix");
1467        assert!(
1468            result[1].fix.is_none(),
1469            "<strong> should not have a fix when fix config is false"
1470        );
1471    }
1472
1473    #[test]
1474    fn test_md033_skip_angle_brackets_in_link_titles() {
1475        // Angle brackets inside link reference definition titles should not be flagged as HTML
1476        let rule = MD033NoInlineHtml::default();
1477        let content = r#"# Test
1478
1479[example]: <https://example.com> "Title with <Angle Brackets> inside"
1480
1481Regular text with <div>content</div> HTML tag.
1482"#;
1483        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1484        let result = rule.check(&ctx).unwrap();
1485
1486        // Should only flag <div>, not <Angle Brackets> in the title (not a valid HTML element)
1487        // Opening tag only (markdownlint behavior)
1488        assert_eq!(result.len(), 1, "Should find opening div tag");
1489        assert!(
1490            result[0].message.contains("<div>"),
1491            "Should flag <div>, got: {}",
1492            result[0].message
1493        );
1494    }
1495
1496    #[test]
1497    fn test_md033_skip_angle_brackets_in_link_title_single_quotes() {
1498        // Test with single-quoted title
1499        let rule = MD033NoInlineHtml::default();
1500        let content = r#"[ref]: url 'Title <Help Wanted> here'
1501
1502<span>text</span> here
1503"#;
1504        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1505        let result = rule.check(&ctx).unwrap();
1506
1507        // <Help Wanted> is not a valid HTML element, so only <span> is flagged
1508        // Opening tag only (markdownlint behavior)
1509        assert_eq!(result.len(), 1, "Should find opening span tag");
1510        assert!(
1511            result[0].message.contains("<span>"),
1512            "Should flag <span>, got: {}",
1513            result[0].message
1514        );
1515    }
1516
1517    #[test]
1518    fn test_md033_multiline_tag_end_line_calculation() {
1519        // Test that multiline HTML tags report correct end_line
1520        let rule = MD033NoInlineHtml::default();
1521        let content = "<div\n  class=\"test\"\n  id=\"example\">";
1522        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1523        let result = rule.check(&ctx).unwrap();
1524
1525        assert_eq!(result.len(), 1, "Should find one HTML tag");
1526        // Tag starts on line 1
1527        assert_eq!(result[0].line, 1, "Start line should be 1");
1528        // Tag ends on line 3 (where the closing > is)
1529        assert_eq!(result[0].end_line, 3, "End line should be 3");
1530    }
1531
1532    #[test]
1533    fn test_md033_single_line_tag_same_start_end_line() {
1534        // Test that single-line HTML tags have same start and end line
1535        let rule = MD033NoInlineHtml::default();
1536        let content = "Some text <div class=\"test\"> more text";
1537        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1538        let result = rule.check(&ctx).unwrap();
1539
1540        assert_eq!(result.len(), 1, "Should find one HTML tag");
1541        assert_eq!(result[0].line, 1, "Start line should be 1");
1542        assert_eq!(result[0].end_line, 1, "End line should be 1 for single-line tag");
1543    }
1544
1545    #[test]
1546    fn test_md033_multiline_tag_with_many_attributes() {
1547        // Test multiline tag spanning multiple lines
1548        let rule = MD033NoInlineHtml::default();
1549        let content =
1550            "Text\n<div\n  data-attr1=\"value1\"\n  data-attr2=\"value2\"\n  data-attr3=\"value3\">\nMore text";
1551        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1552        let result = rule.check(&ctx).unwrap();
1553
1554        assert_eq!(result.len(), 1, "Should find one HTML tag");
1555        // Tag starts on line 2 (first line is "Text")
1556        assert_eq!(result[0].line, 2, "Start line should be 2");
1557        // Tag ends on line 5 (where the closing > is)
1558        assert_eq!(result[0].end_line, 5, "End line should be 5");
1559    }
1560
1561    #[test]
1562    fn test_md033_disallowed_mode_basic() {
1563        // Test disallowed mode: only flags tags in the disallowed list
1564        let rule = MD033NoInlineHtml::with_disallowed(vec!["script".to_string(), "iframe".to_string()]);
1565        let content = "<div>Safe content</div><script>alert('xss')</script>";
1566        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1567        let result = rule.check(&ctx).unwrap();
1568
1569        // Should only flag <script>, not <div>
1570        assert_eq!(result.len(), 1, "Should only flag disallowed tags");
1571        assert!(result[0].message.contains("<script>"), "Should flag script tag");
1572    }
1573
1574    #[test]
1575    fn test_md033_disallowed_gfm_security_tags() {
1576        // Test GFM security tags expansion
1577        let rule = MD033NoInlineHtml::with_disallowed(vec!["gfm".to_string()]);
1578        let content = r#"
1579<div>Safe</div>
1580<title>Bad title</title>
1581<textarea>Bad textarea</textarea>
1582<style>.bad{}</style>
1583<iframe src="evil"></iframe>
1584<script>evil()</script>
1585<plaintext>old tag</plaintext>
1586<span>Safe span</span>
1587"#;
1588        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1589        let result = rule.check(&ctx).unwrap();
1590
1591        // Should flag: title, textarea, style, iframe, script, plaintext
1592        // Should NOT flag: div, span
1593        assert_eq!(result.len(), 6, "Should flag 6 GFM security tags");
1594
1595        let flagged_tags: Vec<&str> = result
1596            .iter()
1597            .filter_map(|w| w.message.split("<").nth(1))
1598            .filter_map(|s| s.split(">").next())
1599            .filter_map(|s| s.split_whitespace().next())
1600            .collect();
1601
1602        assert!(flagged_tags.contains(&"title"), "Should flag title");
1603        assert!(flagged_tags.contains(&"textarea"), "Should flag textarea");
1604        assert!(flagged_tags.contains(&"style"), "Should flag style");
1605        assert!(flagged_tags.contains(&"iframe"), "Should flag iframe");
1606        assert!(flagged_tags.contains(&"script"), "Should flag script");
1607        assert!(flagged_tags.contains(&"plaintext"), "Should flag plaintext");
1608        assert!(!flagged_tags.contains(&"div"), "Should NOT flag div");
1609        assert!(!flagged_tags.contains(&"span"), "Should NOT flag span");
1610    }
1611
1612    #[test]
1613    fn test_md033_disallowed_case_insensitive() {
1614        // Test that disallowed check is case-insensitive
1615        let rule = MD033NoInlineHtml::with_disallowed(vec!["script".to_string()]);
1616        let content = "<SCRIPT>alert('xss')</SCRIPT><Script>alert('xss')</Script>";
1617        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1618        let result = rule.check(&ctx).unwrap();
1619
1620        // Should flag both <SCRIPT> and <Script>
1621        assert_eq!(result.len(), 2, "Should flag both case variants");
1622    }
1623
1624    #[test]
1625    fn test_md033_disallowed_with_attributes() {
1626        // Test that disallowed mode works with tags that have attributes
1627        let rule = MD033NoInlineHtml::with_disallowed(vec!["iframe".to_string()]);
1628        let content = r#"<iframe src="https://evil.com" width="100" height="100"></iframe>"#;
1629        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1630        let result = rule.check(&ctx).unwrap();
1631
1632        assert_eq!(result.len(), 1, "Should flag iframe with attributes");
1633        assert!(result[0].message.contains("iframe"), "Should flag iframe");
1634    }
1635
1636    #[test]
1637    fn test_md033_disallowed_all_gfm_tags() {
1638        // Verify all GFM disallowed tags are covered
1639        use md033_config::GFM_DISALLOWED_TAGS;
1640        let rule = MD033NoInlineHtml::with_disallowed(vec!["gfm".to_string()]);
1641
1642        for tag in GFM_DISALLOWED_TAGS {
1643            let content = format!("<{tag}>content</{tag}>");
1644            let ctx = LintContext::new(&content, crate::config::MarkdownFlavor::Standard, None);
1645            let result = rule.check(&ctx).unwrap();
1646
1647            assert_eq!(result.len(), 1, "GFM tag <{tag}> should be flagged");
1648        }
1649    }
1650
1651    #[test]
1652    fn test_md033_disallowed_mixed_with_custom() {
1653        // Test mixing "gfm" with custom disallowed tags
1654        let rule = MD033NoInlineHtml::with_disallowed(vec![
1655            "gfm".to_string(),
1656            "marquee".to_string(), // Custom disallowed tag
1657        ]);
1658        let content = r#"<script>bad</script><marquee>annoying</marquee><div>ok</div>"#;
1659        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1660        let result = rule.check(&ctx).unwrap();
1661
1662        // Should flag script (gfm) and marquee (custom)
1663        assert_eq!(result.len(), 2, "Should flag both gfm and custom tags");
1664    }
1665
1666    #[test]
1667    fn test_md033_disallowed_empty_means_default_mode() {
1668        // Empty disallowed list means default mode (flag all HTML)
1669        let rule = MD033NoInlineHtml::with_disallowed(vec![]);
1670        let content = "<div>content</div>";
1671        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1672        let result = rule.check(&ctx).unwrap();
1673
1674        // Should flag <div> in default mode
1675        assert_eq!(result.len(), 1, "Empty disallowed = default mode");
1676    }
1677
1678    #[test]
1679    fn test_md033_jsx_fragments_in_mdx() {
1680        // JSX fragments (<> and </>) should not trigger warnings in MDX
1681        let rule = MD033NoInlineHtml::default();
1682        let content = r#"# MDX Document
1683
1684<>
1685  <Heading />
1686  <Content />
1687</>
1688
1689<div>Regular HTML should still be flagged</div>
1690"#;
1691        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::MDX, None);
1692        let result = rule.check(&ctx).unwrap();
1693
1694        // Should only flag <div>, not the fragments or JSX components
1695        assert_eq!(result.len(), 1, "Should only find one HTML tag (the div)");
1696        assert!(
1697            result[0].message.contains("<div>"),
1698            "Should flag <div>, not JSX fragments"
1699        );
1700    }
1701
1702    #[test]
1703    fn test_md033_jsx_components_in_mdx() {
1704        // JSX components (capitalized) should not trigger warnings in MDX
1705        let rule = MD033NoInlineHtml::default();
1706        let content = r#"<CustomComponent prop="value">
1707  Content
1708</CustomComponent>
1709
1710<MyButton onClick={handler}>Click</MyButton>
1711"#;
1712        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::MDX, None);
1713        let result = rule.check(&ctx).unwrap();
1714
1715        // No warnings - all are JSX components
1716        assert_eq!(result.len(), 0, "Should not flag JSX components in MDX");
1717    }
1718
1719    #[test]
1720    fn test_md033_jsx_not_skipped_in_standard_markdown() {
1721        // In standard markdown, capitalized tags should still be flagged if they're valid HTML
1722        let rule = MD033NoInlineHtml::default();
1723        let content = "<Script>alert(1)</Script>";
1724        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1725        let result = rule.check(&ctx).unwrap();
1726
1727        // Should flag <Script> in standard markdown (it's a valid HTML element)
1728        assert_eq!(result.len(), 1, "Should flag <Script> in standard markdown");
1729    }
1730
1731    #[test]
1732    fn test_md033_jsx_attributes_in_mdx() {
1733        // Elements with JSX-specific attributes should not trigger warnings in MDX
1734        let rule = MD033NoInlineHtml::default();
1735        let content = r#"# MDX with JSX Attributes
1736
1737<div className="card big">Content</div>
1738
1739<button onClick={handleClick}>Click me</button>
1740
1741<label htmlFor="input-id">Label</label>
1742
1743<input onChange={handleChange} />
1744
1745<div class="html-class">Regular HTML should be flagged</div>
1746"#;
1747        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::MDX, None);
1748        let result = rule.check(&ctx).unwrap();
1749
1750        // Should only flag the div with regular HTML "class" attribute
1751        assert_eq!(
1752            result.len(),
1753            1,
1754            "Should only flag HTML element without JSX attributes, got: {result:?}"
1755        );
1756        assert!(
1757            result[0].message.contains("<div class="),
1758            "Should flag the div with HTML class attribute"
1759        );
1760    }
1761
1762    #[test]
1763    fn test_md033_jsx_attributes_not_skipped_in_standard() {
1764        // In standard markdown, JSX attributes should still be flagged
1765        let rule = MD033NoInlineHtml::default();
1766        let content = r#"<div className="card">Content</div>"#;
1767        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1768        let result = rule.check(&ctx).unwrap();
1769
1770        // Should flag in standard markdown
1771        assert_eq!(result.len(), 1, "Should flag JSX-style elements in standard markdown");
1772    }
1773
1774    // Auto-fix tests for MD033
1775
1776    #[test]
1777    fn test_md033_fix_disabled_by_default() {
1778        // Auto-fix should be disabled by default
1779        let rule = MD033NoInlineHtml::default();
1780        assert!(!rule.config.fix, "Fix should be disabled by default");
1781        assert_eq!(rule.fix_capability(), crate::rule::FixCapability::Unfixable);
1782    }
1783
1784    #[test]
1785    fn test_md033_fix_enabled_em_to_italic() {
1786        // When fix is enabled, <em>text</em> should convert to *text*
1787        let rule = MD033NoInlineHtml::with_fix(true);
1788        let content = "This has <em>emphasized text</em> here.";
1789        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1790        let fixed = rule.fix(&ctx).unwrap();
1791        assert_eq!(fixed, "This has *emphasized text* here.");
1792    }
1793
1794    #[test]
1795    fn test_md033_fix_enabled_i_to_italic() {
1796        // <i>text</i> should convert to *text*
1797        let rule = MD033NoInlineHtml::with_fix(true);
1798        let content = "This has <i>italic text</i> here.";
1799        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1800        let fixed = rule.fix(&ctx).unwrap();
1801        assert_eq!(fixed, "This has *italic text* here.");
1802    }
1803
1804    #[test]
1805    fn test_md033_fix_enabled_strong_to_bold() {
1806        // <strong>text</strong> should convert to **text**
1807        let rule = MD033NoInlineHtml::with_fix(true);
1808        let content = "This has <strong>bold text</strong> here.";
1809        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1810        let fixed = rule.fix(&ctx).unwrap();
1811        assert_eq!(fixed, "This has **bold text** here.");
1812    }
1813
1814    #[test]
1815    fn test_md033_fix_enabled_b_to_bold() {
1816        // <b>text</b> should convert to **text**
1817        let rule = MD033NoInlineHtml::with_fix(true);
1818        let content = "This has <b>bold text</b> here.";
1819        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1820        let fixed = rule.fix(&ctx).unwrap();
1821        assert_eq!(fixed, "This has **bold text** here.");
1822    }
1823
1824    #[test]
1825    fn test_md033_fix_enabled_code_to_backticks() {
1826        // <code>text</code> should convert to `text`
1827        let rule = MD033NoInlineHtml::with_fix(true);
1828        let content = "This has <code>inline code</code> here.";
1829        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1830        let fixed = rule.fix(&ctx).unwrap();
1831        assert_eq!(fixed, "This has `inline code` here.");
1832    }
1833
1834    #[test]
1835    fn test_md033_fix_enabled_code_with_backticks() {
1836        // <code>text with `backticks`</code> should use double backticks
1837        let rule = MD033NoInlineHtml::with_fix(true);
1838        let content = "This has <code>text with `backticks`</code> here.";
1839        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1840        let fixed = rule.fix(&ctx).unwrap();
1841        assert_eq!(fixed, "This has `` text with `backticks` `` here.");
1842    }
1843
1844    #[test]
1845    fn test_md033_fix_enabled_br_trailing_spaces() {
1846        // <br> should convert to two trailing spaces + newline (default)
1847        let rule = MD033NoInlineHtml::with_fix(true);
1848        let content = "First line<br>Second line";
1849        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1850        let fixed = rule.fix(&ctx).unwrap();
1851        assert_eq!(fixed, "First line  \nSecond line");
1852    }
1853
1854    #[test]
1855    fn test_md033_fix_enabled_br_self_closing() {
1856        // <br/> and <br /> should also convert
1857        let rule = MD033NoInlineHtml::with_fix(true);
1858        let content = "First<br/>second<br />third";
1859        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1860        let fixed = rule.fix(&ctx).unwrap();
1861        assert_eq!(fixed, "First  \nsecond  \nthird");
1862    }
1863
1864    #[test]
1865    fn test_md033_fix_enabled_br_backslash_style() {
1866        // With br_style = backslash, <br> should convert to backslash + newline
1867        let config = MD033Config {
1868            allowed: Vec::new(),
1869            disallowed: Vec::new(),
1870            fix: true,
1871            br_style: md033_config::BrStyle::Backslash,
1872            ..MD033Config::default()
1873        };
1874        let rule = MD033NoInlineHtml::from_config_struct(config);
1875        let content = "First line<br>Second line";
1876        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1877        let fixed = rule.fix(&ctx).unwrap();
1878        assert_eq!(fixed, "First line\\\nSecond line");
1879    }
1880
1881    #[test]
1882    fn test_md033_fix_enabled_hr() {
1883        // <hr> should convert to horizontal rule
1884        let rule = MD033NoInlineHtml::with_fix(true);
1885        let content = "Above<hr>Below";
1886        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1887        let fixed = rule.fix(&ctx).unwrap();
1888        assert_eq!(fixed, "Above\n---\nBelow");
1889    }
1890
1891    #[test]
1892    fn test_md033_fix_enabled_hr_self_closing() {
1893        // <hr/> should also convert
1894        let rule = MD033NoInlineHtml::with_fix(true);
1895        let content = "Above<hr/>Below";
1896        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1897        let fixed = rule.fix(&ctx).unwrap();
1898        assert_eq!(fixed, "Above\n---\nBelow");
1899    }
1900
1901    #[test]
1902    fn test_md033_fix_skips_nested_tags() {
1903        // Tags with nested HTML - outer tags may not be fully fixed due to overlapping ranges
1904        // The inner tags are processed first, which can invalidate outer tag ranges
1905        let rule = MD033NoInlineHtml::with_fix(true);
1906        let content = "This has <em>text with <strong>nested</strong> tags</em> here.";
1907        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1908        let fixed = rule.fix(&ctx).unwrap();
1909        // Inner <strong> is converted to markdown, outer <em> range becomes invalid
1910        // This is expected behavior - user should run fix multiple times for nested tags
1911        assert_eq!(fixed, "This has <em>text with **nested** tags</em> here.");
1912    }
1913
1914    #[test]
1915    fn test_md033_fix_skips_tags_with_attributes() {
1916        // Tags with attributes should NOT be fixed at all - leave as-is
1917        // User may want to keep the attributes (e.g., class="highlight" for styling)
1918        let rule = MD033NoInlineHtml::with_fix(true);
1919        let content = "This has <em class=\"highlight\">emphasized</em> text.";
1920        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1921        let fixed = rule.fix(&ctx).unwrap();
1922        // Content should remain unchanged - we don't know if attributes matter
1923        assert_eq!(fixed, content);
1924    }
1925
1926    #[test]
1927    fn test_md033_fix_disabled_no_changes() {
1928        // When fix is disabled, original content should be returned
1929        let rule = MD033NoInlineHtml::default(); // fix is false by default
1930        let content = "This has <em>emphasized text</em> here.";
1931        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1932        let fixed = rule.fix(&ctx).unwrap();
1933        assert_eq!(fixed, content, "Should return original content when fix is disabled");
1934    }
1935
1936    #[test]
1937    fn test_md033_fix_capability_enabled() {
1938        let rule = MD033NoInlineHtml::with_fix(true);
1939        assert_eq!(rule.fix_capability(), crate::rule::FixCapability::FullyFixable);
1940    }
1941
1942    #[test]
1943    fn test_md033_fix_multiple_tags() {
1944        // Test fixing multiple HTML tags in one document
1945        let rule = MD033NoInlineHtml::with_fix(true);
1946        let content = "Here is <em>italic</em> and <strong>bold</strong> text.";
1947        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1948        let fixed = rule.fix(&ctx).unwrap();
1949        assert_eq!(fixed, "Here is *italic* and **bold** text.");
1950    }
1951
1952    #[test]
1953    fn test_md033_fix_uppercase_tags() {
1954        // HTML tags are case-insensitive
1955        let rule = MD033NoInlineHtml::with_fix(true);
1956        let content = "This has <EM>emphasized</EM> text.";
1957        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1958        let fixed = rule.fix(&ctx).unwrap();
1959        assert_eq!(fixed, "This has *emphasized* text.");
1960    }
1961
1962    #[test]
1963    fn test_md033_fix_unsafe_tags_not_modified() {
1964        // Tags without safe markdown equivalents should NOT be modified
1965        // Only safe fixable tags (em, i, strong, b, code, br, hr) get converted
1966        let rule = MD033NoInlineHtml::with_fix(true);
1967        let content = "This has <div>a div</div> content.";
1968        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1969        let fixed = rule.fix(&ctx).unwrap();
1970        // <div> is not a safe fixable tag, so content should be unchanged
1971        assert_eq!(fixed, "This has <div>a div</div> content.");
1972    }
1973
1974    #[test]
1975    fn test_md033_fix_img_tag_converted() {
1976        // <img> tags with simple src/alt attributes are converted to markdown images
1977        let rule = MD033NoInlineHtml::with_fix(true);
1978        let content = "Image: <img src=\"photo.jpg\" alt=\"My Photo\">";
1979        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1980        let fixed = rule.fix(&ctx).unwrap();
1981        // <img> is converted to ![alt](src) format
1982        assert_eq!(fixed, "Image: ![My Photo](photo.jpg)");
1983    }
1984
1985    #[test]
1986    fn test_md033_fix_img_tag_with_extra_attrs_not_converted() {
1987        // <img> tags with width/height/style attributes are NOT converted
1988        let rule = MD033NoInlineHtml::with_fix(true);
1989        let content = "Image: <img src=\"photo.jpg\" alt=\"My Photo\" width=\"100\">";
1990        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1991        let fixed = rule.fix(&ctx).unwrap();
1992        // Has width attribute - not safe to convert
1993        assert_eq!(fixed, "Image: <img src=\"photo.jpg\" alt=\"My Photo\" width=\"100\">");
1994    }
1995
1996    #[test]
1997    fn test_md033_fix_relaxed_a_with_target_is_converted() {
1998        let rule = relaxed_fix_rule();
1999        let content = "Link: <a href=\"https://example.com\" target=\"_blank\">Example</a>";
2000        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2001        let fixed = rule.fix(&ctx).unwrap();
2002        assert_eq!(fixed, "Link: [Example](https://example.com)");
2003    }
2004
2005    #[test]
2006    fn test_md033_fix_relaxed_img_with_width_is_converted() {
2007        let rule = relaxed_fix_rule();
2008        let content = "Image: <img src=\"photo.jpg\" alt=\"My Photo\" width=\"100\">";
2009        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2010        let fixed = rule.fix(&ctx).unwrap();
2011        assert_eq!(fixed, "Image: ![My Photo](photo.jpg)");
2012    }
2013
2014    #[test]
2015    fn test_md033_fix_relaxed_rejects_unknown_extra_attributes() {
2016        let rule = relaxed_fix_rule();
2017        let content = "Image: <img src=\"photo.jpg\" alt=\"My Photo\" aria-label=\"hero\">";
2018        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2019        let fixed = rule.fix(&ctx).unwrap();
2020        assert_eq!(fixed, content, "Unknown attributes should not be dropped by default");
2021    }
2022
2023    #[test]
2024    fn test_md033_fix_relaxed_still_blocks_unsafe_schemes() {
2025        let rule = relaxed_fix_rule();
2026        let content = "Link: <a href=\"javascript:alert(1)\" target=\"_blank\">Example</a>";
2027        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2028        let fixed = rule.fix(&ctx).unwrap();
2029        assert_eq!(fixed, content, "Unsafe URL schemes must never be converted");
2030    }
2031
2032    #[test]
2033    fn test_md033_fix_relaxed_wrapper_strip_requires_second_pass_for_nested_html() {
2034        let rule = relaxed_fix_rule();
2035        let content = "<p align=\"center\">\n  <img src=\"logo.svg\" alt=\"Logo\" width=\"120\" />\n</p>";
2036        let ctx1 = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2037        let fixed_once = rule.fix(&ctx1).unwrap();
2038        assert!(
2039            fixed_once.contains("<p"),
2040            "First pass should keep wrapper when inner HTML is still present: {fixed_once}"
2041        );
2042        assert!(
2043            fixed_once.contains("![Logo](logo.svg)"),
2044            "Inner image should be converted on first pass: {fixed_once}"
2045        );
2046
2047        let ctx2 = LintContext::new(&fixed_once, crate::config::MarkdownFlavor::Standard, None);
2048        let fixed_twice = rule.fix(&ctx2).unwrap();
2049        assert!(
2050            !fixed_twice.contains("<p"),
2051            "Second pass should strip configured wrapper: {fixed_twice}"
2052        );
2053        assert!(fixed_twice.contains("![Logo](logo.svg)"));
2054    }
2055
2056    #[test]
2057    fn test_md033_fix_relaxed_multiple_droppable_attrs() {
2058        let rule = relaxed_fix_rule();
2059        let content = "<a href=\"https://example.com\" target=\"_blank\" rel=\"noopener\" class=\"btn\">Click</a>";
2060        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2061        let fixed = rule.fix(&ctx).unwrap();
2062        assert_eq!(fixed, "[Click](https://example.com)");
2063    }
2064
2065    #[test]
2066    fn test_md033_fix_relaxed_img_multiple_droppable_attrs() {
2067        let rule = relaxed_fix_rule();
2068        let content = "<img src=\"logo.png\" alt=\"Logo\" width=\"120\" height=\"40\" style=\"border:none\" />";
2069        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2070        let fixed = rule.fix(&ctx).unwrap();
2071        assert_eq!(fixed, "![Logo](logo.png)");
2072    }
2073
2074    #[test]
2075    fn test_md033_fix_relaxed_event_handler_never_dropped() {
2076        let rule = relaxed_fix_rule();
2077        let content = "<a href=\"https://example.com\" onclick=\"track()\">Link</a>";
2078        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2079        let fixed = rule.fix(&ctx).unwrap();
2080        assert_eq!(fixed, content, "Event handler attributes must block conversion");
2081    }
2082
2083    #[test]
2084    fn test_md033_fix_relaxed_event_handler_even_with_custom_config() {
2085        // Even if someone adds on* to drop-attributes, event handlers must be rejected
2086        let config = MD033Config {
2087            fix: true,
2088            fix_mode: MD033FixMode::Relaxed,
2089            drop_attributes: vec!["on*".to_string(), "target".to_string()],
2090            ..MD033Config::default()
2091        };
2092        let rule = MD033NoInlineHtml::from_config_struct(config);
2093        let content = "<a href=\"https://example.com\" onclick=\"alert(1)\">Link</a>";
2094        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2095        let fixed = rule.fix(&ctx).unwrap();
2096        assert_eq!(fixed, content, "on* event handlers must never be dropped");
2097    }
2098
2099    #[test]
2100    fn test_md033_fix_relaxed_custom_drop_attributes() {
2101        let config = MD033Config {
2102            fix: true,
2103            fix_mode: MD033FixMode::Relaxed,
2104            drop_attributes: vec!["loading".to_string()],
2105            ..MD033Config::default()
2106        };
2107        let rule = MD033NoInlineHtml::from_config_struct(config);
2108        // "loading" is in the custom list, "width" is NOT
2109        let content = "<img src=\"x.jpg\" alt=\"\" loading=\"lazy\">";
2110        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2111        let fixed = rule.fix(&ctx).unwrap();
2112        assert_eq!(fixed, "![](x.jpg)", "Custom drop-attributes should be respected");
2113
2114        let content2 = "<img src=\"x.jpg\" alt=\"\" width=\"100\">";
2115        let ctx2 = LintContext::new(content2, crate::config::MarkdownFlavor::Standard, None);
2116        let fixed2 = rule.fix(&ctx2).unwrap();
2117        assert_eq!(
2118            fixed2, content2,
2119            "Attributes not in custom list should block conversion"
2120        );
2121    }
2122
2123    #[test]
2124    fn test_md033_fix_relaxed_custom_strip_wrapper() {
2125        let config = MD033Config {
2126            fix: true,
2127            fix_mode: MD033FixMode::Relaxed,
2128            strip_wrapper_elements: vec!["div".to_string()],
2129            ..MD033Config::default()
2130        };
2131        let rule = MD033NoInlineHtml::from_config_struct(config);
2132        let content = "<div>Some text content</div>";
2133        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2134        let fixed = rule.fix(&ctx).unwrap();
2135        assert_eq!(fixed, "Some text content");
2136    }
2137
2138    #[test]
2139    fn test_md033_fix_relaxed_wrapper_with_plain_text() {
2140        let rule = relaxed_fix_rule();
2141        let content = "<p align=\"center\">Just some text</p>";
2142        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2143        let fixed = rule.fix(&ctx).unwrap();
2144        assert_eq!(fixed, "Just some text");
2145    }
2146
2147    #[test]
2148    fn test_md033_fix_relaxed_data_attr_with_wildcard() {
2149        let config = MD033Config {
2150            fix: true,
2151            fix_mode: MD033FixMode::Relaxed,
2152            drop_attributes: vec!["data-*".to_string(), "target".to_string()],
2153            ..MD033Config::default()
2154        };
2155        let rule = MD033NoInlineHtml::from_config_struct(config);
2156        let content = "<a href=\"https://example.com\" data-tracking=\"abc\" target=\"_blank\">Link</a>";
2157        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2158        let fixed = rule.fix(&ctx).unwrap();
2159        assert_eq!(fixed, "[Link](https://example.com)");
2160    }
2161
2162    #[test]
2163    fn test_md033_fix_relaxed_mixed_droppable_and_blocking_attrs() {
2164        let rule = relaxed_fix_rule();
2165        // "target" is droppable, "aria-label" is not in the default list
2166        let content = "<a href=\"https://example.com\" target=\"_blank\" aria-label=\"nav\">Link</a>";
2167        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2168        let fixed = rule.fix(&ctx).unwrap();
2169        assert_eq!(fixed, content, "Non-droppable attribute should block conversion");
2170    }
2171
2172    #[test]
2173    fn test_md033_fix_relaxed_badge_pattern() {
2174        // Common GitHub README badge pattern
2175        let rule = relaxed_fix_rule();
2176        let content = "<a href=\"https://crates.io/crates/rumdl\" target=\"_blank\"><img src=\"https://img.shields.io/crates/v/rumdl.svg\" alt=\"Crate\" width=\"120\" /></a>";
2177        let ctx1 = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2178        let fixed_once = rule.fix(&ctx1).unwrap();
2179        // First pass should convert the inner <img>
2180        assert!(
2181            fixed_once.contains("![Crate](https://img.shields.io/crates/v/rumdl.svg)"),
2182            "Inner img should be converted: {fixed_once}"
2183        );
2184
2185        // Second pass converts the <a> wrapper
2186        let ctx2 = LintContext::new(&fixed_once, crate::config::MarkdownFlavor::Standard, None);
2187        let fixed_twice = rule.fix(&ctx2).unwrap();
2188        assert!(
2189            fixed_twice
2190                .contains("[![Crate](https://img.shields.io/crates/v/rumdl.svg)](https://crates.io/crates/rumdl)"),
2191            "Badge should produce nested markdown image link: {fixed_twice}"
2192        );
2193    }
2194
2195    #[test]
2196    fn test_md033_fix_relaxed_conservative_mode_unchanged() {
2197        // Verify conservative mode (default) is unaffected by the relaxed logic
2198        let rule = MD033NoInlineHtml::with_fix(true);
2199        let content = "<a href=\"https://example.com\" target=\"_blank\">Link</a>";
2200        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2201        let fixed = rule.fix(&ctx).unwrap();
2202        assert_eq!(fixed, content, "Conservative mode should not drop target attribute");
2203    }
2204
2205    #[test]
2206    fn test_md033_fix_relaxed_img_inside_pre_not_converted() {
2207        // <img> inside <pre> must NOT be converted, even in relaxed mode
2208        let rule = relaxed_fix_rule();
2209        let content = "<pre>\n  <img src=\"diagram.png\" alt=\"d\" width=\"100\" />\n</pre>";
2210        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2211        let fixed = rule.fix(&ctx).unwrap();
2212        assert!(fixed.contains("<img"), "img inside pre must not be converted: {fixed}");
2213    }
2214
2215    #[test]
2216    fn test_md033_fix_relaxed_wrapper_nested_inside_div_not_stripped() {
2217        // <p> nested inside <div> should not be stripped
2218        let rule = relaxed_fix_rule();
2219        let content = "<div><p>text</p></div>";
2220        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2221        let fixed = rule.fix(&ctx).unwrap();
2222        assert!(
2223            fixed.contains("<p>text</p>") || fixed.contains("<p>"),
2224            "Nested <p> inside <div> should not be stripped: {fixed}"
2225        );
2226    }
2227
2228    #[test]
2229    fn test_md033_fix_relaxed_img_inside_nested_wrapper_not_converted() {
2230        // <img> inside <div><p>...</p></div> must NOT be converted because the
2231        // <p> wrapper can't be stripped (it's nested), so the markdown would be
2232        // stuck inside an HTML block where it won't render.
2233        let rule = relaxed_fix_rule();
2234        let content = "<div><p><img src=\"x.jpg\" alt=\"pic\" width=\"100\" /></p></div>";
2235        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2236        let fixed = rule.fix(&ctx).unwrap();
2237        assert!(
2238            fixed.contains("<img"),
2239            "img inside nested wrapper must not be converted: {fixed}"
2240        );
2241    }
2242
2243    #[test]
2244    fn test_md033_fix_mixed_safe_tags() {
2245        // All tags are now safe fixable (em, img, strong)
2246        let rule = MD033NoInlineHtml::with_fix(true);
2247        let content = "<em>italic</em> and <img src=\"x.jpg\"> and <strong>bold</strong>";
2248        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2249        let fixed = rule.fix(&ctx).unwrap();
2250        // All are converted
2251        assert_eq!(fixed, "*italic* and ![](x.jpg) and **bold**");
2252    }
2253
2254    #[test]
2255    fn test_md033_fix_multiple_tags_same_line() {
2256        // Multiple tags on the same line should all be fixed correctly
2257        let rule = MD033NoInlineHtml::with_fix(true);
2258        let content = "Regular text <i>italic</i> and <b>bold</b> here.";
2259        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2260        let fixed = rule.fix(&ctx).unwrap();
2261        assert_eq!(fixed, "Regular text *italic* and **bold** here.");
2262    }
2263
2264    #[test]
2265    fn test_md033_fix_multiple_em_tags_same_line() {
2266        // Multiple em/strong tags on the same line
2267        let rule = MD033NoInlineHtml::with_fix(true);
2268        let content = "<em>first</em> and <strong>second</strong> and <code>third</code>";
2269        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2270        let fixed = rule.fix(&ctx).unwrap();
2271        assert_eq!(fixed, "*first* and **second** and `third`");
2272    }
2273
2274    #[test]
2275    fn test_md033_fix_skips_tags_inside_pre() {
2276        // Tags inside <pre> blocks should NOT be fixed (would break structure)
2277        let rule = MD033NoInlineHtml::with_fix(true);
2278        let content = "<pre><code><em>VALUE</em></code></pre>";
2279        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2280        let fixed = rule.fix(&ctx).unwrap();
2281        // The <em> inside <pre><code> should NOT be converted
2282        // Only the outer structure might be changed
2283        assert!(
2284            !fixed.contains("*VALUE*"),
2285            "Tags inside <pre> should not be converted to markdown. Got: {fixed}"
2286        );
2287    }
2288
2289    #[test]
2290    fn test_md033_fix_skips_tags_inside_div() {
2291        // Tags inside HTML block elements should not be fixed
2292        let rule = MD033NoInlineHtml::with_fix(true);
2293        let content = "<div>\n<em>emphasized</em>\n</div>";
2294        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2295        let fixed = rule.fix(&ctx).unwrap();
2296        // The <em> inside <div> should not be converted to *emphasized*
2297        assert!(
2298            !fixed.contains("*emphasized*"),
2299            "Tags inside HTML blocks should not be converted. Got: {fixed}"
2300        );
2301    }
2302
2303    #[test]
2304    fn test_md033_fix_outside_html_block() {
2305        // Tags outside HTML blocks should still be fixed
2306        let rule = MD033NoInlineHtml::with_fix(true);
2307        let content = "<div>\ncontent\n</div>\n\nOutside <em>emphasized</em> text.";
2308        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2309        let fixed = rule.fix(&ctx).unwrap();
2310        // The <em> outside the div should be converted
2311        assert!(
2312            fixed.contains("*emphasized*"),
2313            "Tags outside HTML blocks should be converted. Got: {fixed}"
2314        );
2315    }
2316
2317    #[test]
2318    fn test_md033_fix_with_id_attribute() {
2319        // Tags with id attributes should not be fixed (id might be used for anchors)
2320        let rule = MD033NoInlineHtml::with_fix(true);
2321        let content = "See <em id=\"important\">this note</em> for details.";
2322        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2323        let fixed = rule.fix(&ctx).unwrap();
2324        // Should remain unchanged - id attribute matters for linking
2325        assert_eq!(fixed, content);
2326    }
2327
2328    #[test]
2329    fn test_md033_fix_with_style_attribute() {
2330        // Tags with style attributes should not be fixed
2331        let rule = MD033NoInlineHtml::with_fix(true);
2332        let content = "This is <strong style=\"color: red\">important</strong> text.";
2333        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2334        let fixed = rule.fix(&ctx).unwrap();
2335        // Should remain unchanged - style attribute provides formatting
2336        assert_eq!(fixed, content);
2337    }
2338
2339    #[test]
2340    fn test_md033_fix_mixed_with_and_without_attributes() {
2341        // Mix of tags with and without attributes
2342        let rule = MD033NoInlineHtml::with_fix(true);
2343        let content = "<em>normal</em> and <em class=\"special\">styled</em> text.";
2344        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2345        let fixed = rule.fix(&ctx).unwrap();
2346        // Only the tag without attributes should be fixed
2347        assert_eq!(fixed, "*normal* and <em class=\"special\">styled</em> text.");
2348    }
2349
2350    #[test]
2351    fn test_md033_quick_fix_tag_with_attributes_no_fix() {
2352        // Quick fix should not be provided for tags with attributes
2353        let rule = MD033NoInlineHtml::with_fix(true);
2354        let content = "<em class=\"test\">emphasized</em>";
2355        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2356        let result = rule.check(&ctx).unwrap();
2357
2358        assert_eq!(result.len(), 1, "Should find one HTML tag");
2359        // No fix should be provided for tags with attributes
2360        assert!(
2361            result[0].fix.is_none(),
2362            "Should NOT have a fix for tags with attributes"
2363        );
2364    }
2365
2366    #[test]
2367    fn test_md033_fix_skips_html_entities() {
2368        // Tags containing HTML entities should NOT be fixed
2369        // HTML entities need HTML context to render; markdown won't process them
2370        let rule = MD033NoInlineHtml::with_fix(true);
2371        let content = "<code>&vert;</code>";
2372        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2373        let fixed = rule.fix(&ctx).unwrap();
2374        // Should remain unchanged - converting would break rendering
2375        assert_eq!(fixed, content);
2376    }
2377
2378    #[test]
2379    fn test_md033_fix_skips_multiple_html_entities() {
2380        // Multiple HTML entities should also be skipped
2381        let rule = MD033NoInlineHtml::with_fix(true);
2382        let content = "<code>&lt;T&gt;</code>";
2383        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2384        let fixed = rule.fix(&ctx).unwrap();
2385        // Should remain unchanged
2386        assert_eq!(fixed, content);
2387    }
2388
2389    #[test]
2390    fn test_md033_fix_allows_ampersand_without_entity() {
2391        // Content with & but no semicolon should still be fixed
2392        let rule = MD033NoInlineHtml::with_fix(true);
2393        let content = "<code>a & b</code>";
2394        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2395        let fixed = rule.fix(&ctx).unwrap();
2396        // Should be converted since & is not part of an entity
2397        assert_eq!(fixed, "`a & b`");
2398    }
2399
2400    #[test]
2401    fn test_md033_fix_em_with_entities_skipped() {
2402        // <em> with entities should also be skipped
2403        let rule = MD033NoInlineHtml::with_fix(true);
2404        let content = "<em>&nbsp;text</em>";
2405        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2406        let fixed = rule.fix(&ctx).unwrap();
2407        // Should remain unchanged
2408        assert_eq!(fixed, content);
2409    }
2410
2411    #[test]
2412    fn test_md033_fix_skips_nested_em_in_code() {
2413        // Tags nested inside other HTML elements should NOT be fixed
2414        // e.g., <code><em>n</em></code> - the <em> should not be converted
2415        let rule = MD033NoInlineHtml::with_fix(true);
2416        let content = "<code><em>n</em></code>";
2417        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2418        let fixed = rule.fix(&ctx).unwrap();
2419        // The inner <em> should NOT be converted to *n* because it's nested
2420        // The whole structure should be left as-is (or outer code converted, but not inner)
2421        assert!(
2422            !fixed.contains("*n*"),
2423            "Nested <em> should not be converted to markdown. Got: {fixed}"
2424        );
2425    }
2426
2427    #[test]
2428    fn test_md033_fix_skips_nested_in_table() {
2429        // Tags nested in HTML structures in tables should not be fixed
2430        let rule = MD033NoInlineHtml::with_fix(true);
2431        let content = "| <code>><em>n</em></code> | description |";
2432        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2433        let fixed = rule.fix(&ctx).unwrap();
2434        // Should not convert nested <em> to *n*
2435        assert!(
2436            !fixed.contains("*n*"),
2437            "Nested tags in table should not be converted. Got: {fixed}"
2438        );
2439    }
2440
2441    #[test]
2442    fn test_md033_fix_standalone_em_still_converted() {
2443        // Standalone (non-nested) <em> should still be converted
2444        let rule = MD033NoInlineHtml::with_fix(true);
2445        let content = "This is <em>emphasized</em> text.";
2446        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2447        let fixed = rule.fix(&ctx).unwrap();
2448        assert_eq!(fixed, "This is *emphasized* text.");
2449    }
2450
2451    // ==========================================================================
2452    // Obsidian Templater Plugin Syntax Tests
2453    //
2454    // Templater is a popular Obsidian plugin that uses `<% ... %>` syntax for
2455    // template interpolation. The `<%` pattern is NOT captured by the HTML tag
2456    // parser because `%` is not a valid HTML tag name character (tags must start
2457    // with a letter). This behavior is documented here with comprehensive tests.
2458    //
2459    // Reference: https://silentvoid13.github.io/Templater/
2460    // ==========================================================================
2461
2462    #[test]
2463    fn test_md033_templater_basic_interpolation_not_flagged() {
2464        // Basic Templater interpolation: <% expr %>
2465        // Should NOT be flagged because `%` is not a valid HTML tag character
2466        let rule = MD033NoInlineHtml::default();
2467        let content = "Today is <% tp.date.now() %> which is nice.";
2468        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2469        let result = rule.check(&ctx).unwrap();
2470        assert!(
2471            result.is_empty(),
2472            "Templater basic interpolation should not be flagged as HTML. Got: {result:?}"
2473        );
2474    }
2475
2476    #[test]
2477    fn test_md033_templater_file_functions_not_flagged() {
2478        // Templater file functions: <% tp.file.* %>
2479        let rule = MD033NoInlineHtml::default();
2480        let content = "File: <% tp.file.title %>\nCreated: <% tp.file.creation_date() %>";
2481        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2482        let result = rule.check(&ctx).unwrap();
2483        assert!(
2484            result.is_empty(),
2485            "Templater file functions should not be flagged. Got: {result:?}"
2486        );
2487    }
2488
2489    #[test]
2490    fn test_md033_templater_with_arguments_not_flagged() {
2491        // Templater with function arguments
2492        let rule = MD033NoInlineHtml::default();
2493        let content = r#"Date: <% tp.date.now("YYYY-MM-DD") %>"#;
2494        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2495        let result = rule.check(&ctx).unwrap();
2496        assert!(
2497            result.is_empty(),
2498            "Templater with arguments should not be flagged. Got: {result:?}"
2499        );
2500    }
2501
2502    #[test]
2503    fn test_md033_templater_javascript_execution_not_flagged() {
2504        // Templater JavaScript execution block: <%* code %>
2505        let rule = MD033NoInlineHtml::default();
2506        let content = "<%* const today = tp.date.now(); tR += today; %>";
2507        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2508        let result = rule.check(&ctx).unwrap();
2509        assert!(
2510            result.is_empty(),
2511            "Templater JS execution block should not be flagged. Got: {result:?}"
2512        );
2513    }
2514
2515    #[test]
2516    fn test_md033_templater_dynamic_execution_not_flagged() {
2517        // Templater dynamic/preview execution: <%+ expr %>
2518        let rule = MD033NoInlineHtml::default();
2519        let content = "Dynamic: <%+ tp.date.now() %>";
2520        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2521        let result = rule.check(&ctx).unwrap();
2522        assert!(
2523            result.is_empty(),
2524            "Templater dynamic execution should not be flagged. Got: {result:?}"
2525        );
2526    }
2527
2528    #[test]
2529    fn test_md033_templater_whitespace_trim_all_not_flagged() {
2530        // Templater whitespace control - trim all: <%_ expr _%>
2531        let rule = MD033NoInlineHtml::default();
2532        let content = "<%_ tp.date.now() _%>";
2533        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2534        let result = rule.check(&ctx).unwrap();
2535        assert!(
2536            result.is_empty(),
2537            "Templater trim-all whitespace should not be flagged. Got: {result:?}"
2538        );
2539    }
2540
2541    #[test]
2542    fn test_md033_templater_whitespace_trim_newline_not_flagged() {
2543        // Templater whitespace control - trim newline: <%- expr -%>
2544        let rule = MD033NoInlineHtml::default();
2545        let content = "<%- tp.date.now() -%>";
2546        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2547        let result = rule.check(&ctx).unwrap();
2548        assert!(
2549            result.is_empty(),
2550            "Templater trim-newline should not be flagged. Got: {result:?}"
2551        );
2552    }
2553
2554    #[test]
2555    fn test_md033_templater_combined_modifiers_not_flagged() {
2556        // Templater combined whitespace and execution modifiers
2557        let rule = MD033NoInlineHtml::default();
2558        let contents = [
2559            "<%-* const x = 1; -%>",  // trim + JS execution
2560            "<%_+ tp.date.now() _%>", // trim-all + dynamic
2561            "<%- tp.file.title -%>",  // trim-newline only
2562            "<%_ tp.file.title _%>",  // trim-all only
2563        ];
2564        for content in contents {
2565            let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2566            let result = rule.check(&ctx).unwrap();
2567            assert!(
2568                result.is_empty(),
2569                "Templater combined modifiers should not be flagged: {content}. Got: {result:?}"
2570            );
2571        }
2572    }
2573
2574    #[test]
2575    fn test_md033_templater_multiline_block_not_flagged() {
2576        // Multi-line Templater JavaScript block
2577        let rule = MD033NoInlineHtml::default();
2578        let content = r#"<%*
2579const x = 1;
2580const y = 2;
2581tR += x + y;
2582%>"#;
2583        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2584        let result = rule.check(&ctx).unwrap();
2585        assert!(
2586            result.is_empty(),
2587            "Templater multi-line block should not be flagged. Got: {result:?}"
2588        );
2589    }
2590
2591    #[test]
2592    fn test_md033_templater_with_angle_brackets_in_condition_not_flagged() {
2593        // Templater with angle brackets in JavaScript condition
2594        // This is a key edge case: `<` inside Templater should not trigger HTML detection
2595        let rule = MD033NoInlineHtml::default();
2596        let content = "<%* if (x < 5) { tR += 'small'; } %>";
2597        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2598        let result = rule.check(&ctx).unwrap();
2599        assert!(
2600            result.is_empty(),
2601            "Templater with angle brackets in conditions should not be flagged. Got: {result:?}"
2602        );
2603    }
2604
2605    #[test]
2606    fn test_md033_templater_mixed_with_html_only_html_flagged() {
2607        // Templater syntax mixed with actual HTML - only HTML should be flagged
2608        let rule = MD033NoInlineHtml::default();
2609        let content = "<% tp.date.now() %> is today's date. <div>This is HTML</div>";
2610        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2611        let result = rule.check(&ctx).unwrap();
2612        assert_eq!(result.len(), 1, "Should only flag the HTML div tag");
2613        assert!(
2614            result[0].message.contains("<div>"),
2615            "Should flag <div>, got: {}",
2616            result[0].message
2617        );
2618    }
2619
2620    #[test]
2621    fn test_md033_templater_in_heading_not_flagged() {
2622        // Templater in markdown heading
2623        let rule = MD033NoInlineHtml::default();
2624        let content = "# <% tp.file.title %>";
2625        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2626        let result = rule.check(&ctx).unwrap();
2627        assert!(
2628            result.is_empty(),
2629            "Templater in heading should not be flagged. Got: {result:?}"
2630        );
2631    }
2632
2633    #[test]
2634    fn test_md033_templater_multiple_on_same_line_not_flagged() {
2635        // Multiple Templater blocks on same line
2636        let rule = MD033NoInlineHtml::default();
2637        let content = "From <% tp.date.now() %> to <% tp.date.tomorrow() %> we have meetings.";
2638        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2639        let result = rule.check(&ctx).unwrap();
2640        assert!(
2641            result.is_empty(),
2642            "Multiple Templater blocks should not be flagged. Got: {result:?}"
2643        );
2644    }
2645
2646    #[test]
2647    fn test_md033_templater_in_code_block_not_flagged() {
2648        // Templater syntax in code blocks should not be flagged (code blocks are skipped)
2649        let rule = MD033NoInlineHtml::default();
2650        let content = "```\n<% tp.date.now() %>\n```";
2651        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2652        let result = rule.check(&ctx).unwrap();
2653        assert!(
2654            result.is_empty(),
2655            "Templater in code block should not be flagged. Got: {result:?}"
2656        );
2657    }
2658
2659    #[test]
2660    fn test_md033_templater_in_inline_code_not_flagged() {
2661        // Templater syntax in inline code span should not be flagged
2662        let rule = MD033NoInlineHtml::default();
2663        let content = "Use `<% tp.date.now() %>` for current date.";
2664        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2665        let result = rule.check(&ctx).unwrap();
2666        assert!(
2667            result.is_empty(),
2668            "Templater in inline code should not be flagged. Got: {result:?}"
2669        );
2670    }
2671
2672    #[test]
2673    fn test_md033_templater_also_works_in_standard_flavor() {
2674        // Templater syntax should also not be flagged in Standard flavor
2675        // because the HTML parser doesn't recognize `<%` as a valid tag
2676        let rule = MD033NoInlineHtml::default();
2677        let content = "<% tp.date.now() %> works everywhere.";
2678        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2679        let result = rule.check(&ctx).unwrap();
2680        assert!(
2681            result.is_empty(),
2682            "Templater should not be flagged even in Standard flavor. Got: {result:?}"
2683        );
2684    }
2685
2686    #[test]
2687    fn test_md033_templater_empty_tag_not_flagged() {
2688        // Empty Templater tags
2689        let rule = MD033NoInlineHtml::default();
2690        let content = "<%>";
2691        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2692        let result = rule.check(&ctx).unwrap();
2693        assert!(
2694            result.is_empty(),
2695            "Empty Templater-like tag should not be flagged. Got: {result:?}"
2696        );
2697    }
2698
2699    #[test]
2700    fn test_md033_templater_unclosed_not_flagged() {
2701        // Unclosed Templater tags - these are template errors, not HTML
2702        let rule = MD033NoInlineHtml::default();
2703        let content = "<% tp.date.now() without closing tag";
2704        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2705        let result = rule.check(&ctx).unwrap();
2706        assert!(
2707            result.is_empty(),
2708            "Unclosed Templater should not be flagged as HTML. Got: {result:?}"
2709        );
2710    }
2711
2712    #[test]
2713    fn test_md033_templater_with_newlines_inside_not_flagged() {
2714        // Templater with newlines inside the expression
2715        let rule = MD033NoInlineHtml::default();
2716        let content = r#"<% tp.date.now("YYYY") +
2717"-" +
2718tp.date.now("MM") %>"#;
2719        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2720        let result = rule.check(&ctx).unwrap();
2721        assert!(
2722            result.is_empty(),
2723            "Templater with internal newlines should not be flagged. Got: {result:?}"
2724        );
2725    }
2726
2727    #[test]
2728    fn test_md033_erb_style_tags_not_flagged() {
2729        // ERB/EJS style tags (similar to Templater) are also not HTML
2730        // This documents the general principle that `<%` is not valid HTML
2731        let rule = MD033NoInlineHtml::default();
2732        let content = "<%= variable %> and <% code %> and <%# comment %>";
2733        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2734        let result = rule.check(&ctx).unwrap();
2735        assert!(
2736            result.is_empty(),
2737            "ERB/EJS style tags should not be flagged as HTML. Got: {result:?}"
2738        );
2739    }
2740
2741    #[test]
2742    fn test_md033_templater_complex_expression_not_flagged() {
2743        // Complex Templater expression with multiple function calls
2744        let rule = MD033NoInlineHtml::default();
2745        let content = r#"<%*
2746const file = tp.file.title;
2747const date = tp.date.now("YYYY-MM-DD");
2748const folder = tp.file.folder();
2749tR += `# ${file}\n\nCreated: ${date}\nIn: ${folder}`;
2750%>"#;
2751        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2752        let result = rule.check(&ctx).unwrap();
2753        assert!(
2754            result.is_empty(),
2755            "Complex Templater expression should not be flagged. Got: {result:?}"
2756        );
2757    }
2758
2759    #[test]
2760    fn test_md033_percent_sign_variations_not_flagged() {
2761        // Various patterns starting with <% that should all be safe
2762        let rule = MD033NoInlineHtml::default();
2763        let patterns = [
2764            "<%=",  // ERB output
2765            "<%#",  // ERB comment
2766            "<%%",  // Double percent
2767            "<%!",  // Some template engines
2768            "<%@",  // JSP directive
2769            "<%--", // JSP comment
2770        ];
2771        for pattern in patterns {
2772            let content = format!("{pattern} content %>");
2773            let ctx = LintContext::new(&content, crate::config::MarkdownFlavor::Standard, None);
2774            let result = rule.check(&ctx).unwrap();
2775            assert!(
2776                result.is_empty(),
2777                "Pattern {pattern} should not be flagged. Got: {result:?}"
2778            );
2779        }
2780    }
2781
2782    // ───── Bug #3: Bracket escaping in image-inside-link conversion ─────
2783    //
2784    // When <a> wraps already-converted markdown image text, the bracket escaping
2785    // must be skipped to produce valid [![alt](url)](href) instead of !\[\](url)
2786
2787    #[test]
2788    fn test_md033_fix_a_wrapping_markdown_image_no_escaped_brackets() {
2789        // When <a> wraps a markdown image (from a prior fix iteration),
2790        // the result should be [![](url)](href) — no escaped brackets
2791        let rule = MD033NoInlineHtml::with_fix(true);
2792        let content = r#"<a href="https://example.com">![](https://example.com/image.png)</a>"#;
2793        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2794        let fixed = rule.fix(&ctx).unwrap();
2795
2796        assert_eq!(fixed, "[![](https://example.com/image.png)](https://example.com)",);
2797        assert!(!fixed.contains(r"\["), "Must not escape brackets: {fixed}");
2798        assert!(!fixed.contains(r"\]"), "Must not escape brackets: {fixed}");
2799    }
2800
2801    #[test]
2802    fn test_md033_fix_a_wrapping_markdown_image_with_alt() {
2803        // <a> wrapping ![alt](url) preserves alt text in linked image
2804        let rule = MD033NoInlineHtml::with_fix(true);
2805        let content =
2806            r#"<a href="https://github.com/repo">![Contributors](https://contrib.rocks/image?repo=org/repo)</a>"#;
2807        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2808        let fixed = rule.fix(&ctx).unwrap();
2809
2810        assert_eq!(
2811            fixed,
2812            "[![Contributors](https://contrib.rocks/image?repo=org/repo)](https://github.com/repo)"
2813        );
2814    }
2815
2816    #[test]
2817    fn test_md033_fix_img_without_alt_produces_empty_alt() {
2818        let rule = MD033NoInlineHtml::with_fix(true);
2819        let content = r#"<img src="photo.jpg" />"#;
2820        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2821        let fixed = rule.fix(&ctx).unwrap();
2822
2823        assert_eq!(fixed, "![](photo.jpg)");
2824    }
2825
2826    #[test]
2827    fn test_md033_fix_a_with_plain_text_still_escapes_brackets() {
2828        // Plain text brackets inside <a> SHOULD be escaped
2829        let rule = MD033NoInlineHtml::with_fix(true);
2830        let content = r#"<a href="https://example.com">text with [brackets]</a>"#;
2831        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2832        let fixed = rule.fix(&ctx).unwrap();
2833
2834        assert!(
2835            fixed.contains(r"\[brackets\]"),
2836            "Plain text brackets should be escaped: {fixed}"
2837        );
2838    }
2839
2840    #[test]
2841    fn test_md033_fix_a_with_image_plus_extra_text_escapes_brackets() {
2842        // Mixed content: image followed by bracketed text — brackets must be escaped
2843        // The image detection must NOT match partial content
2844        let rule = MD033NoInlineHtml::with_fix(true);
2845        let content = r#"<a href="/link">![](img.png) see [docs]</a>"#;
2846        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2847        let fixed = rule.fix(&ctx).unwrap();
2848
2849        // "see [docs]" brackets should be escaped since inner content is mixed
2850        assert!(
2851            fixed.contains(r"\[docs\]"),
2852            "Brackets in mixed image+text content should be escaped: {fixed}"
2853        );
2854    }
2855
2856    #[test]
2857    fn test_md033_fix_img_in_a_end_to_end() {
2858        // End-to-end: verify that iterative fixing of <a><img></a>
2859        // produces the correct final result through the fix coordinator
2860        use crate::config::Config;
2861        use crate::fix_coordinator::FixCoordinator;
2862
2863        let rule = MD033NoInlineHtml::with_fix(true);
2864        let rules: Vec<Box<dyn crate::rule::Rule>> = vec![Box::new(rule)];
2865
2866        let mut content =
2867            r#"<a href="https://github.com/org/repo"><img src="https://contrib.rocks/image?repo=org/repo" /></a>"#
2868                .to_string();
2869        let config = Config::default();
2870        let coordinator = FixCoordinator::new();
2871
2872        let result = coordinator
2873            .apply_fixes_iterative(&rules, &[], &mut content, &config, 10, None)
2874            .unwrap();
2875
2876        assert_eq!(
2877            content, "[![](https://contrib.rocks/image?repo=org/repo)](https://github.com/org/repo)",
2878            "End-to-end: <a><img></a> should become valid linked image"
2879        );
2880        assert!(result.converged);
2881        assert!(!content.contains(r"\["), "No escaped brackets: {content}");
2882    }
2883
2884    #[test]
2885    fn test_md033_fix_img_in_a_with_alt_end_to_end() {
2886        use crate::config::Config;
2887        use crate::fix_coordinator::FixCoordinator;
2888
2889        let rule = MD033NoInlineHtml::with_fix(true);
2890        let rules: Vec<Box<dyn crate::rule::Rule>> = vec![Box::new(rule)];
2891
2892        let mut content =
2893            r#"<a href="https://github.com/org/repo"><img src="https://contrib.rocks/image" alt="Contributors" /></a>"#
2894                .to_string();
2895        let config = Config::default();
2896        let coordinator = FixCoordinator::new();
2897
2898        let result = coordinator
2899            .apply_fixes_iterative(&rules, &[], &mut content, &config, 10, None)
2900            .unwrap();
2901
2902        assert_eq!(
2903            content,
2904            "[![Contributors](https://contrib.rocks/image)](https://github.com/org/repo)",
2905        );
2906        assert!(result.converged);
2907    }
2908}