Skip to main content

rumdl_lib/rules/
md033_no_inline_html.rs

1//!
2//! Rule MD033: No HTML tags
3//!
4//! See [docs/md033.md](../../docs/md033.md) for full documentation, configuration, and examples.
5
6use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
7use crate::utils::regex_cache::*;
8use std::collections::HashSet;
9
10mod md033_config;
11use md033_config::{MD033Config, MD033FixMode};
12
13#[derive(Clone)]
14pub struct MD033NoInlineHtml {
15    config: MD033Config,
16    allowed: HashSet<String>,
17    disallowed: HashSet<String>,
18    drop_attributes: HashSet<String>,
19    strip_wrapper_elements: HashSet<String>,
20}
21
22impl Default for MD033NoInlineHtml {
23    fn default() -> Self {
24        let config = MD033Config::default();
25        let allowed = config.allowed_set();
26        let disallowed = config.disallowed_set();
27        let drop_attributes = config.drop_attributes_set();
28        let strip_wrapper_elements = config.strip_wrapper_elements_set();
29        Self {
30            config,
31            allowed,
32            disallowed,
33            drop_attributes,
34            strip_wrapper_elements,
35        }
36    }
37}
38
39impl MD033NoInlineHtml {
40    pub fn new() -> Self {
41        Self::default()
42    }
43
44    pub fn with_allowed(allowed_vec: Vec<String>) -> Self {
45        let config = MD033Config {
46            allowed: allowed_vec.clone(),
47            disallowed: Vec::new(),
48            fix: false,
49            ..MD033Config::default()
50        };
51        let allowed = config.allowed_set();
52        let disallowed = config.disallowed_set();
53        let drop_attributes = config.drop_attributes_set();
54        let strip_wrapper_elements = config.strip_wrapper_elements_set();
55        Self {
56            config,
57            allowed,
58            disallowed,
59            drop_attributes,
60            strip_wrapper_elements,
61        }
62    }
63
64    pub fn with_disallowed(disallowed_vec: Vec<String>) -> Self {
65        let config = MD033Config {
66            allowed: Vec::new(),
67            disallowed: disallowed_vec.clone(),
68            fix: false,
69            ..MD033Config::default()
70        };
71        let allowed = config.allowed_set();
72        let disallowed = config.disallowed_set();
73        let drop_attributes = config.drop_attributes_set();
74        let strip_wrapper_elements = config.strip_wrapper_elements_set();
75        Self {
76            config,
77            allowed,
78            disallowed,
79            drop_attributes,
80            strip_wrapper_elements,
81        }
82    }
83
84    /// Create a new rule with auto-fix enabled
85    pub fn with_fix(fix: bool) -> Self {
86        let config = MD033Config {
87            allowed: Vec::new(),
88            disallowed: Vec::new(),
89            fix,
90            ..MD033Config::default()
91        };
92        let allowed = config.allowed_set();
93        let disallowed = config.disallowed_set();
94        let drop_attributes = config.drop_attributes_set();
95        let strip_wrapper_elements = config.strip_wrapper_elements_set();
96        Self {
97            config,
98            allowed,
99            disallowed,
100            drop_attributes,
101            strip_wrapper_elements,
102        }
103    }
104
105    pub fn from_config_struct(config: MD033Config) -> Self {
106        let allowed = config.allowed_set();
107        let disallowed = config.disallowed_set();
108        let drop_attributes = config.drop_attributes_set();
109        let strip_wrapper_elements = config.strip_wrapper_elements_set();
110        Self {
111            config,
112            allowed,
113            disallowed,
114            drop_attributes,
115            strip_wrapper_elements,
116        }
117    }
118
119    // Efficient check for allowed tags using HashSet (case-insensitive)
120    #[inline]
121    fn is_tag_allowed(&self, tag: &str) -> bool {
122        if self.allowed.is_empty() {
123            return false;
124        }
125        // Remove angle brackets and slashes, then split by whitespace or '>'
126        let tag = tag.trim_start_matches('<').trim_start_matches('/');
127        let tag_name = tag
128            .split(|c: char| c.is_whitespace() || c == '>' || c == '/')
129            .next()
130            .unwrap_or("");
131        self.allowed.contains(&tag_name.to_lowercase())
132    }
133
134    /// Check if a tag is in the disallowed set (for disallowed-only mode)
135    #[inline]
136    fn is_tag_disallowed(&self, tag: &str) -> bool {
137        if self.disallowed.is_empty() {
138            return false;
139        }
140        // Remove angle brackets and slashes, then split by whitespace or '>'
141        let tag = tag.trim_start_matches('<').trim_start_matches('/');
142        let tag_name = tag
143            .split(|c: char| c.is_whitespace() || c == '>' || c == '/')
144            .next()
145            .unwrap_or("");
146        self.disallowed.contains(&tag_name.to_lowercase())
147    }
148
149    /// Check if operating in disallowed-only mode
150    #[inline]
151    fn is_disallowed_mode(&self) -> bool {
152        self.config.is_disallowed_mode()
153    }
154
155    // Check if a tag is an HTML comment
156    #[inline]
157    fn is_html_comment(&self, tag: &str) -> bool {
158        tag.starts_with("<!--") && tag.ends_with("-->")
159    }
160
161    /// Check if a tag name is a valid HTML element or custom element.
162    /// Returns false for placeholder syntax like `<NAME>`, `<resource>`, `<actual>`.
163    ///
164    /// Per HTML spec, custom elements must contain a hyphen (e.g., `<my-component>`).
165    #[inline]
166    fn is_html_element_or_custom(tag_name: &str) -> bool {
167        const HTML_ELEMENTS: &[&str] = &[
168            // Document structure
169            "html",
170            "head",
171            "body",
172            "title",
173            "base",
174            "link",
175            "meta",
176            "style",
177            // Sections
178            "article",
179            "section",
180            "nav",
181            "aside",
182            "h1",
183            "h2",
184            "h3",
185            "h4",
186            "h5",
187            "h6",
188            "hgroup",
189            "header",
190            "footer",
191            "address",
192            "main",
193            "search",
194            // Grouping
195            "p",
196            "hr",
197            "pre",
198            "blockquote",
199            "ol",
200            "ul",
201            "menu",
202            "li",
203            "dl",
204            "dt",
205            "dd",
206            "figure",
207            "figcaption",
208            "div",
209            // Text-level
210            "a",
211            "em",
212            "strong",
213            "small",
214            "s",
215            "cite",
216            "q",
217            "dfn",
218            "abbr",
219            "ruby",
220            "rt",
221            "rp",
222            "data",
223            "time",
224            "code",
225            "var",
226            "samp",
227            "kbd",
228            "sub",
229            "sup",
230            "i",
231            "b",
232            "u",
233            "mark",
234            "bdi",
235            "bdo",
236            "span",
237            "br",
238            "wbr",
239            // Edits
240            "ins",
241            "del",
242            // Embedded
243            "picture",
244            "source",
245            "img",
246            "iframe",
247            "embed",
248            "object",
249            "param",
250            "video",
251            "audio",
252            "track",
253            "map",
254            "area",
255            "svg",
256            "math",
257            "canvas",
258            // Tables
259            "table",
260            "caption",
261            "colgroup",
262            "col",
263            "tbody",
264            "thead",
265            "tfoot",
266            "tr",
267            "td",
268            "th",
269            // Forms
270            "form",
271            "label",
272            "input",
273            "button",
274            "select",
275            "datalist",
276            "optgroup",
277            "option",
278            "textarea",
279            "output",
280            "progress",
281            "meter",
282            "fieldset",
283            "legend",
284            // Interactive
285            "details",
286            "summary",
287            "dialog",
288            // Scripting
289            "script",
290            "noscript",
291            "template",
292            "slot",
293            // Deprecated but recognized
294            "acronym",
295            "applet",
296            "basefont",
297            "big",
298            "center",
299            "dir",
300            "font",
301            "frame",
302            "frameset",
303            "isindex",
304            "marquee",
305            "noembed",
306            "noframes",
307            "plaintext",
308            "strike",
309            "tt",
310            "xmp",
311        ];
312
313        let lower = tag_name.to_ascii_lowercase();
314        if HTML_ELEMENTS.contains(&lower.as_str()) {
315            return true;
316        }
317        // Custom elements must contain a hyphen per HTML spec
318        tag_name.contains('-')
319    }
320
321    // Check if a tag is likely a programming type annotation rather than HTML
322    #[inline]
323    fn is_likely_type_annotation(&self, tag: &str) -> bool {
324        // Common programming type names that are often used in generics
325        const COMMON_TYPES: &[&str] = &[
326            "string",
327            "number",
328            "any",
329            "void",
330            "null",
331            "undefined",
332            "array",
333            "promise",
334            "function",
335            "error",
336            "date",
337            "regexp",
338            "symbol",
339            "bigint",
340            "map",
341            "set",
342            "weakmap",
343            "weakset",
344            "iterator",
345            "generator",
346            "t",
347            "u",
348            "v",
349            "k",
350            "e", // Common single-letter type parameters
351            "userdata",
352            "apiresponse",
353            "config",
354            "options",
355            "params",
356            "result",
357            "response",
358            "request",
359            "data",
360            "item",
361            "element",
362            "node",
363        ];
364
365        let tag_content = tag
366            .trim_start_matches('<')
367            .trim_end_matches('>')
368            .trim_start_matches('/');
369        let tag_name = tag_content
370            .split(|c: char| c.is_whitespace() || c == '>' || c == '/')
371            .next()
372            .unwrap_or("");
373
374        // Check if it's a simple tag (no attributes) with a common type name
375        if !tag_content.contains(' ') && !tag_content.contains('=') {
376            COMMON_TYPES.contains(&tag_name.to_ascii_lowercase().as_str())
377        } else {
378            false
379        }
380    }
381
382    // Check if a tag is actually an email address in angle brackets
383    #[inline]
384    fn is_email_address(&self, tag: &str) -> bool {
385        let content = tag.trim_start_matches('<').trim_end_matches('>');
386        // Simple email pattern: contains @ and has reasonable structure
387        content.contains('@')
388            && content.chars().all(|c| c.is_alphanumeric() || "@.-_+".contains(c))
389            && content.split('@').count() == 2
390            && content.split('@').all(|part| !part.is_empty())
391    }
392
393    // Check if a tag has the markdown attribute (MkDocs/Material for MkDocs)
394    #[inline]
395    fn has_markdown_attribute(&self, tag: &str) -> bool {
396        // Check for various forms of markdown attribute
397        // Examples: <div markdown>, <div markdown="1">, <div class="result" markdown>
398        tag.contains(" markdown>") || tag.contains(" markdown=") || tag.contains(" markdown ")
399    }
400
401    /// Check if a tag contains JSX-specific attributes that indicate it's JSX, not HTML
402    /// JSX uses different attribute names than HTML:
403    /// - `className` instead of `class`
404    /// - `htmlFor` instead of `for`
405    /// - camelCase event handlers (`onClick`, `onChange`, `onSubmit`, etc.)
406    /// - JSX expression syntax `={...}` for dynamic values
407    #[inline]
408    fn has_jsx_attributes(tag: &str) -> bool {
409        // JSX-specific attribute names (HTML uses class, for, onclick, etc.)
410        tag.contains("className")
411            || tag.contains("htmlFor")
412            || tag.contains("dangerouslySetInnerHTML")
413            // camelCase event handlers (JSX uses onClick, HTML uses onclick)
414            || tag.contains("onClick")
415            || tag.contains("onChange")
416            || tag.contains("onSubmit")
417            || tag.contains("onFocus")
418            || tag.contains("onBlur")
419            || tag.contains("onKeyDown")
420            || tag.contains("onKeyUp")
421            || tag.contains("onKeyPress")
422            || tag.contains("onMouseDown")
423            || tag.contains("onMouseUp")
424            || tag.contains("onMouseEnter")
425            || tag.contains("onMouseLeave")
426            // JSX expression syntax: ={expression} or ={ expression }
427            || tag.contains("={")
428    }
429
430    // Check if a tag is actually a URL in angle brackets
431    #[inline]
432    fn is_url_in_angle_brackets(&self, tag: &str) -> bool {
433        let content = tag.trim_start_matches('<').trim_end_matches('>');
434        // Check for common URL schemes
435        content.starts_with("http://")
436            || content.starts_with("https://")
437            || content.starts_with("ftp://")
438            || content.starts_with("ftps://")
439            || content.starts_with("mailto:")
440    }
441
442    #[inline]
443    fn is_relaxed_fix_mode(&self) -> bool {
444        self.config.fix_mode == MD033FixMode::Relaxed
445    }
446
447    #[inline]
448    fn is_droppable_attribute(&self, attr_name: &str) -> bool {
449        // Event handler attributes (onclick, onload, etc.) are never droppable
450        // because they can execute arbitrary JavaScript.
451        if attr_name.starts_with("on") && attr_name.len() > 2 {
452            return false;
453        }
454        self.drop_attributes.contains(attr_name)
455            || (attr_name.starts_with("data-")
456                && (self.drop_attributes.contains("data-*") || self.drop_attributes.contains("data-")))
457    }
458
459    #[inline]
460    fn is_strippable_wrapper(&self, tag_name: &str) -> bool {
461        self.is_relaxed_fix_mode() && self.strip_wrapper_elements.contains(tag_name)
462    }
463
464    /// Check whether `byte_offset` sits directly inside a top-level strippable
465    /// wrapper element (e.g. `<p>`).  Returns `true` only when:
466    ///  1. The nearest unclosed opening tag before the offset is a configured
467    ///     wrapper element, AND
468    ///  2. That wrapper is itself NOT nested inside another HTML element.
469    ///
470    /// Condition 2 prevents converting inner content when the wrapper cannot
471    /// be stripped (e.g. `<div><p><img/></p></div>` -- stripping `<p>` is
472    /// blocked because it is nested, so converting `<img>` would leave
473    /// markdown inside an HTML block where it won't render).
474    fn is_inside_strippable_wrapper(&self, content: &str, byte_offset: usize) -> bool {
475        if byte_offset == 0 {
476            return false;
477        }
478        let before = content[..byte_offset].trim_end();
479        if !before.ends_with('>') || before.ends_with("->") {
480            return false;
481        }
482        if let Some(last_lt) = before.rfind('<') {
483            let potential_tag = &before[last_lt..];
484            if potential_tag.starts_with("</") || potential_tag.starts_with("<!--") {
485                return false;
486            }
487            let parent_name = potential_tag
488                .trim_start_matches('<')
489                .split(|c: char| c.is_whitespace() || c == '>' || c == '/')
490                .next()
491                .unwrap_or("")
492                .to_lowercase();
493            if !self.strip_wrapper_elements.contains(&parent_name) {
494                return false;
495            }
496            // Verify the wrapper itself is not nested inside another element.
497            let wrapper_before = before[..last_lt].trim_end();
498            if wrapper_before.ends_with('>')
499                && !wrapper_before.ends_with("->")
500                && let Some(outer_lt) = wrapper_before.rfind('<')
501                && let outer_tag = &wrapper_before[outer_lt..]
502                && !outer_tag.starts_with("</")
503                && !outer_tag.starts_with("<!--")
504            {
505                return false;
506            }
507            return true;
508        }
509        false
510    }
511
512    /// Convert paired HTML tags to their Markdown equivalents.
513    /// Returns None if the tag cannot be safely converted (has nested tags, HTML entities, etc.)
514    fn convert_to_markdown(tag_name: &str, inner_content: &str) -> Option<String> {
515        // Skip if content contains nested HTML tags
516        if inner_content.contains('<') {
517            return None;
518        }
519        // Skip if content contains HTML entities (e.g., &vert;, &amp;, &lt;)
520        // These need HTML context to render correctly; markdown won't process them
521        if inner_content.contains('&') && inner_content.contains(';') {
522            // Check for common HTML entity patterns
523            let has_entity = inner_content
524                .split('&')
525                .skip(1)
526                .any(|part| part.split(';').next().is_some_and(|e| !e.is_empty() && e.len() < 10));
527            if has_entity {
528                return None;
529            }
530        }
531        match tag_name {
532            "em" | "i" => Some(format!("*{inner_content}*")),
533            "strong" | "b" => Some(format!("**{inner_content}**")),
534            "code" => {
535                // Handle backticks in content by using double backticks with padding
536                if inner_content.contains('`') {
537                    Some(format!("`` {inner_content} ``"))
538                } else {
539                    Some(format!("`{inner_content}`"))
540                }
541            }
542            _ => None,
543        }
544    }
545
546    /// Convert self-closing HTML tags to their Markdown equivalents.
547    fn convert_self_closing_to_markdown(&self, tag_name: &str, opening_tag: &str) -> Option<String> {
548        match tag_name {
549            "br" => match self.config.br_style {
550                md033_config::BrStyle::TrailingSpaces => Some("  \n".to_string()),
551                md033_config::BrStyle::Backslash => Some("\\\n".to_string()),
552            },
553            "hr" => Some("\n---\n".to_string()),
554            "img" => self.convert_img_to_markdown(opening_tag),
555            _ => None,
556        }
557    }
558
559    /// Parse all attributes from an HTML tag into a list of (name, value) pairs.
560    /// This provides proper attribute parsing instead of naive string matching.
561    fn parse_attributes(tag: &str) -> Vec<(String, Option<String>)> {
562        let mut attrs = Vec::new();
563
564        // Remove < and > and tag name
565        let tag_content = tag.trim_start_matches('<').trim_end_matches('>').trim_end_matches('/');
566
567        // Find first whitespace to skip tag name
568        let attr_start = tag_content
569            .find(|c: char| c.is_whitespace())
570            .map(|i| i + 1)
571            .unwrap_or(tag_content.len());
572
573        if attr_start >= tag_content.len() {
574            return attrs;
575        }
576
577        let attr_str = &tag_content[attr_start..];
578        let mut chars = attr_str.chars().peekable();
579
580        while chars.peek().is_some() {
581            // Skip whitespace
582            while chars.peek().is_some_and(|c| c.is_whitespace()) {
583                chars.next();
584            }
585
586            if chars.peek().is_none() {
587                break;
588            }
589
590            // Read attribute name
591            let mut attr_name = String::new();
592            while let Some(&c) = chars.peek() {
593                if c.is_whitespace() || c == '=' || c == '>' || c == '/' {
594                    break;
595                }
596                attr_name.push(c);
597                chars.next();
598            }
599
600            if attr_name.is_empty() {
601                break;
602            }
603
604            // Skip whitespace before =
605            while chars.peek().is_some_and(|c| c.is_whitespace()) {
606                chars.next();
607            }
608
609            // Check for = and value
610            if chars.peek() == Some(&'=') {
611                chars.next(); // consume =
612
613                // Skip whitespace after =
614                while chars.peek().is_some_and(|c| c.is_whitespace()) {
615                    chars.next();
616                }
617
618                // Read value
619                let mut value = String::new();
620                if let Some(&quote) = chars.peek() {
621                    if quote == '"' || quote == '\'' {
622                        chars.next(); // consume opening quote
623                        for c in chars.by_ref() {
624                            if c == quote {
625                                break;
626                            }
627                            value.push(c);
628                        }
629                    } else {
630                        // Unquoted value
631                        while let Some(&c) = chars.peek() {
632                            if c.is_whitespace() || c == '>' || c == '/' {
633                                break;
634                            }
635                            value.push(c);
636                            chars.next();
637                        }
638                    }
639                }
640                attrs.push((attr_name.to_ascii_lowercase(), Some(value)));
641            } else {
642                // Boolean attribute (no value)
643                attrs.push((attr_name.to_ascii_lowercase(), None));
644            }
645        }
646
647        attrs
648    }
649
650    /// Extract an HTML attribute value from a tag string.
651    /// Handles double quotes, single quotes, and unquoted values.
652    /// Returns None if the attribute is not found.
653    fn extract_attribute(tag: &str, attr_name: &str) -> Option<String> {
654        let attrs = Self::parse_attributes(tag);
655        let attr_lower = attr_name.to_ascii_lowercase();
656
657        attrs
658            .into_iter()
659            .find(|(name, _)| name == &attr_lower)
660            .and_then(|(_, value)| value)
661    }
662
663    /// Check if an HTML tag has extra attributes beyond the specified allowed ones.
664    /// Uses proper attribute parsing to avoid false positives from string matching.
665    fn has_extra_attributes(&self, tag: &str, allowed_attrs: &[&str]) -> bool {
666        let attrs = Self::parse_attributes(tag);
667
668        // All event handlers (on*) are dangerous
669        // Plus common attributes that would be lost in markdown conversion
670        const DANGEROUS_ATTR_PREFIXES: &[&str] = &["on"]; // onclick, onload, onerror, etc.
671        const DANGEROUS_ATTRS: &[&str] = &[
672            "class",
673            "id",
674            "style",
675            "target",
676            "rel",
677            "download",
678            "referrerpolicy",
679            "crossorigin",
680            "loading",
681            "decoding",
682            "fetchpriority",
683            "sizes",
684            "srcset",
685            "usemap",
686            "ismap",
687            "width",
688            "height",
689            "name",   // anchor names
690            "data-*", // data attributes (checked separately)
691        ];
692
693        for (attr_name, _) in attrs {
694            // Skip allowed attributes (list is small, linear scan is efficient)
695            if allowed_attrs.iter().any(|a| a.to_ascii_lowercase() == attr_name) {
696                continue;
697            }
698
699            if self.is_relaxed_fix_mode() {
700                if self.is_droppable_attribute(&attr_name) {
701                    continue;
702                }
703                return true;
704            }
705
706            // Check for event handlers (on*)
707            for prefix in DANGEROUS_ATTR_PREFIXES {
708                if attr_name.starts_with(prefix) && attr_name.len() > prefix.len() {
709                    return true;
710                }
711            }
712
713            // Check for data-* attributes
714            if attr_name.starts_with("data-") {
715                return true;
716            }
717
718            // Check for other dangerous attributes
719            if DANGEROUS_ATTRS.contains(&attr_name.as_str()) {
720                return true;
721            }
722        }
723
724        false
725    }
726
727    /// Convert `<a href="url">text</a>` to `[text](url)` or `[text](url "title")`
728    /// Returns None if conversion is not safe.
729    fn convert_a_to_markdown(&self, opening_tag: &str, inner_content: &str) -> Option<String> {
730        // Extract href attribute
731        let href = Self::extract_attribute(opening_tag, "href")?;
732
733        // Check URL is safe
734        if !MD033Config::is_safe_url(&href) {
735            return None;
736        }
737
738        // Check for nested HTML tags in content
739        if inner_content.contains('<') {
740            return None;
741        }
742
743        // Check for HTML entities that wouldn't render correctly in markdown
744        if inner_content.contains('&') && inner_content.contains(';') {
745            let has_entity = inner_content
746                .split('&')
747                .skip(1)
748                .any(|part| part.split(';').next().is_some_and(|e| !e.is_empty() && e.len() < 10));
749            if has_entity {
750                return None;
751            }
752        }
753
754        // Extract optional title attribute
755        let title = Self::extract_attribute(opening_tag, "title");
756
757        // Check for extra dangerous attributes (title is allowed)
758        if self.has_extra_attributes(opening_tag, &["href", "title"]) {
759            return None;
760        }
761
762        // If inner content is exactly a markdown image (from a prior <img> fix),
763        // use it directly without bracket escaping to produce valid [![alt](src)](href).
764        // Must verify the entire content is a single image — not mixed content like
765        // "![](url) extra [text]" where trailing brackets still need escaping.
766        let trimmed_inner = inner_content.trim();
767        let is_markdown_image =
768            trimmed_inner.starts_with("![") && trimmed_inner.contains("](") && trimmed_inner.ends_with(')') && {
769                // Verify the closing ](url) accounts for the rest of the content
770                // by finding the image's ]( and checking nothing follows the final )
771                if let Some(bracket_close) = trimmed_inner.rfind("](") {
772                    let after_paren = &trimmed_inner[bracket_close + 2..];
773                    // The rest should be just "url)" — find the matching close paren
774                    after_paren.ends_with(')')
775                        && after_paren.chars().filter(|&c| c == ')').count()
776                            >= after_paren.chars().filter(|&c| c == '(').count()
777                } else {
778                    false
779                }
780            };
781        let escaped_text = if is_markdown_image {
782            trimmed_inner.to_string()
783        } else {
784            // Escape special markdown characters in link text
785            // Brackets need escaping to avoid breaking the link syntax
786            inner_content.replace('[', r"\[").replace(']', r"\]")
787        };
788
789        // Escape parentheses in URL
790        let escaped_url = href.replace('(', "%28").replace(')', "%29");
791
792        // Format with or without title
793        if let Some(title_text) = title {
794            // Escape quotes in title
795            let escaped_title = title_text.replace('"', r#"\""#);
796            Some(format!("[{escaped_text}]({escaped_url} \"{escaped_title}\")"))
797        } else {
798            Some(format!("[{escaped_text}]({escaped_url})"))
799        }
800    }
801
802    /// Convert `<img src="url" alt="text">` to `![alt](src)` or `![alt](src "title")`
803    /// Returns None if conversion is not safe.
804    fn convert_img_to_markdown(&self, tag: &str) -> Option<String> {
805        // Extract src attribute (required)
806        let src = Self::extract_attribute(tag, "src")?;
807
808        // Check URL is safe
809        if !MD033Config::is_safe_url(&src) {
810            return None;
811        }
812
813        // Extract alt attribute (optional, default to empty)
814        let alt = Self::extract_attribute(tag, "alt").unwrap_or_default();
815
816        // Extract optional title attribute
817        let title = Self::extract_attribute(tag, "title");
818
819        // Check for extra dangerous attributes (title is allowed)
820        if self.has_extra_attributes(tag, &["src", "alt", "title"]) {
821            return None;
822        }
823
824        // Escape special markdown characters in alt text
825        let escaped_alt = alt.replace('[', r"\[").replace(']', r"\]");
826
827        // Escape parentheses in URL
828        let escaped_url = src.replace('(', "%28").replace(')', "%29");
829
830        // Format with or without title
831        if let Some(title_text) = title {
832            // Escape quotes in title
833            let escaped_title = title_text.replace('"', r#"\""#);
834            Some(format!("![{escaped_alt}]({escaped_url} \"{escaped_title}\")"))
835        } else {
836            Some(format!("![{escaped_alt}]({escaped_url})"))
837        }
838    }
839
840    /// Check if an HTML tag has attributes that would make conversion unsafe
841    fn has_significant_attributes(opening_tag: &str) -> bool {
842        // Tags with just whitespace or empty are fine
843        let tag_content = opening_tag
844            .trim_start_matches('<')
845            .trim_end_matches('>')
846            .trim_end_matches('/');
847
848        // Split by whitespace; if there's more than the tag name, it has attributes
849        let parts: Vec<&str> = tag_content.split_whitespace().collect();
850        parts.len() > 1
851    }
852
853    /// Check if a tag appears to be nested inside another HTML element
854    /// by looking at the surrounding context (e.g., `<code><em>text</em></code>`)
855    fn is_nested_in_html(content: &str, tag_byte_start: usize, tag_byte_end: usize) -> bool {
856        // Check if there's a `>` immediately before this tag (indicating inside another element)
857        if tag_byte_start > 0 {
858            let before = &content[..tag_byte_start];
859            let before_trimmed = before.trim_end();
860            if before_trimmed.ends_with('>') && !before_trimmed.ends_with("->") {
861                // Check it's not a closing tag or comment
862                if let Some(last_lt) = before_trimmed.rfind('<') {
863                    let potential_tag = &before_trimmed[last_lt..];
864                    // Skip if it's a closing tag (</...>) or comment (<!--)
865                    if !potential_tag.starts_with("</") && !potential_tag.starts_with("<!--") {
866                        return true;
867                    }
868                }
869            }
870        }
871        // Check if there's a `<` immediately after the closing tag (indicating inside another element)
872        if tag_byte_end < content.len() {
873            let after = &content[tag_byte_end..];
874            let after_trimmed = after.trim_start();
875            if after_trimmed.starts_with("</") {
876                return true;
877            }
878        }
879        false
880    }
881
882    /// Calculate fix to remove HTML tags while keeping content.
883    ///
884    /// For self-closing tags like `<br/>`, returns a single fix to remove the tag.
885    /// For paired tags like `<span>text</span>`, returns the replacement text (just the content).
886    ///
887    /// Returns (range, replacement_text) where range is the bytes to replace
888    /// and replacement_text is what to put there (content without tags, or empty for self-closing).
889    ///
890    /// When `in_html_block` is true, returns None in conservative mode.  In
891    /// relaxed mode two exceptions apply:
892    /// - Strippable wrapper elements (e.g. `<p>`) bypass the block guard so
893    ///   they can be stripped even though they ARE the HTML block.
894    /// - Self-closing tags whose direct parent is a strippable wrapper also
895    ///   bypass the guard so inner content can be converted first.
896    fn calculate_fix(
897        &self,
898        content: &str,
899        opening_tag: &str,
900        tag_byte_start: usize,
901        in_html_block: bool,
902    ) -> Option<(std::ops::Range<usize>, String)> {
903        // Extract tag name from opening tag
904        let tag_name = opening_tag
905            .trim_start_matches('<')
906            .split(|c: char| c.is_whitespace() || c == '>' || c == '/')
907            .next()?
908            .to_lowercase();
909
910        // Check if it's a self-closing tag (ends with /> or is a void element like <br>)
911        let is_self_closing =
912            opening_tag.ends_with("/>") || matches!(tag_name.as_str(), "br" | "hr" | "img" | "input" | "meta" | "link");
913
914        if is_self_closing {
915            // When fix is enabled, try to convert to Markdown equivalent.
916            // Skip tags inside HTML blocks (would break structure), UNLESS we
917            // are in relaxed mode and the containing block is a strippable
918            // wrapper -- this lets the inner element be converted first so the
919            // wrapper can be stripped on a subsequent pass.
920            let block_ok = !in_html_block
921                || (self.is_relaxed_fix_mode() && self.is_inside_strippable_wrapper(content, tag_byte_start));
922            if self.config.fix
923                && MD033Config::is_safe_fixable_tag(&tag_name)
924                && block_ok
925                && let Some(markdown) = self.convert_self_closing_to_markdown(&tag_name, opening_tag)
926            {
927                return Some((tag_byte_start..tag_byte_start + opening_tag.len(), markdown));
928            }
929            // Can't convert this self-closing tag to Markdown, don't provide a fix
930            // (e.g., <input>, <meta> - these have no Markdown equivalent without the new img support)
931            return None;
932        }
933
934        // Search for the closing tag after the opening tag (case-insensitive)
935        let search_start = tag_byte_start + opening_tag.len();
936        let search_slice = &content[search_start..];
937
938        // Find closing tag case-insensitively
939        let closing_tag_lower = format!("</{tag_name}>");
940        let closing_pos = search_slice.to_ascii_lowercase().find(&closing_tag_lower);
941
942        if let Some(closing_pos) = closing_pos {
943            // Get actual closing tag from original content to get correct byte length
944            let closing_tag_len = closing_tag_lower.len();
945            let closing_byte_start = search_start + closing_pos;
946            let closing_byte_end = closing_byte_start + closing_tag_len;
947
948            // Extract the content between tags
949            let inner_content = &content[search_start..closing_byte_start];
950
951            // In relaxed mode, check wrapper stripping BEFORE the in_html_block
952            // guard because the wrapper element itself IS the HTML block. We only
953            // strip when:
954            //  - the wrapper is not nested inside another HTML element
955            //  - the inner content no longer contains HTML tags (prevents
956            //    overlapping byte-range replacements within a single fix pass)
957            if self.config.fix && self.is_strippable_wrapper(&tag_name) {
958                if Self::is_nested_in_html(content, tag_byte_start, closing_byte_end) {
959                    return None;
960                }
961                if inner_content.contains('<') {
962                    return None;
963                }
964                return Some((tag_byte_start..closing_byte_end, inner_content.trim().to_string()));
965            }
966
967            // Skip auto-fix if inside an HTML block (like <pre>, <div>, etc.)
968            // Converting tags inside HTML blocks would break the intended structure
969            if in_html_block {
970                return None;
971            }
972
973            // Skip auto-fix if this tag is nested inside another HTML element
974            // e.g., <code><em>text</em></code> - don't convert the inner <em>
975            if Self::is_nested_in_html(content, tag_byte_start, closing_byte_end) {
976                return None;
977            }
978
979            // When fix is enabled and tag is safe to convert, try markdown conversion
980            if self.config.fix && MD033Config::is_safe_fixable_tag(&tag_name) {
981                // Handle <a> tags specially - they require attribute extraction
982                if tag_name == "a" {
983                    if let Some(markdown) = self.convert_a_to_markdown(opening_tag, inner_content) {
984                        return Some((tag_byte_start..closing_byte_end, markdown));
985                    }
986                    // convert_a_to_markdown returned None - unsafe URL, nested HTML, etc.
987                    return None;
988                }
989
990                // For simple tags (em, strong, code, etc.) - no attributes allowed
991                if Self::has_significant_attributes(opening_tag) {
992                    // Don't provide a fix for tags with attributes
993                    // User may want to keep the attributes, so leave as-is
994                    return None;
995                }
996                if let Some(markdown) = Self::convert_to_markdown(&tag_name, inner_content) {
997                    return Some((tag_byte_start..closing_byte_end, markdown));
998                }
999                // convert_to_markdown returned None, meaning content has nested tags or
1000                // HTML entities that shouldn't be converted - leave as-is
1001                return None;
1002            }
1003
1004            // For non-fixable tags, don't provide a fix
1005            // (e.g., <div>content</div>, <span>text</span>)
1006            return None;
1007        }
1008
1009        // If no closing tag found, don't provide a fix (malformed HTML)
1010        None
1011    }
1012}
1013
1014impl Rule for MD033NoInlineHtml {
1015    fn name(&self) -> &'static str {
1016        "MD033"
1017    }
1018
1019    fn description(&self) -> &'static str {
1020        "Inline HTML is not allowed"
1021    }
1022
1023    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
1024        let content = ctx.content;
1025
1026        // Early return: if no HTML tags at all, skip processing
1027        if content.is_empty() || !ctx.likely_has_html() {
1028            return Ok(Vec::new());
1029        }
1030
1031        // Quick check for HTML tag pattern before expensive processing
1032        if !HTML_TAG_QUICK_CHECK.is_match(content) {
1033            return Ok(Vec::new());
1034        }
1035
1036        let mut warnings = Vec::new();
1037
1038        // Use centralized HTML parser to get all HTML tags (including multiline)
1039        let html_tags = ctx.html_tags();
1040
1041        for html_tag in html_tags.iter() {
1042            // Skip closing tags (only warn on opening tags)
1043            if html_tag.is_closing {
1044                continue;
1045            }
1046
1047            let line_num = html_tag.line;
1048            let tag_byte_start = html_tag.byte_offset;
1049
1050            // Reconstruct tag string from byte offsets
1051            let tag = &content[html_tag.byte_offset..html_tag.byte_end];
1052
1053            // Skip tags in code blocks, PyMdown blocks, and block IALs
1054            if ctx
1055                .line_info(line_num)
1056                .is_some_and(|info| info.in_code_block || info.in_pymdown_block || info.is_kramdown_block_ial)
1057            {
1058                continue;
1059            }
1060
1061            // Skip HTML tags inside HTML comments
1062            if ctx.is_in_html_comment(tag_byte_start) {
1063                continue;
1064            }
1065
1066            // Skip HTML comments themselves
1067            if self.is_html_comment(tag) {
1068                continue;
1069            }
1070
1071            // Skip angle brackets inside link reference definition titles
1072            // e.g., [ref]: url "Title with <angle brackets>"
1073            if ctx.is_in_link_title(tag_byte_start) {
1074                continue;
1075            }
1076
1077            // Skip JSX components in MDX files (e.g., <Chart />, <MyComponent>)
1078            if ctx.flavor.supports_jsx() && html_tag.tag_name.chars().next().is_some_and(|c| c.is_uppercase()) {
1079                continue;
1080            }
1081
1082            // Skip JSX fragments in MDX files (<> and </>)
1083            if ctx.flavor.supports_jsx() && (html_tag.tag_name.is_empty() || tag == "<>" || tag == "</>") {
1084                continue;
1085            }
1086
1087            // Skip elements with JSX-specific attributes in MDX files
1088            // e.g., <div className="...">, <button onClick={handler}>
1089            if ctx.flavor.supports_jsx() && Self::has_jsx_attributes(tag) {
1090                continue;
1091            }
1092
1093            // Skip non-HTML elements (placeholder syntax like <NAME>, <resource>)
1094            if !Self::is_html_element_or_custom(&html_tag.tag_name) {
1095                continue;
1096            }
1097
1098            // Skip likely programming type annotations
1099            if self.is_likely_type_annotation(tag) {
1100                continue;
1101            }
1102
1103            // Skip email addresses in angle brackets
1104            if self.is_email_address(tag) {
1105                continue;
1106            }
1107
1108            // Skip URLs in angle brackets
1109            if self.is_url_in_angle_brackets(tag) {
1110                continue;
1111            }
1112
1113            // Skip tags inside code spans (use byte offset for reliable multi-line span detection)
1114            if ctx.is_byte_offset_in_code_span(tag_byte_start) {
1115                continue;
1116            }
1117
1118            // Determine whether to report this tag based on mode:
1119            // - Disallowed mode: only report tags in the disallowed list
1120            // - Default mode: report all tags except those in the allowed list
1121            if self.is_disallowed_mode() {
1122                // In disallowed mode, skip tags NOT in the disallowed list
1123                if !self.is_tag_disallowed(tag) {
1124                    continue;
1125                }
1126            } else {
1127                // In default mode, skip allowed tags
1128                if self.is_tag_allowed(tag) {
1129                    continue;
1130                }
1131            }
1132
1133            // Skip tags with markdown attribute in MkDocs mode
1134            if ctx.flavor == crate::config::MarkdownFlavor::MkDocs && self.has_markdown_attribute(tag) {
1135                continue;
1136            }
1137
1138            // Check if we're inside an HTML block (like <pre>, <div>, etc.)
1139            let in_html_block = ctx.is_in_html_block(line_num);
1140
1141            // Calculate fix to remove HTML tags but keep content
1142            let fix = self
1143                .calculate_fix(content, tag, tag_byte_start, in_html_block)
1144                .map(|(range, replacement)| Fix { range, replacement });
1145
1146            // Calculate actual end line and column for multiline tags
1147            // Use byte_end - 1 to get the last character position of the tag
1148            let (end_line, end_col) = if html_tag.byte_end > 0 {
1149                ctx.offset_to_line_col(html_tag.byte_end - 1)
1150            } else {
1151                (line_num, html_tag.end_col + 1)
1152            };
1153
1154            // Report the HTML tag
1155            warnings.push(LintWarning {
1156                rule_name: Some(self.name().to_string()),
1157                line: line_num,
1158                column: html_tag.start_col + 1, // Convert to 1-indexed
1159                end_line,                       // Actual end line for multiline tags
1160                end_column: end_col + 1,        // Actual end column
1161                message: format!("Inline HTML found: {tag}"),
1162                severity: Severity::Warning,
1163                fix,
1164            });
1165        }
1166
1167        Ok(warnings)
1168    }
1169
1170    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
1171        // Auto-fix is opt-in: only apply if explicitly enabled in config
1172        if !self.config.fix {
1173            return Ok(ctx.content.to_string());
1174        }
1175
1176        // Get warnings with their inline fixes
1177        let warnings = self.check(ctx)?;
1178
1179        // If no warnings with fixes, return original content
1180        if warnings.is_empty() || !warnings.iter().any(|w| w.fix.is_some()) {
1181            return Ok(ctx.content.to_string());
1182        }
1183
1184        // Collect all fixes and sort by range start (descending) to apply from end to beginning
1185        let mut fixes: Vec<_> = warnings
1186            .iter()
1187            .filter_map(|w| w.fix.as_ref().map(|f| (f.range.start, f.range.end, &f.replacement)))
1188            .collect();
1189        fixes.sort_by(|a, b| b.0.cmp(&a.0));
1190
1191        // Apply fixes from end to beginning to preserve byte offsets
1192        let mut result = ctx.content.to_string();
1193        for (start, end, replacement) in fixes {
1194            if start < result.len() && end <= result.len() && start <= end {
1195                result.replace_range(start..end, replacement);
1196            }
1197        }
1198
1199        Ok(result)
1200    }
1201
1202    fn fix_capability(&self) -> crate::rule::FixCapability {
1203        if self.config.fix {
1204            crate::rule::FixCapability::FullyFixable
1205        } else {
1206            crate::rule::FixCapability::Unfixable
1207        }
1208    }
1209
1210    /// Get the category of this rule for selective processing
1211    fn category(&self) -> RuleCategory {
1212        RuleCategory::Html
1213    }
1214
1215    /// Check if this rule should be skipped
1216    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
1217        ctx.content.is_empty() || !ctx.likely_has_html()
1218    }
1219
1220    fn as_any(&self) -> &dyn std::any::Any {
1221        self
1222    }
1223
1224    fn default_config_section(&self) -> Option<(String, toml::Value)> {
1225        let json_value = serde_json::to_value(&self.config).ok()?;
1226        Some((
1227            self.name().to_string(),
1228            crate::rule_config_serde::json_to_toml_value(&json_value)?,
1229        ))
1230    }
1231
1232    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
1233    where
1234        Self: Sized,
1235    {
1236        let rule_config = crate::rule_config_serde::load_rule_config::<MD033Config>(config);
1237        Box::new(Self::from_config_struct(rule_config))
1238    }
1239}
1240
1241#[cfg(test)]
1242mod tests {
1243    use super::*;
1244    use crate::lint_context::LintContext;
1245    use crate::rule::Rule;
1246
1247    fn relaxed_fix_rule() -> MD033NoInlineHtml {
1248        let config = MD033Config {
1249            fix: true,
1250            fix_mode: MD033FixMode::Relaxed,
1251            ..MD033Config::default()
1252        };
1253        MD033NoInlineHtml::from_config_struct(config)
1254    }
1255
1256    #[test]
1257    fn test_md033_basic_html() {
1258        let rule = MD033NoInlineHtml::default();
1259        let content = "<div>Some content</div>";
1260        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1261        let result = rule.check(&ctx).unwrap();
1262        // Only reports opening tags, not closing tags
1263        assert_eq!(result.len(), 1); // Only <div>, not </div>
1264        assert!(result[0].message.starts_with("Inline HTML found: <div>"));
1265    }
1266
1267    #[test]
1268    fn test_md033_case_insensitive() {
1269        let rule = MD033NoInlineHtml::default();
1270        let content = "<DiV>Some <B>content</B></dIv>";
1271        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1272        let result = rule.check(&ctx).unwrap();
1273        // Only reports opening tags, not closing tags
1274        assert_eq!(result.len(), 2); // <DiV>, <B> (not </B>, </dIv>)
1275        assert_eq!(result[0].message, "Inline HTML found: <DiV>");
1276        assert_eq!(result[1].message, "Inline HTML found: <B>");
1277    }
1278
1279    #[test]
1280    fn test_md033_allowed_tags() {
1281        let rule = MD033NoInlineHtml::with_allowed(vec!["div".to_string(), "br".to_string()]);
1282        let content = "<div>Allowed</div><p>Not allowed</p><br/>";
1283        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1284        let result = rule.check(&ctx).unwrap();
1285        // Only warnings for non-allowed opening tags (<p> only, div and br are allowed)
1286        assert_eq!(result.len(), 1);
1287        assert_eq!(result[0].message, "Inline HTML found: <p>");
1288
1289        // Test case-insensitivity of allowed tags
1290        let content2 = "<DIV>Allowed</DIV><P>Not allowed</P><BR/>";
1291        let ctx2 = LintContext::new(content2, crate::config::MarkdownFlavor::Standard, None);
1292        let result2 = rule.check(&ctx2).unwrap();
1293        assert_eq!(result2.len(), 1); // Only <P> flagged
1294        assert_eq!(result2[0].message, "Inline HTML found: <P>");
1295    }
1296
1297    #[test]
1298    fn test_md033_html_comments() {
1299        let rule = MD033NoInlineHtml::default();
1300        let content = "<!-- This is a comment --> <p>Not a comment</p>";
1301        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1302        let result = rule.check(&ctx).unwrap();
1303        // Should detect warnings for HTML opening tags (comments are skipped, closing tags not reported)
1304        assert_eq!(result.len(), 1); // Only <p>
1305        assert_eq!(result[0].message, "Inline HTML found: <p>");
1306    }
1307
1308    #[test]
1309    fn test_md033_tags_in_links() {
1310        let rule = MD033NoInlineHtml::default();
1311        let content = "[Link](http://example.com/<div>)";
1312        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1313        let result = rule.check(&ctx).unwrap();
1314        // The <div> in the URL should be detected as HTML (not skipped)
1315        assert_eq!(result.len(), 1);
1316        assert_eq!(result[0].message, "Inline HTML found: <div>");
1317
1318        let content2 = "[Link <a>text</a>](url)";
1319        let ctx2 = LintContext::new(content2, crate::config::MarkdownFlavor::Standard, None);
1320        let result2 = rule.check(&ctx2).unwrap();
1321        // Only reports opening tags
1322        assert_eq!(result2.len(), 1); // Only <a>
1323        assert_eq!(result2[0].message, "Inline HTML found: <a>");
1324    }
1325
1326    #[test]
1327    fn test_md033_fix_escaping() {
1328        let rule = MD033NoInlineHtml::default();
1329        let content = "Text with <div> and <br/> tags.";
1330        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1331        let fixed_content = rule.fix(&ctx).unwrap();
1332        // No fix for HTML tags; output should be unchanged
1333        assert_eq!(fixed_content, content);
1334    }
1335
1336    #[test]
1337    fn test_md033_in_code_blocks() {
1338        let rule = MD033NoInlineHtml::default();
1339        let content = "```html\n<div>Code</div>\n```\n<div>Not code</div>";
1340        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1341        let result = rule.check(&ctx).unwrap();
1342        // Only reports opening tags outside code block
1343        assert_eq!(result.len(), 1); // Only <div> outside code block
1344        assert_eq!(result[0].message, "Inline HTML found: <div>");
1345    }
1346
1347    #[test]
1348    fn test_md033_in_code_spans() {
1349        let rule = MD033NoInlineHtml::default();
1350        let content = "Text with `<p>in code</p>` span. <br/> Not in span.";
1351        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1352        let result = rule.check(&ctx).unwrap();
1353        // Should detect <br/> outside code span, but not tags inside code span
1354        assert_eq!(result.len(), 1);
1355        assert_eq!(result[0].message, "Inline HTML found: <br/>");
1356    }
1357
1358    #[test]
1359    fn test_md033_issue_90_code_span_with_diff_block() {
1360        // Test for issue #90: inline code span followed by diff code block
1361        let rule = MD033NoInlineHtml::default();
1362        let content = r#"# Heading
1363
1364`<env>`
1365
1366```diff
1367- this
1368+ that
1369```"#;
1370        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1371        let result = rule.check(&ctx).unwrap();
1372        // Should NOT detect <env> as HTML since it's inside backticks
1373        assert_eq!(result.len(), 0, "Should not report HTML tags inside code spans");
1374    }
1375
1376    #[test]
1377    fn test_md033_multiple_code_spans_with_angle_brackets() {
1378        // Test multiple code spans on same line
1379        let rule = MD033NoInlineHtml::default();
1380        let content = "`<one>` and `<two>` and `<three>` are all code spans";
1381        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1382        let result = rule.check(&ctx).unwrap();
1383        assert_eq!(result.len(), 0, "Should not report HTML tags inside any code spans");
1384    }
1385
1386    #[test]
1387    fn test_md033_nested_angle_brackets_in_code_span() {
1388        // Test nested angle brackets
1389        let rule = MD033NoInlineHtml::default();
1390        let content = "Text with `<<nested>>` brackets";
1391        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1392        let result = rule.check(&ctx).unwrap();
1393        assert_eq!(result.len(), 0, "Should handle nested angle brackets in code spans");
1394    }
1395
1396    #[test]
1397    fn test_md033_code_span_at_end_before_code_block() {
1398        // Test code span at end of line before code block
1399        let rule = MD033NoInlineHtml::default();
1400        let content = "Testing `<test>`\n```\ncode here\n```";
1401        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1402        let result = rule.check(&ctx).unwrap();
1403        assert_eq!(result.len(), 0, "Should handle code span before code block");
1404    }
1405
1406    #[test]
1407    fn test_md033_quick_fix_inline_tag() {
1408        // Test that non-fixable tags (like <span>) do NOT get a fix
1409        // Only safe fixable tags (em, i, strong, b, code, br, hr) with fix=true get fixes
1410        let rule = MD033NoInlineHtml::default();
1411        let content = "This has <span>inline text</span> that should keep content.";
1412        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1413        let result = rule.check(&ctx).unwrap();
1414
1415        assert_eq!(result.len(), 1, "Should find one HTML tag");
1416        // <span> is NOT a safe fixable tag, so no fix should be provided
1417        assert!(
1418            result[0].fix.is_none(),
1419            "Non-fixable tags like <span> should not have a fix"
1420        );
1421    }
1422
1423    #[test]
1424    fn test_md033_quick_fix_multiline_tag() {
1425        // HTML block elements like <div> are intentionally NOT auto-fixed
1426        // Removing them would change document structure significantly
1427        let rule = MD033NoInlineHtml::default();
1428        let content = "<div>\nBlock content\n</div>";
1429        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1430        let result = rule.check(&ctx).unwrap();
1431
1432        assert_eq!(result.len(), 1, "Should find one HTML tag");
1433        // HTML block elements should NOT have auto-fix
1434        assert!(result[0].fix.is_none(), "HTML block elements should NOT have auto-fix");
1435    }
1436
1437    #[test]
1438    fn test_md033_quick_fix_self_closing_tag() {
1439        // Test that self-closing tags with fix=false (default) do NOT get a fix
1440        let rule = MD033NoInlineHtml::default();
1441        let content = "Self-closing: <br/>";
1442        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1443        let result = rule.check(&ctx).unwrap();
1444
1445        assert_eq!(result.len(), 1, "Should find one HTML tag");
1446        // Default config has fix=false, so no fix should be provided
1447        assert!(
1448            result[0].fix.is_none(),
1449            "Self-closing tags should not have a fix when fix config is false"
1450        );
1451    }
1452
1453    #[test]
1454    fn test_md033_quick_fix_multiple_tags() {
1455        // Test that multiple tags without fix=true do NOT get fixes
1456        // <span> is not a safe fixable tag, <strong> is but fix=false by default
1457        let rule = MD033NoInlineHtml::default();
1458        let content = "<span>first</span> and <strong>second</strong>";
1459        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1460        let result = rule.check(&ctx).unwrap();
1461
1462        assert_eq!(result.len(), 2, "Should find two HTML tags");
1463        // Neither should have a fix: <span> is not fixable, <strong> is but fix=false
1464        assert!(result[0].fix.is_none(), "Non-fixable <span> should not have a fix");
1465        assert!(
1466            result[1].fix.is_none(),
1467            "<strong> should not have a fix when fix config is false"
1468        );
1469    }
1470
1471    #[test]
1472    fn test_md033_skip_angle_brackets_in_link_titles() {
1473        // Angle brackets inside link reference definition titles should not be flagged as HTML
1474        let rule = MD033NoInlineHtml::default();
1475        let content = r#"# Test
1476
1477[example]: <https://example.com> "Title with <Angle Brackets> inside"
1478
1479Regular text with <div>content</div> HTML tag.
1480"#;
1481        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1482        let result = rule.check(&ctx).unwrap();
1483
1484        // Should only flag <div>, not <Angle Brackets> in the title (not a valid HTML element)
1485        // Opening tag only (markdownlint behavior)
1486        assert_eq!(result.len(), 1, "Should find opening div tag");
1487        assert!(
1488            result[0].message.contains("<div>"),
1489            "Should flag <div>, got: {}",
1490            result[0].message
1491        );
1492    }
1493
1494    #[test]
1495    fn test_md033_skip_angle_brackets_in_link_title_single_quotes() {
1496        // Test with single-quoted title
1497        let rule = MD033NoInlineHtml::default();
1498        let content = r#"[ref]: url 'Title <Help Wanted> here'
1499
1500<span>text</span> here
1501"#;
1502        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1503        let result = rule.check(&ctx).unwrap();
1504
1505        // <Help Wanted> is not a valid HTML element, so only <span> is flagged
1506        // Opening tag only (markdownlint behavior)
1507        assert_eq!(result.len(), 1, "Should find opening span tag");
1508        assert!(
1509            result[0].message.contains("<span>"),
1510            "Should flag <span>, got: {}",
1511            result[0].message
1512        );
1513    }
1514
1515    #[test]
1516    fn test_md033_multiline_tag_end_line_calculation() {
1517        // Test that multiline HTML tags report correct end_line
1518        let rule = MD033NoInlineHtml::default();
1519        let content = "<div\n  class=\"test\"\n  id=\"example\">";
1520        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1521        let result = rule.check(&ctx).unwrap();
1522
1523        assert_eq!(result.len(), 1, "Should find one HTML tag");
1524        // Tag starts on line 1
1525        assert_eq!(result[0].line, 1, "Start line should be 1");
1526        // Tag ends on line 3 (where the closing > is)
1527        assert_eq!(result[0].end_line, 3, "End line should be 3");
1528    }
1529
1530    #[test]
1531    fn test_md033_single_line_tag_same_start_end_line() {
1532        // Test that single-line HTML tags have same start and end line
1533        let rule = MD033NoInlineHtml::default();
1534        let content = "Some text <div class=\"test\"> more text";
1535        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1536        let result = rule.check(&ctx).unwrap();
1537
1538        assert_eq!(result.len(), 1, "Should find one HTML tag");
1539        assert_eq!(result[0].line, 1, "Start line should be 1");
1540        assert_eq!(result[0].end_line, 1, "End line should be 1 for single-line tag");
1541    }
1542
1543    #[test]
1544    fn test_md033_multiline_tag_with_many_attributes() {
1545        // Test multiline tag spanning multiple lines
1546        let rule = MD033NoInlineHtml::default();
1547        let content =
1548            "Text\n<div\n  data-attr1=\"value1\"\n  data-attr2=\"value2\"\n  data-attr3=\"value3\">\nMore text";
1549        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1550        let result = rule.check(&ctx).unwrap();
1551
1552        assert_eq!(result.len(), 1, "Should find one HTML tag");
1553        // Tag starts on line 2 (first line is "Text")
1554        assert_eq!(result[0].line, 2, "Start line should be 2");
1555        // Tag ends on line 5 (where the closing > is)
1556        assert_eq!(result[0].end_line, 5, "End line should be 5");
1557    }
1558
1559    #[test]
1560    fn test_md033_disallowed_mode_basic() {
1561        // Test disallowed mode: only flags tags in the disallowed list
1562        let rule = MD033NoInlineHtml::with_disallowed(vec!["script".to_string(), "iframe".to_string()]);
1563        let content = "<div>Safe content</div><script>alert('xss')</script>";
1564        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1565        let result = rule.check(&ctx).unwrap();
1566
1567        // Should only flag <script>, not <div>
1568        assert_eq!(result.len(), 1, "Should only flag disallowed tags");
1569        assert!(result[0].message.contains("<script>"), "Should flag script tag");
1570    }
1571
1572    #[test]
1573    fn test_md033_disallowed_gfm_security_tags() {
1574        // Test GFM security tags expansion
1575        let rule = MD033NoInlineHtml::with_disallowed(vec!["gfm".to_string()]);
1576        let content = r#"
1577<div>Safe</div>
1578<title>Bad title</title>
1579<textarea>Bad textarea</textarea>
1580<style>.bad{}</style>
1581<iframe src="evil"></iframe>
1582<script>evil()</script>
1583<plaintext>old tag</plaintext>
1584<span>Safe span</span>
1585"#;
1586        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1587        let result = rule.check(&ctx).unwrap();
1588
1589        // Should flag: title, textarea, style, iframe, script, plaintext
1590        // Should NOT flag: div, span
1591        assert_eq!(result.len(), 6, "Should flag 6 GFM security tags");
1592
1593        let flagged_tags: Vec<&str> = result
1594            .iter()
1595            .filter_map(|w| w.message.split("<").nth(1))
1596            .filter_map(|s| s.split(">").next())
1597            .filter_map(|s| s.split_whitespace().next())
1598            .collect();
1599
1600        assert!(flagged_tags.contains(&"title"), "Should flag title");
1601        assert!(flagged_tags.contains(&"textarea"), "Should flag textarea");
1602        assert!(flagged_tags.contains(&"style"), "Should flag style");
1603        assert!(flagged_tags.contains(&"iframe"), "Should flag iframe");
1604        assert!(flagged_tags.contains(&"script"), "Should flag script");
1605        assert!(flagged_tags.contains(&"plaintext"), "Should flag plaintext");
1606        assert!(!flagged_tags.contains(&"div"), "Should NOT flag div");
1607        assert!(!flagged_tags.contains(&"span"), "Should NOT flag span");
1608    }
1609
1610    #[test]
1611    fn test_md033_disallowed_case_insensitive() {
1612        // Test that disallowed check is case-insensitive
1613        let rule = MD033NoInlineHtml::with_disallowed(vec!["script".to_string()]);
1614        let content = "<SCRIPT>alert('xss')</SCRIPT><Script>alert('xss')</Script>";
1615        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1616        let result = rule.check(&ctx).unwrap();
1617
1618        // Should flag both <SCRIPT> and <Script>
1619        assert_eq!(result.len(), 2, "Should flag both case variants");
1620    }
1621
1622    #[test]
1623    fn test_md033_disallowed_with_attributes() {
1624        // Test that disallowed mode works with tags that have attributes
1625        let rule = MD033NoInlineHtml::with_disallowed(vec!["iframe".to_string()]);
1626        let content = r#"<iframe src="https://evil.com" width="100" height="100"></iframe>"#;
1627        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1628        let result = rule.check(&ctx).unwrap();
1629
1630        assert_eq!(result.len(), 1, "Should flag iframe with attributes");
1631        assert!(result[0].message.contains("iframe"), "Should flag iframe");
1632    }
1633
1634    #[test]
1635    fn test_md033_disallowed_all_gfm_tags() {
1636        // Verify all GFM disallowed tags are covered
1637        use md033_config::GFM_DISALLOWED_TAGS;
1638        let rule = MD033NoInlineHtml::with_disallowed(vec!["gfm".to_string()]);
1639
1640        for tag in GFM_DISALLOWED_TAGS {
1641            let content = format!("<{tag}>content</{tag}>");
1642            let ctx = LintContext::new(&content, crate::config::MarkdownFlavor::Standard, None);
1643            let result = rule.check(&ctx).unwrap();
1644
1645            assert_eq!(result.len(), 1, "GFM tag <{tag}> should be flagged");
1646        }
1647    }
1648
1649    #[test]
1650    fn test_md033_disallowed_mixed_with_custom() {
1651        // Test mixing "gfm" with custom disallowed tags
1652        let rule = MD033NoInlineHtml::with_disallowed(vec![
1653            "gfm".to_string(),
1654            "marquee".to_string(), // Custom disallowed tag
1655        ]);
1656        let content = r#"<script>bad</script><marquee>annoying</marquee><div>ok</div>"#;
1657        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1658        let result = rule.check(&ctx).unwrap();
1659
1660        // Should flag script (gfm) and marquee (custom)
1661        assert_eq!(result.len(), 2, "Should flag both gfm and custom tags");
1662    }
1663
1664    #[test]
1665    fn test_md033_disallowed_empty_means_default_mode() {
1666        // Empty disallowed list means default mode (flag all HTML)
1667        let rule = MD033NoInlineHtml::with_disallowed(vec![]);
1668        let content = "<div>content</div>";
1669        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1670        let result = rule.check(&ctx).unwrap();
1671
1672        // Should flag <div> in default mode
1673        assert_eq!(result.len(), 1, "Empty disallowed = default mode");
1674    }
1675
1676    #[test]
1677    fn test_md033_jsx_fragments_in_mdx() {
1678        // JSX fragments (<> and </>) should not trigger warnings in MDX
1679        let rule = MD033NoInlineHtml::default();
1680        let content = r#"# MDX Document
1681
1682<>
1683  <Heading />
1684  <Content />
1685</>
1686
1687<div>Regular HTML should still be flagged</div>
1688"#;
1689        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::MDX, None);
1690        let result = rule.check(&ctx).unwrap();
1691
1692        // Should only flag <div>, not the fragments or JSX components
1693        assert_eq!(result.len(), 1, "Should only find one HTML tag (the div)");
1694        assert!(
1695            result[0].message.contains("<div>"),
1696            "Should flag <div>, not JSX fragments"
1697        );
1698    }
1699
1700    #[test]
1701    fn test_md033_jsx_components_in_mdx() {
1702        // JSX components (capitalized) should not trigger warnings in MDX
1703        let rule = MD033NoInlineHtml::default();
1704        let content = r#"<CustomComponent prop="value">
1705  Content
1706</CustomComponent>
1707
1708<MyButton onClick={handler}>Click</MyButton>
1709"#;
1710        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::MDX, None);
1711        let result = rule.check(&ctx).unwrap();
1712
1713        // No warnings - all are JSX components
1714        assert_eq!(result.len(), 0, "Should not flag JSX components in MDX");
1715    }
1716
1717    #[test]
1718    fn test_md033_jsx_not_skipped_in_standard_markdown() {
1719        // In standard markdown, capitalized tags should still be flagged if they're valid HTML
1720        let rule = MD033NoInlineHtml::default();
1721        let content = "<Script>alert(1)</Script>";
1722        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1723        let result = rule.check(&ctx).unwrap();
1724
1725        // Should flag <Script> in standard markdown (it's a valid HTML element)
1726        assert_eq!(result.len(), 1, "Should flag <Script> in standard markdown");
1727    }
1728
1729    #[test]
1730    fn test_md033_jsx_attributes_in_mdx() {
1731        // Elements with JSX-specific attributes should not trigger warnings in MDX
1732        let rule = MD033NoInlineHtml::default();
1733        let content = r#"# MDX with JSX Attributes
1734
1735<div className="card big">Content</div>
1736
1737<button onClick={handleClick}>Click me</button>
1738
1739<label htmlFor="input-id">Label</label>
1740
1741<input onChange={handleChange} />
1742
1743<div class="html-class">Regular HTML should be flagged</div>
1744"#;
1745        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::MDX, None);
1746        let result = rule.check(&ctx).unwrap();
1747
1748        // Should only flag the div with regular HTML "class" attribute
1749        assert_eq!(
1750            result.len(),
1751            1,
1752            "Should only flag HTML element without JSX attributes, got: {result:?}"
1753        );
1754        assert!(
1755            result[0].message.contains("<div class="),
1756            "Should flag the div with HTML class attribute"
1757        );
1758    }
1759
1760    #[test]
1761    fn test_md033_jsx_attributes_not_skipped_in_standard() {
1762        // In standard markdown, JSX attributes should still be flagged
1763        let rule = MD033NoInlineHtml::default();
1764        let content = r#"<div className="card">Content</div>"#;
1765        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1766        let result = rule.check(&ctx).unwrap();
1767
1768        // Should flag in standard markdown
1769        assert_eq!(result.len(), 1, "Should flag JSX-style elements in standard markdown");
1770    }
1771
1772    // Auto-fix tests for MD033
1773
1774    #[test]
1775    fn test_md033_fix_disabled_by_default() {
1776        // Auto-fix should be disabled by default
1777        let rule = MD033NoInlineHtml::default();
1778        assert!(!rule.config.fix, "Fix should be disabled by default");
1779        assert_eq!(rule.fix_capability(), crate::rule::FixCapability::Unfixable);
1780    }
1781
1782    #[test]
1783    fn test_md033_fix_enabled_em_to_italic() {
1784        // When fix is enabled, <em>text</em> should convert to *text*
1785        let rule = MD033NoInlineHtml::with_fix(true);
1786        let content = "This has <em>emphasized text</em> here.";
1787        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1788        let fixed = rule.fix(&ctx).unwrap();
1789        assert_eq!(fixed, "This has *emphasized text* here.");
1790    }
1791
1792    #[test]
1793    fn test_md033_fix_enabled_i_to_italic() {
1794        // <i>text</i> should convert to *text*
1795        let rule = MD033NoInlineHtml::with_fix(true);
1796        let content = "This has <i>italic text</i> here.";
1797        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1798        let fixed = rule.fix(&ctx).unwrap();
1799        assert_eq!(fixed, "This has *italic text* here.");
1800    }
1801
1802    #[test]
1803    fn test_md033_fix_enabled_strong_to_bold() {
1804        // <strong>text</strong> should convert to **text**
1805        let rule = MD033NoInlineHtml::with_fix(true);
1806        let content = "This has <strong>bold text</strong> here.";
1807        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1808        let fixed = rule.fix(&ctx).unwrap();
1809        assert_eq!(fixed, "This has **bold text** here.");
1810    }
1811
1812    #[test]
1813    fn test_md033_fix_enabled_b_to_bold() {
1814        // <b>text</b> should convert to **text**
1815        let rule = MD033NoInlineHtml::with_fix(true);
1816        let content = "This has <b>bold text</b> here.";
1817        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1818        let fixed = rule.fix(&ctx).unwrap();
1819        assert_eq!(fixed, "This has **bold text** here.");
1820    }
1821
1822    #[test]
1823    fn test_md033_fix_enabled_code_to_backticks() {
1824        // <code>text</code> should convert to `text`
1825        let rule = MD033NoInlineHtml::with_fix(true);
1826        let content = "This has <code>inline code</code> here.";
1827        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1828        let fixed = rule.fix(&ctx).unwrap();
1829        assert_eq!(fixed, "This has `inline code` here.");
1830    }
1831
1832    #[test]
1833    fn test_md033_fix_enabled_code_with_backticks() {
1834        // <code>text with `backticks`</code> should use double backticks
1835        let rule = MD033NoInlineHtml::with_fix(true);
1836        let content = "This has <code>text with `backticks`</code> here.";
1837        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1838        let fixed = rule.fix(&ctx).unwrap();
1839        assert_eq!(fixed, "This has `` text with `backticks` `` here.");
1840    }
1841
1842    #[test]
1843    fn test_md033_fix_enabled_br_trailing_spaces() {
1844        // <br> should convert to two trailing spaces + newline (default)
1845        let rule = MD033NoInlineHtml::with_fix(true);
1846        let content = "First line<br>Second line";
1847        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1848        let fixed = rule.fix(&ctx).unwrap();
1849        assert_eq!(fixed, "First line  \nSecond line");
1850    }
1851
1852    #[test]
1853    fn test_md033_fix_enabled_br_self_closing() {
1854        // <br/> and <br /> should also convert
1855        let rule = MD033NoInlineHtml::with_fix(true);
1856        let content = "First<br/>second<br />third";
1857        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1858        let fixed = rule.fix(&ctx).unwrap();
1859        assert_eq!(fixed, "First  \nsecond  \nthird");
1860    }
1861
1862    #[test]
1863    fn test_md033_fix_enabled_br_backslash_style() {
1864        // With br_style = backslash, <br> should convert to backslash + newline
1865        let config = MD033Config {
1866            allowed: Vec::new(),
1867            disallowed: Vec::new(),
1868            fix: true,
1869            br_style: md033_config::BrStyle::Backslash,
1870            ..MD033Config::default()
1871        };
1872        let rule = MD033NoInlineHtml::from_config_struct(config);
1873        let content = "First line<br>Second line";
1874        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1875        let fixed = rule.fix(&ctx).unwrap();
1876        assert_eq!(fixed, "First line\\\nSecond line");
1877    }
1878
1879    #[test]
1880    fn test_md033_fix_enabled_hr() {
1881        // <hr> should convert to horizontal rule
1882        let rule = MD033NoInlineHtml::with_fix(true);
1883        let content = "Above<hr>Below";
1884        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1885        let fixed = rule.fix(&ctx).unwrap();
1886        assert_eq!(fixed, "Above\n---\nBelow");
1887    }
1888
1889    #[test]
1890    fn test_md033_fix_enabled_hr_self_closing() {
1891        // <hr/> should also convert
1892        let rule = MD033NoInlineHtml::with_fix(true);
1893        let content = "Above<hr/>Below";
1894        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1895        let fixed = rule.fix(&ctx).unwrap();
1896        assert_eq!(fixed, "Above\n---\nBelow");
1897    }
1898
1899    #[test]
1900    fn test_md033_fix_skips_nested_tags() {
1901        // Tags with nested HTML - outer tags may not be fully fixed due to overlapping ranges
1902        // The inner tags are processed first, which can invalidate outer tag ranges
1903        let rule = MD033NoInlineHtml::with_fix(true);
1904        let content = "This has <em>text with <strong>nested</strong> tags</em> here.";
1905        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1906        let fixed = rule.fix(&ctx).unwrap();
1907        // Inner <strong> is converted to markdown, outer <em> range becomes invalid
1908        // This is expected behavior - user should run fix multiple times for nested tags
1909        assert_eq!(fixed, "This has <em>text with **nested** tags</em> here.");
1910    }
1911
1912    #[test]
1913    fn test_md033_fix_skips_tags_with_attributes() {
1914        // Tags with attributes should NOT be fixed at all - leave as-is
1915        // User may want to keep the attributes (e.g., class="highlight" for styling)
1916        let rule = MD033NoInlineHtml::with_fix(true);
1917        let content = "This has <em class=\"highlight\">emphasized</em> text.";
1918        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1919        let fixed = rule.fix(&ctx).unwrap();
1920        // Content should remain unchanged - we don't know if attributes matter
1921        assert_eq!(fixed, content);
1922    }
1923
1924    #[test]
1925    fn test_md033_fix_disabled_no_changes() {
1926        // When fix is disabled, original content should be returned
1927        let rule = MD033NoInlineHtml::default(); // fix is false by default
1928        let content = "This has <em>emphasized text</em> here.";
1929        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1930        let fixed = rule.fix(&ctx).unwrap();
1931        assert_eq!(fixed, content, "Should return original content when fix is disabled");
1932    }
1933
1934    #[test]
1935    fn test_md033_fix_capability_enabled() {
1936        let rule = MD033NoInlineHtml::with_fix(true);
1937        assert_eq!(rule.fix_capability(), crate::rule::FixCapability::FullyFixable);
1938    }
1939
1940    #[test]
1941    fn test_md033_fix_multiple_tags() {
1942        // Test fixing multiple HTML tags in one document
1943        let rule = MD033NoInlineHtml::with_fix(true);
1944        let content = "Here is <em>italic</em> and <strong>bold</strong> text.";
1945        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1946        let fixed = rule.fix(&ctx).unwrap();
1947        assert_eq!(fixed, "Here is *italic* and **bold** text.");
1948    }
1949
1950    #[test]
1951    fn test_md033_fix_uppercase_tags() {
1952        // HTML tags are case-insensitive
1953        let rule = MD033NoInlineHtml::with_fix(true);
1954        let content = "This has <EM>emphasized</EM> text.";
1955        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1956        let fixed = rule.fix(&ctx).unwrap();
1957        assert_eq!(fixed, "This has *emphasized* text.");
1958    }
1959
1960    #[test]
1961    fn test_md033_fix_unsafe_tags_not_modified() {
1962        // Tags without safe markdown equivalents should NOT be modified
1963        // Only safe fixable tags (em, i, strong, b, code, br, hr) get converted
1964        let rule = MD033NoInlineHtml::with_fix(true);
1965        let content = "This has <div>a div</div> content.";
1966        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1967        let fixed = rule.fix(&ctx).unwrap();
1968        // <div> is not a safe fixable tag, so content should be unchanged
1969        assert_eq!(fixed, "This has <div>a div</div> content.");
1970    }
1971
1972    #[test]
1973    fn test_md033_fix_img_tag_converted() {
1974        // <img> tags with simple src/alt attributes are converted to markdown images
1975        let rule = MD033NoInlineHtml::with_fix(true);
1976        let content = "Image: <img src=\"photo.jpg\" alt=\"My Photo\">";
1977        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1978        let fixed = rule.fix(&ctx).unwrap();
1979        // <img> is converted to ![alt](src) format
1980        assert_eq!(fixed, "Image: ![My Photo](photo.jpg)");
1981    }
1982
1983    #[test]
1984    fn test_md033_fix_img_tag_with_extra_attrs_not_converted() {
1985        // <img> tags with width/height/style attributes are NOT converted
1986        let rule = MD033NoInlineHtml::with_fix(true);
1987        let content = "Image: <img src=\"photo.jpg\" alt=\"My Photo\" width=\"100\">";
1988        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1989        let fixed = rule.fix(&ctx).unwrap();
1990        // Has width attribute - not safe to convert
1991        assert_eq!(fixed, "Image: <img src=\"photo.jpg\" alt=\"My Photo\" width=\"100\">");
1992    }
1993
1994    #[test]
1995    fn test_md033_fix_relaxed_a_with_target_is_converted() {
1996        let rule = relaxed_fix_rule();
1997        let content = "Link: <a href=\"https://example.com\" target=\"_blank\">Example</a>";
1998        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1999        let fixed = rule.fix(&ctx).unwrap();
2000        assert_eq!(fixed, "Link: [Example](https://example.com)");
2001    }
2002
2003    #[test]
2004    fn test_md033_fix_relaxed_img_with_width_is_converted() {
2005        let rule = relaxed_fix_rule();
2006        let content = "Image: <img src=\"photo.jpg\" alt=\"My Photo\" width=\"100\">";
2007        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2008        let fixed = rule.fix(&ctx).unwrap();
2009        assert_eq!(fixed, "Image: ![My Photo](photo.jpg)");
2010    }
2011
2012    #[test]
2013    fn test_md033_fix_relaxed_rejects_unknown_extra_attributes() {
2014        let rule = relaxed_fix_rule();
2015        let content = "Image: <img src=\"photo.jpg\" alt=\"My Photo\" aria-label=\"hero\">";
2016        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2017        let fixed = rule.fix(&ctx).unwrap();
2018        assert_eq!(fixed, content, "Unknown attributes should not be dropped by default");
2019    }
2020
2021    #[test]
2022    fn test_md033_fix_relaxed_still_blocks_unsafe_schemes() {
2023        let rule = relaxed_fix_rule();
2024        let content = "Link: <a href=\"javascript:alert(1)\" target=\"_blank\">Example</a>";
2025        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2026        let fixed = rule.fix(&ctx).unwrap();
2027        assert_eq!(fixed, content, "Unsafe URL schemes must never be converted");
2028    }
2029
2030    #[test]
2031    fn test_md033_fix_relaxed_wrapper_strip_requires_second_pass_for_nested_html() {
2032        let rule = relaxed_fix_rule();
2033        let content = "<p align=\"center\">\n  <img src=\"logo.svg\" alt=\"Logo\" width=\"120\" />\n</p>";
2034        let ctx1 = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2035        let fixed_once = rule.fix(&ctx1).unwrap();
2036        assert!(
2037            fixed_once.contains("<p"),
2038            "First pass should keep wrapper when inner HTML is still present: {fixed_once}"
2039        );
2040        assert!(
2041            fixed_once.contains("![Logo](logo.svg)"),
2042            "Inner image should be converted on first pass: {fixed_once}"
2043        );
2044
2045        let ctx2 = LintContext::new(&fixed_once, crate::config::MarkdownFlavor::Standard, None);
2046        let fixed_twice = rule.fix(&ctx2).unwrap();
2047        assert!(
2048            !fixed_twice.contains("<p"),
2049            "Second pass should strip configured wrapper: {fixed_twice}"
2050        );
2051        assert!(fixed_twice.contains("![Logo](logo.svg)"));
2052    }
2053
2054    #[test]
2055    fn test_md033_fix_relaxed_multiple_droppable_attrs() {
2056        let rule = relaxed_fix_rule();
2057        let content = "<a href=\"https://example.com\" target=\"_blank\" rel=\"noopener\" class=\"btn\">Click</a>";
2058        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2059        let fixed = rule.fix(&ctx).unwrap();
2060        assert_eq!(fixed, "[Click](https://example.com)");
2061    }
2062
2063    #[test]
2064    fn test_md033_fix_relaxed_img_multiple_droppable_attrs() {
2065        let rule = relaxed_fix_rule();
2066        let content = "<img src=\"logo.png\" alt=\"Logo\" width=\"120\" height=\"40\" style=\"border:none\" />";
2067        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2068        let fixed = rule.fix(&ctx).unwrap();
2069        assert_eq!(fixed, "![Logo](logo.png)");
2070    }
2071
2072    #[test]
2073    fn test_md033_fix_relaxed_event_handler_never_dropped() {
2074        let rule = relaxed_fix_rule();
2075        let content = "<a href=\"https://example.com\" onclick=\"track()\">Link</a>";
2076        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2077        let fixed = rule.fix(&ctx).unwrap();
2078        assert_eq!(fixed, content, "Event handler attributes must block conversion");
2079    }
2080
2081    #[test]
2082    fn test_md033_fix_relaxed_event_handler_even_with_custom_config() {
2083        // Even if someone adds on* to drop-attributes, event handlers must be rejected
2084        let config = MD033Config {
2085            fix: true,
2086            fix_mode: MD033FixMode::Relaxed,
2087            drop_attributes: vec!["on*".to_string(), "target".to_string()],
2088            ..MD033Config::default()
2089        };
2090        let rule = MD033NoInlineHtml::from_config_struct(config);
2091        let content = "<a href=\"https://example.com\" onclick=\"alert(1)\">Link</a>";
2092        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2093        let fixed = rule.fix(&ctx).unwrap();
2094        assert_eq!(fixed, content, "on* event handlers must never be dropped");
2095    }
2096
2097    #[test]
2098    fn test_md033_fix_relaxed_custom_drop_attributes() {
2099        let config = MD033Config {
2100            fix: true,
2101            fix_mode: MD033FixMode::Relaxed,
2102            drop_attributes: vec!["loading".to_string()],
2103            ..MD033Config::default()
2104        };
2105        let rule = MD033NoInlineHtml::from_config_struct(config);
2106        // "loading" is in the custom list, "width" is NOT
2107        let content = "<img src=\"x.jpg\" alt=\"\" loading=\"lazy\">";
2108        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2109        let fixed = rule.fix(&ctx).unwrap();
2110        assert_eq!(fixed, "![](x.jpg)", "Custom drop-attributes should be respected");
2111
2112        let content2 = "<img src=\"x.jpg\" alt=\"\" width=\"100\">";
2113        let ctx2 = LintContext::new(content2, crate::config::MarkdownFlavor::Standard, None);
2114        let fixed2 = rule.fix(&ctx2).unwrap();
2115        assert_eq!(
2116            fixed2, content2,
2117            "Attributes not in custom list should block conversion"
2118        );
2119    }
2120
2121    #[test]
2122    fn test_md033_fix_relaxed_custom_strip_wrapper() {
2123        let config = MD033Config {
2124            fix: true,
2125            fix_mode: MD033FixMode::Relaxed,
2126            strip_wrapper_elements: vec!["div".to_string()],
2127            ..MD033Config::default()
2128        };
2129        let rule = MD033NoInlineHtml::from_config_struct(config);
2130        let content = "<div>Some text content</div>";
2131        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2132        let fixed = rule.fix(&ctx).unwrap();
2133        assert_eq!(fixed, "Some text content");
2134    }
2135
2136    #[test]
2137    fn test_md033_fix_relaxed_wrapper_with_plain_text() {
2138        let rule = relaxed_fix_rule();
2139        let content = "<p align=\"center\">Just some text</p>";
2140        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2141        let fixed = rule.fix(&ctx).unwrap();
2142        assert_eq!(fixed, "Just some text");
2143    }
2144
2145    #[test]
2146    fn test_md033_fix_relaxed_data_attr_with_wildcard() {
2147        let config = MD033Config {
2148            fix: true,
2149            fix_mode: MD033FixMode::Relaxed,
2150            drop_attributes: vec!["data-*".to_string(), "target".to_string()],
2151            ..MD033Config::default()
2152        };
2153        let rule = MD033NoInlineHtml::from_config_struct(config);
2154        let content = "<a href=\"https://example.com\" data-tracking=\"abc\" target=\"_blank\">Link</a>";
2155        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2156        let fixed = rule.fix(&ctx).unwrap();
2157        assert_eq!(fixed, "[Link](https://example.com)");
2158    }
2159
2160    #[test]
2161    fn test_md033_fix_relaxed_mixed_droppable_and_blocking_attrs() {
2162        let rule = relaxed_fix_rule();
2163        // "target" is droppable, "aria-label" is not in the default list
2164        let content = "<a href=\"https://example.com\" target=\"_blank\" aria-label=\"nav\">Link</a>";
2165        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2166        let fixed = rule.fix(&ctx).unwrap();
2167        assert_eq!(fixed, content, "Non-droppable attribute should block conversion");
2168    }
2169
2170    #[test]
2171    fn test_md033_fix_relaxed_badge_pattern() {
2172        // Common GitHub README badge pattern
2173        let rule = relaxed_fix_rule();
2174        let content = "<a href=\"https://crates.io/crates/rumdl\" target=\"_blank\"><img src=\"https://img.shields.io/crates/v/rumdl.svg\" alt=\"Crate\" width=\"120\" /></a>";
2175        let ctx1 = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2176        let fixed_once = rule.fix(&ctx1).unwrap();
2177        // First pass should convert the inner <img>
2178        assert!(
2179            fixed_once.contains("![Crate](https://img.shields.io/crates/v/rumdl.svg)"),
2180            "Inner img should be converted: {fixed_once}"
2181        );
2182
2183        // Second pass converts the <a> wrapper
2184        let ctx2 = LintContext::new(&fixed_once, crate::config::MarkdownFlavor::Standard, None);
2185        let fixed_twice = rule.fix(&ctx2).unwrap();
2186        assert!(
2187            fixed_twice
2188                .contains("[![Crate](https://img.shields.io/crates/v/rumdl.svg)](https://crates.io/crates/rumdl)"),
2189            "Badge should produce nested markdown image link: {fixed_twice}"
2190        );
2191    }
2192
2193    #[test]
2194    fn test_md033_fix_relaxed_conservative_mode_unchanged() {
2195        // Verify conservative mode (default) is unaffected by the relaxed logic
2196        let rule = MD033NoInlineHtml::with_fix(true);
2197        let content = "<a href=\"https://example.com\" target=\"_blank\">Link</a>";
2198        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2199        let fixed = rule.fix(&ctx).unwrap();
2200        assert_eq!(fixed, content, "Conservative mode should not drop target attribute");
2201    }
2202
2203    #[test]
2204    fn test_md033_fix_relaxed_img_inside_pre_not_converted() {
2205        // <img> inside <pre> must NOT be converted, even in relaxed mode
2206        let rule = relaxed_fix_rule();
2207        let content = "<pre>\n  <img src=\"diagram.png\" alt=\"d\" width=\"100\" />\n</pre>";
2208        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2209        let fixed = rule.fix(&ctx).unwrap();
2210        assert!(fixed.contains("<img"), "img inside pre must not be converted: {fixed}");
2211    }
2212
2213    #[test]
2214    fn test_md033_fix_relaxed_wrapper_nested_inside_div_not_stripped() {
2215        // <p> nested inside <div> should not be stripped
2216        let rule = relaxed_fix_rule();
2217        let content = "<div><p>text</p></div>";
2218        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2219        let fixed = rule.fix(&ctx).unwrap();
2220        assert!(
2221            fixed.contains("<p>text</p>") || fixed.contains("<p>"),
2222            "Nested <p> inside <div> should not be stripped: {fixed}"
2223        );
2224    }
2225
2226    #[test]
2227    fn test_md033_fix_relaxed_img_inside_nested_wrapper_not_converted() {
2228        // <img> inside <div><p>...</p></div> must NOT be converted because the
2229        // <p> wrapper can't be stripped (it's nested), so the markdown would be
2230        // stuck inside an HTML block where it won't render.
2231        let rule = relaxed_fix_rule();
2232        let content = "<div><p><img src=\"x.jpg\" alt=\"pic\" width=\"100\" /></p></div>";
2233        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2234        let fixed = rule.fix(&ctx).unwrap();
2235        assert!(
2236            fixed.contains("<img"),
2237            "img inside nested wrapper must not be converted: {fixed}"
2238        );
2239    }
2240
2241    #[test]
2242    fn test_md033_fix_mixed_safe_tags() {
2243        // All tags are now safe fixable (em, img, strong)
2244        let rule = MD033NoInlineHtml::with_fix(true);
2245        let content = "<em>italic</em> and <img src=\"x.jpg\"> and <strong>bold</strong>";
2246        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2247        let fixed = rule.fix(&ctx).unwrap();
2248        // All are converted
2249        assert_eq!(fixed, "*italic* and ![](x.jpg) and **bold**");
2250    }
2251
2252    #[test]
2253    fn test_md033_fix_multiple_tags_same_line() {
2254        // Multiple tags on the same line should all be fixed correctly
2255        let rule = MD033NoInlineHtml::with_fix(true);
2256        let content = "Regular text <i>italic</i> and <b>bold</b> here.";
2257        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2258        let fixed = rule.fix(&ctx).unwrap();
2259        assert_eq!(fixed, "Regular text *italic* and **bold** here.");
2260    }
2261
2262    #[test]
2263    fn test_md033_fix_multiple_em_tags_same_line() {
2264        // Multiple em/strong tags on the same line
2265        let rule = MD033NoInlineHtml::with_fix(true);
2266        let content = "<em>first</em> and <strong>second</strong> and <code>third</code>";
2267        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2268        let fixed = rule.fix(&ctx).unwrap();
2269        assert_eq!(fixed, "*first* and **second** and `third`");
2270    }
2271
2272    #[test]
2273    fn test_md033_fix_skips_tags_inside_pre() {
2274        // Tags inside <pre> blocks should NOT be fixed (would break structure)
2275        let rule = MD033NoInlineHtml::with_fix(true);
2276        let content = "<pre><code><em>VALUE</em></code></pre>";
2277        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2278        let fixed = rule.fix(&ctx).unwrap();
2279        // The <em> inside <pre><code> should NOT be converted
2280        // Only the outer structure might be changed
2281        assert!(
2282            !fixed.contains("*VALUE*"),
2283            "Tags inside <pre> should not be converted to markdown. Got: {fixed}"
2284        );
2285    }
2286
2287    #[test]
2288    fn test_md033_fix_skips_tags_inside_div() {
2289        // Tags inside HTML block elements should not be fixed
2290        let rule = MD033NoInlineHtml::with_fix(true);
2291        let content = "<div>\n<em>emphasized</em>\n</div>";
2292        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2293        let fixed = rule.fix(&ctx).unwrap();
2294        // The <em> inside <div> should not be converted to *emphasized*
2295        assert!(
2296            !fixed.contains("*emphasized*"),
2297            "Tags inside HTML blocks should not be converted. Got: {fixed}"
2298        );
2299    }
2300
2301    #[test]
2302    fn test_md033_fix_outside_html_block() {
2303        // Tags outside HTML blocks should still be fixed
2304        let rule = MD033NoInlineHtml::with_fix(true);
2305        let content = "<div>\ncontent\n</div>\n\nOutside <em>emphasized</em> text.";
2306        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2307        let fixed = rule.fix(&ctx).unwrap();
2308        // The <em> outside the div should be converted
2309        assert!(
2310            fixed.contains("*emphasized*"),
2311            "Tags outside HTML blocks should be converted. Got: {fixed}"
2312        );
2313    }
2314
2315    #[test]
2316    fn test_md033_fix_with_id_attribute() {
2317        // Tags with id attributes should not be fixed (id might be used for anchors)
2318        let rule = MD033NoInlineHtml::with_fix(true);
2319        let content = "See <em id=\"important\">this note</em> for details.";
2320        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2321        let fixed = rule.fix(&ctx).unwrap();
2322        // Should remain unchanged - id attribute matters for linking
2323        assert_eq!(fixed, content);
2324    }
2325
2326    #[test]
2327    fn test_md033_fix_with_style_attribute() {
2328        // Tags with style attributes should not be fixed
2329        let rule = MD033NoInlineHtml::with_fix(true);
2330        let content = "This is <strong style=\"color: red\">important</strong> text.";
2331        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2332        let fixed = rule.fix(&ctx).unwrap();
2333        // Should remain unchanged - style attribute provides formatting
2334        assert_eq!(fixed, content);
2335    }
2336
2337    #[test]
2338    fn test_md033_fix_mixed_with_and_without_attributes() {
2339        // Mix of tags with and without attributes
2340        let rule = MD033NoInlineHtml::with_fix(true);
2341        let content = "<em>normal</em> and <em class=\"special\">styled</em> text.";
2342        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2343        let fixed = rule.fix(&ctx).unwrap();
2344        // Only the tag without attributes should be fixed
2345        assert_eq!(fixed, "*normal* and <em class=\"special\">styled</em> text.");
2346    }
2347
2348    #[test]
2349    fn test_md033_quick_fix_tag_with_attributes_no_fix() {
2350        // Quick fix should not be provided for tags with attributes
2351        let rule = MD033NoInlineHtml::with_fix(true);
2352        let content = "<em class=\"test\">emphasized</em>";
2353        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2354        let result = rule.check(&ctx).unwrap();
2355
2356        assert_eq!(result.len(), 1, "Should find one HTML tag");
2357        // No fix should be provided for tags with attributes
2358        assert!(
2359            result[0].fix.is_none(),
2360            "Should NOT have a fix for tags with attributes"
2361        );
2362    }
2363
2364    #[test]
2365    fn test_md033_fix_skips_html_entities() {
2366        // Tags containing HTML entities should NOT be fixed
2367        // HTML entities need HTML context to render; markdown won't process them
2368        let rule = MD033NoInlineHtml::with_fix(true);
2369        let content = "<code>&vert;</code>";
2370        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2371        let fixed = rule.fix(&ctx).unwrap();
2372        // Should remain unchanged - converting would break rendering
2373        assert_eq!(fixed, content);
2374    }
2375
2376    #[test]
2377    fn test_md033_fix_skips_multiple_html_entities() {
2378        // Multiple HTML entities should also be skipped
2379        let rule = MD033NoInlineHtml::with_fix(true);
2380        let content = "<code>&lt;T&gt;</code>";
2381        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2382        let fixed = rule.fix(&ctx).unwrap();
2383        // Should remain unchanged
2384        assert_eq!(fixed, content);
2385    }
2386
2387    #[test]
2388    fn test_md033_fix_allows_ampersand_without_entity() {
2389        // Content with & but no semicolon should still be fixed
2390        let rule = MD033NoInlineHtml::with_fix(true);
2391        let content = "<code>a & b</code>";
2392        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2393        let fixed = rule.fix(&ctx).unwrap();
2394        // Should be converted since & is not part of an entity
2395        assert_eq!(fixed, "`a & b`");
2396    }
2397
2398    #[test]
2399    fn test_md033_fix_em_with_entities_skipped() {
2400        // <em> with entities should also be skipped
2401        let rule = MD033NoInlineHtml::with_fix(true);
2402        let content = "<em>&nbsp;text</em>";
2403        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2404        let fixed = rule.fix(&ctx).unwrap();
2405        // Should remain unchanged
2406        assert_eq!(fixed, content);
2407    }
2408
2409    #[test]
2410    fn test_md033_fix_skips_nested_em_in_code() {
2411        // Tags nested inside other HTML elements should NOT be fixed
2412        // e.g., <code><em>n</em></code> - the <em> should not be converted
2413        let rule = MD033NoInlineHtml::with_fix(true);
2414        let content = "<code><em>n</em></code>";
2415        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2416        let fixed = rule.fix(&ctx).unwrap();
2417        // The inner <em> should NOT be converted to *n* because it's nested
2418        // The whole structure should be left as-is (or outer code converted, but not inner)
2419        assert!(
2420            !fixed.contains("*n*"),
2421            "Nested <em> should not be converted to markdown. Got: {fixed}"
2422        );
2423    }
2424
2425    #[test]
2426    fn test_md033_fix_skips_nested_in_table() {
2427        // Tags nested in HTML structures in tables should not be fixed
2428        let rule = MD033NoInlineHtml::with_fix(true);
2429        let content = "| <code>><em>n</em></code> | description |";
2430        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2431        let fixed = rule.fix(&ctx).unwrap();
2432        // Should not convert nested <em> to *n*
2433        assert!(
2434            !fixed.contains("*n*"),
2435            "Nested tags in table should not be converted. Got: {fixed}"
2436        );
2437    }
2438
2439    #[test]
2440    fn test_md033_fix_standalone_em_still_converted() {
2441        // Standalone (non-nested) <em> should still be converted
2442        let rule = MD033NoInlineHtml::with_fix(true);
2443        let content = "This is <em>emphasized</em> text.";
2444        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2445        let fixed = rule.fix(&ctx).unwrap();
2446        assert_eq!(fixed, "This is *emphasized* text.");
2447    }
2448
2449    // ==========================================================================
2450    // Obsidian Templater Plugin Syntax Tests
2451    //
2452    // Templater is a popular Obsidian plugin that uses `<% ... %>` syntax for
2453    // template interpolation. The `<%` pattern is NOT captured by the HTML tag
2454    // parser because `%` is not a valid HTML tag name character (tags must start
2455    // with a letter). This behavior is documented here with comprehensive tests.
2456    //
2457    // Reference: https://silentvoid13.github.io/Templater/
2458    // ==========================================================================
2459
2460    #[test]
2461    fn test_md033_templater_basic_interpolation_not_flagged() {
2462        // Basic Templater interpolation: <% expr %>
2463        // Should NOT be flagged because `%` is not a valid HTML tag character
2464        let rule = MD033NoInlineHtml::default();
2465        let content = "Today is <% tp.date.now() %> which is nice.";
2466        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2467        let result = rule.check(&ctx).unwrap();
2468        assert!(
2469            result.is_empty(),
2470            "Templater basic interpolation should not be flagged as HTML. Got: {result:?}"
2471        );
2472    }
2473
2474    #[test]
2475    fn test_md033_templater_file_functions_not_flagged() {
2476        // Templater file functions: <% tp.file.* %>
2477        let rule = MD033NoInlineHtml::default();
2478        let content = "File: <% tp.file.title %>\nCreated: <% tp.file.creation_date() %>";
2479        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2480        let result = rule.check(&ctx).unwrap();
2481        assert!(
2482            result.is_empty(),
2483            "Templater file functions should not be flagged. Got: {result:?}"
2484        );
2485    }
2486
2487    #[test]
2488    fn test_md033_templater_with_arguments_not_flagged() {
2489        // Templater with function arguments
2490        let rule = MD033NoInlineHtml::default();
2491        let content = r#"Date: <% tp.date.now("YYYY-MM-DD") %>"#;
2492        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2493        let result = rule.check(&ctx).unwrap();
2494        assert!(
2495            result.is_empty(),
2496            "Templater with arguments should not be flagged. Got: {result:?}"
2497        );
2498    }
2499
2500    #[test]
2501    fn test_md033_templater_javascript_execution_not_flagged() {
2502        // Templater JavaScript execution block: <%* code %>
2503        let rule = MD033NoInlineHtml::default();
2504        let content = "<%* const today = tp.date.now(); tR += today; %>";
2505        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2506        let result = rule.check(&ctx).unwrap();
2507        assert!(
2508            result.is_empty(),
2509            "Templater JS execution block should not be flagged. Got: {result:?}"
2510        );
2511    }
2512
2513    #[test]
2514    fn test_md033_templater_dynamic_execution_not_flagged() {
2515        // Templater dynamic/preview execution: <%+ expr %>
2516        let rule = MD033NoInlineHtml::default();
2517        let content = "Dynamic: <%+ tp.date.now() %>";
2518        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2519        let result = rule.check(&ctx).unwrap();
2520        assert!(
2521            result.is_empty(),
2522            "Templater dynamic execution should not be flagged. Got: {result:?}"
2523        );
2524    }
2525
2526    #[test]
2527    fn test_md033_templater_whitespace_trim_all_not_flagged() {
2528        // Templater whitespace control - trim all: <%_ expr _%>
2529        let rule = MD033NoInlineHtml::default();
2530        let content = "<%_ tp.date.now() _%>";
2531        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2532        let result = rule.check(&ctx).unwrap();
2533        assert!(
2534            result.is_empty(),
2535            "Templater trim-all whitespace should not be flagged. Got: {result:?}"
2536        );
2537    }
2538
2539    #[test]
2540    fn test_md033_templater_whitespace_trim_newline_not_flagged() {
2541        // Templater whitespace control - trim newline: <%- expr -%>
2542        let rule = MD033NoInlineHtml::default();
2543        let content = "<%- tp.date.now() -%>";
2544        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2545        let result = rule.check(&ctx).unwrap();
2546        assert!(
2547            result.is_empty(),
2548            "Templater trim-newline should not be flagged. Got: {result:?}"
2549        );
2550    }
2551
2552    #[test]
2553    fn test_md033_templater_combined_modifiers_not_flagged() {
2554        // Templater combined whitespace and execution modifiers
2555        let rule = MD033NoInlineHtml::default();
2556        let contents = [
2557            "<%-* const x = 1; -%>",  // trim + JS execution
2558            "<%_+ tp.date.now() _%>", // trim-all + dynamic
2559            "<%- tp.file.title -%>",  // trim-newline only
2560            "<%_ tp.file.title _%>",  // trim-all only
2561        ];
2562        for content in contents {
2563            let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2564            let result = rule.check(&ctx).unwrap();
2565            assert!(
2566                result.is_empty(),
2567                "Templater combined modifiers should not be flagged: {content}. Got: {result:?}"
2568            );
2569        }
2570    }
2571
2572    #[test]
2573    fn test_md033_templater_multiline_block_not_flagged() {
2574        // Multi-line Templater JavaScript block
2575        let rule = MD033NoInlineHtml::default();
2576        let content = r#"<%*
2577const x = 1;
2578const y = 2;
2579tR += x + y;
2580%>"#;
2581        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2582        let result = rule.check(&ctx).unwrap();
2583        assert!(
2584            result.is_empty(),
2585            "Templater multi-line block should not be flagged. Got: {result:?}"
2586        );
2587    }
2588
2589    #[test]
2590    fn test_md033_templater_with_angle_brackets_in_condition_not_flagged() {
2591        // Templater with angle brackets in JavaScript condition
2592        // This is a key edge case: `<` inside Templater should not trigger HTML detection
2593        let rule = MD033NoInlineHtml::default();
2594        let content = "<%* if (x < 5) { tR += 'small'; } %>";
2595        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2596        let result = rule.check(&ctx).unwrap();
2597        assert!(
2598            result.is_empty(),
2599            "Templater with angle brackets in conditions should not be flagged. Got: {result:?}"
2600        );
2601    }
2602
2603    #[test]
2604    fn test_md033_templater_mixed_with_html_only_html_flagged() {
2605        // Templater syntax mixed with actual HTML - only HTML should be flagged
2606        let rule = MD033NoInlineHtml::default();
2607        let content = "<% tp.date.now() %> is today's date. <div>This is HTML</div>";
2608        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2609        let result = rule.check(&ctx).unwrap();
2610        assert_eq!(result.len(), 1, "Should only flag the HTML div tag");
2611        assert!(
2612            result[0].message.contains("<div>"),
2613            "Should flag <div>, got: {}",
2614            result[0].message
2615        );
2616    }
2617
2618    #[test]
2619    fn test_md033_templater_in_heading_not_flagged() {
2620        // Templater in markdown heading
2621        let rule = MD033NoInlineHtml::default();
2622        let content = "# <% tp.file.title %>";
2623        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2624        let result = rule.check(&ctx).unwrap();
2625        assert!(
2626            result.is_empty(),
2627            "Templater in heading should not be flagged. Got: {result:?}"
2628        );
2629    }
2630
2631    #[test]
2632    fn test_md033_templater_multiple_on_same_line_not_flagged() {
2633        // Multiple Templater blocks on same line
2634        let rule = MD033NoInlineHtml::default();
2635        let content = "From <% tp.date.now() %> to <% tp.date.tomorrow() %> we have meetings.";
2636        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2637        let result = rule.check(&ctx).unwrap();
2638        assert!(
2639            result.is_empty(),
2640            "Multiple Templater blocks should not be flagged. Got: {result:?}"
2641        );
2642    }
2643
2644    #[test]
2645    fn test_md033_templater_in_code_block_not_flagged() {
2646        // Templater syntax in code blocks should not be flagged (code blocks are skipped)
2647        let rule = MD033NoInlineHtml::default();
2648        let content = "```\n<% tp.date.now() %>\n```";
2649        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2650        let result = rule.check(&ctx).unwrap();
2651        assert!(
2652            result.is_empty(),
2653            "Templater in code block should not be flagged. Got: {result:?}"
2654        );
2655    }
2656
2657    #[test]
2658    fn test_md033_templater_in_inline_code_not_flagged() {
2659        // Templater syntax in inline code span should not be flagged
2660        let rule = MD033NoInlineHtml::default();
2661        let content = "Use `<% tp.date.now() %>` for current date.";
2662        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2663        let result = rule.check(&ctx).unwrap();
2664        assert!(
2665            result.is_empty(),
2666            "Templater in inline code should not be flagged. Got: {result:?}"
2667        );
2668    }
2669
2670    #[test]
2671    fn test_md033_templater_also_works_in_standard_flavor() {
2672        // Templater syntax should also not be flagged in Standard flavor
2673        // because the HTML parser doesn't recognize `<%` as a valid tag
2674        let rule = MD033NoInlineHtml::default();
2675        let content = "<% tp.date.now() %> works everywhere.";
2676        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2677        let result = rule.check(&ctx).unwrap();
2678        assert!(
2679            result.is_empty(),
2680            "Templater should not be flagged even in Standard flavor. Got: {result:?}"
2681        );
2682    }
2683
2684    #[test]
2685    fn test_md033_templater_empty_tag_not_flagged() {
2686        // Empty Templater tags
2687        let rule = MD033NoInlineHtml::default();
2688        let content = "<%>";
2689        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2690        let result = rule.check(&ctx).unwrap();
2691        assert!(
2692            result.is_empty(),
2693            "Empty Templater-like tag should not be flagged. Got: {result:?}"
2694        );
2695    }
2696
2697    #[test]
2698    fn test_md033_templater_unclosed_not_flagged() {
2699        // Unclosed Templater tags - these are template errors, not HTML
2700        let rule = MD033NoInlineHtml::default();
2701        let content = "<% tp.date.now() without closing tag";
2702        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2703        let result = rule.check(&ctx).unwrap();
2704        assert!(
2705            result.is_empty(),
2706            "Unclosed Templater should not be flagged as HTML. Got: {result:?}"
2707        );
2708    }
2709
2710    #[test]
2711    fn test_md033_templater_with_newlines_inside_not_flagged() {
2712        // Templater with newlines inside the expression
2713        let rule = MD033NoInlineHtml::default();
2714        let content = r#"<% tp.date.now("YYYY") +
2715"-" +
2716tp.date.now("MM") %>"#;
2717        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2718        let result = rule.check(&ctx).unwrap();
2719        assert!(
2720            result.is_empty(),
2721            "Templater with internal newlines should not be flagged. Got: {result:?}"
2722        );
2723    }
2724
2725    #[test]
2726    fn test_md033_erb_style_tags_not_flagged() {
2727        // ERB/EJS style tags (similar to Templater) are also not HTML
2728        // This documents the general principle that `<%` is not valid HTML
2729        let rule = MD033NoInlineHtml::default();
2730        let content = "<%= variable %> and <% code %> and <%# comment %>";
2731        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2732        let result = rule.check(&ctx).unwrap();
2733        assert!(
2734            result.is_empty(),
2735            "ERB/EJS style tags should not be flagged as HTML. Got: {result:?}"
2736        );
2737    }
2738
2739    #[test]
2740    fn test_md033_templater_complex_expression_not_flagged() {
2741        // Complex Templater expression with multiple function calls
2742        let rule = MD033NoInlineHtml::default();
2743        let content = r#"<%*
2744const file = tp.file.title;
2745const date = tp.date.now("YYYY-MM-DD");
2746const folder = tp.file.folder();
2747tR += `# ${file}\n\nCreated: ${date}\nIn: ${folder}`;
2748%>"#;
2749        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2750        let result = rule.check(&ctx).unwrap();
2751        assert!(
2752            result.is_empty(),
2753            "Complex Templater expression should not be flagged. Got: {result:?}"
2754        );
2755    }
2756
2757    #[test]
2758    fn test_md033_percent_sign_variations_not_flagged() {
2759        // Various patterns starting with <% that should all be safe
2760        let rule = MD033NoInlineHtml::default();
2761        let patterns = [
2762            "<%=",  // ERB output
2763            "<%#",  // ERB comment
2764            "<%%",  // Double percent
2765            "<%!",  // Some template engines
2766            "<%@",  // JSP directive
2767            "<%--", // JSP comment
2768        ];
2769        for pattern in patterns {
2770            let content = format!("{pattern} content %>");
2771            let ctx = LintContext::new(&content, crate::config::MarkdownFlavor::Standard, None);
2772            let result = rule.check(&ctx).unwrap();
2773            assert!(
2774                result.is_empty(),
2775                "Pattern {pattern} should not be flagged. Got: {result:?}"
2776            );
2777        }
2778    }
2779
2780    // ───── Bug #3: Bracket escaping in image-inside-link conversion ─────
2781    //
2782    // When <a> wraps already-converted markdown image text, the bracket escaping
2783    // must be skipped to produce valid [![alt](url)](href) instead of !\[\](url)
2784
2785    #[test]
2786    fn test_md033_fix_a_wrapping_markdown_image_no_escaped_brackets() {
2787        // When <a> wraps a markdown image (from a prior fix iteration),
2788        // the result should be [![](url)](href) — no escaped brackets
2789        let rule = MD033NoInlineHtml::with_fix(true);
2790        let content = r#"<a href="https://example.com">![](https://example.com/image.png)</a>"#;
2791        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2792        let fixed = rule.fix(&ctx).unwrap();
2793
2794        assert_eq!(fixed, "[![](https://example.com/image.png)](https://example.com)",);
2795        assert!(!fixed.contains(r"\["), "Must not escape brackets: {fixed}");
2796        assert!(!fixed.contains(r"\]"), "Must not escape brackets: {fixed}");
2797    }
2798
2799    #[test]
2800    fn test_md033_fix_a_wrapping_markdown_image_with_alt() {
2801        // <a> wrapping ![alt](url) preserves alt text in linked image
2802        let rule = MD033NoInlineHtml::with_fix(true);
2803        let content =
2804            r#"<a href="https://github.com/repo">![Contributors](https://contrib.rocks/image?repo=org/repo)</a>"#;
2805        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2806        let fixed = rule.fix(&ctx).unwrap();
2807
2808        assert_eq!(
2809            fixed,
2810            "[![Contributors](https://contrib.rocks/image?repo=org/repo)](https://github.com/repo)"
2811        );
2812    }
2813
2814    #[test]
2815    fn test_md033_fix_img_without_alt_produces_empty_alt() {
2816        let rule = MD033NoInlineHtml::with_fix(true);
2817        let content = r#"<img src="photo.jpg" />"#;
2818        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2819        let fixed = rule.fix(&ctx).unwrap();
2820
2821        assert_eq!(fixed, "![](photo.jpg)");
2822    }
2823
2824    #[test]
2825    fn test_md033_fix_a_with_plain_text_still_escapes_brackets() {
2826        // Plain text brackets inside <a> SHOULD be escaped
2827        let rule = MD033NoInlineHtml::with_fix(true);
2828        let content = r#"<a href="https://example.com">text with [brackets]</a>"#;
2829        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2830        let fixed = rule.fix(&ctx).unwrap();
2831
2832        assert!(
2833            fixed.contains(r"\[brackets\]"),
2834            "Plain text brackets should be escaped: {fixed}"
2835        );
2836    }
2837
2838    #[test]
2839    fn test_md033_fix_a_with_image_plus_extra_text_escapes_brackets() {
2840        // Mixed content: image followed by bracketed text — brackets must be escaped
2841        // The image detection must NOT match partial content
2842        let rule = MD033NoInlineHtml::with_fix(true);
2843        let content = r#"<a href="/link">![](img.png) see [docs]</a>"#;
2844        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2845        let fixed = rule.fix(&ctx).unwrap();
2846
2847        // "see [docs]" brackets should be escaped since inner content is mixed
2848        assert!(
2849            fixed.contains(r"\[docs\]"),
2850            "Brackets in mixed image+text content should be escaped: {fixed}"
2851        );
2852    }
2853
2854    #[test]
2855    fn test_md033_fix_img_in_a_end_to_end() {
2856        // End-to-end: verify that iterative fixing of <a><img></a>
2857        // produces the correct final result through the fix coordinator
2858        use crate::config::Config;
2859        use crate::fix_coordinator::FixCoordinator;
2860
2861        let rule = MD033NoInlineHtml::with_fix(true);
2862        let rules: Vec<Box<dyn crate::rule::Rule>> = vec![Box::new(rule)];
2863
2864        let mut content =
2865            r#"<a href="https://github.com/org/repo"><img src="https://contrib.rocks/image?repo=org/repo" /></a>"#
2866                .to_string();
2867        let config = Config::default();
2868        let coordinator = FixCoordinator::new();
2869
2870        let result = coordinator
2871            .apply_fixes_iterative(&rules, &[], &mut content, &config, 10, None)
2872            .unwrap();
2873
2874        assert_eq!(
2875            content, "[![](https://contrib.rocks/image?repo=org/repo)](https://github.com/org/repo)",
2876            "End-to-end: <a><img></a> should become valid linked image"
2877        );
2878        assert!(result.converged);
2879        assert!(!content.contains(r"\["), "No escaped brackets: {content}");
2880    }
2881
2882    #[test]
2883    fn test_md033_fix_img_in_a_with_alt_end_to_end() {
2884        use crate::config::Config;
2885        use crate::fix_coordinator::FixCoordinator;
2886
2887        let rule = MD033NoInlineHtml::with_fix(true);
2888        let rules: Vec<Box<dyn crate::rule::Rule>> = vec![Box::new(rule)];
2889
2890        let mut content =
2891            r#"<a href="https://github.com/org/repo"><img src="https://contrib.rocks/image" alt="Contributors" /></a>"#
2892                .to_string();
2893        let config = Config::default();
2894        let coordinator = FixCoordinator::new();
2895
2896        let result = coordinator
2897            .apply_fixes_iterative(&rules, &[], &mut content, &config, 10, None)
2898            .unwrap();
2899
2900        assert_eq!(
2901            content,
2902            "[![Contributors](https://contrib.rocks/image)](https://github.com/org/repo)",
2903        );
2904        assert!(result.converged);
2905    }
2906}