Skip to main content

rumdl_lib/rules/
md033_no_inline_html.rs

1//!
2//! Rule MD033: No HTML tags
3//!
4//! See [docs/md033.md](../../docs/md033.md) for full documentation, configuration, and examples.
5
6use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
7use crate::utils::regex_cache::*;
8use std::collections::HashSet;
9
10mod md033_config;
11use md033_config::{MD033Config, MD033FixMode};
12
13#[derive(Clone)]
14pub struct MD033NoInlineHtml {
15    config: MD033Config,
16    allowed: HashSet<String>,
17    disallowed: HashSet<String>,
18    drop_attributes: HashSet<String>,
19    strip_wrapper_elements: HashSet<String>,
20}
21
22impl Default for MD033NoInlineHtml {
23    fn default() -> Self {
24        let config = MD033Config::default();
25        let allowed = config.allowed_set();
26        let disallowed = config.disallowed_set();
27        let drop_attributes = config.drop_attributes_set();
28        let strip_wrapper_elements = config.strip_wrapper_elements_set();
29        Self {
30            config,
31            allowed,
32            disallowed,
33            drop_attributes,
34            strip_wrapper_elements,
35        }
36    }
37}
38
39impl MD033NoInlineHtml {
40    pub fn new() -> Self {
41        Self::default()
42    }
43
44    pub fn with_allowed(allowed_vec: Vec<String>) -> Self {
45        let config = MD033Config {
46            allowed: allowed_vec.clone(),
47            disallowed: Vec::new(),
48            fix: false,
49            ..MD033Config::default()
50        };
51        let allowed = config.allowed_set();
52        let disallowed = config.disallowed_set();
53        let drop_attributes = config.drop_attributes_set();
54        let strip_wrapper_elements = config.strip_wrapper_elements_set();
55        Self {
56            config,
57            allowed,
58            disallowed,
59            drop_attributes,
60            strip_wrapper_elements,
61        }
62    }
63
64    pub fn with_disallowed(disallowed_vec: Vec<String>) -> Self {
65        let config = MD033Config {
66            allowed: Vec::new(),
67            disallowed: disallowed_vec.clone(),
68            fix: false,
69            ..MD033Config::default()
70        };
71        let allowed = config.allowed_set();
72        let disallowed = config.disallowed_set();
73        let drop_attributes = config.drop_attributes_set();
74        let strip_wrapper_elements = config.strip_wrapper_elements_set();
75        Self {
76            config,
77            allowed,
78            disallowed,
79            drop_attributes,
80            strip_wrapper_elements,
81        }
82    }
83
84    /// Create a new rule with auto-fix enabled
85    pub fn with_fix(fix: bool) -> Self {
86        let config = MD033Config {
87            allowed: Vec::new(),
88            disallowed: Vec::new(),
89            fix,
90            ..MD033Config::default()
91        };
92        let allowed = config.allowed_set();
93        let disallowed = config.disallowed_set();
94        let drop_attributes = config.drop_attributes_set();
95        let strip_wrapper_elements = config.strip_wrapper_elements_set();
96        Self {
97            config,
98            allowed,
99            disallowed,
100            drop_attributes,
101            strip_wrapper_elements,
102        }
103    }
104
105    pub fn from_config_struct(config: MD033Config) -> Self {
106        let allowed = config.allowed_set();
107        let disallowed = config.disallowed_set();
108        let drop_attributes = config.drop_attributes_set();
109        let strip_wrapper_elements = config.strip_wrapper_elements_set();
110        Self {
111            config,
112            allowed,
113            disallowed,
114            drop_attributes,
115            strip_wrapper_elements,
116        }
117    }
118
119    // Efficient check for allowed tags using HashSet (case-insensitive)
120    #[inline]
121    fn is_tag_allowed(&self, tag: &str) -> bool {
122        if self.allowed.is_empty() {
123            return false;
124        }
125        // Remove angle brackets and slashes, then split by whitespace or '>'
126        let tag = tag.trim_start_matches('<').trim_start_matches('/');
127        let tag_name = tag
128            .split(|c: char| c.is_whitespace() || c == '>' || c == '/')
129            .next()
130            .unwrap_or("");
131        self.allowed.contains(&tag_name.to_lowercase())
132    }
133
134    /// Check if a tag is in the disallowed set (for disallowed-only mode)
135    #[inline]
136    fn is_tag_disallowed(&self, tag: &str) -> bool {
137        if self.disallowed.is_empty() {
138            return false;
139        }
140        // Remove angle brackets and slashes, then split by whitespace or '>'
141        let tag = tag.trim_start_matches('<').trim_start_matches('/');
142        let tag_name = tag
143            .split(|c: char| c.is_whitespace() || c == '>' || c == '/')
144            .next()
145            .unwrap_or("");
146        self.disallowed.contains(&tag_name.to_lowercase())
147    }
148
149    /// Check if operating in disallowed-only mode
150    #[inline]
151    fn is_disallowed_mode(&self) -> bool {
152        self.config.is_disallowed_mode()
153    }
154
155    // Check if a tag is an HTML comment
156    #[inline]
157    fn is_html_comment(&self, tag: &str) -> bool {
158        tag.starts_with("<!--") && tag.ends_with("-->")
159    }
160
161    /// Check if a tag name is a valid HTML element or custom element.
162    /// Returns false for placeholder syntax like `<NAME>`, `<resource>`, `<actual>`.
163    ///
164    /// Per HTML spec, custom elements must contain a hyphen (e.g., `<my-component>`).
165    #[inline]
166    fn is_html_element_or_custom(tag_name: &str) -> bool {
167        const HTML_ELEMENTS: &[&str] = &[
168            // Document structure
169            "html",
170            "head",
171            "body",
172            "title",
173            "base",
174            "link",
175            "meta",
176            "style",
177            // Sections
178            "article",
179            "section",
180            "nav",
181            "aside",
182            "h1",
183            "h2",
184            "h3",
185            "h4",
186            "h5",
187            "h6",
188            "hgroup",
189            "header",
190            "footer",
191            "address",
192            "main",
193            "search",
194            // Grouping
195            "p",
196            "hr",
197            "pre",
198            "blockquote",
199            "ol",
200            "ul",
201            "menu",
202            "li",
203            "dl",
204            "dt",
205            "dd",
206            "figure",
207            "figcaption",
208            "div",
209            // Text-level
210            "a",
211            "em",
212            "strong",
213            "small",
214            "s",
215            "cite",
216            "q",
217            "dfn",
218            "abbr",
219            "ruby",
220            "rt",
221            "rp",
222            "data",
223            "time",
224            "code",
225            "var",
226            "samp",
227            "kbd",
228            "sub",
229            "sup",
230            "i",
231            "b",
232            "u",
233            "mark",
234            "bdi",
235            "bdo",
236            "span",
237            "br",
238            "wbr",
239            // Edits
240            "ins",
241            "del",
242            // Embedded
243            "picture",
244            "source",
245            "img",
246            "iframe",
247            "embed",
248            "object",
249            "param",
250            "video",
251            "audio",
252            "track",
253            "map",
254            "area",
255            "svg",
256            "math",
257            "canvas",
258            // Tables
259            "table",
260            "caption",
261            "colgroup",
262            "col",
263            "tbody",
264            "thead",
265            "tfoot",
266            "tr",
267            "td",
268            "th",
269            // Forms
270            "form",
271            "label",
272            "input",
273            "button",
274            "select",
275            "datalist",
276            "optgroup",
277            "option",
278            "textarea",
279            "output",
280            "progress",
281            "meter",
282            "fieldset",
283            "legend",
284            // Interactive
285            "details",
286            "summary",
287            "dialog",
288            // Scripting
289            "script",
290            "noscript",
291            "template",
292            "slot",
293            // Deprecated but recognized
294            "acronym",
295            "applet",
296            "basefont",
297            "big",
298            "center",
299            "dir",
300            "font",
301            "frame",
302            "frameset",
303            "isindex",
304            "marquee",
305            "noembed",
306            "noframes",
307            "plaintext",
308            "strike",
309            "tt",
310            "xmp",
311        ];
312
313        let lower = tag_name.to_ascii_lowercase();
314        if HTML_ELEMENTS.contains(&lower.as_str()) {
315            return true;
316        }
317        // Custom elements must contain a hyphen per HTML spec
318        tag_name.contains('-')
319    }
320
321    // Check if a tag is likely a programming type annotation rather than HTML
322    #[inline]
323    fn is_likely_type_annotation(&self, tag: &str) -> bool {
324        // Common programming type names that are often used in generics
325        const COMMON_TYPES: &[&str] = &[
326            "string",
327            "number",
328            "any",
329            "void",
330            "null",
331            "undefined",
332            "array",
333            "promise",
334            "function",
335            "error",
336            "date",
337            "regexp",
338            "symbol",
339            "bigint",
340            "map",
341            "set",
342            "weakmap",
343            "weakset",
344            "iterator",
345            "generator",
346            "t",
347            "u",
348            "v",
349            "k",
350            "e", // Common single-letter type parameters
351            "userdata",
352            "apiresponse",
353            "config",
354            "options",
355            "params",
356            "result",
357            "response",
358            "request",
359            "data",
360            "item",
361            "element",
362            "node",
363        ];
364
365        let tag_content = tag
366            .trim_start_matches('<')
367            .trim_end_matches('>')
368            .trim_start_matches('/');
369        let tag_name = tag_content
370            .split(|c: char| c.is_whitespace() || c == '>' || c == '/')
371            .next()
372            .unwrap_or("");
373
374        // Check if it's a simple tag (no attributes) with a common type name
375        if !tag_content.contains(' ') && !tag_content.contains('=') {
376            COMMON_TYPES.contains(&tag_name.to_ascii_lowercase().as_str())
377        } else {
378            false
379        }
380    }
381
382    // Check if a tag is actually an email address in angle brackets
383    #[inline]
384    fn is_email_address(&self, tag: &str) -> bool {
385        let content = tag.trim_start_matches('<').trim_end_matches('>');
386        // Simple email pattern: contains @ and has reasonable structure
387        content.contains('@')
388            && content.chars().all(|c| c.is_alphanumeric() || "@.-_+".contains(c))
389            && content.split('@').count() == 2
390            && content.split('@').all(|part| !part.is_empty())
391    }
392
393    // Check if a tag has the markdown attribute (MkDocs/Material for MkDocs)
394    #[inline]
395    fn has_markdown_attribute(&self, tag: &str) -> bool {
396        // Check for various forms of markdown attribute
397        // Examples: <div markdown>, <div markdown="1">, <div class="result" markdown>
398        tag.contains(" markdown>") || tag.contains(" markdown=") || tag.contains(" markdown ")
399    }
400
401    /// Check if a tag contains JSX-specific attributes that indicate it's JSX, not HTML
402    /// JSX uses different attribute names than HTML:
403    /// - `className` instead of `class`
404    /// - `htmlFor` instead of `for`
405    /// - camelCase event handlers (`onClick`, `onChange`, `onSubmit`, etc.)
406    /// - JSX expression syntax `={...}` for dynamic values
407    #[inline]
408    fn has_jsx_attributes(tag: &str) -> bool {
409        // JSX-specific attribute names (HTML uses class, for, onclick, etc.)
410        tag.contains("className")
411            || tag.contains("htmlFor")
412            || tag.contains("dangerouslySetInnerHTML")
413            // camelCase event handlers (JSX uses onClick, HTML uses onclick)
414            || tag.contains("onClick")
415            || tag.contains("onChange")
416            || tag.contains("onSubmit")
417            || tag.contains("onFocus")
418            || tag.contains("onBlur")
419            || tag.contains("onKeyDown")
420            || tag.contains("onKeyUp")
421            || tag.contains("onKeyPress")
422            || tag.contains("onMouseDown")
423            || tag.contains("onMouseUp")
424            || tag.contains("onMouseEnter")
425            || tag.contains("onMouseLeave")
426            // JSX expression syntax: ={expression} or ={ expression }
427            || tag.contains("={")
428    }
429
430    // Check if a tag is actually a URL in angle brackets
431    #[inline]
432    fn is_url_in_angle_brackets(&self, tag: &str) -> bool {
433        let content = tag.trim_start_matches('<').trim_end_matches('>');
434        // Check for common URL schemes
435        content.starts_with("http://")
436            || content.starts_with("https://")
437            || content.starts_with("ftp://")
438            || content.starts_with("ftps://")
439            || content.starts_with("mailto:")
440    }
441
442    #[inline]
443    fn is_relaxed_fix_mode(&self) -> bool {
444        self.config.fix_mode == MD033FixMode::Relaxed
445    }
446
447    #[inline]
448    fn is_droppable_attribute(&self, attr_name: &str) -> bool {
449        // Event handler attributes (onclick, onload, etc.) are never droppable
450        // because they can execute arbitrary JavaScript.
451        if attr_name.starts_with("on") && attr_name.len() > 2 {
452            return false;
453        }
454        self.drop_attributes.contains(attr_name)
455            || (attr_name.starts_with("data-")
456                && (self.drop_attributes.contains("data-*") || self.drop_attributes.contains("data-")))
457    }
458
459    #[inline]
460    fn is_strippable_wrapper(&self, tag_name: &str) -> bool {
461        self.is_relaxed_fix_mode() && self.strip_wrapper_elements.contains(tag_name)
462    }
463
464    /// Check whether `byte_offset` sits directly inside a top-level strippable
465    /// wrapper element (e.g. `<p>`).  Returns `true` only when:
466    ///  1. The nearest unclosed opening tag before the offset is a configured
467    ///     wrapper element, AND
468    ///  2. That wrapper is itself NOT nested inside another HTML element.
469    ///
470    /// Condition 2 prevents converting inner content when the wrapper cannot
471    /// be stripped (e.g. `<div><p><img/></p></div>` -- stripping `<p>` is
472    /// blocked because it is nested, so converting `<img>` would leave
473    /// markdown inside an HTML block where it won't render).
474    fn is_inside_strippable_wrapper(&self, content: &str, byte_offset: usize) -> bool {
475        if byte_offset == 0 {
476            return false;
477        }
478        let before = content[..byte_offset].trim_end();
479        if !before.ends_with('>') || before.ends_with("->") {
480            return false;
481        }
482        if let Some(last_lt) = before.rfind('<') {
483            let potential_tag = &before[last_lt..];
484            if potential_tag.starts_with("</") || potential_tag.starts_with("<!--") {
485                return false;
486            }
487            let parent_name = potential_tag
488                .trim_start_matches('<')
489                .split(|c: char| c.is_whitespace() || c == '>' || c == '/')
490                .next()
491                .unwrap_or("")
492                .to_lowercase();
493            if !self.strip_wrapper_elements.contains(&parent_name) {
494                return false;
495            }
496            // Verify the wrapper itself is not nested inside another element.
497            let wrapper_before = before[..last_lt].trim_end();
498            if wrapper_before.ends_with('>') && !wrapper_before.ends_with("->") {
499                if let Some(outer_lt) = wrapper_before.rfind('<') {
500                    let outer_tag = &wrapper_before[outer_lt..];
501                    if !outer_tag.starts_with("</") && !outer_tag.starts_with("<!--") {
502                        return false;
503                    }
504                }
505            }
506            return true;
507        }
508        false
509    }
510
511    /// Convert paired HTML tags to their Markdown equivalents.
512    /// Returns None if the tag cannot be safely converted (has nested tags, HTML entities, etc.)
513    fn convert_to_markdown(tag_name: &str, inner_content: &str) -> Option<String> {
514        // Skip if content contains nested HTML tags
515        if inner_content.contains('<') {
516            return None;
517        }
518        // Skip if content contains HTML entities (e.g., &vert;, &amp;, &lt;)
519        // These need HTML context to render correctly; markdown won't process them
520        if inner_content.contains('&') && inner_content.contains(';') {
521            // Check for common HTML entity patterns
522            let has_entity = inner_content
523                .split('&')
524                .skip(1)
525                .any(|part| part.split(';').next().is_some_and(|e| !e.is_empty() && e.len() < 10));
526            if has_entity {
527                return None;
528            }
529        }
530        match tag_name {
531            "em" | "i" => Some(format!("*{inner_content}*")),
532            "strong" | "b" => Some(format!("**{inner_content}**")),
533            "code" => {
534                // Handle backticks in content by using double backticks with padding
535                if inner_content.contains('`') {
536                    Some(format!("`` {inner_content} ``"))
537                } else {
538                    Some(format!("`{inner_content}`"))
539                }
540            }
541            _ => None,
542        }
543    }
544
545    /// Convert self-closing HTML tags to their Markdown equivalents.
546    fn convert_self_closing_to_markdown(&self, tag_name: &str, opening_tag: &str) -> Option<String> {
547        match tag_name {
548            "br" => match self.config.br_style {
549                md033_config::BrStyle::TrailingSpaces => Some("  \n".to_string()),
550                md033_config::BrStyle::Backslash => Some("\\\n".to_string()),
551            },
552            "hr" => Some("\n---\n".to_string()),
553            "img" => self.convert_img_to_markdown(opening_tag),
554            _ => None,
555        }
556    }
557
558    /// Parse all attributes from an HTML tag into a list of (name, value) pairs.
559    /// This provides proper attribute parsing instead of naive string matching.
560    fn parse_attributes(tag: &str) -> Vec<(String, Option<String>)> {
561        let mut attrs = Vec::new();
562
563        // Remove < and > and tag name
564        let tag_content = tag.trim_start_matches('<').trim_end_matches('>').trim_end_matches('/');
565
566        // Find first whitespace to skip tag name
567        let attr_start = tag_content
568            .find(|c: char| c.is_whitespace())
569            .map(|i| i + 1)
570            .unwrap_or(tag_content.len());
571
572        if attr_start >= tag_content.len() {
573            return attrs;
574        }
575
576        let attr_str = &tag_content[attr_start..];
577        let mut chars = attr_str.chars().peekable();
578
579        while chars.peek().is_some() {
580            // Skip whitespace
581            while chars.peek().is_some_and(|c| c.is_whitespace()) {
582                chars.next();
583            }
584
585            if chars.peek().is_none() {
586                break;
587            }
588
589            // Read attribute name
590            let mut attr_name = String::new();
591            while let Some(&c) = chars.peek() {
592                if c.is_whitespace() || c == '=' || c == '>' || c == '/' {
593                    break;
594                }
595                attr_name.push(c);
596                chars.next();
597            }
598
599            if attr_name.is_empty() {
600                break;
601            }
602
603            // Skip whitespace before =
604            while chars.peek().is_some_and(|c| c.is_whitespace()) {
605                chars.next();
606            }
607
608            // Check for = and value
609            if chars.peek() == Some(&'=') {
610                chars.next(); // consume =
611
612                // Skip whitespace after =
613                while chars.peek().is_some_and(|c| c.is_whitespace()) {
614                    chars.next();
615                }
616
617                // Read value
618                let mut value = String::new();
619                if let Some(&quote) = chars.peek() {
620                    if quote == '"' || quote == '\'' {
621                        chars.next(); // consume opening quote
622                        for c in chars.by_ref() {
623                            if c == quote {
624                                break;
625                            }
626                            value.push(c);
627                        }
628                    } else {
629                        // Unquoted value
630                        while let Some(&c) = chars.peek() {
631                            if c.is_whitespace() || c == '>' || c == '/' {
632                                break;
633                            }
634                            value.push(c);
635                            chars.next();
636                        }
637                    }
638                }
639                attrs.push((attr_name.to_ascii_lowercase(), Some(value)));
640            } else {
641                // Boolean attribute (no value)
642                attrs.push((attr_name.to_ascii_lowercase(), None));
643            }
644        }
645
646        attrs
647    }
648
649    /// Extract an HTML attribute value from a tag string.
650    /// Handles double quotes, single quotes, and unquoted values.
651    /// Returns None if the attribute is not found.
652    fn extract_attribute(tag: &str, attr_name: &str) -> Option<String> {
653        let attrs = Self::parse_attributes(tag);
654        let attr_lower = attr_name.to_ascii_lowercase();
655
656        attrs
657            .into_iter()
658            .find(|(name, _)| name == &attr_lower)
659            .and_then(|(_, value)| value)
660    }
661
662    /// Check if an HTML tag has extra attributes beyond the specified allowed ones.
663    /// Uses proper attribute parsing to avoid false positives from string matching.
664    fn has_extra_attributes(&self, tag: &str, allowed_attrs: &[&str]) -> bool {
665        let attrs = Self::parse_attributes(tag);
666
667        // All event handlers (on*) are dangerous
668        // Plus common attributes that would be lost in markdown conversion
669        const DANGEROUS_ATTR_PREFIXES: &[&str] = &["on"]; // onclick, onload, onerror, etc.
670        const DANGEROUS_ATTRS: &[&str] = &[
671            "class",
672            "id",
673            "style",
674            "target",
675            "rel",
676            "download",
677            "referrerpolicy",
678            "crossorigin",
679            "loading",
680            "decoding",
681            "fetchpriority",
682            "sizes",
683            "srcset",
684            "usemap",
685            "ismap",
686            "width",
687            "height",
688            "name",   // anchor names
689            "data-*", // data attributes (checked separately)
690        ];
691
692        for (attr_name, _) in attrs {
693            // Skip allowed attributes (list is small, linear scan is efficient)
694            if allowed_attrs.iter().any(|a| a.to_ascii_lowercase() == attr_name) {
695                continue;
696            }
697
698            if self.is_relaxed_fix_mode() {
699                if self.is_droppable_attribute(&attr_name) {
700                    continue;
701                }
702                return true;
703            }
704
705            // Check for event handlers (on*)
706            for prefix in DANGEROUS_ATTR_PREFIXES {
707                if attr_name.starts_with(prefix) && attr_name.len() > prefix.len() {
708                    return true;
709                }
710            }
711
712            // Check for data-* attributes
713            if attr_name.starts_with("data-") {
714                return true;
715            }
716
717            // Check for other dangerous attributes
718            if DANGEROUS_ATTRS.contains(&attr_name.as_str()) {
719                return true;
720            }
721        }
722
723        false
724    }
725
726    /// Convert `<a href="url">text</a>` to `[text](url)` or `[text](url "title")`
727    /// Returns None if conversion is not safe.
728    fn convert_a_to_markdown(&self, opening_tag: &str, inner_content: &str) -> Option<String> {
729        // Extract href attribute
730        let href = Self::extract_attribute(opening_tag, "href")?;
731
732        // Check URL is safe
733        if !MD033Config::is_safe_url(&href) {
734            return None;
735        }
736
737        // Check for nested HTML tags in content
738        if inner_content.contains('<') {
739            return None;
740        }
741
742        // Check for HTML entities that wouldn't render correctly in markdown
743        if inner_content.contains('&') && inner_content.contains(';') {
744            let has_entity = inner_content
745                .split('&')
746                .skip(1)
747                .any(|part| part.split(';').next().is_some_and(|e| !e.is_empty() && e.len() < 10));
748            if has_entity {
749                return None;
750            }
751        }
752
753        // Extract optional title attribute
754        let title = Self::extract_attribute(opening_tag, "title");
755
756        // Check for extra dangerous attributes (title is allowed)
757        if self.has_extra_attributes(opening_tag, &["href", "title"]) {
758            return None;
759        }
760
761        // If inner content is exactly a markdown image (from a prior <img> fix),
762        // use it directly without bracket escaping to produce valid [![alt](src)](href).
763        // Must verify the entire content is a single image — not mixed content like
764        // "![](url) extra [text]" where trailing brackets still need escaping.
765        let trimmed_inner = inner_content.trim();
766        let is_markdown_image =
767            trimmed_inner.starts_with("![") && trimmed_inner.contains("](") && trimmed_inner.ends_with(')') && {
768                // Verify the closing ](url) accounts for the rest of the content
769                // by finding the image's ]( and checking nothing follows the final )
770                if let Some(bracket_close) = trimmed_inner.rfind("](") {
771                    let after_paren = &trimmed_inner[bracket_close + 2..];
772                    // The rest should be just "url)" — find the matching close paren
773                    after_paren.ends_with(')')
774                        && after_paren.chars().filter(|&c| c == ')').count()
775                            >= after_paren.chars().filter(|&c| c == '(').count()
776                } else {
777                    false
778                }
779            };
780        let escaped_text = if is_markdown_image {
781            trimmed_inner.to_string()
782        } else {
783            // Escape special markdown characters in link text
784            // Brackets need escaping to avoid breaking the link syntax
785            inner_content.replace('[', r"\[").replace(']', r"\]")
786        };
787
788        // Escape parentheses in URL
789        let escaped_url = href.replace('(', "%28").replace(')', "%29");
790
791        // Format with or without title
792        if let Some(title_text) = title {
793            // Escape quotes in title
794            let escaped_title = title_text.replace('"', r#"\""#);
795            Some(format!("[{escaped_text}]({escaped_url} \"{escaped_title}\")"))
796        } else {
797            Some(format!("[{escaped_text}]({escaped_url})"))
798        }
799    }
800
801    /// Convert `<img src="url" alt="text">` to `![alt](src)` or `![alt](src "title")`
802    /// Returns None if conversion is not safe.
803    fn convert_img_to_markdown(&self, tag: &str) -> Option<String> {
804        // Extract src attribute (required)
805        let src = Self::extract_attribute(tag, "src")?;
806
807        // Check URL is safe
808        if !MD033Config::is_safe_url(&src) {
809            return None;
810        }
811
812        // Extract alt attribute (optional, default to empty)
813        let alt = Self::extract_attribute(tag, "alt").unwrap_or_default();
814
815        // Extract optional title attribute
816        let title = Self::extract_attribute(tag, "title");
817
818        // Check for extra dangerous attributes (title is allowed)
819        if self.has_extra_attributes(tag, &["src", "alt", "title"]) {
820            return None;
821        }
822
823        // Escape special markdown characters in alt text
824        let escaped_alt = alt.replace('[', r"\[").replace(']', r"\]");
825
826        // Escape parentheses in URL
827        let escaped_url = src.replace('(', "%28").replace(')', "%29");
828
829        // Format with or without title
830        if let Some(title_text) = title {
831            // Escape quotes in title
832            let escaped_title = title_text.replace('"', r#"\""#);
833            Some(format!("![{escaped_alt}]({escaped_url} \"{escaped_title}\")"))
834        } else {
835            Some(format!("![{escaped_alt}]({escaped_url})"))
836        }
837    }
838
839    /// Check if an HTML tag has attributes that would make conversion unsafe
840    fn has_significant_attributes(opening_tag: &str) -> bool {
841        // Tags with just whitespace or empty are fine
842        let tag_content = opening_tag
843            .trim_start_matches('<')
844            .trim_end_matches('>')
845            .trim_end_matches('/');
846
847        // Split by whitespace; if there's more than the tag name, it has attributes
848        let parts: Vec<&str> = tag_content.split_whitespace().collect();
849        parts.len() > 1
850    }
851
852    /// Check if a tag appears to be nested inside another HTML element
853    /// by looking at the surrounding context (e.g., `<code><em>text</em></code>`)
854    fn is_nested_in_html(content: &str, tag_byte_start: usize, tag_byte_end: usize) -> bool {
855        // Check if there's a `>` immediately before this tag (indicating inside another element)
856        if tag_byte_start > 0 {
857            let before = &content[..tag_byte_start];
858            let before_trimmed = before.trim_end();
859            if before_trimmed.ends_with('>') && !before_trimmed.ends_with("->") {
860                // Check it's not a closing tag or comment
861                if let Some(last_lt) = before_trimmed.rfind('<') {
862                    let potential_tag = &before_trimmed[last_lt..];
863                    // Skip if it's a closing tag (</...>) or comment (<!--)
864                    if !potential_tag.starts_with("</") && !potential_tag.starts_with("<!--") {
865                        return true;
866                    }
867                }
868            }
869        }
870        // Check if there's a `<` immediately after the closing tag (indicating inside another element)
871        if tag_byte_end < content.len() {
872            let after = &content[tag_byte_end..];
873            let after_trimmed = after.trim_start();
874            if after_trimmed.starts_with("</") {
875                return true;
876            }
877        }
878        false
879    }
880
881    /// Calculate fix to remove HTML tags while keeping content.
882    ///
883    /// For self-closing tags like `<br/>`, returns a single fix to remove the tag.
884    /// For paired tags like `<span>text</span>`, returns the replacement text (just the content).
885    ///
886    /// Returns (range, replacement_text) where range is the bytes to replace
887    /// and replacement_text is what to put there (content without tags, or empty for self-closing).
888    ///
889    /// When `in_html_block` is true, returns None in conservative mode.  In
890    /// relaxed mode two exceptions apply:
891    /// - Strippable wrapper elements (e.g. `<p>`) bypass the block guard so
892    ///   they can be stripped even though they ARE the HTML block.
893    /// - Self-closing tags whose direct parent is a strippable wrapper also
894    ///   bypass the guard so inner content can be converted first.
895    fn calculate_fix(
896        &self,
897        content: &str,
898        opening_tag: &str,
899        tag_byte_start: usize,
900        in_html_block: bool,
901    ) -> Option<(std::ops::Range<usize>, String)> {
902        // Extract tag name from opening tag
903        let tag_name = opening_tag
904            .trim_start_matches('<')
905            .split(|c: char| c.is_whitespace() || c == '>' || c == '/')
906            .next()?
907            .to_lowercase();
908
909        // Check if it's a self-closing tag (ends with /> or is a void element like <br>)
910        let is_self_closing =
911            opening_tag.ends_with("/>") || matches!(tag_name.as_str(), "br" | "hr" | "img" | "input" | "meta" | "link");
912
913        if is_self_closing {
914            // When fix is enabled, try to convert to Markdown equivalent.
915            // Skip tags inside HTML blocks (would break structure), UNLESS we
916            // are in relaxed mode and the containing block is a strippable
917            // wrapper -- this lets the inner element be converted first so the
918            // wrapper can be stripped on a subsequent pass.
919            let block_ok = !in_html_block
920                || (self.is_relaxed_fix_mode() && self.is_inside_strippable_wrapper(content, tag_byte_start));
921            if self.config.fix
922                && MD033Config::is_safe_fixable_tag(&tag_name)
923                && block_ok
924                && let Some(markdown) = self.convert_self_closing_to_markdown(&tag_name, opening_tag)
925            {
926                return Some((tag_byte_start..tag_byte_start + opening_tag.len(), markdown));
927            }
928            // Can't convert this self-closing tag to Markdown, don't provide a fix
929            // (e.g., <input>, <meta> - these have no Markdown equivalent without the new img support)
930            return None;
931        }
932
933        // Search for the closing tag after the opening tag (case-insensitive)
934        let search_start = tag_byte_start + opening_tag.len();
935        let search_slice = &content[search_start..];
936
937        // Find closing tag case-insensitively
938        let closing_tag_lower = format!("</{tag_name}>");
939        let closing_pos = search_slice.to_ascii_lowercase().find(&closing_tag_lower);
940
941        if let Some(closing_pos) = closing_pos {
942            // Get actual closing tag from original content to get correct byte length
943            let closing_tag_len = closing_tag_lower.len();
944            let closing_byte_start = search_start + closing_pos;
945            let closing_byte_end = closing_byte_start + closing_tag_len;
946
947            // Extract the content between tags
948            let inner_content = &content[search_start..closing_byte_start];
949
950            // In relaxed mode, check wrapper stripping BEFORE the in_html_block
951            // guard because the wrapper element itself IS the HTML block. We only
952            // strip when:
953            //  - the wrapper is not nested inside another HTML element
954            //  - the inner content no longer contains HTML tags (prevents
955            //    overlapping byte-range replacements within a single fix pass)
956            if self.config.fix && self.is_strippable_wrapper(&tag_name) {
957                if Self::is_nested_in_html(content, tag_byte_start, closing_byte_end) {
958                    return None;
959                }
960                if inner_content.contains('<') {
961                    return None;
962                }
963                return Some((tag_byte_start..closing_byte_end, inner_content.trim().to_string()));
964            }
965
966            // Skip auto-fix if inside an HTML block (like <pre>, <div>, etc.)
967            // Converting tags inside HTML blocks would break the intended structure
968            if in_html_block {
969                return None;
970            }
971
972            // Skip auto-fix if this tag is nested inside another HTML element
973            // e.g., <code><em>text</em></code> - don't convert the inner <em>
974            if Self::is_nested_in_html(content, tag_byte_start, closing_byte_end) {
975                return None;
976            }
977
978            // When fix is enabled and tag is safe to convert, try markdown conversion
979            if self.config.fix && MD033Config::is_safe_fixable_tag(&tag_name) {
980                // Handle <a> tags specially - they require attribute extraction
981                if tag_name == "a" {
982                    if let Some(markdown) = self.convert_a_to_markdown(opening_tag, inner_content) {
983                        return Some((tag_byte_start..closing_byte_end, markdown));
984                    }
985                    // convert_a_to_markdown returned None - unsafe URL, nested HTML, etc.
986                    return None;
987                }
988
989                // For simple tags (em, strong, code, etc.) - no attributes allowed
990                if Self::has_significant_attributes(opening_tag) {
991                    // Don't provide a fix for tags with attributes
992                    // User may want to keep the attributes, so leave as-is
993                    return None;
994                }
995                if let Some(markdown) = Self::convert_to_markdown(&tag_name, inner_content) {
996                    return Some((tag_byte_start..closing_byte_end, markdown));
997                }
998                // convert_to_markdown returned None, meaning content has nested tags or
999                // HTML entities that shouldn't be converted - leave as-is
1000                return None;
1001            }
1002
1003            // For non-fixable tags, don't provide a fix
1004            // (e.g., <div>content</div>, <span>text</span>)
1005            return None;
1006        }
1007
1008        // If no closing tag found, don't provide a fix (malformed HTML)
1009        None
1010    }
1011}
1012
1013impl Rule for MD033NoInlineHtml {
1014    fn name(&self) -> &'static str {
1015        "MD033"
1016    }
1017
1018    fn description(&self) -> &'static str {
1019        "Inline HTML is not allowed"
1020    }
1021
1022    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
1023        let content = ctx.content;
1024
1025        // Early return: if no HTML tags at all, skip processing
1026        if content.is_empty() || !ctx.likely_has_html() {
1027            return Ok(Vec::new());
1028        }
1029
1030        // Quick check for HTML tag pattern before expensive processing
1031        if !HTML_TAG_QUICK_CHECK.is_match(content) {
1032            return Ok(Vec::new());
1033        }
1034
1035        let mut warnings = Vec::new();
1036
1037        // Use centralized HTML parser to get all HTML tags (including multiline)
1038        let html_tags = ctx.html_tags();
1039
1040        for html_tag in html_tags.iter() {
1041            // Skip closing tags (only warn on opening tags)
1042            if html_tag.is_closing {
1043                continue;
1044            }
1045
1046            let line_num = html_tag.line;
1047            let tag_byte_start = html_tag.byte_offset;
1048
1049            // Reconstruct tag string from byte offsets
1050            let tag = &content[html_tag.byte_offset..html_tag.byte_end];
1051
1052            // Skip tags in code blocks, PyMdown blocks, and block IALs
1053            if ctx
1054                .line_info(line_num)
1055                .is_some_and(|info| info.in_code_block || info.in_pymdown_block || info.is_kramdown_block_ial)
1056            {
1057                continue;
1058            }
1059
1060            // Skip HTML tags inside HTML comments
1061            if ctx.is_in_html_comment(tag_byte_start) {
1062                continue;
1063            }
1064
1065            // Skip HTML comments themselves
1066            if self.is_html_comment(tag) {
1067                continue;
1068            }
1069
1070            // Skip angle brackets inside link reference definition titles
1071            // e.g., [ref]: url "Title with <angle brackets>"
1072            if ctx.is_in_link_title(tag_byte_start) {
1073                continue;
1074            }
1075
1076            // Skip JSX components in MDX files (e.g., <Chart />, <MyComponent>)
1077            if ctx.flavor.supports_jsx() && html_tag.tag_name.chars().next().is_some_and(|c| c.is_uppercase()) {
1078                continue;
1079            }
1080
1081            // Skip JSX fragments in MDX files (<> and </>)
1082            if ctx.flavor.supports_jsx() && (html_tag.tag_name.is_empty() || tag == "<>" || tag == "</>") {
1083                continue;
1084            }
1085
1086            // Skip elements with JSX-specific attributes in MDX files
1087            // e.g., <div className="...">, <button onClick={handler}>
1088            if ctx.flavor.supports_jsx() && Self::has_jsx_attributes(tag) {
1089                continue;
1090            }
1091
1092            // Skip non-HTML elements (placeholder syntax like <NAME>, <resource>)
1093            if !Self::is_html_element_or_custom(&html_tag.tag_name) {
1094                continue;
1095            }
1096
1097            // Skip likely programming type annotations
1098            if self.is_likely_type_annotation(tag) {
1099                continue;
1100            }
1101
1102            // Skip email addresses in angle brackets
1103            if self.is_email_address(tag) {
1104                continue;
1105            }
1106
1107            // Skip URLs in angle brackets
1108            if self.is_url_in_angle_brackets(tag) {
1109                continue;
1110            }
1111
1112            // Skip tags inside code spans (use byte offset for reliable multi-line span detection)
1113            if ctx.is_byte_offset_in_code_span(tag_byte_start) {
1114                continue;
1115            }
1116
1117            // Determine whether to report this tag based on mode:
1118            // - Disallowed mode: only report tags in the disallowed list
1119            // - Default mode: report all tags except those in the allowed list
1120            if self.is_disallowed_mode() {
1121                // In disallowed mode, skip tags NOT in the disallowed list
1122                if !self.is_tag_disallowed(tag) {
1123                    continue;
1124                }
1125            } else {
1126                // In default mode, skip allowed tags
1127                if self.is_tag_allowed(tag) {
1128                    continue;
1129                }
1130            }
1131
1132            // Skip tags with markdown attribute in MkDocs mode
1133            if ctx.flavor == crate::config::MarkdownFlavor::MkDocs && self.has_markdown_attribute(tag) {
1134                continue;
1135            }
1136
1137            // Check if we're inside an HTML block (like <pre>, <div>, etc.)
1138            let in_html_block = ctx.is_in_html_block(line_num);
1139
1140            // Calculate fix to remove HTML tags but keep content
1141            let fix = self
1142                .calculate_fix(content, tag, tag_byte_start, in_html_block)
1143                .map(|(range, replacement)| Fix { range, replacement });
1144
1145            // Calculate actual end line and column for multiline tags
1146            // Use byte_end - 1 to get the last character position of the tag
1147            let (end_line, end_col) = if html_tag.byte_end > 0 {
1148                ctx.offset_to_line_col(html_tag.byte_end - 1)
1149            } else {
1150                (line_num, html_tag.end_col + 1)
1151            };
1152
1153            // Report the HTML tag
1154            warnings.push(LintWarning {
1155                rule_name: Some(self.name().to_string()),
1156                line: line_num,
1157                column: html_tag.start_col + 1, // Convert to 1-indexed
1158                end_line,                       // Actual end line for multiline tags
1159                end_column: end_col + 1,        // Actual end column
1160                message: format!("Inline HTML found: {tag}"),
1161                severity: Severity::Warning,
1162                fix,
1163            });
1164        }
1165
1166        Ok(warnings)
1167    }
1168
1169    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
1170        // Auto-fix is opt-in: only apply if explicitly enabled in config
1171        if !self.config.fix {
1172            return Ok(ctx.content.to_string());
1173        }
1174
1175        // Get warnings with their inline fixes
1176        let warnings = self.check(ctx)?;
1177
1178        // If no warnings with fixes, return original content
1179        if warnings.is_empty() || !warnings.iter().any(|w| w.fix.is_some()) {
1180            return Ok(ctx.content.to_string());
1181        }
1182
1183        // Collect all fixes and sort by range start (descending) to apply from end to beginning
1184        let mut fixes: Vec<_> = warnings
1185            .iter()
1186            .filter_map(|w| w.fix.as_ref().map(|f| (f.range.start, f.range.end, &f.replacement)))
1187            .collect();
1188        fixes.sort_by(|a, b| b.0.cmp(&a.0));
1189
1190        // Apply fixes from end to beginning to preserve byte offsets
1191        let mut result = ctx.content.to_string();
1192        for (start, end, replacement) in fixes {
1193            if start < result.len() && end <= result.len() && start <= end {
1194                result.replace_range(start..end, replacement);
1195            }
1196        }
1197
1198        Ok(result)
1199    }
1200
1201    fn fix_capability(&self) -> crate::rule::FixCapability {
1202        if self.config.fix {
1203            crate::rule::FixCapability::FullyFixable
1204        } else {
1205            crate::rule::FixCapability::Unfixable
1206        }
1207    }
1208
1209    /// Get the category of this rule for selective processing
1210    fn category(&self) -> RuleCategory {
1211        RuleCategory::Html
1212    }
1213
1214    /// Check if this rule should be skipped
1215    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
1216        ctx.content.is_empty() || !ctx.likely_has_html()
1217    }
1218
1219    fn as_any(&self) -> &dyn std::any::Any {
1220        self
1221    }
1222
1223    fn default_config_section(&self) -> Option<(String, toml::Value)> {
1224        let json_value = serde_json::to_value(&self.config).ok()?;
1225        Some((
1226            self.name().to_string(),
1227            crate::rule_config_serde::json_to_toml_value(&json_value)?,
1228        ))
1229    }
1230
1231    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
1232    where
1233        Self: Sized,
1234    {
1235        let rule_config = crate::rule_config_serde::load_rule_config::<MD033Config>(config);
1236        Box::new(Self::from_config_struct(rule_config))
1237    }
1238}
1239
1240#[cfg(test)]
1241mod tests {
1242    use super::*;
1243    use crate::lint_context::LintContext;
1244    use crate::rule::Rule;
1245
1246    fn relaxed_fix_rule() -> MD033NoInlineHtml {
1247        let config = MD033Config {
1248            fix: true,
1249            fix_mode: MD033FixMode::Relaxed,
1250            ..MD033Config::default()
1251        };
1252        MD033NoInlineHtml::from_config_struct(config)
1253    }
1254
1255    #[test]
1256    fn test_md033_basic_html() {
1257        let rule = MD033NoInlineHtml::default();
1258        let content = "<div>Some content</div>";
1259        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1260        let result = rule.check(&ctx).unwrap();
1261        // Only reports opening tags, not closing tags
1262        assert_eq!(result.len(), 1); // Only <div>, not </div>
1263        assert!(result[0].message.starts_with("Inline HTML found: <div>"));
1264    }
1265
1266    #[test]
1267    fn test_md033_case_insensitive() {
1268        let rule = MD033NoInlineHtml::default();
1269        let content = "<DiV>Some <B>content</B></dIv>";
1270        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1271        let result = rule.check(&ctx).unwrap();
1272        // Only reports opening tags, not closing tags
1273        assert_eq!(result.len(), 2); // <DiV>, <B> (not </B>, </dIv>)
1274        assert_eq!(result[0].message, "Inline HTML found: <DiV>");
1275        assert_eq!(result[1].message, "Inline HTML found: <B>");
1276    }
1277
1278    #[test]
1279    fn test_md033_allowed_tags() {
1280        let rule = MD033NoInlineHtml::with_allowed(vec!["div".to_string(), "br".to_string()]);
1281        let content = "<div>Allowed</div><p>Not allowed</p><br/>";
1282        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1283        let result = rule.check(&ctx).unwrap();
1284        // Only warnings for non-allowed opening tags (<p> only, div and br are allowed)
1285        assert_eq!(result.len(), 1);
1286        assert_eq!(result[0].message, "Inline HTML found: <p>");
1287
1288        // Test case-insensitivity of allowed tags
1289        let content2 = "<DIV>Allowed</DIV><P>Not allowed</P><BR/>";
1290        let ctx2 = LintContext::new(content2, crate::config::MarkdownFlavor::Standard, None);
1291        let result2 = rule.check(&ctx2).unwrap();
1292        assert_eq!(result2.len(), 1); // Only <P> flagged
1293        assert_eq!(result2[0].message, "Inline HTML found: <P>");
1294    }
1295
1296    #[test]
1297    fn test_md033_html_comments() {
1298        let rule = MD033NoInlineHtml::default();
1299        let content = "<!-- This is a comment --> <p>Not a comment</p>";
1300        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1301        let result = rule.check(&ctx).unwrap();
1302        // Should detect warnings for HTML opening tags (comments are skipped, closing tags not reported)
1303        assert_eq!(result.len(), 1); // Only <p>
1304        assert_eq!(result[0].message, "Inline HTML found: <p>");
1305    }
1306
1307    #[test]
1308    fn test_md033_tags_in_links() {
1309        let rule = MD033NoInlineHtml::default();
1310        let content = "[Link](http://example.com/<div>)";
1311        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1312        let result = rule.check(&ctx).unwrap();
1313        // The <div> in the URL should be detected as HTML (not skipped)
1314        assert_eq!(result.len(), 1);
1315        assert_eq!(result[0].message, "Inline HTML found: <div>");
1316
1317        let content2 = "[Link <a>text</a>](url)";
1318        let ctx2 = LintContext::new(content2, crate::config::MarkdownFlavor::Standard, None);
1319        let result2 = rule.check(&ctx2).unwrap();
1320        // Only reports opening tags
1321        assert_eq!(result2.len(), 1); // Only <a>
1322        assert_eq!(result2[0].message, "Inline HTML found: <a>");
1323    }
1324
1325    #[test]
1326    fn test_md033_fix_escaping() {
1327        let rule = MD033NoInlineHtml::default();
1328        let content = "Text with <div> and <br/> tags.";
1329        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1330        let fixed_content = rule.fix(&ctx).unwrap();
1331        // No fix for HTML tags; output should be unchanged
1332        assert_eq!(fixed_content, content);
1333    }
1334
1335    #[test]
1336    fn test_md033_in_code_blocks() {
1337        let rule = MD033NoInlineHtml::default();
1338        let content = "```html\n<div>Code</div>\n```\n<div>Not code</div>";
1339        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1340        let result = rule.check(&ctx).unwrap();
1341        // Only reports opening tags outside code block
1342        assert_eq!(result.len(), 1); // Only <div> outside code block
1343        assert_eq!(result[0].message, "Inline HTML found: <div>");
1344    }
1345
1346    #[test]
1347    fn test_md033_in_code_spans() {
1348        let rule = MD033NoInlineHtml::default();
1349        let content = "Text with `<p>in code</p>` span. <br/> Not in span.";
1350        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1351        let result = rule.check(&ctx).unwrap();
1352        // Should detect <br/> outside code span, but not tags inside code span
1353        assert_eq!(result.len(), 1);
1354        assert_eq!(result[0].message, "Inline HTML found: <br/>");
1355    }
1356
1357    #[test]
1358    fn test_md033_issue_90_code_span_with_diff_block() {
1359        // Test for issue #90: inline code span followed by diff code block
1360        let rule = MD033NoInlineHtml::default();
1361        let content = r#"# Heading
1362
1363`<env>`
1364
1365```diff
1366- this
1367+ that
1368```"#;
1369        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1370        let result = rule.check(&ctx).unwrap();
1371        // Should NOT detect <env> as HTML since it's inside backticks
1372        assert_eq!(result.len(), 0, "Should not report HTML tags inside code spans");
1373    }
1374
1375    #[test]
1376    fn test_md033_multiple_code_spans_with_angle_brackets() {
1377        // Test multiple code spans on same line
1378        let rule = MD033NoInlineHtml::default();
1379        let content = "`<one>` and `<two>` and `<three>` are all code spans";
1380        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1381        let result = rule.check(&ctx).unwrap();
1382        assert_eq!(result.len(), 0, "Should not report HTML tags inside any code spans");
1383    }
1384
1385    #[test]
1386    fn test_md033_nested_angle_brackets_in_code_span() {
1387        // Test nested angle brackets
1388        let rule = MD033NoInlineHtml::default();
1389        let content = "Text with `<<nested>>` brackets";
1390        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1391        let result = rule.check(&ctx).unwrap();
1392        assert_eq!(result.len(), 0, "Should handle nested angle brackets in code spans");
1393    }
1394
1395    #[test]
1396    fn test_md033_code_span_at_end_before_code_block() {
1397        // Test code span at end of line before code block
1398        let rule = MD033NoInlineHtml::default();
1399        let content = "Testing `<test>`\n```\ncode here\n```";
1400        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1401        let result = rule.check(&ctx).unwrap();
1402        assert_eq!(result.len(), 0, "Should handle code span before code block");
1403    }
1404
1405    #[test]
1406    fn test_md033_quick_fix_inline_tag() {
1407        // Test that non-fixable tags (like <span>) do NOT get a fix
1408        // Only safe fixable tags (em, i, strong, b, code, br, hr) with fix=true get fixes
1409        let rule = MD033NoInlineHtml::default();
1410        let content = "This has <span>inline text</span> that should keep content.";
1411        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1412        let result = rule.check(&ctx).unwrap();
1413
1414        assert_eq!(result.len(), 1, "Should find one HTML tag");
1415        // <span> is NOT a safe fixable tag, so no fix should be provided
1416        assert!(
1417            result[0].fix.is_none(),
1418            "Non-fixable tags like <span> should not have a fix"
1419        );
1420    }
1421
1422    #[test]
1423    fn test_md033_quick_fix_multiline_tag() {
1424        // HTML block elements like <div> are intentionally NOT auto-fixed
1425        // Removing them would change document structure significantly
1426        let rule = MD033NoInlineHtml::default();
1427        let content = "<div>\nBlock content\n</div>";
1428        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1429        let result = rule.check(&ctx).unwrap();
1430
1431        assert_eq!(result.len(), 1, "Should find one HTML tag");
1432        // HTML block elements should NOT have auto-fix
1433        assert!(result[0].fix.is_none(), "HTML block elements should NOT have auto-fix");
1434    }
1435
1436    #[test]
1437    fn test_md033_quick_fix_self_closing_tag() {
1438        // Test that self-closing tags with fix=false (default) do NOT get a fix
1439        let rule = MD033NoInlineHtml::default();
1440        let content = "Self-closing: <br/>";
1441        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1442        let result = rule.check(&ctx).unwrap();
1443
1444        assert_eq!(result.len(), 1, "Should find one HTML tag");
1445        // Default config has fix=false, so no fix should be provided
1446        assert!(
1447            result[0].fix.is_none(),
1448            "Self-closing tags should not have a fix when fix config is false"
1449        );
1450    }
1451
1452    #[test]
1453    fn test_md033_quick_fix_multiple_tags() {
1454        // Test that multiple tags without fix=true do NOT get fixes
1455        // <span> is not a safe fixable tag, <strong> is but fix=false by default
1456        let rule = MD033NoInlineHtml::default();
1457        let content = "<span>first</span> and <strong>second</strong>";
1458        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1459        let result = rule.check(&ctx).unwrap();
1460
1461        assert_eq!(result.len(), 2, "Should find two HTML tags");
1462        // Neither should have a fix: <span> is not fixable, <strong> is but fix=false
1463        assert!(result[0].fix.is_none(), "Non-fixable <span> should not have a fix");
1464        assert!(
1465            result[1].fix.is_none(),
1466            "<strong> should not have a fix when fix config is false"
1467        );
1468    }
1469
1470    #[test]
1471    fn test_md033_skip_angle_brackets_in_link_titles() {
1472        // Angle brackets inside link reference definition titles should not be flagged as HTML
1473        let rule = MD033NoInlineHtml::default();
1474        let content = r#"# Test
1475
1476[example]: <https://example.com> "Title with <Angle Brackets> inside"
1477
1478Regular text with <div>content</div> HTML tag.
1479"#;
1480        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1481        let result = rule.check(&ctx).unwrap();
1482
1483        // Should only flag <div>, not <Angle Brackets> in the title (not a valid HTML element)
1484        // Opening tag only (markdownlint behavior)
1485        assert_eq!(result.len(), 1, "Should find opening div tag");
1486        assert!(
1487            result[0].message.contains("<div>"),
1488            "Should flag <div>, got: {}",
1489            result[0].message
1490        );
1491    }
1492
1493    #[test]
1494    fn test_md033_skip_angle_brackets_in_link_title_single_quotes() {
1495        // Test with single-quoted title
1496        let rule = MD033NoInlineHtml::default();
1497        let content = r#"[ref]: url 'Title <Help Wanted> here'
1498
1499<span>text</span> here
1500"#;
1501        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1502        let result = rule.check(&ctx).unwrap();
1503
1504        // <Help Wanted> is not a valid HTML element, so only <span> is flagged
1505        // Opening tag only (markdownlint behavior)
1506        assert_eq!(result.len(), 1, "Should find opening span tag");
1507        assert!(
1508            result[0].message.contains("<span>"),
1509            "Should flag <span>, got: {}",
1510            result[0].message
1511        );
1512    }
1513
1514    #[test]
1515    fn test_md033_multiline_tag_end_line_calculation() {
1516        // Test that multiline HTML tags report correct end_line
1517        let rule = MD033NoInlineHtml::default();
1518        let content = "<div\n  class=\"test\"\n  id=\"example\">";
1519        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1520        let result = rule.check(&ctx).unwrap();
1521
1522        assert_eq!(result.len(), 1, "Should find one HTML tag");
1523        // Tag starts on line 1
1524        assert_eq!(result[0].line, 1, "Start line should be 1");
1525        // Tag ends on line 3 (where the closing > is)
1526        assert_eq!(result[0].end_line, 3, "End line should be 3");
1527    }
1528
1529    #[test]
1530    fn test_md033_single_line_tag_same_start_end_line() {
1531        // Test that single-line HTML tags have same start and end line
1532        let rule = MD033NoInlineHtml::default();
1533        let content = "Some text <div class=\"test\"> more text";
1534        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1535        let result = rule.check(&ctx).unwrap();
1536
1537        assert_eq!(result.len(), 1, "Should find one HTML tag");
1538        assert_eq!(result[0].line, 1, "Start line should be 1");
1539        assert_eq!(result[0].end_line, 1, "End line should be 1 for single-line tag");
1540    }
1541
1542    #[test]
1543    fn test_md033_multiline_tag_with_many_attributes() {
1544        // Test multiline tag spanning multiple lines
1545        let rule = MD033NoInlineHtml::default();
1546        let content =
1547            "Text\n<div\n  data-attr1=\"value1\"\n  data-attr2=\"value2\"\n  data-attr3=\"value3\">\nMore text";
1548        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1549        let result = rule.check(&ctx).unwrap();
1550
1551        assert_eq!(result.len(), 1, "Should find one HTML tag");
1552        // Tag starts on line 2 (first line is "Text")
1553        assert_eq!(result[0].line, 2, "Start line should be 2");
1554        // Tag ends on line 5 (where the closing > is)
1555        assert_eq!(result[0].end_line, 5, "End line should be 5");
1556    }
1557
1558    #[test]
1559    fn test_md033_disallowed_mode_basic() {
1560        // Test disallowed mode: only flags tags in the disallowed list
1561        let rule = MD033NoInlineHtml::with_disallowed(vec!["script".to_string(), "iframe".to_string()]);
1562        let content = "<div>Safe content</div><script>alert('xss')</script>";
1563        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1564        let result = rule.check(&ctx).unwrap();
1565
1566        // Should only flag <script>, not <div>
1567        assert_eq!(result.len(), 1, "Should only flag disallowed tags");
1568        assert!(result[0].message.contains("<script>"), "Should flag script tag");
1569    }
1570
1571    #[test]
1572    fn test_md033_disallowed_gfm_security_tags() {
1573        // Test GFM security tags expansion
1574        let rule = MD033NoInlineHtml::with_disallowed(vec!["gfm".to_string()]);
1575        let content = r#"
1576<div>Safe</div>
1577<title>Bad title</title>
1578<textarea>Bad textarea</textarea>
1579<style>.bad{}</style>
1580<iframe src="evil"></iframe>
1581<script>evil()</script>
1582<plaintext>old tag</plaintext>
1583<span>Safe span</span>
1584"#;
1585        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1586        let result = rule.check(&ctx).unwrap();
1587
1588        // Should flag: title, textarea, style, iframe, script, plaintext
1589        // Should NOT flag: div, span
1590        assert_eq!(result.len(), 6, "Should flag 6 GFM security tags");
1591
1592        let flagged_tags: Vec<&str> = result
1593            .iter()
1594            .filter_map(|w| w.message.split("<").nth(1))
1595            .filter_map(|s| s.split(">").next())
1596            .filter_map(|s| s.split_whitespace().next())
1597            .collect();
1598
1599        assert!(flagged_tags.contains(&"title"), "Should flag title");
1600        assert!(flagged_tags.contains(&"textarea"), "Should flag textarea");
1601        assert!(flagged_tags.contains(&"style"), "Should flag style");
1602        assert!(flagged_tags.contains(&"iframe"), "Should flag iframe");
1603        assert!(flagged_tags.contains(&"script"), "Should flag script");
1604        assert!(flagged_tags.contains(&"plaintext"), "Should flag plaintext");
1605        assert!(!flagged_tags.contains(&"div"), "Should NOT flag div");
1606        assert!(!flagged_tags.contains(&"span"), "Should NOT flag span");
1607    }
1608
1609    #[test]
1610    fn test_md033_disallowed_case_insensitive() {
1611        // Test that disallowed check is case-insensitive
1612        let rule = MD033NoInlineHtml::with_disallowed(vec!["script".to_string()]);
1613        let content = "<SCRIPT>alert('xss')</SCRIPT><Script>alert('xss')</Script>";
1614        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1615        let result = rule.check(&ctx).unwrap();
1616
1617        // Should flag both <SCRIPT> and <Script>
1618        assert_eq!(result.len(), 2, "Should flag both case variants");
1619    }
1620
1621    #[test]
1622    fn test_md033_disallowed_with_attributes() {
1623        // Test that disallowed mode works with tags that have attributes
1624        let rule = MD033NoInlineHtml::with_disallowed(vec!["iframe".to_string()]);
1625        let content = r#"<iframe src="https://evil.com" width="100" height="100"></iframe>"#;
1626        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1627        let result = rule.check(&ctx).unwrap();
1628
1629        assert_eq!(result.len(), 1, "Should flag iframe with attributes");
1630        assert!(result[0].message.contains("iframe"), "Should flag iframe");
1631    }
1632
1633    #[test]
1634    fn test_md033_disallowed_all_gfm_tags() {
1635        // Verify all GFM disallowed tags are covered
1636        use md033_config::GFM_DISALLOWED_TAGS;
1637        let rule = MD033NoInlineHtml::with_disallowed(vec!["gfm".to_string()]);
1638
1639        for tag in GFM_DISALLOWED_TAGS {
1640            let content = format!("<{tag}>content</{tag}>");
1641            let ctx = LintContext::new(&content, crate::config::MarkdownFlavor::Standard, None);
1642            let result = rule.check(&ctx).unwrap();
1643
1644            assert_eq!(result.len(), 1, "GFM tag <{tag}> should be flagged");
1645        }
1646    }
1647
1648    #[test]
1649    fn test_md033_disallowed_mixed_with_custom() {
1650        // Test mixing "gfm" with custom disallowed tags
1651        let rule = MD033NoInlineHtml::with_disallowed(vec![
1652            "gfm".to_string(),
1653            "marquee".to_string(), // Custom disallowed tag
1654        ]);
1655        let content = r#"<script>bad</script><marquee>annoying</marquee><div>ok</div>"#;
1656        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1657        let result = rule.check(&ctx).unwrap();
1658
1659        // Should flag script (gfm) and marquee (custom)
1660        assert_eq!(result.len(), 2, "Should flag both gfm and custom tags");
1661    }
1662
1663    #[test]
1664    fn test_md033_disallowed_empty_means_default_mode() {
1665        // Empty disallowed list means default mode (flag all HTML)
1666        let rule = MD033NoInlineHtml::with_disallowed(vec![]);
1667        let content = "<div>content</div>";
1668        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1669        let result = rule.check(&ctx).unwrap();
1670
1671        // Should flag <div> in default mode
1672        assert_eq!(result.len(), 1, "Empty disallowed = default mode");
1673    }
1674
1675    #[test]
1676    fn test_md033_jsx_fragments_in_mdx() {
1677        // JSX fragments (<> and </>) should not trigger warnings in MDX
1678        let rule = MD033NoInlineHtml::default();
1679        let content = r#"# MDX Document
1680
1681<>
1682  <Heading />
1683  <Content />
1684</>
1685
1686<div>Regular HTML should still be flagged</div>
1687"#;
1688        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::MDX, None);
1689        let result = rule.check(&ctx).unwrap();
1690
1691        // Should only flag <div>, not the fragments or JSX components
1692        assert_eq!(result.len(), 1, "Should only find one HTML tag (the div)");
1693        assert!(
1694            result[0].message.contains("<div>"),
1695            "Should flag <div>, not JSX fragments"
1696        );
1697    }
1698
1699    #[test]
1700    fn test_md033_jsx_components_in_mdx() {
1701        // JSX components (capitalized) should not trigger warnings in MDX
1702        let rule = MD033NoInlineHtml::default();
1703        let content = r#"<CustomComponent prop="value">
1704  Content
1705</CustomComponent>
1706
1707<MyButton onClick={handler}>Click</MyButton>
1708"#;
1709        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::MDX, None);
1710        let result = rule.check(&ctx).unwrap();
1711
1712        // No warnings - all are JSX components
1713        assert_eq!(result.len(), 0, "Should not flag JSX components in MDX");
1714    }
1715
1716    #[test]
1717    fn test_md033_jsx_not_skipped_in_standard_markdown() {
1718        // In standard markdown, capitalized tags should still be flagged if they're valid HTML
1719        let rule = MD033NoInlineHtml::default();
1720        let content = "<Script>alert(1)</Script>";
1721        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1722        let result = rule.check(&ctx).unwrap();
1723
1724        // Should flag <Script> in standard markdown (it's a valid HTML element)
1725        assert_eq!(result.len(), 1, "Should flag <Script> in standard markdown");
1726    }
1727
1728    #[test]
1729    fn test_md033_jsx_attributes_in_mdx() {
1730        // Elements with JSX-specific attributes should not trigger warnings in MDX
1731        let rule = MD033NoInlineHtml::default();
1732        let content = r#"# MDX with JSX Attributes
1733
1734<div className="card big">Content</div>
1735
1736<button onClick={handleClick}>Click me</button>
1737
1738<label htmlFor="input-id">Label</label>
1739
1740<input onChange={handleChange} />
1741
1742<div class="html-class">Regular HTML should be flagged</div>
1743"#;
1744        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::MDX, None);
1745        let result = rule.check(&ctx).unwrap();
1746
1747        // Should only flag the div with regular HTML "class" attribute
1748        assert_eq!(
1749            result.len(),
1750            1,
1751            "Should only flag HTML element without JSX attributes, got: {result:?}"
1752        );
1753        assert!(
1754            result[0].message.contains("<div class="),
1755            "Should flag the div with HTML class attribute"
1756        );
1757    }
1758
1759    #[test]
1760    fn test_md033_jsx_attributes_not_skipped_in_standard() {
1761        // In standard markdown, JSX attributes should still be flagged
1762        let rule = MD033NoInlineHtml::default();
1763        let content = r#"<div className="card">Content</div>"#;
1764        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1765        let result = rule.check(&ctx).unwrap();
1766
1767        // Should flag in standard markdown
1768        assert_eq!(result.len(), 1, "Should flag JSX-style elements in standard markdown");
1769    }
1770
1771    // Auto-fix tests for MD033
1772
1773    #[test]
1774    fn test_md033_fix_disabled_by_default() {
1775        // Auto-fix should be disabled by default
1776        let rule = MD033NoInlineHtml::default();
1777        assert!(!rule.config.fix, "Fix should be disabled by default");
1778        assert_eq!(rule.fix_capability(), crate::rule::FixCapability::Unfixable);
1779    }
1780
1781    #[test]
1782    fn test_md033_fix_enabled_em_to_italic() {
1783        // When fix is enabled, <em>text</em> should convert to *text*
1784        let rule = MD033NoInlineHtml::with_fix(true);
1785        let content = "This has <em>emphasized text</em> here.";
1786        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1787        let fixed = rule.fix(&ctx).unwrap();
1788        assert_eq!(fixed, "This has *emphasized text* here.");
1789    }
1790
1791    #[test]
1792    fn test_md033_fix_enabled_i_to_italic() {
1793        // <i>text</i> should convert to *text*
1794        let rule = MD033NoInlineHtml::with_fix(true);
1795        let content = "This has <i>italic text</i> here.";
1796        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1797        let fixed = rule.fix(&ctx).unwrap();
1798        assert_eq!(fixed, "This has *italic text* here.");
1799    }
1800
1801    #[test]
1802    fn test_md033_fix_enabled_strong_to_bold() {
1803        // <strong>text</strong> should convert to **text**
1804        let rule = MD033NoInlineHtml::with_fix(true);
1805        let content = "This has <strong>bold text</strong> here.";
1806        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1807        let fixed = rule.fix(&ctx).unwrap();
1808        assert_eq!(fixed, "This has **bold text** here.");
1809    }
1810
1811    #[test]
1812    fn test_md033_fix_enabled_b_to_bold() {
1813        // <b>text</b> should convert to **text**
1814        let rule = MD033NoInlineHtml::with_fix(true);
1815        let content = "This has <b>bold text</b> here.";
1816        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1817        let fixed = rule.fix(&ctx).unwrap();
1818        assert_eq!(fixed, "This has **bold text** here.");
1819    }
1820
1821    #[test]
1822    fn test_md033_fix_enabled_code_to_backticks() {
1823        // <code>text</code> should convert to `text`
1824        let rule = MD033NoInlineHtml::with_fix(true);
1825        let content = "This has <code>inline code</code> here.";
1826        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1827        let fixed = rule.fix(&ctx).unwrap();
1828        assert_eq!(fixed, "This has `inline code` here.");
1829    }
1830
1831    #[test]
1832    fn test_md033_fix_enabled_code_with_backticks() {
1833        // <code>text with `backticks`</code> should use double backticks
1834        let rule = MD033NoInlineHtml::with_fix(true);
1835        let content = "This has <code>text with `backticks`</code> here.";
1836        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1837        let fixed = rule.fix(&ctx).unwrap();
1838        assert_eq!(fixed, "This has `` text with `backticks` `` here.");
1839    }
1840
1841    #[test]
1842    fn test_md033_fix_enabled_br_trailing_spaces() {
1843        // <br> should convert to two trailing spaces + newline (default)
1844        let rule = MD033NoInlineHtml::with_fix(true);
1845        let content = "First line<br>Second line";
1846        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1847        let fixed = rule.fix(&ctx).unwrap();
1848        assert_eq!(fixed, "First line  \nSecond line");
1849    }
1850
1851    #[test]
1852    fn test_md033_fix_enabled_br_self_closing() {
1853        // <br/> and <br /> should also convert
1854        let rule = MD033NoInlineHtml::with_fix(true);
1855        let content = "First<br/>second<br />third";
1856        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1857        let fixed = rule.fix(&ctx).unwrap();
1858        assert_eq!(fixed, "First  \nsecond  \nthird");
1859    }
1860
1861    #[test]
1862    fn test_md033_fix_enabled_br_backslash_style() {
1863        // With br_style = backslash, <br> should convert to backslash + newline
1864        let config = MD033Config {
1865            allowed: Vec::new(),
1866            disallowed: Vec::new(),
1867            fix: true,
1868            br_style: md033_config::BrStyle::Backslash,
1869            ..MD033Config::default()
1870        };
1871        let rule = MD033NoInlineHtml::from_config_struct(config);
1872        let content = "First line<br>Second line";
1873        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1874        let fixed = rule.fix(&ctx).unwrap();
1875        assert_eq!(fixed, "First line\\\nSecond line");
1876    }
1877
1878    #[test]
1879    fn test_md033_fix_enabled_hr() {
1880        // <hr> should convert to horizontal rule
1881        let rule = MD033NoInlineHtml::with_fix(true);
1882        let content = "Above<hr>Below";
1883        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1884        let fixed = rule.fix(&ctx).unwrap();
1885        assert_eq!(fixed, "Above\n---\nBelow");
1886    }
1887
1888    #[test]
1889    fn test_md033_fix_enabled_hr_self_closing() {
1890        // <hr/> should also convert
1891        let rule = MD033NoInlineHtml::with_fix(true);
1892        let content = "Above<hr/>Below";
1893        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1894        let fixed = rule.fix(&ctx).unwrap();
1895        assert_eq!(fixed, "Above\n---\nBelow");
1896    }
1897
1898    #[test]
1899    fn test_md033_fix_skips_nested_tags() {
1900        // Tags with nested HTML - outer tags may not be fully fixed due to overlapping ranges
1901        // The inner tags are processed first, which can invalidate outer tag ranges
1902        let rule = MD033NoInlineHtml::with_fix(true);
1903        let content = "This has <em>text with <strong>nested</strong> tags</em> here.";
1904        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1905        let fixed = rule.fix(&ctx).unwrap();
1906        // Inner <strong> is converted to markdown, outer <em> range becomes invalid
1907        // This is expected behavior - user should run fix multiple times for nested tags
1908        assert_eq!(fixed, "This has <em>text with **nested** tags</em> here.");
1909    }
1910
1911    #[test]
1912    fn test_md033_fix_skips_tags_with_attributes() {
1913        // Tags with attributes should NOT be fixed at all - leave as-is
1914        // User may want to keep the attributes (e.g., class="highlight" for styling)
1915        let rule = MD033NoInlineHtml::with_fix(true);
1916        let content = "This has <em class=\"highlight\">emphasized</em> text.";
1917        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1918        let fixed = rule.fix(&ctx).unwrap();
1919        // Content should remain unchanged - we don't know if attributes matter
1920        assert_eq!(fixed, content);
1921    }
1922
1923    #[test]
1924    fn test_md033_fix_disabled_no_changes() {
1925        // When fix is disabled, original content should be returned
1926        let rule = MD033NoInlineHtml::default(); // fix is false by default
1927        let content = "This has <em>emphasized text</em> here.";
1928        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1929        let fixed = rule.fix(&ctx).unwrap();
1930        assert_eq!(fixed, content, "Should return original content when fix is disabled");
1931    }
1932
1933    #[test]
1934    fn test_md033_fix_capability_enabled() {
1935        let rule = MD033NoInlineHtml::with_fix(true);
1936        assert_eq!(rule.fix_capability(), crate::rule::FixCapability::FullyFixable);
1937    }
1938
1939    #[test]
1940    fn test_md033_fix_multiple_tags() {
1941        // Test fixing multiple HTML tags in one document
1942        let rule = MD033NoInlineHtml::with_fix(true);
1943        let content = "Here is <em>italic</em> and <strong>bold</strong> text.";
1944        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1945        let fixed = rule.fix(&ctx).unwrap();
1946        assert_eq!(fixed, "Here is *italic* and **bold** text.");
1947    }
1948
1949    #[test]
1950    fn test_md033_fix_uppercase_tags() {
1951        // HTML tags are case-insensitive
1952        let rule = MD033NoInlineHtml::with_fix(true);
1953        let content = "This has <EM>emphasized</EM> text.";
1954        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1955        let fixed = rule.fix(&ctx).unwrap();
1956        assert_eq!(fixed, "This has *emphasized* text.");
1957    }
1958
1959    #[test]
1960    fn test_md033_fix_unsafe_tags_not_modified() {
1961        // Tags without safe markdown equivalents should NOT be modified
1962        // Only safe fixable tags (em, i, strong, b, code, br, hr) get converted
1963        let rule = MD033NoInlineHtml::with_fix(true);
1964        let content = "This has <div>a div</div> content.";
1965        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1966        let fixed = rule.fix(&ctx).unwrap();
1967        // <div> is not a safe fixable tag, so content should be unchanged
1968        assert_eq!(fixed, "This has <div>a div</div> content.");
1969    }
1970
1971    #[test]
1972    fn test_md033_fix_img_tag_converted() {
1973        // <img> tags with simple src/alt attributes are converted to markdown images
1974        let rule = MD033NoInlineHtml::with_fix(true);
1975        let content = "Image: <img src=\"photo.jpg\" alt=\"My Photo\">";
1976        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1977        let fixed = rule.fix(&ctx).unwrap();
1978        // <img> is converted to ![alt](src) format
1979        assert_eq!(fixed, "Image: ![My Photo](photo.jpg)");
1980    }
1981
1982    #[test]
1983    fn test_md033_fix_img_tag_with_extra_attrs_not_converted() {
1984        // <img> tags with width/height/style attributes are NOT converted
1985        let rule = MD033NoInlineHtml::with_fix(true);
1986        let content = "Image: <img src=\"photo.jpg\" alt=\"My Photo\" width=\"100\">";
1987        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1988        let fixed = rule.fix(&ctx).unwrap();
1989        // Has width attribute - not safe to convert
1990        assert_eq!(fixed, "Image: <img src=\"photo.jpg\" alt=\"My Photo\" width=\"100\">");
1991    }
1992
1993    #[test]
1994    fn test_md033_fix_relaxed_a_with_target_is_converted() {
1995        let rule = relaxed_fix_rule();
1996        let content = "Link: <a href=\"https://example.com\" target=\"_blank\">Example</a>";
1997        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1998        let fixed = rule.fix(&ctx).unwrap();
1999        assert_eq!(fixed, "Link: [Example](https://example.com)");
2000    }
2001
2002    #[test]
2003    fn test_md033_fix_relaxed_img_with_width_is_converted() {
2004        let rule = relaxed_fix_rule();
2005        let content = "Image: <img src=\"photo.jpg\" alt=\"My Photo\" width=\"100\">";
2006        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2007        let fixed = rule.fix(&ctx).unwrap();
2008        assert_eq!(fixed, "Image: ![My Photo](photo.jpg)");
2009    }
2010
2011    #[test]
2012    fn test_md033_fix_relaxed_rejects_unknown_extra_attributes() {
2013        let rule = relaxed_fix_rule();
2014        let content = "Image: <img src=\"photo.jpg\" alt=\"My Photo\" aria-label=\"hero\">";
2015        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2016        let fixed = rule.fix(&ctx).unwrap();
2017        assert_eq!(fixed, content, "Unknown attributes should not be dropped by default");
2018    }
2019
2020    #[test]
2021    fn test_md033_fix_relaxed_still_blocks_unsafe_schemes() {
2022        let rule = relaxed_fix_rule();
2023        let content = "Link: <a href=\"javascript:alert(1)\" target=\"_blank\">Example</a>";
2024        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2025        let fixed = rule.fix(&ctx).unwrap();
2026        assert_eq!(fixed, content, "Unsafe URL schemes must never be converted");
2027    }
2028
2029    #[test]
2030    fn test_md033_fix_relaxed_wrapper_strip_requires_second_pass_for_nested_html() {
2031        let rule = relaxed_fix_rule();
2032        let content = "<p align=\"center\">\n  <img src=\"logo.svg\" alt=\"Logo\" width=\"120\" />\n</p>";
2033        let ctx1 = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2034        let fixed_once = rule.fix(&ctx1).unwrap();
2035        assert!(
2036            fixed_once.contains("<p"),
2037            "First pass should keep wrapper when inner HTML is still present: {fixed_once}"
2038        );
2039        assert!(
2040            fixed_once.contains("![Logo](logo.svg)"),
2041            "Inner image should be converted on first pass: {fixed_once}"
2042        );
2043
2044        let ctx2 = LintContext::new(&fixed_once, crate::config::MarkdownFlavor::Standard, None);
2045        let fixed_twice = rule.fix(&ctx2).unwrap();
2046        assert!(
2047            !fixed_twice.contains("<p"),
2048            "Second pass should strip configured wrapper: {fixed_twice}"
2049        );
2050        assert!(fixed_twice.contains("![Logo](logo.svg)"));
2051    }
2052
2053    #[test]
2054    fn test_md033_fix_relaxed_multiple_droppable_attrs() {
2055        let rule = relaxed_fix_rule();
2056        let content = "<a href=\"https://example.com\" target=\"_blank\" rel=\"noopener\" class=\"btn\">Click</a>";
2057        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2058        let fixed = rule.fix(&ctx).unwrap();
2059        assert_eq!(fixed, "[Click](https://example.com)");
2060    }
2061
2062    #[test]
2063    fn test_md033_fix_relaxed_img_multiple_droppable_attrs() {
2064        let rule = relaxed_fix_rule();
2065        let content = "<img src=\"logo.png\" alt=\"Logo\" width=\"120\" height=\"40\" style=\"border:none\" />";
2066        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2067        let fixed = rule.fix(&ctx).unwrap();
2068        assert_eq!(fixed, "![Logo](logo.png)");
2069    }
2070
2071    #[test]
2072    fn test_md033_fix_relaxed_event_handler_never_dropped() {
2073        let rule = relaxed_fix_rule();
2074        let content = "<a href=\"https://example.com\" onclick=\"track()\">Link</a>";
2075        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2076        let fixed = rule.fix(&ctx).unwrap();
2077        assert_eq!(fixed, content, "Event handler attributes must block conversion");
2078    }
2079
2080    #[test]
2081    fn test_md033_fix_relaxed_event_handler_even_with_custom_config() {
2082        // Even if someone adds on* to drop-attributes, event handlers must be rejected
2083        let config = MD033Config {
2084            fix: true,
2085            fix_mode: MD033FixMode::Relaxed,
2086            drop_attributes: vec!["on*".to_string(), "target".to_string()],
2087            ..MD033Config::default()
2088        };
2089        let rule = MD033NoInlineHtml::from_config_struct(config);
2090        let content = "<a href=\"https://example.com\" onclick=\"alert(1)\">Link</a>";
2091        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2092        let fixed = rule.fix(&ctx).unwrap();
2093        assert_eq!(fixed, content, "on* event handlers must never be dropped");
2094    }
2095
2096    #[test]
2097    fn test_md033_fix_relaxed_custom_drop_attributes() {
2098        let config = MD033Config {
2099            fix: true,
2100            fix_mode: MD033FixMode::Relaxed,
2101            drop_attributes: vec!["loading".to_string()],
2102            ..MD033Config::default()
2103        };
2104        let rule = MD033NoInlineHtml::from_config_struct(config);
2105        // "loading" is in the custom list, "width" is NOT
2106        let content = "<img src=\"x.jpg\" alt=\"\" loading=\"lazy\">";
2107        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2108        let fixed = rule.fix(&ctx).unwrap();
2109        assert_eq!(fixed, "![](x.jpg)", "Custom drop-attributes should be respected");
2110
2111        let content2 = "<img src=\"x.jpg\" alt=\"\" width=\"100\">";
2112        let ctx2 = LintContext::new(content2, crate::config::MarkdownFlavor::Standard, None);
2113        let fixed2 = rule.fix(&ctx2).unwrap();
2114        assert_eq!(
2115            fixed2, content2,
2116            "Attributes not in custom list should block conversion"
2117        );
2118    }
2119
2120    #[test]
2121    fn test_md033_fix_relaxed_custom_strip_wrapper() {
2122        let config = MD033Config {
2123            fix: true,
2124            fix_mode: MD033FixMode::Relaxed,
2125            strip_wrapper_elements: vec!["div".to_string()],
2126            ..MD033Config::default()
2127        };
2128        let rule = MD033NoInlineHtml::from_config_struct(config);
2129        let content = "<div>Some text content</div>";
2130        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2131        let fixed = rule.fix(&ctx).unwrap();
2132        assert_eq!(fixed, "Some text content");
2133    }
2134
2135    #[test]
2136    fn test_md033_fix_relaxed_wrapper_with_plain_text() {
2137        let rule = relaxed_fix_rule();
2138        let content = "<p align=\"center\">Just some text</p>";
2139        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2140        let fixed = rule.fix(&ctx).unwrap();
2141        assert_eq!(fixed, "Just some text");
2142    }
2143
2144    #[test]
2145    fn test_md033_fix_relaxed_data_attr_with_wildcard() {
2146        let config = MD033Config {
2147            fix: true,
2148            fix_mode: MD033FixMode::Relaxed,
2149            drop_attributes: vec!["data-*".to_string(), "target".to_string()],
2150            ..MD033Config::default()
2151        };
2152        let rule = MD033NoInlineHtml::from_config_struct(config);
2153        let content = "<a href=\"https://example.com\" data-tracking=\"abc\" target=\"_blank\">Link</a>";
2154        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2155        let fixed = rule.fix(&ctx).unwrap();
2156        assert_eq!(fixed, "[Link](https://example.com)");
2157    }
2158
2159    #[test]
2160    fn test_md033_fix_relaxed_mixed_droppable_and_blocking_attrs() {
2161        let rule = relaxed_fix_rule();
2162        // "target" is droppable, "aria-label" is not in the default list
2163        let content = "<a href=\"https://example.com\" target=\"_blank\" aria-label=\"nav\">Link</a>";
2164        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2165        let fixed = rule.fix(&ctx).unwrap();
2166        assert_eq!(fixed, content, "Non-droppable attribute should block conversion");
2167    }
2168
2169    #[test]
2170    fn test_md033_fix_relaxed_badge_pattern() {
2171        // Common GitHub README badge pattern
2172        let rule = relaxed_fix_rule();
2173        let content = "<a href=\"https://crates.io/crates/rumdl\" target=\"_blank\"><img src=\"https://img.shields.io/crates/v/rumdl.svg\" alt=\"Crate\" width=\"120\" /></a>";
2174        let ctx1 = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2175        let fixed_once = rule.fix(&ctx1).unwrap();
2176        // First pass should convert the inner <img>
2177        assert!(
2178            fixed_once.contains("![Crate](https://img.shields.io/crates/v/rumdl.svg)"),
2179            "Inner img should be converted: {fixed_once}"
2180        );
2181
2182        // Second pass converts the <a> wrapper
2183        let ctx2 = LintContext::new(&fixed_once, crate::config::MarkdownFlavor::Standard, None);
2184        let fixed_twice = rule.fix(&ctx2).unwrap();
2185        assert!(
2186            fixed_twice
2187                .contains("[![Crate](https://img.shields.io/crates/v/rumdl.svg)](https://crates.io/crates/rumdl)"),
2188            "Badge should produce nested markdown image link: {fixed_twice}"
2189        );
2190    }
2191
2192    #[test]
2193    fn test_md033_fix_relaxed_conservative_mode_unchanged() {
2194        // Verify conservative mode (default) is unaffected by the relaxed logic
2195        let rule = MD033NoInlineHtml::with_fix(true);
2196        let content = "<a href=\"https://example.com\" target=\"_blank\">Link</a>";
2197        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2198        let fixed = rule.fix(&ctx).unwrap();
2199        assert_eq!(fixed, content, "Conservative mode should not drop target attribute");
2200    }
2201
2202    #[test]
2203    fn test_md033_fix_relaxed_img_inside_pre_not_converted() {
2204        // <img> inside <pre> must NOT be converted, even in relaxed mode
2205        let rule = relaxed_fix_rule();
2206        let content = "<pre>\n  <img src=\"diagram.png\" alt=\"d\" width=\"100\" />\n</pre>";
2207        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2208        let fixed = rule.fix(&ctx).unwrap();
2209        assert!(fixed.contains("<img"), "img inside pre must not be converted: {fixed}");
2210    }
2211
2212    #[test]
2213    fn test_md033_fix_relaxed_wrapper_nested_inside_div_not_stripped() {
2214        // <p> nested inside <div> should not be stripped
2215        let rule = relaxed_fix_rule();
2216        let content = "<div><p>text</p></div>";
2217        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2218        let fixed = rule.fix(&ctx).unwrap();
2219        assert!(
2220            fixed.contains("<p>text</p>") || fixed.contains("<p>"),
2221            "Nested <p> inside <div> should not be stripped: {fixed}"
2222        );
2223    }
2224
2225    #[test]
2226    fn test_md033_fix_relaxed_img_inside_nested_wrapper_not_converted() {
2227        // <img> inside <div><p>...</p></div> must NOT be converted because the
2228        // <p> wrapper can't be stripped (it's nested), so the markdown would be
2229        // stuck inside an HTML block where it won't render.
2230        let rule = relaxed_fix_rule();
2231        let content = "<div><p><img src=\"x.jpg\" alt=\"pic\" width=\"100\" /></p></div>";
2232        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2233        let fixed = rule.fix(&ctx).unwrap();
2234        assert!(
2235            fixed.contains("<img"),
2236            "img inside nested wrapper must not be converted: {fixed}"
2237        );
2238    }
2239
2240    #[test]
2241    fn test_md033_fix_mixed_safe_tags() {
2242        // All tags are now safe fixable (em, img, strong)
2243        let rule = MD033NoInlineHtml::with_fix(true);
2244        let content = "<em>italic</em> and <img src=\"x.jpg\"> and <strong>bold</strong>";
2245        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2246        let fixed = rule.fix(&ctx).unwrap();
2247        // All are converted
2248        assert_eq!(fixed, "*italic* and ![](x.jpg) and **bold**");
2249    }
2250
2251    #[test]
2252    fn test_md033_fix_multiple_tags_same_line() {
2253        // Multiple tags on the same line should all be fixed correctly
2254        let rule = MD033NoInlineHtml::with_fix(true);
2255        let content = "Regular text <i>italic</i> and <b>bold</b> here.";
2256        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2257        let fixed = rule.fix(&ctx).unwrap();
2258        assert_eq!(fixed, "Regular text *italic* and **bold** here.");
2259    }
2260
2261    #[test]
2262    fn test_md033_fix_multiple_em_tags_same_line() {
2263        // Multiple em/strong tags on the same line
2264        let rule = MD033NoInlineHtml::with_fix(true);
2265        let content = "<em>first</em> and <strong>second</strong> and <code>third</code>";
2266        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2267        let fixed = rule.fix(&ctx).unwrap();
2268        assert_eq!(fixed, "*first* and **second** and `third`");
2269    }
2270
2271    #[test]
2272    fn test_md033_fix_skips_tags_inside_pre() {
2273        // Tags inside <pre> blocks should NOT be fixed (would break structure)
2274        let rule = MD033NoInlineHtml::with_fix(true);
2275        let content = "<pre><code><em>VALUE</em></code></pre>";
2276        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2277        let fixed = rule.fix(&ctx).unwrap();
2278        // The <em> inside <pre><code> should NOT be converted
2279        // Only the outer structure might be changed
2280        assert!(
2281            !fixed.contains("*VALUE*"),
2282            "Tags inside <pre> should not be converted to markdown. Got: {fixed}"
2283        );
2284    }
2285
2286    #[test]
2287    fn test_md033_fix_skips_tags_inside_div() {
2288        // Tags inside HTML block elements should not be fixed
2289        let rule = MD033NoInlineHtml::with_fix(true);
2290        let content = "<div>\n<em>emphasized</em>\n</div>";
2291        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2292        let fixed = rule.fix(&ctx).unwrap();
2293        // The <em> inside <div> should not be converted to *emphasized*
2294        assert!(
2295            !fixed.contains("*emphasized*"),
2296            "Tags inside HTML blocks should not be converted. Got: {fixed}"
2297        );
2298    }
2299
2300    #[test]
2301    fn test_md033_fix_outside_html_block() {
2302        // Tags outside HTML blocks should still be fixed
2303        let rule = MD033NoInlineHtml::with_fix(true);
2304        let content = "<div>\ncontent\n</div>\n\nOutside <em>emphasized</em> text.";
2305        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2306        let fixed = rule.fix(&ctx).unwrap();
2307        // The <em> outside the div should be converted
2308        assert!(
2309            fixed.contains("*emphasized*"),
2310            "Tags outside HTML blocks should be converted. Got: {fixed}"
2311        );
2312    }
2313
2314    #[test]
2315    fn test_md033_fix_with_id_attribute() {
2316        // Tags with id attributes should not be fixed (id might be used for anchors)
2317        let rule = MD033NoInlineHtml::with_fix(true);
2318        let content = "See <em id=\"important\">this note</em> for details.";
2319        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2320        let fixed = rule.fix(&ctx).unwrap();
2321        // Should remain unchanged - id attribute matters for linking
2322        assert_eq!(fixed, content);
2323    }
2324
2325    #[test]
2326    fn test_md033_fix_with_style_attribute() {
2327        // Tags with style attributes should not be fixed
2328        let rule = MD033NoInlineHtml::with_fix(true);
2329        let content = "This is <strong style=\"color: red\">important</strong> text.";
2330        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2331        let fixed = rule.fix(&ctx).unwrap();
2332        // Should remain unchanged - style attribute provides formatting
2333        assert_eq!(fixed, content);
2334    }
2335
2336    #[test]
2337    fn test_md033_fix_mixed_with_and_without_attributes() {
2338        // Mix of tags with and without attributes
2339        let rule = MD033NoInlineHtml::with_fix(true);
2340        let content = "<em>normal</em> and <em class=\"special\">styled</em> text.";
2341        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2342        let fixed = rule.fix(&ctx).unwrap();
2343        // Only the tag without attributes should be fixed
2344        assert_eq!(fixed, "*normal* and <em class=\"special\">styled</em> text.");
2345    }
2346
2347    #[test]
2348    fn test_md033_quick_fix_tag_with_attributes_no_fix() {
2349        // Quick fix should not be provided for tags with attributes
2350        let rule = MD033NoInlineHtml::with_fix(true);
2351        let content = "<em class=\"test\">emphasized</em>";
2352        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2353        let result = rule.check(&ctx).unwrap();
2354
2355        assert_eq!(result.len(), 1, "Should find one HTML tag");
2356        // No fix should be provided for tags with attributes
2357        assert!(
2358            result[0].fix.is_none(),
2359            "Should NOT have a fix for tags with attributes"
2360        );
2361    }
2362
2363    #[test]
2364    fn test_md033_fix_skips_html_entities() {
2365        // Tags containing HTML entities should NOT be fixed
2366        // HTML entities need HTML context to render; markdown won't process them
2367        let rule = MD033NoInlineHtml::with_fix(true);
2368        let content = "<code>&vert;</code>";
2369        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2370        let fixed = rule.fix(&ctx).unwrap();
2371        // Should remain unchanged - converting would break rendering
2372        assert_eq!(fixed, content);
2373    }
2374
2375    #[test]
2376    fn test_md033_fix_skips_multiple_html_entities() {
2377        // Multiple HTML entities should also be skipped
2378        let rule = MD033NoInlineHtml::with_fix(true);
2379        let content = "<code>&lt;T&gt;</code>";
2380        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2381        let fixed = rule.fix(&ctx).unwrap();
2382        // Should remain unchanged
2383        assert_eq!(fixed, content);
2384    }
2385
2386    #[test]
2387    fn test_md033_fix_allows_ampersand_without_entity() {
2388        // Content with & but no semicolon should still be fixed
2389        let rule = MD033NoInlineHtml::with_fix(true);
2390        let content = "<code>a & b</code>";
2391        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2392        let fixed = rule.fix(&ctx).unwrap();
2393        // Should be converted since & is not part of an entity
2394        assert_eq!(fixed, "`a & b`");
2395    }
2396
2397    #[test]
2398    fn test_md033_fix_em_with_entities_skipped() {
2399        // <em> with entities should also be skipped
2400        let rule = MD033NoInlineHtml::with_fix(true);
2401        let content = "<em>&nbsp;text</em>";
2402        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2403        let fixed = rule.fix(&ctx).unwrap();
2404        // Should remain unchanged
2405        assert_eq!(fixed, content);
2406    }
2407
2408    #[test]
2409    fn test_md033_fix_skips_nested_em_in_code() {
2410        // Tags nested inside other HTML elements should NOT be fixed
2411        // e.g., <code><em>n</em></code> - the <em> should not be converted
2412        let rule = MD033NoInlineHtml::with_fix(true);
2413        let content = "<code><em>n</em></code>";
2414        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2415        let fixed = rule.fix(&ctx).unwrap();
2416        // The inner <em> should NOT be converted to *n* because it's nested
2417        // The whole structure should be left as-is (or outer code converted, but not inner)
2418        assert!(
2419            !fixed.contains("*n*"),
2420            "Nested <em> should not be converted to markdown. Got: {fixed}"
2421        );
2422    }
2423
2424    #[test]
2425    fn test_md033_fix_skips_nested_in_table() {
2426        // Tags nested in HTML structures in tables should not be fixed
2427        let rule = MD033NoInlineHtml::with_fix(true);
2428        let content = "| <code>><em>n</em></code> | description |";
2429        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2430        let fixed = rule.fix(&ctx).unwrap();
2431        // Should not convert nested <em> to *n*
2432        assert!(
2433            !fixed.contains("*n*"),
2434            "Nested tags in table should not be converted. Got: {fixed}"
2435        );
2436    }
2437
2438    #[test]
2439    fn test_md033_fix_standalone_em_still_converted() {
2440        // Standalone (non-nested) <em> should still be converted
2441        let rule = MD033NoInlineHtml::with_fix(true);
2442        let content = "This is <em>emphasized</em> text.";
2443        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2444        let fixed = rule.fix(&ctx).unwrap();
2445        assert_eq!(fixed, "This is *emphasized* text.");
2446    }
2447
2448    // ==========================================================================
2449    // Obsidian Templater Plugin Syntax Tests
2450    //
2451    // Templater is a popular Obsidian plugin that uses `<% ... %>` syntax for
2452    // template interpolation. The `<%` pattern is NOT captured by the HTML tag
2453    // parser because `%` is not a valid HTML tag name character (tags must start
2454    // with a letter). This behavior is documented here with comprehensive tests.
2455    //
2456    // Reference: https://silentvoid13.github.io/Templater/
2457    // ==========================================================================
2458
2459    #[test]
2460    fn test_md033_templater_basic_interpolation_not_flagged() {
2461        // Basic Templater interpolation: <% expr %>
2462        // Should NOT be flagged because `%` is not a valid HTML tag character
2463        let rule = MD033NoInlineHtml::default();
2464        let content = "Today is <% tp.date.now() %> which is nice.";
2465        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2466        let result = rule.check(&ctx).unwrap();
2467        assert!(
2468            result.is_empty(),
2469            "Templater basic interpolation should not be flagged as HTML. Got: {result:?}"
2470        );
2471    }
2472
2473    #[test]
2474    fn test_md033_templater_file_functions_not_flagged() {
2475        // Templater file functions: <% tp.file.* %>
2476        let rule = MD033NoInlineHtml::default();
2477        let content = "File: <% tp.file.title %>\nCreated: <% tp.file.creation_date() %>";
2478        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2479        let result = rule.check(&ctx).unwrap();
2480        assert!(
2481            result.is_empty(),
2482            "Templater file functions should not be flagged. Got: {result:?}"
2483        );
2484    }
2485
2486    #[test]
2487    fn test_md033_templater_with_arguments_not_flagged() {
2488        // Templater with function arguments
2489        let rule = MD033NoInlineHtml::default();
2490        let content = r#"Date: <% tp.date.now("YYYY-MM-DD") %>"#;
2491        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2492        let result = rule.check(&ctx).unwrap();
2493        assert!(
2494            result.is_empty(),
2495            "Templater with arguments should not be flagged. Got: {result:?}"
2496        );
2497    }
2498
2499    #[test]
2500    fn test_md033_templater_javascript_execution_not_flagged() {
2501        // Templater JavaScript execution block: <%* code %>
2502        let rule = MD033NoInlineHtml::default();
2503        let content = "<%* const today = tp.date.now(); tR += today; %>";
2504        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2505        let result = rule.check(&ctx).unwrap();
2506        assert!(
2507            result.is_empty(),
2508            "Templater JS execution block should not be flagged. Got: {result:?}"
2509        );
2510    }
2511
2512    #[test]
2513    fn test_md033_templater_dynamic_execution_not_flagged() {
2514        // Templater dynamic/preview execution: <%+ expr %>
2515        let rule = MD033NoInlineHtml::default();
2516        let content = "Dynamic: <%+ tp.date.now() %>";
2517        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2518        let result = rule.check(&ctx).unwrap();
2519        assert!(
2520            result.is_empty(),
2521            "Templater dynamic execution should not be flagged. Got: {result:?}"
2522        );
2523    }
2524
2525    #[test]
2526    fn test_md033_templater_whitespace_trim_all_not_flagged() {
2527        // Templater whitespace control - trim all: <%_ expr _%>
2528        let rule = MD033NoInlineHtml::default();
2529        let content = "<%_ tp.date.now() _%>";
2530        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2531        let result = rule.check(&ctx).unwrap();
2532        assert!(
2533            result.is_empty(),
2534            "Templater trim-all whitespace should not be flagged. Got: {result:?}"
2535        );
2536    }
2537
2538    #[test]
2539    fn test_md033_templater_whitespace_trim_newline_not_flagged() {
2540        // Templater whitespace control - trim newline: <%- expr -%>
2541        let rule = MD033NoInlineHtml::default();
2542        let content = "<%- tp.date.now() -%>";
2543        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2544        let result = rule.check(&ctx).unwrap();
2545        assert!(
2546            result.is_empty(),
2547            "Templater trim-newline should not be flagged. Got: {result:?}"
2548        );
2549    }
2550
2551    #[test]
2552    fn test_md033_templater_combined_modifiers_not_flagged() {
2553        // Templater combined whitespace and execution modifiers
2554        let rule = MD033NoInlineHtml::default();
2555        let contents = [
2556            "<%-* const x = 1; -%>",  // trim + JS execution
2557            "<%_+ tp.date.now() _%>", // trim-all + dynamic
2558            "<%- tp.file.title -%>",  // trim-newline only
2559            "<%_ tp.file.title _%>",  // trim-all only
2560        ];
2561        for content in contents {
2562            let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2563            let result = rule.check(&ctx).unwrap();
2564            assert!(
2565                result.is_empty(),
2566                "Templater combined modifiers should not be flagged: {content}. Got: {result:?}"
2567            );
2568        }
2569    }
2570
2571    #[test]
2572    fn test_md033_templater_multiline_block_not_flagged() {
2573        // Multi-line Templater JavaScript block
2574        let rule = MD033NoInlineHtml::default();
2575        let content = r#"<%*
2576const x = 1;
2577const y = 2;
2578tR += x + y;
2579%>"#;
2580        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2581        let result = rule.check(&ctx).unwrap();
2582        assert!(
2583            result.is_empty(),
2584            "Templater multi-line block should not be flagged. Got: {result:?}"
2585        );
2586    }
2587
2588    #[test]
2589    fn test_md033_templater_with_angle_brackets_in_condition_not_flagged() {
2590        // Templater with angle brackets in JavaScript condition
2591        // This is a key edge case: `<` inside Templater should not trigger HTML detection
2592        let rule = MD033NoInlineHtml::default();
2593        let content = "<%* if (x < 5) { tR += 'small'; } %>";
2594        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2595        let result = rule.check(&ctx).unwrap();
2596        assert!(
2597            result.is_empty(),
2598            "Templater with angle brackets in conditions should not be flagged. Got: {result:?}"
2599        );
2600    }
2601
2602    #[test]
2603    fn test_md033_templater_mixed_with_html_only_html_flagged() {
2604        // Templater syntax mixed with actual HTML - only HTML should be flagged
2605        let rule = MD033NoInlineHtml::default();
2606        let content = "<% tp.date.now() %> is today's date. <div>This is HTML</div>";
2607        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2608        let result = rule.check(&ctx).unwrap();
2609        assert_eq!(result.len(), 1, "Should only flag the HTML div tag");
2610        assert!(
2611            result[0].message.contains("<div>"),
2612            "Should flag <div>, got: {}",
2613            result[0].message
2614        );
2615    }
2616
2617    #[test]
2618    fn test_md033_templater_in_heading_not_flagged() {
2619        // Templater in markdown heading
2620        let rule = MD033NoInlineHtml::default();
2621        let content = "# <% tp.file.title %>";
2622        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2623        let result = rule.check(&ctx).unwrap();
2624        assert!(
2625            result.is_empty(),
2626            "Templater in heading should not be flagged. Got: {result:?}"
2627        );
2628    }
2629
2630    #[test]
2631    fn test_md033_templater_multiple_on_same_line_not_flagged() {
2632        // Multiple Templater blocks on same line
2633        let rule = MD033NoInlineHtml::default();
2634        let content = "From <% tp.date.now() %> to <% tp.date.tomorrow() %> we have meetings.";
2635        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2636        let result = rule.check(&ctx).unwrap();
2637        assert!(
2638            result.is_empty(),
2639            "Multiple Templater blocks should not be flagged. Got: {result:?}"
2640        );
2641    }
2642
2643    #[test]
2644    fn test_md033_templater_in_code_block_not_flagged() {
2645        // Templater syntax in code blocks should not be flagged (code blocks are skipped)
2646        let rule = MD033NoInlineHtml::default();
2647        let content = "```\n<% tp.date.now() %>\n```";
2648        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2649        let result = rule.check(&ctx).unwrap();
2650        assert!(
2651            result.is_empty(),
2652            "Templater in code block should not be flagged. Got: {result:?}"
2653        );
2654    }
2655
2656    #[test]
2657    fn test_md033_templater_in_inline_code_not_flagged() {
2658        // Templater syntax in inline code span should not be flagged
2659        let rule = MD033NoInlineHtml::default();
2660        let content = "Use `<% tp.date.now() %>` for current date.";
2661        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2662        let result = rule.check(&ctx).unwrap();
2663        assert!(
2664            result.is_empty(),
2665            "Templater in inline code should not be flagged. Got: {result:?}"
2666        );
2667    }
2668
2669    #[test]
2670    fn test_md033_templater_also_works_in_standard_flavor() {
2671        // Templater syntax should also not be flagged in Standard flavor
2672        // because the HTML parser doesn't recognize `<%` as a valid tag
2673        let rule = MD033NoInlineHtml::default();
2674        let content = "<% tp.date.now() %> works everywhere.";
2675        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2676        let result = rule.check(&ctx).unwrap();
2677        assert!(
2678            result.is_empty(),
2679            "Templater should not be flagged even in Standard flavor. Got: {result:?}"
2680        );
2681    }
2682
2683    #[test]
2684    fn test_md033_templater_empty_tag_not_flagged() {
2685        // Empty Templater tags
2686        let rule = MD033NoInlineHtml::default();
2687        let content = "<%>";
2688        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2689        let result = rule.check(&ctx).unwrap();
2690        assert!(
2691            result.is_empty(),
2692            "Empty Templater-like tag should not be flagged. Got: {result:?}"
2693        );
2694    }
2695
2696    #[test]
2697    fn test_md033_templater_unclosed_not_flagged() {
2698        // Unclosed Templater tags - these are template errors, not HTML
2699        let rule = MD033NoInlineHtml::default();
2700        let content = "<% tp.date.now() without closing tag";
2701        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2702        let result = rule.check(&ctx).unwrap();
2703        assert!(
2704            result.is_empty(),
2705            "Unclosed Templater should not be flagged as HTML. Got: {result:?}"
2706        );
2707    }
2708
2709    #[test]
2710    fn test_md033_templater_with_newlines_inside_not_flagged() {
2711        // Templater with newlines inside the expression
2712        let rule = MD033NoInlineHtml::default();
2713        let content = r#"<% tp.date.now("YYYY") +
2714"-" +
2715tp.date.now("MM") %>"#;
2716        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2717        let result = rule.check(&ctx).unwrap();
2718        assert!(
2719            result.is_empty(),
2720            "Templater with internal newlines should not be flagged. Got: {result:?}"
2721        );
2722    }
2723
2724    #[test]
2725    fn test_md033_erb_style_tags_not_flagged() {
2726        // ERB/EJS style tags (similar to Templater) are also not HTML
2727        // This documents the general principle that `<%` is not valid HTML
2728        let rule = MD033NoInlineHtml::default();
2729        let content = "<%= variable %> and <% code %> and <%# comment %>";
2730        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2731        let result = rule.check(&ctx).unwrap();
2732        assert!(
2733            result.is_empty(),
2734            "ERB/EJS style tags should not be flagged as HTML. Got: {result:?}"
2735        );
2736    }
2737
2738    #[test]
2739    fn test_md033_templater_complex_expression_not_flagged() {
2740        // Complex Templater expression with multiple function calls
2741        let rule = MD033NoInlineHtml::default();
2742        let content = r#"<%*
2743const file = tp.file.title;
2744const date = tp.date.now("YYYY-MM-DD");
2745const folder = tp.file.folder();
2746tR += `# ${file}\n\nCreated: ${date}\nIn: ${folder}`;
2747%>"#;
2748        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2749        let result = rule.check(&ctx).unwrap();
2750        assert!(
2751            result.is_empty(),
2752            "Complex Templater expression should not be flagged. Got: {result:?}"
2753        );
2754    }
2755
2756    #[test]
2757    fn test_md033_percent_sign_variations_not_flagged() {
2758        // Various patterns starting with <% that should all be safe
2759        let rule = MD033NoInlineHtml::default();
2760        let patterns = [
2761            "<%=",  // ERB output
2762            "<%#",  // ERB comment
2763            "<%%",  // Double percent
2764            "<%!",  // Some template engines
2765            "<%@",  // JSP directive
2766            "<%--", // JSP comment
2767        ];
2768        for pattern in patterns {
2769            let content = format!("{pattern} content %>");
2770            let ctx = LintContext::new(&content, crate::config::MarkdownFlavor::Standard, None);
2771            let result = rule.check(&ctx).unwrap();
2772            assert!(
2773                result.is_empty(),
2774                "Pattern {pattern} should not be flagged. Got: {result:?}"
2775            );
2776        }
2777    }
2778
2779    // ───── Bug #3: Bracket escaping in image-inside-link conversion ─────
2780    //
2781    // When <a> wraps already-converted markdown image text, the bracket escaping
2782    // must be skipped to produce valid [![alt](url)](href) instead of !\[\](url)
2783
2784    #[test]
2785    fn test_md033_fix_a_wrapping_markdown_image_no_escaped_brackets() {
2786        // When <a> wraps a markdown image (from a prior fix iteration),
2787        // the result should be [![](url)](href) — no escaped brackets
2788        let rule = MD033NoInlineHtml::with_fix(true);
2789        let content = r#"<a href="https://example.com">![](https://example.com/image.png)</a>"#;
2790        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2791        let fixed = rule.fix(&ctx).unwrap();
2792
2793        assert_eq!(fixed, "[![](https://example.com/image.png)](https://example.com)",);
2794        assert!(!fixed.contains(r"\["), "Must not escape brackets: {fixed}");
2795        assert!(!fixed.contains(r"\]"), "Must not escape brackets: {fixed}");
2796    }
2797
2798    #[test]
2799    fn test_md033_fix_a_wrapping_markdown_image_with_alt() {
2800        // <a> wrapping ![alt](url) preserves alt text in linked image
2801        let rule = MD033NoInlineHtml::with_fix(true);
2802        let content =
2803            r#"<a href="https://github.com/repo">![Contributors](https://contrib.rocks/image?repo=org/repo)</a>"#;
2804        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2805        let fixed = rule.fix(&ctx).unwrap();
2806
2807        assert_eq!(
2808            fixed,
2809            "[![Contributors](https://contrib.rocks/image?repo=org/repo)](https://github.com/repo)"
2810        );
2811    }
2812
2813    #[test]
2814    fn test_md033_fix_img_without_alt_produces_empty_alt() {
2815        let rule = MD033NoInlineHtml::with_fix(true);
2816        let content = r#"<img src="photo.jpg" />"#;
2817        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2818        let fixed = rule.fix(&ctx).unwrap();
2819
2820        assert_eq!(fixed, "![](photo.jpg)");
2821    }
2822
2823    #[test]
2824    fn test_md033_fix_a_with_plain_text_still_escapes_brackets() {
2825        // Plain text brackets inside <a> SHOULD be escaped
2826        let rule = MD033NoInlineHtml::with_fix(true);
2827        let content = r#"<a href="https://example.com">text with [brackets]</a>"#;
2828        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2829        let fixed = rule.fix(&ctx).unwrap();
2830
2831        assert!(
2832            fixed.contains(r"\[brackets\]"),
2833            "Plain text brackets should be escaped: {fixed}"
2834        );
2835    }
2836
2837    #[test]
2838    fn test_md033_fix_a_with_image_plus_extra_text_escapes_brackets() {
2839        // Mixed content: image followed by bracketed text — brackets must be escaped
2840        // The image detection must NOT match partial content
2841        let rule = MD033NoInlineHtml::with_fix(true);
2842        let content = r#"<a href="/link">![](img.png) see [docs]</a>"#;
2843        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2844        let fixed = rule.fix(&ctx).unwrap();
2845
2846        // "see [docs]" brackets should be escaped since inner content is mixed
2847        assert!(
2848            fixed.contains(r"\[docs\]"),
2849            "Brackets in mixed image+text content should be escaped: {fixed}"
2850        );
2851    }
2852
2853    #[test]
2854    fn test_md033_fix_img_in_a_end_to_end() {
2855        // End-to-end: verify that iterative fixing of <a><img></a>
2856        // produces the correct final result through the fix coordinator
2857        use crate::config::Config;
2858        use crate::fix_coordinator::FixCoordinator;
2859
2860        let rule = MD033NoInlineHtml::with_fix(true);
2861        let rules: Vec<Box<dyn crate::rule::Rule>> = vec![Box::new(rule)];
2862
2863        let mut content =
2864            r#"<a href="https://github.com/org/repo"><img src="https://contrib.rocks/image?repo=org/repo" /></a>"#
2865                .to_string();
2866        let config = Config::default();
2867        let coordinator = FixCoordinator::new();
2868
2869        let result = coordinator
2870            .apply_fixes_iterative(&rules, &[], &mut content, &config, 10, None)
2871            .unwrap();
2872
2873        assert_eq!(
2874            content, "[![](https://contrib.rocks/image?repo=org/repo)](https://github.com/org/repo)",
2875            "End-to-end: <a><img></a> should become valid linked image"
2876        );
2877        assert!(result.converged);
2878        assert!(!content.contains(r"\["), "No escaped brackets: {content}");
2879    }
2880
2881    #[test]
2882    fn test_md033_fix_img_in_a_with_alt_end_to_end() {
2883        use crate::config::Config;
2884        use crate::fix_coordinator::FixCoordinator;
2885
2886        let rule = MD033NoInlineHtml::with_fix(true);
2887        let rules: Vec<Box<dyn crate::rule::Rule>> = vec![Box::new(rule)];
2888
2889        let mut content =
2890            r#"<a href="https://github.com/org/repo"><img src="https://contrib.rocks/image" alt="Contributors" /></a>"#
2891                .to_string();
2892        let config = Config::default();
2893        let coordinator = FixCoordinator::new();
2894
2895        let result = coordinator
2896            .apply_fixes_iterative(&rules, &[], &mut content, &config, 10, None)
2897            .unwrap();
2898
2899        assert_eq!(
2900            content,
2901            "[![Contributors](https://contrib.rocks/image)](https://github.com/org/repo)",
2902        );
2903        assert!(result.converged);
2904    }
2905}