Skip to main content

rumdl_lib/rules/
md033_no_inline_html.rs

1//!
2//! Rule MD033: No HTML tags
3//!
4//! See [docs/md033.md](../../docs/md033.md) for full documentation, configuration, and examples.
5
6use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
7use crate::utils::regex_cache::*;
8use std::collections::HashSet;
9
10mod md033_config;
11use md033_config::{MD033Config, MD033FixMode};
12
13#[derive(Clone)]
14pub struct MD033NoInlineHtml {
15    config: MD033Config,
16    allowed: HashSet<String>,
17    disallowed: HashSet<String>,
18    drop_attributes: HashSet<String>,
19    strip_wrapper_elements: HashSet<String>,
20}
21
22impl Default for MD033NoInlineHtml {
23    fn default() -> Self {
24        let config = MD033Config::default();
25        let allowed = config.allowed_set();
26        let disallowed = config.disallowed_set();
27        let drop_attributes = config.drop_attributes_set();
28        let strip_wrapper_elements = config.strip_wrapper_elements_set();
29        Self {
30            config,
31            allowed,
32            disallowed,
33            drop_attributes,
34            strip_wrapper_elements,
35        }
36    }
37}
38
39impl MD033NoInlineHtml {
40    pub fn new() -> Self {
41        Self::default()
42    }
43
44    pub fn with_allowed(allowed_vec: Vec<String>) -> Self {
45        let config = MD033Config {
46            allowed: allowed_vec.clone(),
47            disallowed: Vec::new(),
48            fix: false,
49            ..MD033Config::default()
50        };
51        let allowed = config.allowed_set();
52        let disallowed = config.disallowed_set();
53        let drop_attributes = config.drop_attributes_set();
54        let strip_wrapper_elements = config.strip_wrapper_elements_set();
55        Self {
56            config,
57            allowed,
58            disallowed,
59            drop_attributes,
60            strip_wrapper_elements,
61        }
62    }
63
64    pub fn with_disallowed(disallowed_vec: Vec<String>) -> Self {
65        let config = MD033Config {
66            allowed: Vec::new(),
67            disallowed: disallowed_vec.clone(),
68            fix: false,
69            ..MD033Config::default()
70        };
71        let allowed = config.allowed_set();
72        let disallowed = config.disallowed_set();
73        let drop_attributes = config.drop_attributes_set();
74        let strip_wrapper_elements = config.strip_wrapper_elements_set();
75        Self {
76            config,
77            allowed,
78            disallowed,
79            drop_attributes,
80            strip_wrapper_elements,
81        }
82    }
83
84    /// Create a new rule with auto-fix enabled
85    pub fn with_fix(fix: bool) -> Self {
86        let config = MD033Config {
87            allowed: Vec::new(),
88            disallowed: Vec::new(),
89            fix,
90            ..MD033Config::default()
91        };
92        let allowed = config.allowed_set();
93        let disallowed = config.disallowed_set();
94        let drop_attributes = config.drop_attributes_set();
95        let strip_wrapper_elements = config.strip_wrapper_elements_set();
96        Self {
97            config,
98            allowed,
99            disallowed,
100            drop_attributes,
101            strip_wrapper_elements,
102        }
103    }
104
105    pub fn from_config_struct(config: MD033Config) -> Self {
106        let allowed = config.allowed_set();
107        let disallowed = config.disallowed_set();
108        let drop_attributes = config.drop_attributes_set();
109        let strip_wrapper_elements = config.strip_wrapper_elements_set();
110        Self {
111            config,
112            allowed,
113            disallowed,
114            drop_attributes,
115            strip_wrapper_elements,
116        }
117    }
118
119    // Efficient check for allowed tags using HashSet (case-insensitive)
120    #[inline]
121    fn is_tag_allowed(&self, tag: &str) -> bool {
122        if self.allowed.is_empty() {
123            return false;
124        }
125        // Remove angle brackets and slashes, then split by whitespace or '>'
126        let tag = tag.trim_start_matches('<').trim_start_matches('/');
127        let tag_name = tag
128            .split(|c: char| c.is_whitespace() || c == '>' || c == '/')
129            .next()
130            .unwrap_or("");
131        self.allowed.contains(&tag_name.to_lowercase())
132    }
133
134    /// Check if a tag is in the disallowed set (for disallowed-only mode)
135    #[inline]
136    fn is_tag_disallowed(&self, tag: &str) -> bool {
137        if self.disallowed.is_empty() {
138            return false;
139        }
140        // Remove angle brackets and slashes, then split by whitespace or '>'
141        let tag = tag.trim_start_matches('<').trim_start_matches('/');
142        let tag_name = tag
143            .split(|c: char| c.is_whitespace() || c == '>' || c == '/')
144            .next()
145            .unwrap_or("");
146        self.disallowed.contains(&tag_name.to_lowercase())
147    }
148
149    /// Check if operating in disallowed-only mode
150    #[inline]
151    fn is_disallowed_mode(&self) -> bool {
152        self.config.is_disallowed_mode()
153    }
154
155    // Check if a tag is an HTML comment
156    #[inline]
157    fn is_html_comment(&self, tag: &str) -> bool {
158        tag.starts_with("<!--") && tag.ends_with("-->")
159    }
160
161    /// Check if a tag name is a valid HTML element or custom element.
162    /// Returns false for placeholder syntax like `<NAME>`, `<resource>`, `<actual>`.
163    ///
164    /// Per HTML spec, custom elements must contain a hyphen (e.g., `<my-component>`).
165    #[inline]
166    fn is_html_element_or_custom(tag_name: &str) -> bool {
167        const HTML_ELEMENTS: &[&str] = &[
168            // Document structure
169            "html",
170            "head",
171            "body",
172            "title",
173            "base",
174            "link",
175            "meta",
176            "style",
177            // Sections
178            "article",
179            "section",
180            "nav",
181            "aside",
182            "h1",
183            "h2",
184            "h3",
185            "h4",
186            "h5",
187            "h6",
188            "hgroup",
189            "header",
190            "footer",
191            "address",
192            "main",
193            "search",
194            // Grouping
195            "p",
196            "hr",
197            "pre",
198            "blockquote",
199            "ol",
200            "ul",
201            "menu",
202            "li",
203            "dl",
204            "dt",
205            "dd",
206            "figure",
207            "figcaption",
208            "div",
209            // Text-level
210            "a",
211            "em",
212            "strong",
213            "small",
214            "s",
215            "cite",
216            "q",
217            "dfn",
218            "abbr",
219            "ruby",
220            "rt",
221            "rp",
222            "data",
223            "time",
224            "code",
225            "var",
226            "samp",
227            "kbd",
228            "sub",
229            "sup",
230            "i",
231            "b",
232            "u",
233            "mark",
234            "bdi",
235            "bdo",
236            "span",
237            "br",
238            "wbr",
239            // Edits
240            "ins",
241            "del",
242            // Embedded
243            "picture",
244            "source",
245            "img",
246            "iframe",
247            "embed",
248            "object",
249            "param",
250            "video",
251            "audio",
252            "track",
253            "map",
254            "area",
255            "svg",
256            "math",
257            "canvas",
258            // Tables
259            "table",
260            "caption",
261            "colgroup",
262            "col",
263            "tbody",
264            "thead",
265            "tfoot",
266            "tr",
267            "td",
268            "th",
269            // Forms
270            "form",
271            "label",
272            "input",
273            "button",
274            "select",
275            "datalist",
276            "optgroup",
277            "option",
278            "textarea",
279            "output",
280            "progress",
281            "meter",
282            "fieldset",
283            "legend",
284            // Interactive
285            "details",
286            "summary",
287            "dialog",
288            // Scripting
289            "script",
290            "noscript",
291            "template",
292            "slot",
293            // Deprecated but recognized
294            "acronym",
295            "applet",
296            "basefont",
297            "big",
298            "center",
299            "dir",
300            "font",
301            "frame",
302            "frameset",
303            "isindex",
304            "marquee",
305            "noembed",
306            "noframes",
307            "plaintext",
308            "strike",
309            "tt",
310            "xmp",
311        ];
312
313        let lower = tag_name.to_ascii_lowercase();
314        if HTML_ELEMENTS.contains(&lower.as_str()) {
315            return true;
316        }
317        // Custom elements must contain a hyphen per HTML spec
318        tag_name.contains('-')
319    }
320
321    // Check if a tag is likely a programming type annotation rather than HTML
322    #[inline]
323    fn is_likely_type_annotation(&self, tag: &str) -> bool {
324        // Common programming type names that are often used in generics
325        const COMMON_TYPES: &[&str] = &[
326            "string",
327            "number",
328            "any",
329            "void",
330            "null",
331            "undefined",
332            "array",
333            "promise",
334            "function",
335            "error",
336            "date",
337            "regexp",
338            "symbol",
339            "bigint",
340            "map",
341            "set",
342            "weakmap",
343            "weakset",
344            "iterator",
345            "generator",
346            "t",
347            "u",
348            "v",
349            "k",
350            "e", // Common single-letter type parameters
351            "userdata",
352            "apiresponse",
353            "config",
354            "options",
355            "params",
356            "result",
357            "response",
358            "request",
359            "data",
360            "item",
361            "element",
362            "node",
363        ];
364
365        let tag_content = tag
366            .trim_start_matches('<')
367            .trim_end_matches('>')
368            .trim_start_matches('/');
369        let tag_name = tag_content
370            .split(|c: char| c.is_whitespace() || c == '>' || c == '/')
371            .next()
372            .unwrap_or("");
373
374        // Check if it's a simple tag (no attributes) with a common type name
375        if !tag_content.contains(' ') && !tag_content.contains('=') {
376            COMMON_TYPES.contains(&tag_name.to_ascii_lowercase().as_str())
377        } else {
378            false
379        }
380    }
381
382    // Check if a tag is actually an email address in angle brackets
383    #[inline]
384    fn is_email_address(&self, tag: &str) -> bool {
385        let content = tag.trim_start_matches('<').trim_end_matches('>');
386        // Simple email pattern: contains @ and has reasonable structure
387        content.contains('@')
388            && content.chars().all(|c| c.is_alphanumeric() || "@.-_+".contains(c))
389            && content.split('@').count() == 2
390            && content.split('@').all(|part| !part.is_empty())
391    }
392
393    // Check if a tag has the markdown attribute (MkDocs/Material for MkDocs)
394    #[inline]
395    fn has_markdown_attribute(&self, tag: &str) -> bool {
396        // Check for various forms of markdown attribute
397        // Examples: <div markdown>, <div markdown="1">, <div class="result" markdown>
398        tag.contains(" markdown>") || tag.contains(" markdown=") || tag.contains(" markdown ")
399    }
400
401    /// Check if a tag contains JSX-specific attributes that indicate it's JSX, not HTML
402    /// JSX uses different attribute names than HTML:
403    /// - `className` instead of `class`
404    /// - `htmlFor` instead of `for`
405    /// - camelCase event handlers (`onClick`, `onChange`, `onSubmit`, etc.)
406    /// - JSX expression syntax `={...}` for dynamic values
407    #[inline]
408    fn has_jsx_attributes(tag: &str) -> bool {
409        // JSX-specific attribute names (HTML uses class, for, onclick, etc.)
410        tag.contains("className")
411            || tag.contains("htmlFor")
412            || tag.contains("dangerouslySetInnerHTML")
413            // camelCase event handlers (JSX uses onClick, HTML uses onclick)
414            || tag.contains("onClick")
415            || tag.contains("onChange")
416            || tag.contains("onSubmit")
417            || tag.contains("onFocus")
418            || tag.contains("onBlur")
419            || tag.contains("onKeyDown")
420            || tag.contains("onKeyUp")
421            || tag.contains("onKeyPress")
422            || tag.contains("onMouseDown")
423            || tag.contains("onMouseUp")
424            || tag.contains("onMouseEnter")
425            || tag.contains("onMouseLeave")
426            // JSX expression syntax: ={expression} or ={ expression }
427            || tag.contains("={")
428    }
429
430    // Check if a tag is actually a URL in angle brackets
431    #[inline]
432    fn is_url_in_angle_brackets(&self, tag: &str) -> bool {
433        let content = tag.trim_start_matches('<').trim_end_matches('>');
434        // Check for common URL schemes
435        content.starts_with("http://")
436            || content.starts_with("https://")
437            || content.starts_with("ftp://")
438            || content.starts_with("ftps://")
439            || content.starts_with("mailto:")
440    }
441
442    #[inline]
443    fn is_relaxed_fix_mode(&self) -> bool {
444        self.config.fix_mode == MD033FixMode::Relaxed
445    }
446
447    #[inline]
448    fn is_droppable_attribute(&self, attr_name: &str) -> bool {
449        // Event handler attributes (onclick, onload, etc.) are never droppable
450        // because they can execute arbitrary JavaScript.
451        if attr_name.starts_with("on") && attr_name.len() > 2 {
452            return false;
453        }
454        self.drop_attributes.contains(attr_name)
455            || (attr_name.starts_with("data-")
456                && (self.drop_attributes.contains("data-*") || self.drop_attributes.contains("data-")))
457    }
458
459    #[inline]
460    fn is_strippable_wrapper(&self, tag_name: &str) -> bool {
461        self.is_relaxed_fix_mode() && self.strip_wrapper_elements.contains(tag_name)
462    }
463
464    /// Check whether `byte_offset` sits directly inside a top-level strippable
465    /// wrapper element (e.g. `<p>`).  Returns `true` only when:
466    ///  1. The nearest unclosed opening tag before the offset is a configured
467    ///     wrapper element, AND
468    ///  2. That wrapper is itself NOT nested inside another HTML element.
469    ///
470    /// Condition 2 prevents converting inner content when the wrapper cannot
471    /// be stripped (e.g. `<div><p><img/></p></div>` -- stripping `<p>` is
472    /// blocked because it is nested, so converting `<img>` would leave
473    /// markdown inside an HTML block where it won't render).
474    fn is_inside_strippable_wrapper(&self, content: &str, byte_offset: usize) -> bool {
475        if byte_offset == 0 {
476            return false;
477        }
478        let before = content[..byte_offset].trim_end();
479        if !before.ends_with('>') || before.ends_with("->") {
480            return false;
481        }
482        if let Some(last_lt) = before.rfind('<') {
483            let potential_tag = &before[last_lt..];
484            if potential_tag.starts_with("</") || potential_tag.starts_with("<!--") {
485                return false;
486            }
487            let parent_name = potential_tag
488                .trim_start_matches('<')
489                .split(|c: char| c.is_whitespace() || c == '>' || c == '/')
490                .next()
491                .unwrap_or("")
492                .to_lowercase();
493            if !self.strip_wrapper_elements.contains(&parent_name) {
494                return false;
495            }
496            // Verify the wrapper itself is not nested inside another element.
497            let wrapper_before = before[..last_lt].trim_end();
498            if wrapper_before.ends_with('>')
499                && !wrapper_before.ends_with("->")
500                && let Some(outer_lt) = wrapper_before.rfind('<')
501                && let outer_tag = &wrapper_before[outer_lt..]
502                && !outer_tag.starts_with("</")
503                && !outer_tag.starts_with("<!--")
504            {
505                return false;
506            }
507            return true;
508        }
509        false
510    }
511
512    /// Convert paired HTML tags to their Markdown equivalents.
513    /// Returns None if the tag cannot be safely converted (has nested tags, HTML entities, etc.)
514    fn convert_to_markdown(tag_name: &str, inner_content: &str) -> Option<String> {
515        // Skip if content contains nested HTML tags
516        if inner_content.contains('<') {
517            return None;
518        }
519        // Skip if content contains HTML entities (e.g., &vert;, &amp;, &lt;)
520        // These need HTML context to render correctly; markdown won't process them
521        if inner_content.contains('&') && inner_content.contains(';') {
522            // Check for common HTML entity patterns
523            let has_entity = inner_content
524                .split('&')
525                .skip(1)
526                .any(|part| part.split(';').next().is_some_and(|e| !e.is_empty() && e.len() < 10));
527            if has_entity {
528                return None;
529            }
530        }
531        match tag_name {
532            "em" | "i" => Some(format!("*{inner_content}*")),
533            "strong" | "b" => Some(format!("**{inner_content}**")),
534            "code" => {
535                // Handle backticks in content by using double backticks with padding
536                if inner_content.contains('`') {
537                    Some(format!("`` {inner_content} ``"))
538                } else {
539                    Some(format!("`{inner_content}`"))
540                }
541            }
542            _ => None,
543        }
544    }
545
546    /// Convert self-closing HTML tags to their Markdown equivalents.
547    fn convert_self_closing_to_markdown(&self, tag_name: &str, opening_tag: &str) -> Option<String> {
548        match tag_name {
549            "br" => match self.config.br_style {
550                md033_config::BrStyle::TrailingSpaces => Some("  \n".to_string()),
551                md033_config::BrStyle::Backslash => Some("\\\n".to_string()),
552            },
553            "hr" => Some("\n---\n".to_string()),
554            "img" => self.convert_img_to_markdown(opening_tag),
555            _ => None,
556        }
557    }
558
559    /// Parse all attributes from an HTML tag into a list of (name, value) pairs.
560    /// This provides proper attribute parsing instead of naive string matching.
561    fn parse_attributes(tag: &str) -> Vec<(String, Option<String>)> {
562        let mut attrs = Vec::new();
563
564        // Remove < and > and tag name
565        let tag_content = tag.trim_start_matches('<').trim_end_matches('>').trim_end_matches('/');
566
567        // Find first whitespace to skip tag name
568        let attr_start = tag_content
569            .find(|c: char| c.is_whitespace())
570            .map(|i| i + 1)
571            .unwrap_or(tag_content.len());
572
573        if attr_start >= tag_content.len() {
574            return attrs;
575        }
576
577        let attr_str = &tag_content[attr_start..];
578        let mut chars = attr_str.chars().peekable();
579
580        while chars.peek().is_some() {
581            // Skip whitespace
582            while chars.peek().is_some_and(|c| c.is_whitespace()) {
583                chars.next();
584            }
585
586            if chars.peek().is_none() {
587                break;
588            }
589
590            // Read attribute name
591            let mut attr_name = String::new();
592            while let Some(&c) = chars.peek() {
593                if c.is_whitespace() || c == '=' || c == '>' || c == '/' {
594                    break;
595                }
596                attr_name.push(c);
597                chars.next();
598            }
599
600            if attr_name.is_empty() {
601                break;
602            }
603
604            // Skip whitespace before =
605            while chars.peek().is_some_and(|c| c.is_whitespace()) {
606                chars.next();
607            }
608
609            // Check for = and value
610            if chars.peek() == Some(&'=') {
611                chars.next(); // consume =
612
613                // Skip whitespace after =
614                while chars.peek().is_some_and(|c| c.is_whitespace()) {
615                    chars.next();
616                }
617
618                // Read value
619                let mut value = String::new();
620                if let Some(&quote) = chars.peek() {
621                    if quote == '"' || quote == '\'' {
622                        chars.next(); // consume opening quote
623                        for c in chars.by_ref() {
624                            if c == quote {
625                                break;
626                            }
627                            value.push(c);
628                        }
629                    } else {
630                        // Unquoted value
631                        while let Some(&c) = chars.peek() {
632                            if c.is_whitespace() || c == '>' || c == '/' {
633                                break;
634                            }
635                            value.push(c);
636                            chars.next();
637                        }
638                    }
639                }
640                attrs.push((attr_name.to_ascii_lowercase(), Some(value)));
641            } else {
642                // Boolean attribute (no value)
643                attrs.push((attr_name.to_ascii_lowercase(), None));
644            }
645        }
646
647        attrs
648    }
649
650    /// Extract an HTML attribute value from a tag string.
651    /// Handles double quotes, single quotes, and unquoted values.
652    /// Returns None if the attribute is not found.
653    fn extract_attribute(tag: &str, attr_name: &str) -> Option<String> {
654        let attrs = Self::parse_attributes(tag);
655        let attr_lower = attr_name.to_ascii_lowercase();
656
657        attrs
658            .into_iter()
659            .find(|(name, _)| name == &attr_lower)
660            .and_then(|(_, value)| value)
661    }
662
663    /// Check if an HTML tag has extra attributes beyond the specified allowed ones.
664    /// Uses proper attribute parsing to avoid false positives from string matching.
665    fn has_extra_attributes(&self, tag: &str, allowed_attrs: &[&str]) -> bool {
666        let attrs = Self::parse_attributes(tag);
667
668        // All event handlers (on*) are dangerous
669        // Plus common attributes that would be lost in markdown conversion
670        const DANGEROUS_ATTR_PREFIXES: &[&str] = &["on"]; // onclick, onload, onerror, etc.
671        const DANGEROUS_ATTRS: &[&str] = &[
672            "class",
673            "id",
674            "style",
675            "target",
676            "rel",
677            "download",
678            "referrerpolicy",
679            "crossorigin",
680            "loading",
681            "decoding",
682            "fetchpriority",
683            "sizes",
684            "srcset",
685            "usemap",
686            "ismap",
687            "width",
688            "height",
689            "name",   // anchor names
690            "data-*", // data attributes (checked separately)
691        ];
692
693        for (attr_name, _) in attrs {
694            // Skip allowed attributes (list is small, linear scan is efficient)
695            if allowed_attrs.iter().any(|a| a.to_ascii_lowercase() == attr_name) {
696                continue;
697            }
698
699            if self.is_relaxed_fix_mode() {
700                if self.is_droppable_attribute(&attr_name) {
701                    continue;
702                }
703                return true;
704            }
705
706            // Check for event handlers (on*)
707            for prefix in DANGEROUS_ATTR_PREFIXES {
708                if attr_name.starts_with(prefix) && attr_name.len() > prefix.len() {
709                    return true;
710                }
711            }
712
713            // Check for data-* attributes
714            if attr_name.starts_with("data-") {
715                return true;
716            }
717
718            // Check for other dangerous attributes
719            if DANGEROUS_ATTRS.contains(&attr_name.as_str()) {
720                return true;
721            }
722        }
723
724        false
725    }
726
727    /// Convert `<a href="url">text</a>` to `[text](url)` or `[text](url "title")`
728    /// Returns None if conversion is not safe.
729    fn convert_a_to_markdown(&self, opening_tag: &str, inner_content: &str) -> Option<String> {
730        // Extract href attribute
731        let href = Self::extract_attribute(opening_tag, "href")?;
732
733        // Check URL is safe
734        if !MD033Config::is_safe_url(&href) {
735            return None;
736        }
737
738        // Check for nested HTML tags in content
739        if inner_content.contains('<') {
740            return None;
741        }
742
743        // Check for HTML entities that wouldn't render correctly in markdown
744        if inner_content.contains('&') && inner_content.contains(';') {
745            let has_entity = inner_content
746                .split('&')
747                .skip(1)
748                .any(|part| part.split(';').next().is_some_and(|e| !e.is_empty() && e.len() < 10));
749            if has_entity {
750                return None;
751            }
752        }
753
754        // Extract optional title attribute
755        let title = Self::extract_attribute(opening_tag, "title");
756
757        // Check for extra dangerous attributes (title is allowed)
758        if self.has_extra_attributes(opening_tag, &["href", "title"]) {
759            return None;
760        }
761
762        // If inner content is exactly a markdown image (from a prior <img> fix),
763        // use it directly without bracket escaping to produce valid [![alt](src)](href).
764        // Must verify the entire content is a single image — not mixed content like
765        // "![](url) extra [text]" where trailing brackets still need escaping.
766        let trimmed_inner = inner_content.trim();
767        let is_markdown_image =
768            trimmed_inner.starts_with("![") && trimmed_inner.contains("](") && trimmed_inner.ends_with(')') && {
769                // Verify the closing ](url) accounts for the rest of the content
770                // by finding the image's ]( and checking nothing follows the final )
771                if let Some(bracket_close) = trimmed_inner.rfind("](") {
772                    let after_paren = &trimmed_inner[bracket_close + 2..];
773                    // The rest should be just "url)" — find the matching close paren
774                    after_paren.ends_with(')')
775                        && after_paren.chars().filter(|&c| c == ')').count()
776                            >= after_paren.chars().filter(|&c| c == '(').count()
777                } else {
778                    false
779                }
780            };
781        let escaped_text = if is_markdown_image {
782            trimmed_inner.to_string()
783        } else {
784            // Escape special markdown characters in link text
785            // Brackets need escaping to avoid breaking the link syntax
786            inner_content.replace('[', r"\[").replace(']', r"\]")
787        };
788
789        // Escape parentheses in URL
790        let escaped_url = href.replace('(', "%28").replace(')', "%29");
791
792        // Format with or without title
793        if let Some(title_text) = title {
794            // Escape quotes in title
795            let escaped_title = title_text.replace('"', r#"\""#);
796            Some(format!("[{escaped_text}]({escaped_url} \"{escaped_title}\")"))
797        } else {
798            Some(format!("[{escaped_text}]({escaped_url})"))
799        }
800    }
801
802    /// Convert `<img src="url" alt="text">` to `![alt](src)` or `![alt](src "title")`
803    /// Returns None if conversion is not safe.
804    fn convert_img_to_markdown(&self, tag: &str) -> Option<String> {
805        // Extract src attribute (required)
806        let src = Self::extract_attribute(tag, "src")?;
807
808        // Check URL is safe
809        if !MD033Config::is_safe_url(&src) {
810            return None;
811        }
812
813        // Extract alt attribute (optional, default to empty)
814        let alt = Self::extract_attribute(tag, "alt").unwrap_or_default();
815
816        // Extract optional title attribute
817        let title = Self::extract_attribute(tag, "title");
818
819        // Check for extra dangerous attributes (title is allowed)
820        if self.has_extra_attributes(tag, &["src", "alt", "title"]) {
821            return None;
822        }
823
824        // Escape special markdown characters in alt text
825        let escaped_alt = alt.replace('[', r"\[").replace(']', r"\]");
826
827        // Escape parentheses in URL
828        let escaped_url = src.replace('(', "%28").replace(')', "%29");
829
830        // Format with or without title
831        if let Some(title_text) = title {
832            // Escape quotes in title
833            let escaped_title = title_text.replace('"', r#"\""#);
834            Some(format!("![{escaped_alt}]({escaped_url} \"{escaped_title}\")"))
835        } else {
836            Some(format!("![{escaped_alt}]({escaped_url})"))
837        }
838    }
839
840    /// Check if an HTML tag has attributes that would make conversion unsafe
841    fn has_significant_attributes(opening_tag: &str) -> bool {
842        // Tags with just whitespace or empty are fine
843        let tag_content = opening_tag
844            .trim_start_matches('<')
845            .trim_end_matches('>')
846            .trim_end_matches('/');
847
848        // Split by whitespace; if there's more than the tag name, it has attributes
849        let parts: Vec<&str> = tag_content.split_whitespace().collect();
850        parts.len() > 1
851    }
852
853    /// Check if a tag appears to be nested inside another HTML element
854    /// by looking at the surrounding context (e.g., `<code><em>text</em></code>`)
855    fn is_nested_in_html(content: &str, tag_byte_start: usize, tag_byte_end: usize) -> bool {
856        // Check if there's a `>` immediately before this tag (indicating inside another element)
857        if tag_byte_start > 0 {
858            let before = &content[..tag_byte_start];
859            let before_trimmed = before.trim_end();
860            if before_trimmed.ends_with('>') && !before_trimmed.ends_with("->") {
861                // Check it's not a closing tag or comment
862                if let Some(last_lt) = before_trimmed.rfind('<') {
863                    let potential_tag = &before_trimmed[last_lt..];
864                    // Skip if it's a closing tag (</...>) or comment (<!--)
865                    if !potential_tag.starts_with("</") && !potential_tag.starts_with("<!--") {
866                        return true;
867                    }
868                }
869            }
870        }
871        // Check if there's a `<` immediately after the closing tag (indicating inside another element)
872        if tag_byte_end < content.len() {
873            let after = &content[tag_byte_end..];
874            let after_trimmed = after.trim_start();
875            if after_trimmed.starts_with("</") {
876                return true;
877            }
878        }
879        false
880    }
881
882    /// Calculate fix to remove HTML tags while keeping content.
883    ///
884    /// For self-closing tags like `<br/>`, returns a single fix to remove the tag.
885    /// For paired tags like `<span>text</span>`, returns the replacement text (just the content).
886    ///
887    /// Returns (range, replacement_text) where range is the bytes to replace
888    /// and replacement_text is what to put there (content without tags, or empty for self-closing).
889    ///
890    /// When `in_html_block` is true, returns None in conservative mode.  In
891    /// relaxed mode two exceptions apply:
892    /// - Strippable wrapper elements (e.g. `<p>`) bypass the block guard so
893    ///   they can be stripped even though they ARE the HTML block.
894    /// - Self-closing tags whose direct parent is a strippable wrapper also
895    ///   bypass the guard so inner content can be converted first.
896    fn calculate_fix(
897        &self,
898        content: &str,
899        opening_tag: &str,
900        tag_byte_start: usize,
901        in_html_block: bool,
902    ) -> Option<(std::ops::Range<usize>, String)> {
903        // Extract tag name from opening tag
904        let tag_name = opening_tag
905            .trim_start_matches('<')
906            .split(|c: char| c.is_whitespace() || c == '>' || c == '/')
907            .next()?
908            .to_lowercase();
909
910        // Check if it's a self-closing tag (ends with /> or is a void element like <br>)
911        let is_self_closing =
912            opening_tag.ends_with("/>") || matches!(tag_name.as_str(), "br" | "hr" | "img" | "input" | "meta" | "link");
913
914        if is_self_closing {
915            // When fix is enabled, try to convert to Markdown equivalent.
916            // Skip tags inside HTML blocks (would break structure), UNLESS we
917            // are in relaxed mode and the containing block is a strippable
918            // wrapper -- this lets the inner element be converted first so the
919            // wrapper can be stripped on a subsequent pass.
920            let block_ok = !in_html_block
921                || (self.is_relaxed_fix_mode() && self.is_inside_strippable_wrapper(content, tag_byte_start));
922            if self.config.fix
923                && MD033Config::is_safe_fixable_tag(&tag_name)
924                && block_ok
925                && let Some(markdown) = self.convert_self_closing_to_markdown(&tag_name, opening_tag)
926            {
927                return Some((tag_byte_start..tag_byte_start + opening_tag.len(), markdown));
928            }
929            // Can't convert this self-closing tag to Markdown, don't provide a fix
930            // (e.g., <input>, <meta> - these have no Markdown equivalent without the new img support)
931            return None;
932        }
933
934        // Search for the closing tag after the opening tag (case-insensitive)
935        let search_start = tag_byte_start + opening_tag.len();
936        let search_slice = &content[search_start..];
937
938        // Find closing tag case-insensitively
939        let closing_tag_lower = format!("</{tag_name}>");
940        let closing_pos = search_slice.to_ascii_lowercase().find(&closing_tag_lower);
941
942        if let Some(closing_pos) = closing_pos {
943            // Get actual closing tag from original content to get correct byte length
944            let closing_tag_len = closing_tag_lower.len();
945            let closing_byte_start = search_start + closing_pos;
946            let closing_byte_end = closing_byte_start + closing_tag_len;
947
948            // Extract the content between tags
949            let inner_content = &content[search_start..closing_byte_start];
950
951            // In relaxed mode, check wrapper stripping BEFORE the in_html_block
952            // guard because the wrapper element itself IS the HTML block. We only
953            // strip when:
954            //  - the wrapper is not nested inside another HTML element
955            //  - the inner content no longer contains HTML tags (prevents
956            //    overlapping byte-range replacements within a single fix pass)
957            if self.config.fix && self.is_strippable_wrapper(&tag_name) {
958                if Self::is_nested_in_html(content, tag_byte_start, closing_byte_end) {
959                    return None;
960                }
961                if inner_content.contains('<') {
962                    return None;
963                }
964                return Some((tag_byte_start..closing_byte_end, inner_content.trim().to_string()));
965            }
966
967            // Skip auto-fix if inside an HTML block (like <pre>, <div>, etc.)
968            // Converting tags inside HTML blocks would break the intended structure
969            if in_html_block {
970                return None;
971            }
972
973            // Skip auto-fix if this tag is nested inside another HTML element
974            // e.g., <code><em>text</em></code> - don't convert the inner <em>
975            if Self::is_nested_in_html(content, tag_byte_start, closing_byte_end) {
976                return None;
977            }
978
979            // When fix is enabled and tag is safe to convert, try markdown conversion
980            if self.config.fix && MD033Config::is_safe_fixable_tag(&tag_name) {
981                // Handle <a> tags specially - they require attribute extraction
982                if tag_name == "a" {
983                    if let Some(markdown) = self.convert_a_to_markdown(opening_tag, inner_content) {
984                        return Some((tag_byte_start..closing_byte_end, markdown));
985                    }
986                    // convert_a_to_markdown returned None - unsafe URL, nested HTML, etc.
987                    return None;
988                }
989
990                // For simple tags (em, strong, code, etc.) - no attributes allowed
991                if Self::has_significant_attributes(opening_tag) {
992                    // Don't provide a fix for tags with attributes
993                    // User may want to keep the attributes, so leave as-is
994                    return None;
995                }
996                if let Some(markdown) = Self::convert_to_markdown(&tag_name, inner_content) {
997                    return Some((tag_byte_start..closing_byte_end, markdown));
998                }
999                // convert_to_markdown returned None, meaning content has nested tags or
1000                // HTML entities that shouldn't be converted - leave as-is
1001                return None;
1002            }
1003
1004            // For non-fixable tags, don't provide a fix
1005            // (e.g., <div>content</div>, <span>text</span>)
1006            return None;
1007        }
1008
1009        // If no closing tag found, don't provide a fix (malformed HTML)
1010        None
1011    }
1012}
1013
1014impl Rule for MD033NoInlineHtml {
1015    fn name(&self) -> &'static str {
1016        "MD033"
1017    }
1018
1019    fn description(&self) -> &'static str {
1020        "Inline HTML is not allowed"
1021    }
1022
1023    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
1024        let content = ctx.content;
1025
1026        // Early return: if no HTML tags at all, skip processing
1027        if content.is_empty() || !ctx.likely_has_html() {
1028            return Ok(Vec::new());
1029        }
1030
1031        // Quick check for HTML tag pattern before expensive processing
1032        if !HTML_TAG_QUICK_CHECK.is_match(content) {
1033            return Ok(Vec::new());
1034        }
1035
1036        let mut warnings = Vec::new();
1037
1038        // Use centralized HTML parser to get all HTML tags (including multiline)
1039        let html_tags = ctx.html_tags();
1040
1041        for html_tag in html_tags.iter() {
1042            // Skip closing tags (only warn on opening tags)
1043            if html_tag.is_closing {
1044                continue;
1045            }
1046
1047            let line_num = html_tag.line;
1048            let tag_byte_start = html_tag.byte_offset;
1049
1050            // Reconstruct tag string from byte offsets
1051            let tag = &content[html_tag.byte_offset..html_tag.byte_end];
1052
1053            // Skip tags in code blocks, PyMdown blocks, and block IALs
1054            if ctx
1055                .line_info(line_num)
1056                .is_some_and(|info| info.in_code_block || info.in_pymdown_block || info.is_kramdown_block_ial)
1057            {
1058                continue;
1059            }
1060
1061            // Skip HTML tags inside HTML comments
1062            if ctx.is_in_html_comment(tag_byte_start) {
1063                continue;
1064            }
1065
1066            // Skip HTML comments themselves
1067            if self.is_html_comment(tag) {
1068                continue;
1069            }
1070
1071            // Skip angle brackets inside link reference definition titles
1072            // e.g., [ref]: url "Title with <angle brackets>"
1073            if ctx.is_in_link_title(tag_byte_start) {
1074                continue;
1075            }
1076
1077            // Skip JSX components in MDX files (e.g., <Chart />, <MyComponent>)
1078            if ctx.flavor.supports_jsx() && html_tag.tag_name.chars().next().is_some_and(|c| c.is_uppercase()) {
1079                continue;
1080            }
1081
1082            // Skip JSX fragments in MDX files (<> and </>)
1083            if ctx.flavor.supports_jsx() && (html_tag.tag_name.is_empty() || tag == "<>" || tag == "</>") {
1084                continue;
1085            }
1086
1087            // Skip elements with JSX-specific attributes in MDX files
1088            // e.g., <div className="...">, <button onClick={handler}>
1089            if ctx.flavor.supports_jsx() && Self::has_jsx_attributes(tag) {
1090                continue;
1091            }
1092
1093            // Skip non-HTML elements (placeholder syntax like <NAME>, <resource>)
1094            if !Self::is_html_element_or_custom(&html_tag.tag_name) {
1095                continue;
1096            }
1097
1098            // Skip likely programming type annotations
1099            if self.is_likely_type_annotation(tag) {
1100                continue;
1101            }
1102
1103            // Skip email addresses in angle brackets
1104            if self.is_email_address(tag) {
1105                continue;
1106            }
1107
1108            // Skip URLs in angle brackets
1109            if self.is_url_in_angle_brackets(tag) {
1110                continue;
1111            }
1112
1113            // Skip tags inside code spans (use byte offset for reliable multi-line span detection)
1114            if ctx.is_byte_offset_in_code_span(tag_byte_start) {
1115                continue;
1116            }
1117
1118            // Determine whether to report this tag based on mode:
1119            // - Disallowed mode: only report tags in the disallowed list
1120            // - Default mode: report all tags except those in the allowed list
1121            if self.is_disallowed_mode() {
1122                // In disallowed mode, skip tags NOT in the disallowed list
1123                if !self.is_tag_disallowed(tag) {
1124                    continue;
1125                }
1126            } else {
1127                // In default mode, skip allowed tags
1128                if self.is_tag_allowed(tag) {
1129                    continue;
1130                }
1131            }
1132
1133            // Skip tags with markdown attribute in MkDocs mode
1134            if ctx.flavor == crate::config::MarkdownFlavor::MkDocs && self.has_markdown_attribute(tag) {
1135                continue;
1136            }
1137
1138            // Check if we're inside an HTML block (like <pre>, <div>, etc.)
1139            let in_html_block = ctx.is_in_html_block(line_num);
1140
1141            // Calculate fix to remove HTML tags but keep content
1142            let fix = self
1143                .calculate_fix(content, tag, tag_byte_start, in_html_block)
1144                .map(|(range, replacement)| Fix { range, replacement });
1145
1146            // Calculate actual end line and column for multiline tags
1147            // Use byte_end - 1 to get the last character position of the tag
1148            let (end_line, end_col) = if html_tag.byte_end > 0 {
1149                ctx.offset_to_line_col(html_tag.byte_end - 1)
1150            } else {
1151                (line_num, html_tag.end_col + 1)
1152            };
1153
1154            // Report the HTML tag
1155            warnings.push(LintWarning {
1156                rule_name: Some(self.name().to_string()),
1157                line: line_num,
1158                column: html_tag.start_col + 1, // Convert to 1-indexed
1159                end_line,                       // Actual end line for multiline tags
1160                end_column: end_col + 1,        // Actual end column
1161                message: format!("Inline HTML found: {tag}"),
1162                severity: Severity::Warning,
1163                fix,
1164            });
1165        }
1166
1167        Ok(warnings)
1168    }
1169
1170    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
1171        // Auto-fix is opt-in: only apply if explicitly enabled in config
1172        if !self.config.fix {
1173            return Ok(ctx.content.to_string());
1174        }
1175
1176        // Get warnings with their inline fixes
1177        let warnings = self.check(ctx)?;
1178        let warnings =
1179            crate::utils::fix_utils::filter_warnings_by_inline_config(warnings, ctx.inline_config(), self.name());
1180
1181        // If no warnings with fixes, return original content
1182        if warnings.is_empty() || !warnings.iter().any(|w| w.fix.is_some()) {
1183            return Ok(ctx.content.to_string());
1184        }
1185
1186        // Collect all fixes and sort by range start (descending) to apply from end to beginning
1187        let mut fixes: Vec<_> = warnings
1188            .iter()
1189            .filter_map(|w| w.fix.as_ref().map(|f| (f.range.start, f.range.end, &f.replacement)))
1190            .collect();
1191        fixes.sort_by(|a, b| b.0.cmp(&a.0));
1192
1193        // Apply fixes from end to beginning to preserve byte offsets
1194        let mut result = ctx.content.to_string();
1195        for (start, end, replacement) in fixes {
1196            if start < result.len() && end <= result.len() && start <= end {
1197                result.replace_range(start..end, replacement);
1198            }
1199        }
1200
1201        Ok(result)
1202    }
1203
1204    fn fix_capability(&self) -> crate::rule::FixCapability {
1205        if self.config.fix {
1206            crate::rule::FixCapability::FullyFixable
1207        } else {
1208            crate::rule::FixCapability::Unfixable
1209        }
1210    }
1211
1212    /// Get the category of this rule for selective processing
1213    fn category(&self) -> RuleCategory {
1214        RuleCategory::Html
1215    }
1216
1217    /// Check if this rule should be skipped
1218    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
1219        ctx.content.is_empty() || !ctx.likely_has_html()
1220    }
1221
1222    fn as_any(&self) -> &dyn std::any::Any {
1223        self
1224    }
1225
1226    fn default_config_section(&self) -> Option<(String, toml::Value)> {
1227        let json_value = serde_json::to_value(&self.config).ok()?;
1228        Some((
1229            self.name().to_string(),
1230            crate::rule_config_serde::json_to_toml_value(&json_value)?,
1231        ))
1232    }
1233
1234    fn config_aliases(&self) -> Option<std::collections::HashMap<String, String>> {
1235        let mut aliases = std::collections::HashMap::new();
1236        // Shorthand aliases for allowed-elements/disallowed-elements
1237        aliases.insert("allowed".to_string(), "allowed-elements".to_string());
1238        aliases.insert("disallowed".to_string(), "disallowed-elements".to_string());
1239        Some(aliases)
1240    }
1241
1242    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
1243    where
1244        Self: Sized,
1245    {
1246        let rule_config = crate::rule_config_serde::load_rule_config::<MD033Config>(config);
1247        Box::new(Self::from_config_struct(rule_config))
1248    }
1249}
1250
1251#[cfg(test)]
1252mod tests {
1253    use super::*;
1254    use crate::lint_context::LintContext;
1255    use crate::rule::Rule;
1256
1257    fn relaxed_fix_rule() -> MD033NoInlineHtml {
1258        let config = MD033Config {
1259            fix: true,
1260            fix_mode: MD033FixMode::Relaxed,
1261            ..MD033Config::default()
1262        };
1263        MD033NoInlineHtml::from_config_struct(config)
1264    }
1265
1266    #[test]
1267    fn test_md033_basic_html() {
1268        let rule = MD033NoInlineHtml::default();
1269        let content = "<div>Some content</div>";
1270        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1271        let result = rule.check(&ctx).unwrap();
1272        // Only reports opening tags, not closing tags
1273        assert_eq!(result.len(), 1); // Only <div>, not </div>
1274        assert!(result[0].message.starts_with("Inline HTML found: <div>"));
1275    }
1276
1277    #[test]
1278    fn test_md033_case_insensitive() {
1279        let rule = MD033NoInlineHtml::default();
1280        let content = "<DiV>Some <B>content</B></dIv>";
1281        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1282        let result = rule.check(&ctx).unwrap();
1283        // Only reports opening tags, not closing tags
1284        assert_eq!(result.len(), 2); // <DiV>, <B> (not </B>, </dIv>)
1285        assert_eq!(result[0].message, "Inline HTML found: <DiV>");
1286        assert_eq!(result[1].message, "Inline HTML found: <B>");
1287    }
1288
1289    #[test]
1290    fn test_md033_allowed_tags() {
1291        let rule = MD033NoInlineHtml::with_allowed(vec!["div".to_string(), "br".to_string()]);
1292        let content = "<div>Allowed</div><p>Not allowed</p><br/>";
1293        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1294        let result = rule.check(&ctx).unwrap();
1295        // Only warnings for non-allowed opening tags (<p> only, div and br are allowed)
1296        assert_eq!(result.len(), 1);
1297        assert_eq!(result[0].message, "Inline HTML found: <p>");
1298
1299        // Test case-insensitivity of allowed tags
1300        let content2 = "<DIV>Allowed</DIV><P>Not allowed</P><BR/>";
1301        let ctx2 = LintContext::new(content2, crate::config::MarkdownFlavor::Standard, None);
1302        let result2 = rule.check(&ctx2).unwrap();
1303        assert_eq!(result2.len(), 1); // Only <P> flagged
1304        assert_eq!(result2[0].message, "Inline HTML found: <P>");
1305    }
1306
1307    #[test]
1308    fn test_md033_html_comments() {
1309        let rule = MD033NoInlineHtml::default();
1310        let content = "<!-- This is a comment --> <p>Not a comment</p>";
1311        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1312        let result = rule.check(&ctx).unwrap();
1313        // Should detect warnings for HTML opening tags (comments are skipped, closing tags not reported)
1314        assert_eq!(result.len(), 1); // Only <p>
1315        assert_eq!(result[0].message, "Inline HTML found: <p>");
1316    }
1317
1318    #[test]
1319    fn test_md033_tags_in_links() {
1320        let rule = MD033NoInlineHtml::default();
1321        let content = "[Link](http://example.com/<div>)";
1322        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1323        let result = rule.check(&ctx).unwrap();
1324        // The <div> in the URL should be detected as HTML (not skipped)
1325        assert_eq!(result.len(), 1);
1326        assert_eq!(result[0].message, "Inline HTML found: <div>");
1327
1328        let content2 = "[Link <a>text</a>](url)";
1329        let ctx2 = LintContext::new(content2, crate::config::MarkdownFlavor::Standard, None);
1330        let result2 = rule.check(&ctx2).unwrap();
1331        // Only reports opening tags
1332        assert_eq!(result2.len(), 1); // Only <a>
1333        assert_eq!(result2[0].message, "Inline HTML found: <a>");
1334    }
1335
1336    #[test]
1337    fn test_md033_fix_escaping() {
1338        let rule = MD033NoInlineHtml::default();
1339        let content = "Text with <div> and <br/> tags.";
1340        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1341        let fixed_content = rule.fix(&ctx).unwrap();
1342        // No fix for HTML tags; output should be unchanged
1343        assert_eq!(fixed_content, content);
1344    }
1345
1346    #[test]
1347    fn test_md033_in_code_blocks() {
1348        let rule = MD033NoInlineHtml::default();
1349        let content = "```html\n<div>Code</div>\n```\n<div>Not code</div>";
1350        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1351        let result = rule.check(&ctx).unwrap();
1352        // Only reports opening tags outside code block
1353        assert_eq!(result.len(), 1); // Only <div> outside code block
1354        assert_eq!(result[0].message, "Inline HTML found: <div>");
1355    }
1356
1357    #[test]
1358    fn test_md033_in_code_spans() {
1359        let rule = MD033NoInlineHtml::default();
1360        let content = "Text with `<p>in code</p>` span. <br/> Not in span.";
1361        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1362        let result = rule.check(&ctx).unwrap();
1363        // Should detect <br/> outside code span, but not tags inside code span
1364        assert_eq!(result.len(), 1);
1365        assert_eq!(result[0].message, "Inline HTML found: <br/>");
1366    }
1367
1368    #[test]
1369    fn test_md033_issue_90_code_span_with_diff_block() {
1370        // Test for issue #90: inline code span followed by diff code block
1371        let rule = MD033NoInlineHtml::default();
1372        let content = r#"# Heading
1373
1374`<env>`
1375
1376```diff
1377- this
1378+ that
1379```"#;
1380        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1381        let result = rule.check(&ctx).unwrap();
1382        // Should NOT detect <env> as HTML since it's inside backticks
1383        assert_eq!(result.len(), 0, "Should not report HTML tags inside code spans");
1384    }
1385
1386    #[test]
1387    fn test_md033_multiple_code_spans_with_angle_brackets() {
1388        // Test multiple code spans on same line
1389        let rule = MD033NoInlineHtml::default();
1390        let content = "`<one>` and `<two>` and `<three>` are all code spans";
1391        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1392        let result = rule.check(&ctx).unwrap();
1393        assert_eq!(result.len(), 0, "Should not report HTML tags inside any code spans");
1394    }
1395
1396    #[test]
1397    fn test_md033_nested_angle_brackets_in_code_span() {
1398        // Test nested angle brackets
1399        let rule = MD033NoInlineHtml::default();
1400        let content = "Text with `<<nested>>` brackets";
1401        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1402        let result = rule.check(&ctx).unwrap();
1403        assert_eq!(result.len(), 0, "Should handle nested angle brackets in code spans");
1404    }
1405
1406    #[test]
1407    fn test_md033_code_span_at_end_before_code_block() {
1408        // Test code span at end of line before code block
1409        let rule = MD033NoInlineHtml::default();
1410        let content = "Testing `<test>`\n```\ncode here\n```";
1411        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1412        let result = rule.check(&ctx).unwrap();
1413        assert_eq!(result.len(), 0, "Should handle code span before code block");
1414    }
1415
1416    #[test]
1417    fn test_md033_quick_fix_inline_tag() {
1418        // Test that non-fixable tags (like <span>) do NOT get a fix
1419        // Only safe fixable tags (em, i, strong, b, code, br, hr) with fix=true get fixes
1420        let rule = MD033NoInlineHtml::default();
1421        let content = "This has <span>inline text</span> that should keep content.";
1422        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1423        let result = rule.check(&ctx).unwrap();
1424
1425        assert_eq!(result.len(), 1, "Should find one HTML tag");
1426        // <span> is NOT a safe fixable tag, so no fix should be provided
1427        assert!(
1428            result[0].fix.is_none(),
1429            "Non-fixable tags like <span> should not have a fix"
1430        );
1431    }
1432
1433    #[test]
1434    fn test_md033_quick_fix_multiline_tag() {
1435        // HTML block elements like <div> are intentionally NOT auto-fixed
1436        // Removing them would change document structure significantly
1437        let rule = MD033NoInlineHtml::default();
1438        let content = "<div>\nBlock content\n</div>";
1439        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1440        let result = rule.check(&ctx).unwrap();
1441
1442        assert_eq!(result.len(), 1, "Should find one HTML tag");
1443        // HTML block elements should NOT have auto-fix
1444        assert!(result[0].fix.is_none(), "HTML block elements should NOT have auto-fix");
1445    }
1446
1447    #[test]
1448    fn test_md033_quick_fix_self_closing_tag() {
1449        // Test that self-closing tags with fix=false (default) do NOT get a fix
1450        let rule = MD033NoInlineHtml::default();
1451        let content = "Self-closing: <br/>";
1452        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1453        let result = rule.check(&ctx).unwrap();
1454
1455        assert_eq!(result.len(), 1, "Should find one HTML tag");
1456        // Default config has fix=false, so no fix should be provided
1457        assert!(
1458            result[0].fix.is_none(),
1459            "Self-closing tags should not have a fix when fix config is false"
1460        );
1461    }
1462
1463    #[test]
1464    fn test_md033_quick_fix_multiple_tags() {
1465        // Test that multiple tags without fix=true do NOT get fixes
1466        // <span> is not a safe fixable tag, <strong> is but fix=false by default
1467        let rule = MD033NoInlineHtml::default();
1468        let content = "<span>first</span> and <strong>second</strong>";
1469        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1470        let result = rule.check(&ctx).unwrap();
1471
1472        assert_eq!(result.len(), 2, "Should find two HTML tags");
1473        // Neither should have a fix: <span> is not fixable, <strong> is but fix=false
1474        assert!(result[0].fix.is_none(), "Non-fixable <span> should not have a fix");
1475        assert!(
1476            result[1].fix.is_none(),
1477            "<strong> should not have a fix when fix config is false"
1478        );
1479    }
1480
1481    #[test]
1482    fn test_md033_skip_angle_brackets_in_link_titles() {
1483        // Angle brackets inside link reference definition titles should not be flagged as HTML
1484        let rule = MD033NoInlineHtml::default();
1485        let content = r#"# Test
1486
1487[example]: <https://example.com> "Title with <Angle Brackets> inside"
1488
1489Regular text with <div>content</div> HTML tag.
1490"#;
1491        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1492        let result = rule.check(&ctx).unwrap();
1493
1494        // Should only flag <div>, not <Angle Brackets> in the title (not a valid HTML element)
1495        // Opening tag only (markdownlint behavior)
1496        assert_eq!(result.len(), 1, "Should find opening div tag");
1497        assert!(
1498            result[0].message.contains("<div>"),
1499            "Should flag <div>, got: {}",
1500            result[0].message
1501        );
1502    }
1503
1504    #[test]
1505    fn test_md033_skip_angle_brackets_in_link_title_single_quotes() {
1506        // Test with single-quoted title
1507        let rule = MD033NoInlineHtml::default();
1508        let content = r#"[ref]: url 'Title <Help Wanted> here'
1509
1510<span>text</span> here
1511"#;
1512        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1513        let result = rule.check(&ctx).unwrap();
1514
1515        // <Help Wanted> is not a valid HTML element, so only <span> is flagged
1516        // Opening tag only (markdownlint behavior)
1517        assert_eq!(result.len(), 1, "Should find opening span tag");
1518        assert!(
1519            result[0].message.contains("<span>"),
1520            "Should flag <span>, got: {}",
1521            result[0].message
1522        );
1523    }
1524
1525    #[test]
1526    fn test_md033_multiline_tag_end_line_calculation() {
1527        // Test that multiline HTML tags report correct end_line
1528        let rule = MD033NoInlineHtml::default();
1529        let content = "<div\n  class=\"test\"\n  id=\"example\">";
1530        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1531        let result = rule.check(&ctx).unwrap();
1532
1533        assert_eq!(result.len(), 1, "Should find one HTML tag");
1534        // Tag starts on line 1
1535        assert_eq!(result[0].line, 1, "Start line should be 1");
1536        // Tag ends on line 3 (where the closing > is)
1537        assert_eq!(result[0].end_line, 3, "End line should be 3");
1538    }
1539
1540    #[test]
1541    fn test_md033_single_line_tag_same_start_end_line() {
1542        // Test that single-line HTML tags have same start and end line
1543        let rule = MD033NoInlineHtml::default();
1544        let content = "Some text <div class=\"test\"> more text";
1545        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1546        let result = rule.check(&ctx).unwrap();
1547
1548        assert_eq!(result.len(), 1, "Should find one HTML tag");
1549        assert_eq!(result[0].line, 1, "Start line should be 1");
1550        assert_eq!(result[0].end_line, 1, "End line should be 1 for single-line tag");
1551    }
1552
1553    #[test]
1554    fn test_md033_multiline_tag_with_many_attributes() {
1555        // Test multiline tag spanning multiple lines
1556        let rule = MD033NoInlineHtml::default();
1557        let content =
1558            "Text\n<div\n  data-attr1=\"value1\"\n  data-attr2=\"value2\"\n  data-attr3=\"value3\">\nMore text";
1559        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1560        let result = rule.check(&ctx).unwrap();
1561
1562        assert_eq!(result.len(), 1, "Should find one HTML tag");
1563        // Tag starts on line 2 (first line is "Text")
1564        assert_eq!(result[0].line, 2, "Start line should be 2");
1565        // Tag ends on line 5 (where the closing > is)
1566        assert_eq!(result[0].end_line, 5, "End line should be 5");
1567    }
1568
1569    #[test]
1570    fn test_md033_disallowed_mode_basic() {
1571        // Test disallowed mode: only flags tags in the disallowed list
1572        let rule = MD033NoInlineHtml::with_disallowed(vec!["script".to_string(), "iframe".to_string()]);
1573        let content = "<div>Safe content</div><script>alert('xss')</script>";
1574        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1575        let result = rule.check(&ctx).unwrap();
1576
1577        // Should only flag <script>, not <div>
1578        assert_eq!(result.len(), 1, "Should only flag disallowed tags");
1579        assert!(result[0].message.contains("<script>"), "Should flag script tag");
1580    }
1581
1582    #[test]
1583    fn test_md033_disallowed_gfm_security_tags() {
1584        // Test GFM security tags expansion
1585        let rule = MD033NoInlineHtml::with_disallowed(vec!["gfm".to_string()]);
1586        let content = r#"
1587<div>Safe</div>
1588<title>Bad title</title>
1589<textarea>Bad textarea</textarea>
1590<style>.bad{}</style>
1591<iframe src="evil"></iframe>
1592<script>evil()</script>
1593<plaintext>old tag</plaintext>
1594<span>Safe span</span>
1595"#;
1596        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1597        let result = rule.check(&ctx).unwrap();
1598
1599        // Should flag: title, textarea, style, iframe, script, plaintext
1600        // Should NOT flag: div, span
1601        assert_eq!(result.len(), 6, "Should flag 6 GFM security tags");
1602
1603        let flagged_tags: Vec<&str> = result
1604            .iter()
1605            .filter_map(|w| w.message.split("<").nth(1))
1606            .filter_map(|s| s.split(">").next())
1607            .filter_map(|s| s.split_whitespace().next())
1608            .collect();
1609
1610        assert!(flagged_tags.contains(&"title"), "Should flag title");
1611        assert!(flagged_tags.contains(&"textarea"), "Should flag textarea");
1612        assert!(flagged_tags.contains(&"style"), "Should flag style");
1613        assert!(flagged_tags.contains(&"iframe"), "Should flag iframe");
1614        assert!(flagged_tags.contains(&"script"), "Should flag script");
1615        assert!(flagged_tags.contains(&"plaintext"), "Should flag plaintext");
1616        assert!(!flagged_tags.contains(&"div"), "Should NOT flag div");
1617        assert!(!flagged_tags.contains(&"span"), "Should NOT flag span");
1618    }
1619
1620    #[test]
1621    fn test_md033_disallowed_case_insensitive() {
1622        // Test that disallowed check is case-insensitive
1623        let rule = MD033NoInlineHtml::with_disallowed(vec!["script".to_string()]);
1624        let content = "<SCRIPT>alert('xss')</SCRIPT><Script>alert('xss')</Script>";
1625        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1626        let result = rule.check(&ctx).unwrap();
1627
1628        // Should flag both <SCRIPT> and <Script>
1629        assert_eq!(result.len(), 2, "Should flag both case variants");
1630    }
1631
1632    #[test]
1633    fn test_md033_disallowed_with_attributes() {
1634        // Test that disallowed mode works with tags that have attributes
1635        let rule = MD033NoInlineHtml::with_disallowed(vec!["iframe".to_string()]);
1636        let content = r#"<iframe src="https://evil.com" width="100" height="100"></iframe>"#;
1637        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1638        let result = rule.check(&ctx).unwrap();
1639
1640        assert_eq!(result.len(), 1, "Should flag iframe with attributes");
1641        assert!(result[0].message.contains("iframe"), "Should flag iframe");
1642    }
1643
1644    #[test]
1645    fn test_md033_disallowed_all_gfm_tags() {
1646        // Verify all GFM disallowed tags are covered
1647        use md033_config::GFM_DISALLOWED_TAGS;
1648        let rule = MD033NoInlineHtml::with_disallowed(vec!["gfm".to_string()]);
1649
1650        for tag in GFM_DISALLOWED_TAGS {
1651            let content = format!("<{tag}>content</{tag}>");
1652            let ctx = LintContext::new(&content, crate::config::MarkdownFlavor::Standard, None);
1653            let result = rule.check(&ctx).unwrap();
1654
1655            assert_eq!(result.len(), 1, "GFM tag <{tag}> should be flagged");
1656        }
1657    }
1658
1659    #[test]
1660    fn test_md033_disallowed_mixed_with_custom() {
1661        // Test mixing "gfm" with custom disallowed tags
1662        let rule = MD033NoInlineHtml::with_disallowed(vec![
1663            "gfm".to_string(),
1664            "marquee".to_string(), // Custom disallowed tag
1665        ]);
1666        let content = r#"<script>bad</script><marquee>annoying</marquee><div>ok</div>"#;
1667        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1668        let result = rule.check(&ctx).unwrap();
1669
1670        // Should flag script (gfm) and marquee (custom)
1671        assert_eq!(result.len(), 2, "Should flag both gfm and custom tags");
1672    }
1673
1674    #[test]
1675    fn test_md033_disallowed_empty_means_default_mode() {
1676        // Empty disallowed list means default mode (flag all HTML)
1677        let rule = MD033NoInlineHtml::with_disallowed(vec![]);
1678        let content = "<div>content</div>";
1679        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1680        let result = rule.check(&ctx).unwrap();
1681
1682        // Should flag <div> in default mode
1683        assert_eq!(result.len(), 1, "Empty disallowed = default mode");
1684    }
1685
1686    #[test]
1687    fn test_md033_jsx_fragments_in_mdx() {
1688        // JSX fragments (<> and </>) should not trigger warnings in MDX
1689        let rule = MD033NoInlineHtml::default();
1690        let content = r#"# MDX Document
1691
1692<>
1693  <Heading />
1694  <Content />
1695</>
1696
1697<div>Regular HTML should still be flagged</div>
1698"#;
1699        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::MDX, None);
1700        let result = rule.check(&ctx).unwrap();
1701
1702        // Should only flag <div>, not the fragments or JSX components
1703        assert_eq!(result.len(), 1, "Should only find one HTML tag (the div)");
1704        assert!(
1705            result[0].message.contains("<div>"),
1706            "Should flag <div>, not JSX fragments"
1707        );
1708    }
1709
1710    #[test]
1711    fn test_md033_jsx_components_in_mdx() {
1712        // JSX components (capitalized) should not trigger warnings in MDX
1713        let rule = MD033NoInlineHtml::default();
1714        let content = r#"<CustomComponent prop="value">
1715  Content
1716</CustomComponent>
1717
1718<MyButton onClick={handler}>Click</MyButton>
1719"#;
1720        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::MDX, None);
1721        let result = rule.check(&ctx).unwrap();
1722
1723        // No warnings - all are JSX components
1724        assert_eq!(result.len(), 0, "Should not flag JSX components in MDX");
1725    }
1726
1727    #[test]
1728    fn test_md033_jsx_not_skipped_in_standard_markdown() {
1729        // In standard markdown, capitalized tags should still be flagged if they're valid HTML
1730        let rule = MD033NoInlineHtml::default();
1731        let content = "<Script>alert(1)</Script>";
1732        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1733        let result = rule.check(&ctx).unwrap();
1734
1735        // Should flag <Script> in standard markdown (it's a valid HTML element)
1736        assert_eq!(result.len(), 1, "Should flag <Script> in standard markdown");
1737    }
1738
1739    #[test]
1740    fn test_md033_jsx_attributes_in_mdx() {
1741        // Elements with JSX-specific attributes should not trigger warnings in MDX
1742        let rule = MD033NoInlineHtml::default();
1743        let content = r#"# MDX with JSX Attributes
1744
1745<div className="card big">Content</div>
1746
1747<button onClick={handleClick}>Click me</button>
1748
1749<label htmlFor="input-id">Label</label>
1750
1751<input onChange={handleChange} />
1752
1753<div class="html-class">Regular HTML should be flagged</div>
1754"#;
1755        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::MDX, None);
1756        let result = rule.check(&ctx).unwrap();
1757
1758        // Should only flag the div with regular HTML "class" attribute
1759        assert_eq!(
1760            result.len(),
1761            1,
1762            "Should only flag HTML element without JSX attributes, got: {result:?}"
1763        );
1764        assert!(
1765            result[0].message.contains("<div class="),
1766            "Should flag the div with HTML class attribute"
1767        );
1768    }
1769
1770    #[test]
1771    fn test_md033_jsx_attributes_not_skipped_in_standard() {
1772        // In standard markdown, JSX attributes should still be flagged
1773        let rule = MD033NoInlineHtml::default();
1774        let content = r#"<div className="card">Content</div>"#;
1775        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1776        let result = rule.check(&ctx).unwrap();
1777
1778        // Should flag in standard markdown
1779        assert_eq!(result.len(), 1, "Should flag JSX-style elements in standard markdown");
1780    }
1781
1782    // Auto-fix tests for MD033
1783
1784    #[test]
1785    fn test_md033_fix_disabled_by_default() {
1786        // Auto-fix should be disabled by default
1787        let rule = MD033NoInlineHtml::default();
1788        assert!(!rule.config.fix, "Fix should be disabled by default");
1789        assert_eq!(rule.fix_capability(), crate::rule::FixCapability::Unfixable);
1790    }
1791
1792    #[test]
1793    fn test_md033_fix_enabled_em_to_italic() {
1794        // When fix is enabled, <em>text</em> should convert to *text*
1795        let rule = MD033NoInlineHtml::with_fix(true);
1796        let content = "This has <em>emphasized text</em> here.";
1797        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1798        let fixed = rule.fix(&ctx).unwrap();
1799        assert_eq!(fixed, "This has *emphasized text* here.");
1800    }
1801
1802    #[test]
1803    fn test_md033_fix_enabled_i_to_italic() {
1804        // <i>text</i> should convert to *text*
1805        let rule = MD033NoInlineHtml::with_fix(true);
1806        let content = "This has <i>italic text</i> here.";
1807        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1808        let fixed = rule.fix(&ctx).unwrap();
1809        assert_eq!(fixed, "This has *italic text* here.");
1810    }
1811
1812    #[test]
1813    fn test_md033_fix_enabled_strong_to_bold() {
1814        // <strong>text</strong> should convert to **text**
1815        let rule = MD033NoInlineHtml::with_fix(true);
1816        let content = "This has <strong>bold text</strong> here.";
1817        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1818        let fixed = rule.fix(&ctx).unwrap();
1819        assert_eq!(fixed, "This has **bold text** here.");
1820    }
1821
1822    #[test]
1823    fn test_md033_fix_enabled_b_to_bold() {
1824        // <b>text</b> should convert to **text**
1825        let rule = MD033NoInlineHtml::with_fix(true);
1826        let content = "This has <b>bold text</b> here.";
1827        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1828        let fixed = rule.fix(&ctx).unwrap();
1829        assert_eq!(fixed, "This has **bold text** here.");
1830    }
1831
1832    #[test]
1833    fn test_md033_fix_enabled_code_to_backticks() {
1834        // <code>text</code> should convert to `text`
1835        let rule = MD033NoInlineHtml::with_fix(true);
1836        let content = "This has <code>inline code</code> here.";
1837        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1838        let fixed = rule.fix(&ctx).unwrap();
1839        assert_eq!(fixed, "This has `inline code` here.");
1840    }
1841
1842    #[test]
1843    fn test_md033_fix_enabled_code_with_backticks() {
1844        // <code>text with `backticks`</code> should use double backticks
1845        let rule = MD033NoInlineHtml::with_fix(true);
1846        let content = "This has <code>text with `backticks`</code> here.";
1847        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1848        let fixed = rule.fix(&ctx).unwrap();
1849        assert_eq!(fixed, "This has `` text with `backticks` `` here.");
1850    }
1851
1852    #[test]
1853    fn test_md033_fix_enabled_br_trailing_spaces() {
1854        // <br> should convert to two trailing spaces + newline (default)
1855        let rule = MD033NoInlineHtml::with_fix(true);
1856        let content = "First line<br>Second line";
1857        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1858        let fixed = rule.fix(&ctx).unwrap();
1859        assert_eq!(fixed, "First line  \nSecond line");
1860    }
1861
1862    #[test]
1863    fn test_md033_fix_enabled_br_self_closing() {
1864        // <br/> and <br /> should also convert
1865        let rule = MD033NoInlineHtml::with_fix(true);
1866        let content = "First<br/>second<br />third";
1867        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1868        let fixed = rule.fix(&ctx).unwrap();
1869        assert_eq!(fixed, "First  \nsecond  \nthird");
1870    }
1871
1872    #[test]
1873    fn test_md033_fix_enabled_br_backslash_style() {
1874        // With br_style = backslash, <br> should convert to backslash + newline
1875        let config = MD033Config {
1876            allowed: Vec::new(),
1877            disallowed: Vec::new(),
1878            fix: true,
1879            br_style: md033_config::BrStyle::Backslash,
1880            ..MD033Config::default()
1881        };
1882        let rule = MD033NoInlineHtml::from_config_struct(config);
1883        let content = "First line<br>Second line";
1884        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1885        let fixed = rule.fix(&ctx).unwrap();
1886        assert_eq!(fixed, "First line\\\nSecond line");
1887    }
1888
1889    #[test]
1890    fn test_md033_fix_enabled_hr() {
1891        // <hr> should convert to horizontal rule
1892        let rule = MD033NoInlineHtml::with_fix(true);
1893        let content = "Above<hr>Below";
1894        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1895        let fixed = rule.fix(&ctx).unwrap();
1896        assert_eq!(fixed, "Above\n---\nBelow");
1897    }
1898
1899    #[test]
1900    fn test_md033_fix_enabled_hr_self_closing() {
1901        // <hr/> should also convert
1902        let rule = MD033NoInlineHtml::with_fix(true);
1903        let content = "Above<hr/>Below";
1904        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1905        let fixed = rule.fix(&ctx).unwrap();
1906        assert_eq!(fixed, "Above\n---\nBelow");
1907    }
1908
1909    #[test]
1910    fn test_md033_fix_skips_nested_tags() {
1911        // Tags with nested HTML - outer tags may not be fully fixed due to overlapping ranges
1912        // The inner tags are processed first, which can invalidate outer tag ranges
1913        let rule = MD033NoInlineHtml::with_fix(true);
1914        let content = "This has <em>text with <strong>nested</strong> tags</em> here.";
1915        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1916        let fixed = rule.fix(&ctx).unwrap();
1917        // Inner <strong> is converted to markdown, outer <em> range becomes invalid
1918        // This is expected behavior - user should run fix multiple times for nested tags
1919        assert_eq!(fixed, "This has <em>text with **nested** tags</em> here.");
1920    }
1921
1922    #[test]
1923    fn test_md033_fix_skips_tags_with_attributes() {
1924        // Tags with attributes should NOT be fixed at all - leave as-is
1925        // User may want to keep the attributes (e.g., class="highlight" for styling)
1926        let rule = MD033NoInlineHtml::with_fix(true);
1927        let content = "This has <em class=\"highlight\">emphasized</em> text.";
1928        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1929        let fixed = rule.fix(&ctx).unwrap();
1930        // Content should remain unchanged - we don't know if attributes matter
1931        assert_eq!(fixed, content);
1932    }
1933
1934    #[test]
1935    fn test_md033_fix_disabled_no_changes() {
1936        // When fix is disabled, original content should be returned
1937        let rule = MD033NoInlineHtml::default(); // fix is false by default
1938        let content = "This has <em>emphasized text</em> here.";
1939        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1940        let fixed = rule.fix(&ctx).unwrap();
1941        assert_eq!(fixed, content, "Should return original content when fix is disabled");
1942    }
1943
1944    #[test]
1945    fn test_md033_fix_capability_enabled() {
1946        let rule = MD033NoInlineHtml::with_fix(true);
1947        assert_eq!(rule.fix_capability(), crate::rule::FixCapability::FullyFixable);
1948    }
1949
1950    #[test]
1951    fn test_md033_fix_multiple_tags() {
1952        // Test fixing multiple HTML tags in one document
1953        let rule = MD033NoInlineHtml::with_fix(true);
1954        let content = "Here is <em>italic</em> and <strong>bold</strong> text.";
1955        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1956        let fixed = rule.fix(&ctx).unwrap();
1957        assert_eq!(fixed, "Here is *italic* and **bold** text.");
1958    }
1959
1960    #[test]
1961    fn test_md033_fix_uppercase_tags() {
1962        // HTML tags are case-insensitive
1963        let rule = MD033NoInlineHtml::with_fix(true);
1964        let content = "This has <EM>emphasized</EM> text.";
1965        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1966        let fixed = rule.fix(&ctx).unwrap();
1967        assert_eq!(fixed, "This has *emphasized* text.");
1968    }
1969
1970    #[test]
1971    fn test_md033_fix_unsafe_tags_not_modified() {
1972        // Tags without safe markdown equivalents should NOT be modified
1973        // Only safe fixable tags (em, i, strong, b, code, br, hr) get converted
1974        let rule = MD033NoInlineHtml::with_fix(true);
1975        let content = "This has <div>a div</div> content.";
1976        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1977        let fixed = rule.fix(&ctx).unwrap();
1978        // <div> is not a safe fixable tag, so content should be unchanged
1979        assert_eq!(fixed, "This has <div>a div</div> content.");
1980    }
1981
1982    #[test]
1983    fn test_md033_fix_img_tag_converted() {
1984        // <img> tags with simple src/alt attributes are converted to markdown images
1985        let rule = MD033NoInlineHtml::with_fix(true);
1986        let content = "Image: <img src=\"photo.jpg\" alt=\"My Photo\">";
1987        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1988        let fixed = rule.fix(&ctx).unwrap();
1989        // <img> is converted to ![alt](src) format
1990        assert_eq!(fixed, "Image: ![My Photo](photo.jpg)");
1991    }
1992
1993    #[test]
1994    fn test_md033_fix_img_tag_with_extra_attrs_not_converted() {
1995        // <img> tags with width/height/style attributes are NOT converted
1996        let rule = MD033NoInlineHtml::with_fix(true);
1997        let content = "Image: <img src=\"photo.jpg\" alt=\"My Photo\" width=\"100\">";
1998        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1999        let fixed = rule.fix(&ctx).unwrap();
2000        // Has width attribute - not safe to convert
2001        assert_eq!(fixed, "Image: <img src=\"photo.jpg\" alt=\"My Photo\" width=\"100\">");
2002    }
2003
2004    #[test]
2005    fn test_md033_fix_relaxed_a_with_target_is_converted() {
2006        let rule = relaxed_fix_rule();
2007        let content = "Link: <a href=\"https://example.com\" target=\"_blank\">Example</a>";
2008        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2009        let fixed = rule.fix(&ctx).unwrap();
2010        assert_eq!(fixed, "Link: [Example](https://example.com)");
2011    }
2012
2013    #[test]
2014    fn test_md033_fix_relaxed_img_with_width_is_converted() {
2015        let rule = relaxed_fix_rule();
2016        let content = "Image: <img src=\"photo.jpg\" alt=\"My Photo\" width=\"100\">";
2017        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2018        let fixed = rule.fix(&ctx).unwrap();
2019        assert_eq!(fixed, "Image: ![My Photo](photo.jpg)");
2020    }
2021
2022    #[test]
2023    fn test_md033_fix_relaxed_rejects_unknown_extra_attributes() {
2024        let rule = relaxed_fix_rule();
2025        let content = "Image: <img src=\"photo.jpg\" alt=\"My Photo\" aria-label=\"hero\">";
2026        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2027        let fixed = rule.fix(&ctx).unwrap();
2028        assert_eq!(fixed, content, "Unknown attributes should not be dropped by default");
2029    }
2030
2031    #[test]
2032    fn test_md033_fix_relaxed_still_blocks_unsafe_schemes() {
2033        let rule = relaxed_fix_rule();
2034        let content = "Link: <a href=\"javascript:alert(1)\" target=\"_blank\">Example</a>";
2035        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2036        let fixed = rule.fix(&ctx).unwrap();
2037        assert_eq!(fixed, content, "Unsafe URL schemes must never be converted");
2038    }
2039
2040    #[test]
2041    fn test_md033_fix_relaxed_wrapper_strip_requires_second_pass_for_nested_html() {
2042        let rule = relaxed_fix_rule();
2043        let content = "<p align=\"center\">\n  <img src=\"logo.svg\" alt=\"Logo\" width=\"120\" />\n</p>";
2044        let ctx1 = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2045        let fixed_once = rule.fix(&ctx1).unwrap();
2046        assert!(
2047            fixed_once.contains("<p"),
2048            "First pass should keep wrapper when inner HTML is still present: {fixed_once}"
2049        );
2050        assert!(
2051            fixed_once.contains("![Logo](logo.svg)"),
2052            "Inner image should be converted on first pass: {fixed_once}"
2053        );
2054
2055        let ctx2 = LintContext::new(&fixed_once, crate::config::MarkdownFlavor::Standard, None);
2056        let fixed_twice = rule.fix(&ctx2).unwrap();
2057        assert!(
2058            !fixed_twice.contains("<p"),
2059            "Second pass should strip configured wrapper: {fixed_twice}"
2060        );
2061        assert!(fixed_twice.contains("![Logo](logo.svg)"));
2062    }
2063
2064    #[test]
2065    fn test_md033_fix_relaxed_multiple_droppable_attrs() {
2066        let rule = relaxed_fix_rule();
2067        let content = "<a href=\"https://example.com\" target=\"_blank\" rel=\"noopener\" class=\"btn\">Click</a>";
2068        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2069        let fixed = rule.fix(&ctx).unwrap();
2070        assert_eq!(fixed, "[Click](https://example.com)");
2071    }
2072
2073    #[test]
2074    fn test_md033_fix_relaxed_img_multiple_droppable_attrs() {
2075        let rule = relaxed_fix_rule();
2076        let content = "<img src=\"logo.png\" alt=\"Logo\" width=\"120\" height=\"40\" style=\"border:none\" />";
2077        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2078        let fixed = rule.fix(&ctx).unwrap();
2079        assert_eq!(fixed, "![Logo](logo.png)");
2080    }
2081
2082    #[test]
2083    fn test_md033_fix_relaxed_event_handler_never_dropped() {
2084        let rule = relaxed_fix_rule();
2085        let content = "<a href=\"https://example.com\" onclick=\"track()\">Link</a>";
2086        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2087        let fixed = rule.fix(&ctx).unwrap();
2088        assert_eq!(fixed, content, "Event handler attributes must block conversion");
2089    }
2090
2091    #[test]
2092    fn test_md033_fix_relaxed_event_handler_even_with_custom_config() {
2093        // Even if someone adds on* to drop-attributes, event handlers must be rejected
2094        let config = MD033Config {
2095            fix: true,
2096            fix_mode: MD033FixMode::Relaxed,
2097            drop_attributes: vec!["on*".to_string(), "target".to_string()],
2098            ..MD033Config::default()
2099        };
2100        let rule = MD033NoInlineHtml::from_config_struct(config);
2101        let content = "<a href=\"https://example.com\" onclick=\"alert(1)\">Link</a>";
2102        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2103        let fixed = rule.fix(&ctx).unwrap();
2104        assert_eq!(fixed, content, "on* event handlers must never be dropped");
2105    }
2106
2107    #[test]
2108    fn test_md033_fix_relaxed_custom_drop_attributes() {
2109        let config = MD033Config {
2110            fix: true,
2111            fix_mode: MD033FixMode::Relaxed,
2112            drop_attributes: vec!["loading".to_string()],
2113            ..MD033Config::default()
2114        };
2115        let rule = MD033NoInlineHtml::from_config_struct(config);
2116        // "loading" is in the custom list, "width" is NOT
2117        let content = "<img src=\"x.jpg\" alt=\"\" loading=\"lazy\">";
2118        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2119        let fixed = rule.fix(&ctx).unwrap();
2120        assert_eq!(fixed, "![](x.jpg)", "Custom drop-attributes should be respected");
2121
2122        let content2 = "<img src=\"x.jpg\" alt=\"\" width=\"100\">";
2123        let ctx2 = LintContext::new(content2, crate::config::MarkdownFlavor::Standard, None);
2124        let fixed2 = rule.fix(&ctx2).unwrap();
2125        assert_eq!(
2126            fixed2, content2,
2127            "Attributes not in custom list should block conversion"
2128        );
2129    }
2130
2131    #[test]
2132    fn test_md033_fix_relaxed_custom_strip_wrapper() {
2133        let config = MD033Config {
2134            fix: true,
2135            fix_mode: MD033FixMode::Relaxed,
2136            strip_wrapper_elements: vec!["div".to_string()],
2137            ..MD033Config::default()
2138        };
2139        let rule = MD033NoInlineHtml::from_config_struct(config);
2140        let content = "<div>Some text content</div>";
2141        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2142        let fixed = rule.fix(&ctx).unwrap();
2143        assert_eq!(fixed, "Some text content");
2144    }
2145
2146    #[test]
2147    fn test_md033_fix_relaxed_wrapper_with_plain_text() {
2148        let rule = relaxed_fix_rule();
2149        let content = "<p align=\"center\">Just some text</p>";
2150        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2151        let fixed = rule.fix(&ctx).unwrap();
2152        assert_eq!(fixed, "Just some text");
2153    }
2154
2155    #[test]
2156    fn test_md033_fix_relaxed_data_attr_with_wildcard() {
2157        let config = MD033Config {
2158            fix: true,
2159            fix_mode: MD033FixMode::Relaxed,
2160            drop_attributes: vec!["data-*".to_string(), "target".to_string()],
2161            ..MD033Config::default()
2162        };
2163        let rule = MD033NoInlineHtml::from_config_struct(config);
2164        let content = "<a href=\"https://example.com\" data-tracking=\"abc\" target=\"_blank\">Link</a>";
2165        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2166        let fixed = rule.fix(&ctx).unwrap();
2167        assert_eq!(fixed, "[Link](https://example.com)");
2168    }
2169
2170    #[test]
2171    fn test_md033_fix_relaxed_mixed_droppable_and_blocking_attrs() {
2172        let rule = relaxed_fix_rule();
2173        // "target" is droppable, "aria-label" is not in the default list
2174        let content = "<a href=\"https://example.com\" target=\"_blank\" aria-label=\"nav\">Link</a>";
2175        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2176        let fixed = rule.fix(&ctx).unwrap();
2177        assert_eq!(fixed, content, "Non-droppable attribute should block conversion");
2178    }
2179
2180    #[test]
2181    fn test_md033_fix_relaxed_badge_pattern() {
2182        // Common GitHub README badge pattern
2183        let rule = relaxed_fix_rule();
2184        let content = "<a href=\"https://crates.io/crates/rumdl\" target=\"_blank\"><img src=\"https://img.shields.io/crates/v/rumdl.svg\" alt=\"Crate\" width=\"120\" /></a>";
2185        let ctx1 = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2186        let fixed_once = rule.fix(&ctx1).unwrap();
2187        // First pass should convert the inner <img>
2188        assert!(
2189            fixed_once.contains("![Crate](https://img.shields.io/crates/v/rumdl.svg)"),
2190            "Inner img should be converted: {fixed_once}"
2191        );
2192
2193        // Second pass converts the <a> wrapper
2194        let ctx2 = LintContext::new(&fixed_once, crate::config::MarkdownFlavor::Standard, None);
2195        let fixed_twice = rule.fix(&ctx2).unwrap();
2196        assert!(
2197            fixed_twice
2198                .contains("[![Crate](https://img.shields.io/crates/v/rumdl.svg)](https://crates.io/crates/rumdl)"),
2199            "Badge should produce nested markdown image link: {fixed_twice}"
2200        );
2201    }
2202
2203    #[test]
2204    fn test_md033_fix_relaxed_conservative_mode_unchanged() {
2205        // Verify conservative mode (default) is unaffected by the relaxed logic
2206        let rule = MD033NoInlineHtml::with_fix(true);
2207        let content = "<a href=\"https://example.com\" target=\"_blank\">Link</a>";
2208        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2209        let fixed = rule.fix(&ctx).unwrap();
2210        assert_eq!(fixed, content, "Conservative mode should not drop target attribute");
2211    }
2212
2213    #[test]
2214    fn test_md033_fix_relaxed_img_inside_pre_not_converted() {
2215        // <img> inside <pre> must NOT be converted, even in relaxed mode
2216        let rule = relaxed_fix_rule();
2217        let content = "<pre>\n  <img src=\"diagram.png\" alt=\"d\" width=\"100\" />\n</pre>";
2218        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2219        let fixed = rule.fix(&ctx).unwrap();
2220        assert!(fixed.contains("<img"), "img inside pre must not be converted: {fixed}");
2221    }
2222
2223    #[test]
2224    fn test_md033_fix_relaxed_wrapper_nested_inside_div_not_stripped() {
2225        // <p> nested inside <div> should not be stripped
2226        let rule = relaxed_fix_rule();
2227        let content = "<div><p>text</p></div>";
2228        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2229        let fixed = rule.fix(&ctx).unwrap();
2230        assert!(
2231            fixed.contains("<p>text</p>") || fixed.contains("<p>"),
2232            "Nested <p> inside <div> should not be stripped: {fixed}"
2233        );
2234    }
2235
2236    #[test]
2237    fn test_md033_fix_relaxed_img_inside_nested_wrapper_not_converted() {
2238        // <img> inside <div><p>...</p></div> must NOT be converted because the
2239        // <p> wrapper can't be stripped (it's nested), so the markdown would be
2240        // stuck inside an HTML block where it won't render.
2241        let rule = relaxed_fix_rule();
2242        let content = "<div><p><img src=\"x.jpg\" alt=\"pic\" width=\"100\" /></p></div>";
2243        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2244        let fixed = rule.fix(&ctx).unwrap();
2245        assert!(
2246            fixed.contains("<img"),
2247            "img inside nested wrapper must not be converted: {fixed}"
2248        );
2249    }
2250
2251    #[test]
2252    fn test_md033_fix_mixed_safe_tags() {
2253        // All tags are now safe fixable (em, img, strong)
2254        let rule = MD033NoInlineHtml::with_fix(true);
2255        let content = "<em>italic</em> and <img src=\"x.jpg\"> and <strong>bold</strong>";
2256        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2257        let fixed = rule.fix(&ctx).unwrap();
2258        // All are converted
2259        assert_eq!(fixed, "*italic* and ![](x.jpg) and **bold**");
2260    }
2261
2262    #[test]
2263    fn test_md033_fix_multiple_tags_same_line() {
2264        // Multiple tags on the same line should all be fixed correctly
2265        let rule = MD033NoInlineHtml::with_fix(true);
2266        let content = "Regular text <i>italic</i> and <b>bold</b> here.";
2267        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2268        let fixed = rule.fix(&ctx).unwrap();
2269        assert_eq!(fixed, "Regular text *italic* and **bold** here.");
2270    }
2271
2272    #[test]
2273    fn test_md033_fix_multiple_em_tags_same_line() {
2274        // Multiple em/strong tags on the same line
2275        let rule = MD033NoInlineHtml::with_fix(true);
2276        let content = "<em>first</em> and <strong>second</strong> and <code>third</code>";
2277        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2278        let fixed = rule.fix(&ctx).unwrap();
2279        assert_eq!(fixed, "*first* and **second** and `third`");
2280    }
2281
2282    #[test]
2283    fn test_md033_fix_skips_tags_inside_pre() {
2284        // Tags inside <pre> blocks should NOT be fixed (would break structure)
2285        let rule = MD033NoInlineHtml::with_fix(true);
2286        let content = "<pre><code><em>VALUE</em></code></pre>";
2287        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2288        let fixed = rule.fix(&ctx).unwrap();
2289        // The <em> inside <pre><code> should NOT be converted
2290        // Only the outer structure might be changed
2291        assert!(
2292            !fixed.contains("*VALUE*"),
2293            "Tags inside <pre> should not be converted to markdown. Got: {fixed}"
2294        );
2295    }
2296
2297    #[test]
2298    fn test_md033_fix_skips_tags_inside_div() {
2299        // Tags inside HTML block elements should not be fixed
2300        let rule = MD033NoInlineHtml::with_fix(true);
2301        let content = "<div>\n<em>emphasized</em>\n</div>";
2302        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2303        let fixed = rule.fix(&ctx).unwrap();
2304        // The <em> inside <div> should not be converted to *emphasized*
2305        assert!(
2306            !fixed.contains("*emphasized*"),
2307            "Tags inside HTML blocks should not be converted. Got: {fixed}"
2308        );
2309    }
2310
2311    #[test]
2312    fn test_md033_fix_outside_html_block() {
2313        // Tags outside HTML blocks should still be fixed
2314        let rule = MD033NoInlineHtml::with_fix(true);
2315        let content = "<div>\ncontent\n</div>\n\nOutside <em>emphasized</em> text.";
2316        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2317        let fixed = rule.fix(&ctx).unwrap();
2318        // The <em> outside the div should be converted
2319        assert!(
2320            fixed.contains("*emphasized*"),
2321            "Tags outside HTML blocks should be converted. Got: {fixed}"
2322        );
2323    }
2324
2325    #[test]
2326    fn test_md033_fix_with_id_attribute() {
2327        // Tags with id attributes should not be fixed (id might be used for anchors)
2328        let rule = MD033NoInlineHtml::with_fix(true);
2329        let content = "See <em id=\"important\">this note</em> for details.";
2330        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2331        let fixed = rule.fix(&ctx).unwrap();
2332        // Should remain unchanged - id attribute matters for linking
2333        assert_eq!(fixed, content);
2334    }
2335
2336    #[test]
2337    fn test_md033_fix_with_style_attribute() {
2338        // Tags with style attributes should not be fixed
2339        let rule = MD033NoInlineHtml::with_fix(true);
2340        let content = "This is <strong style=\"color: red\">important</strong> text.";
2341        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2342        let fixed = rule.fix(&ctx).unwrap();
2343        // Should remain unchanged - style attribute provides formatting
2344        assert_eq!(fixed, content);
2345    }
2346
2347    #[test]
2348    fn test_md033_fix_mixed_with_and_without_attributes() {
2349        // Mix of tags with and without attributes
2350        let rule = MD033NoInlineHtml::with_fix(true);
2351        let content = "<em>normal</em> and <em class=\"special\">styled</em> text.";
2352        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2353        let fixed = rule.fix(&ctx).unwrap();
2354        // Only the tag without attributes should be fixed
2355        assert_eq!(fixed, "*normal* and <em class=\"special\">styled</em> text.");
2356    }
2357
2358    #[test]
2359    fn test_md033_quick_fix_tag_with_attributes_no_fix() {
2360        // Quick fix should not be provided for tags with attributes
2361        let rule = MD033NoInlineHtml::with_fix(true);
2362        let content = "<em class=\"test\">emphasized</em>";
2363        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2364        let result = rule.check(&ctx).unwrap();
2365
2366        assert_eq!(result.len(), 1, "Should find one HTML tag");
2367        // No fix should be provided for tags with attributes
2368        assert!(
2369            result[0].fix.is_none(),
2370            "Should NOT have a fix for tags with attributes"
2371        );
2372    }
2373
2374    #[test]
2375    fn test_md033_fix_skips_html_entities() {
2376        // Tags containing HTML entities should NOT be fixed
2377        // HTML entities need HTML context to render; markdown won't process them
2378        let rule = MD033NoInlineHtml::with_fix(true);
2379        let content = "<code>&vert;</code>";
2380        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2381        let fixed = rule.fix(&ctx).unwrap();
2382        // Should remain unchanged - converting would break rendering
2383        assert_eq!(fixed, content);
2384    }
2385
2386    #[test]
2387    fn test_md033_fix_skips_multiple_html_entities() {
2388        // Multiple HTML entities should also be skipped
2389        let rule = MD033NoInlineHtml::with_fix(true);
2390        let content = "<code>&lt;T&gt;</code>";
2391        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2392        let fixed = rule.fix(&ctx).unwrap();
2393        // Should remain unchanged
2394        assert_eq!(fixed, content);
2395    }
2396
2397    #[test]
2398    fn test_md033_fix_allows_ampersand_without_entity() {
2399        // Content with & but no semicolon should still be fixed
2400        let rule = MD033NoInlineHtml::with_fix(true);
2401        let content = "<code>a & b</code>";
2402        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2403        let fixed = rule.fix(&ctx).unwrap();
2404        // Should be converted since & is not part of an entity
2405        assert_eq!(fixed, "`a & b`");
2406    }
2407
2408    #[test]
2409    fn test_md033_fix_em_with_entities_skipped() {
2410        // <em> with entities should also be skipped
2411        let rule = MD033NoInlineHtml::with_fix(true);
2412        let content = "<em>&nbsp;text</em>";
2413        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2414        let fixed = rule.fix(&ctx).unwrap();
2415        // Should remain unchanged
2416        assert_eq!(fixed, content);
2417    }
2418
2419    #[test]
2420    fn test_md033_fix_skips_nested_em_in_code() {
2421        // Tags nested inside other HTML elements should NOT be fixed
2422        // e.g., <code><em>n</em></code> - the <em> should not be converted
2423        let rule = MD033NoInlineHtml::with_fix(true);
2424        let content = "<code><em>n</em></code>";
2425        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2426        let fixed = rule.fix(&ctx).unwrap();
2427        // The inner <em> should NOT be converted to *n* because it's nested
2428        // The whole structure should be left as-is (or outer code converted, but not inner)
2429        assert!(
2430            !fixed.contains("*n*"),
2431            "Nested <em> should not be converted to markdown. Got: {fixed}"
2432        );
2433    }
2434
2435    #[test]
2436    fn test_md033_fix_skips_nested_in_table() {
2437        // Tags nested in HTML structures in tables should not be fixed
2438        let rule = MD033NoInlineHtml::with_fix(true);
2439        let content = "| <code>><em>n</em></code> | description |";
2440        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2441        let fixed = rule.fix(&ctx).unwrap();
2442        // Should not convert nested <em> to *n*
2443        assert!(
2444            !fixed.contains("*n*"),
2445            "Nested tags in table should not be converted. Got: {fixed}"
2446        );
2447    }
2448
2449    #[test]
2450    fn test_md033_fix_standalone_em_still_converted() {
2451        // Standalone (non-nested) <em> should still be converted
2452        let rule = MD033NoInlineHtml::with_fix(true);
2453        let content = "This is <em>emphasized</em> text.";
2454        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2455        let fixed = rule.fix(&ctx).unwrap();
2456        assert_eq!(fixed, "This is *emphasized* text.");
2457    }
2458
2459    // ==========================================================================
2460    // Obsidian Templater Plugin Syntax Tests
2461    //
2462    // Templater is a popular Obsidian plugin that uses `<% ... %>` syntax for
2463    // template interpolation. The `<%` pattern is NOT captured by the HTML tag
2464    // parser because `%` is not a valid HTML tag name character (tags must start
2465    // with a letter). This behavior is documented here with comprehensive tests.
2466    //
2467    // Reference: https://silentvoid13.github.io/Templater/
2468    // ==========================================================================
2469
2470    #[test]
2471    fn test_md033_templater_basic_interpolation_not_flagged() {
2472        // Basic Templater interpolation: <% expr %>
2473        // Should NOT be flagged because `%` is not a valid HTML tag character
2474        let rule = MD033NoInlineHtml::default();
2475        let content = "Today is <% tp.date.now() %> which is nice.";
2476        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2477        let result = rule.check(&ctx).unwrap();
2478        assert!(
2479            result.is_empty(),
2480            "Templater basic interpolation should not be flagged as HTML. Got: {result:?}"
2481        );
2482    }
2483
2484    #[test]
2485    fn test_md033_templater_file_functions_not_flagged() {
2486        // Templater file functions: <% tp.file.* %>
2487        let rule = MD033NoInlineHtml::default();
2488        let content = "File: <% tp.file.title %>\nCreated: <% tp.file.creation_date() %>";
2489        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2490        let result = rule.check(&ctx).unwrap();
2491        assert!(
2492            result.is_empty(),
2493            "Templater file functions should not be flagged. Got: {result:?}"
2494        );
2495    }
2496
2497    #[test]
2498    fn test_md033_templater_with_arguments_not_flagged() {
2499        // Templater with function arguments
2500        let rule = MD033NoInlineHtml::default();
2501        let content = r#"Date: <% tp.date.now("YYYY-MM-DD") %>"#;
2502        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2503        let result = rule.check(&ctx).unwrap();
2504        assert!(
2505            result.is_empty(),
2506            "Templater with arguments should not be flagged. Got: {result:?}"
2507        );
2508    }
2509
2510    #[test]
2511    fn test_md033_templater_javascript_execution_not_flagged() {
2512        // Templater JavaScript execution block: <%* code %>
2513        let rule = MD033NoInlineHtml::default();
2514        let content = "<%* const today = tp.date.now(); tR += today; %>";
2515        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2516        let result = rule.check(&ctx).unwrap();
2517        assert!(
2518            result.is_empty(),
2519            "Templater JS execution block should not be flagged. Got: {result:?}"
2520        );
2521    }
2522
2523    #[test]
2524    fn test_md033_templater_dynamic_execution_not_flagged() {
2525        // Templater dynamic/preview execution: <%+ expr %>
2526        let rule = MD033NoInlineHtml::default();
2527        let content = "Dynamic: <%+ tp.date.now() %>";
2528        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2529        let result = rule.check(&ctx).unwrap();
2530        assert!(
2531            result.is_empty(),
2532            "Templater dynamic execution should not be flagged. Got: {result:?}"
2533        );
2534    }
2535
2536    #[test]
2537    fn test_md033_templater_whitespace_trim_all_not_flagged() {
2538        // Templater whitespace control - trim all: <%_ expr _%>
2539        let rule = MD033NoInlineHtml::default();
2540        let content = "<%_ tp.date.now() _%>";
2541        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2542        let result = rule.check(&ctx).unwrap();
2543        assert!(
2544            result.is_empty(),
2545            "Templater trim-all whitespace should not be flagged. Got: {result:?}"
2546        );
2547    }
2548
2549    #[test]
2550    fn test_md033_templater_whitespace_trim_newline_not_flagged() {
2551        // Templater whitespace control - trim newline: <%- expr -%>
2552        let rule = MD033NoInlineHtml::default();
2553        let content = "<%- tp.date.now() -%>";
2554        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2555        let result = rule.check(&ctx).unwrap();
2556        assert!(
2557            result.is_empty(),
2558            "Templater trim-newline should not be flagged. Got: {result:?}"
2559        );
2560    }
2561
2562    #[test]
2563    fn test_md033_templater_combined_modifiers_not_flagged() {
2564        // Templater combined whitespace and execution modifiers
2565        let rule = MD033NoInlineHtml::default();
2566        let contents = [
2567            "<%-* const x = 1; -%>",  // trim + JS execution
2568            "<%_+ tp.date.now() _%>", // trim-all + dynamic
2569            "<%- tp.file.title -%>",  // trim-newline only
2570            "<%_ tp.file.title _%>",  // trim-all only
2571        ];
2572        for content in contents {
2573            let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2574            let result = rule.check(&ctx).unwrap();
2575            assert!(
2576                result.is_empty(),
2577                "Templater combined modifiers should not be flagged: {content}. Got: {result:?}"
2578            );
2579        }
2580    }
2581
2582    #[test]
2583    fn test_md033_templater_multiline_block_not_flagged() {
2584        // Multi-line Templater JavaScript block
2585        let rule = MD033NoInlineHtml::default();
2586        let content = r#"<%*
2587const x = 1;
2588const y = 2;
2589tR += x + y;
2590%>"#;
2591        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2592        let result = rule.check(&ctx).unwrap();
2593        assert!(
2594            result.is_empty(),
2595            "Templater multi-line block should not be flagged. Got: {result:?}"
2596        );
2597    }
2598
2599    #[test]
2600    fn test_md033_templater_with_angle_brackets_in_condition_not_flagged() {
2601        // Templater with angle brackets in JavaScript condition
2602        // This is a key edge case: `<` inside Templater should not trigger HTML detection
2603        let rule = MD033NoInlineHtml::default();
2604        let content = "<%* if (x < 5) { tR += 'small'; } %>";
2605        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2606        let result = rule.check(&ctx).unwrap();
2607        assert!(
2608            result.is_empty(),
2609            "Templater with angle brackets in conditions should not be flagged. Got: {result:?}"
2610        );
2611    }
2612
2613    #[test]
2614    fn test_md033_templater_mixed_with_html_only_html_flagged() {
2615        // Templater syntax mixed with actual HTML - only HTML should be flagged
2616        let rule = MD033NoInlineHtml::default();
2617        let content = "<% tp.date.now() %> is today's date. <div>This is HTML</div>";
2618        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2619        let result = rule.check(&ctx).unwrap();
2620        assert_eq!(result.len(), 1, "Should only flag the HTML div tag");
2621        assert!(
2622            result[0].message.contains("<div>"),
2623            "Should flag <div>, got: {}",
2624            result[0].message
2625        );
2626    }
2627
2628    #[test]
2629    fn test_md033_templater_in_heading_not_flagged() {
2630        // Templater in markdown heading
2631        let rule = MD033NoInlineHtml::default();
2632        let content = "# <% tp.file.title %>";
2633        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2634        let result = rule.check(&ctx).unwrap();
2635        assert!(
2636            result.is_empty(),
2637            "Templater in heading should not be flagged. Got: {result:?}"
2638        );
2639    }
2640
2641    #[test]
2642    fn test_md033_templater_multiple_on_same_line_not_flagged() {
2643        // Multiple Templater blocks on same line
2644        let rule = MD033NoInlineHtml::default();
2645        let content = "From <% tp.date.now() %> to <% tp.date.tomorrow() %> we have meetings.";
2646        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2647        let result = rule.check(&ctx).unwrap();
2648        assert!(
2649            result.is_empty(),
2650            "Multiple Templater blocks should not be flagged. Got: {result:?}"
2651        );
2652    }
2653
2654    #[test]
2655    fn test_md033_templater_in_code_block_not_flagged() {
2656        // Templater syntax in code blocks should not be flagged (code blocks are skipped)
2657        let rule = MD033NoInlineHtml::default();
2658        let content = "```\n<% tp.date.now() %>\n```";
2659        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2660        let result = rule.check(&ctx).unwrap();
2661        assert!(
2662            result.is_empty(),
2663            "Templater in code block should not be flagged. Got: {result:?}"
2664        );
2665    }
2666
2667    #[test]
2668    fn test_md033_templater_in_inline_code_not_flagged() {
2669        // Templater syntax in inline code span should not be flagged
2670        let rule = MD033NoInlineHtml::default();
2671        let content = "Use `<% tp.date.now() %>` for current date.";
2672        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2673        let result = rule.check(&ctx).unwrap();
2674        assert!(
2675            result.is_empty(),
2676            "Templater in inline code should not be flagged. Got: {result:?}"
2677        );
2678    }
2679
2680    #[test]
2681    fn test_md033_templater_also_works_in_standard_flavor() {
2682        // Templater syntax should also not be flagged in Standard flavor
2683        // because the HTML parser doesn't recognize `<%` as a valid tag
2684        let rule = MD033NoInlineHtml::default();
2685        let content = "<% tp.date.now() %> works everywhere.";
2686        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2687        let result = rule.check(&ctx).unwrap();
2688        assert!(
2689            result.is_empty(),
2690            "Templater should not be flagged even in Standard flavor. Got: {result:?}"
2691        );
2692    }
2693
2694    #[test]
2695    fn test_md033_templater_empty_tag_not_flagged() {
2696        // Empty Templater tags
2697        let rule = MD033NoInlineHtml::default();
2698        let content = "<%>";
2699        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2700        let result = rule.check(&ctx).unwrap();
2701        assert!(
2702            result.is_empty(),
2703            "Empty Templater-like tag should not be flagged. Got: {result:?}"
2704        );
2705    }
2706
2707    #[test]
2708    fn test_md033_templater_unclosed_not_flagged() {
2709        // Unclosed Templater tags - these are template errors, not HTML
2710        let rule = MD033NoInlineHtml::default();
2711        let content = "<% tp.date.now() without closing tag";
2712        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2713        let result = rule.check(&ctx).unwrap();
2714        assert!(
2715            result.is_empty(),
2716            "Unclosed Templater should not be flagged as HTML. Got: {result:?}"
2717        );
2718    }
2719
2720    #[test]
2721    fn test_md033_templater_with_newlines_inside_not_flagged() {
2722        // Templater with newlines inside the expression
2723        let rule = MD033NoInlineHtml::default();
2724        let content = r#"<% tp.date.now("YYYY") +
2725"-" +
2726tp.date.now("MM") %>"#;
2727        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2728        let result = rule.check(&ctx).unwrap();
2729        assert!(
2730            result.is_empty(),
2731            "Templater with internal newlines should not be flagged. Got: {result:?}"
2732        );
2733    }
2734
2735    #[test]
2736    fn test_md033_erb_style_tags_not_flagged() {
2737        // ERB/EJS style tags (similar to Templater) are also not HTML
2738        // This documents the general principle that `<%` is not valid HTML
2739        let rule = MD033NoInlineHtml::default();
2740        let content = "<%= variable %> and <% code %> and <%# comment %>";
2741        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2742        let result = rule.check(&ctx).unwrap();
2743        assert!(
2744            result.is_empty(),
2745            "ERB/EJS style tags should not be flagged as HTML. Got: {result:?}"
2746        );
2747    }
2748
2749    #[test]
2750    fn test_md033_templater_complex_expression_not_flagged() {
2751        // Complex Templater expression with multiple function calls
2752        let rule = MD033NoInlineHtml::default();
2753        let content = r#"<%*
2754const file = tp.file.title;
2755const date = tp.date.now("YYYY-MM-DD");
2756const folder = tp.file.folder();
2757tR += `# ${file}\n\nCreated: ${date}\nIn: ${folder}`;
2758%>"#;
2759        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2760        let result = rule.check(&ctx).unwrap();
2761        assert!(
2762            result.is_empty(),
2763            "Complex Templater expression should not be flagged. Got: {result:?}"
2764        );
2765    }
2766
2767    #[test]
2768    fn test_md033_percent_sign_variations_not_flagged() {
2769        // Various patterns starting with <% that should all be safe
2770        let rule = MD033NoInlineHtml::default();
2771        let patterns = [
2772            "<%=",  // ERB output
2773            "<%#",  // ERB comment
2774            "<%%",  // Double percent
2775            "<%!",  // Some template engines
2776            "<%@",  // JSP directive
2777            "<%--", // JSP comment
2778        ];
2779        for pattern in patterns {
2780            let content = format!("{pattern} content %>");
2781            let ctx = LintContext::new(&content, crate::config::MarkdownFlavor::Standard, None);
2782            let result = rule.check(&ctx).unwrap();
2783            assert!(
2784                result.is_empty(),
2785                "Pattern {pattern} should not be flagged. Got: {result:?}"
2786            );
2787        }
2788    }
2789
2790    // ───── Bug #3: Bracket escaping in image-inside-link conversion ─────
2791    //
2792    // When <a> wraps already-converted markdown image text, the bracket escaping
2793    // must be skipped to produce valid [![alt](url)](href) instead of !\[\](url)
2794
2795    #[test]
2796    fn test_md033_fix_a_wrapping_markdown_image_no_escaped_brackets() {
2797        // When <a> wraps a markdown image (from a prior fix iteration),
2798        // the result should be [![](url)](href) — no escaped brackets
2799        let rule = MD033NoInlineHtml::with_fix(true);
2800        let content = r#"<a href="https://example.com">![](https://example.com/image.png)</a>"#;
2801        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2802        let fixed = rule.fix(&ctx).unwrap();
2803
2804        assert_eq!(fixed, "[![](https://example.com/image.png)](https://example.com)",);
2805        assert!(!fixed.contains(r"\["), "Must not escape brackets: {fixed}");
2806        assert!(!fixed.contains(r"\]"), "Must not escape brackets: {fixed}");
2807    }
2808
2809    #[test]
2810    fn test_md033_fix_a_wrapping_markdown_image_with_alt() {
2811        // <a> wrapping ![alt](url) preserves alt text in linked image
2812        let rule = MD033NoInlineHtml::with_fix(true);
2813        let content =
2814            r#"<a href="https://github.com/repo">![Contributors](https://contrib.rocks/image?repo=org/repo)</a>"#;
2815        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2816        let fixed = rule.fix(&ctx).unwrap();
2817
2818        assert_eq!(
2819            fixed,
2820            "[![Contributors](https://contrib.rocks/image?repo=org/repo)](https://github.com/repo)"
2821        );
2822    }
2823
2824    #[test]
2825    fn test_md033_fix_img_without_alt_produces_empty_alt() {
2826        let rule = MD033NoInlineHtml::with_fix(true);
2827        let content = r#"<img src="photo.jpg" />"#;
2828        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2829        let fixed = rule.fix(&ctx).unwrap();
2830
2831        assert_eq!(fixed, "![](photo.jpg)");
2832    }
2833
2834    #[test]
2835    fn test_md033_fix_a_with_plain_text_still_escapes_brackets() {
2836        // Plain text brackets inside <a> SHOULD be escaped
2837        let rule = MD033NoInlineHtml::with_fix(true);
2838        let content = r#"<a href="https://example.com">text with [brackets]</a>"#;
2839        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2840        let fixed = rule.fix(&ctx).unwrap();
2841
2842        assert!(
2843            fixed.contains(r"\[brackets\]"),
2844            "Plain text brackets should be escaped: {fixed}"
2845        );
2846    }
2847
2848    #[test]
2849    fn test_md033_fix_a_with_image_plus_extra_text_escapes_brackets() {
2850        // Mixed content: image followed by bracketed text — brackets must be escaped
2851        // The image detection must NOT match partial content
2852        let rule = MD033NoInlineHtml::with_fix(true);
2853        let content = r#"<a href="/link">![](img.png) see [docs]</a>"#;
2854        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2855        let fixed = rule.fix(&ctx).unwrap();
2856
2857        // "see [docs]" brackets should be escaped since inner content is mixed
2858        assert!(
2859            fixed.contains(r"\[docs\]"),
2860            "Brackets in mixed image+text content should be escaped: {fixed}"
2861        );
2862    }
2863
2864    #[test]
2865    fn test_md033_fix_img_in_a_end_to_end() {
2866        // End-to-end: verify that iterative fixing of <a><img></a>
2867        // produces the correct final result through the fix coordinator
2868        use crate::config::Config;
2869        use crate::fix_coordinator::FixCoordinator;
2870
2871        let rule = MD033NoInlineHtml::with_fix(true);
2872        let rules: Vec<Box<dyn crate::rule::Rule>> = vec![Box::new(rule)];
2873
2874        let mut content =
2875            r#"<a href="https://github.com/org/repo"><img src="https://contrib.rocks/image?repo=org/repo" /></a>"#
2876                .to_string();
2877        let config = Config::default();
2878        let coordinator = FixCoordinator::new();
2879
2880        let result = coordinator
2881            .apply_fixes_iterative(&rules, &[], &mut content, &config, 10, None)
2882            .unwrap();
2883
2884        assert_eq!(
2885            content, "[![](https://contrib.rocks/image?repo=org/repo)](https://github.com/org/repo)",
2886            "End-to-end: <a><img></a> should become valid linked image"
2887        );
2888        assert!(result.converged);
2889        assert!(!content.contains(r"\["), "No escaped brackets: {content}");
2890    }
2891
2892    #[test]
2893    fn test_md033_fix_img_in_a_with_alt_end_to_end() {
2894        use crate::config::Config;
2895        use crate::fix_coordinator::FixCoordinator;
2896
2897        let rule = MD033NoInlineHtml::with_fix(true);
2898        let rules: Vec<Box<dyn crate::rule::Rule>> = vec![Box::new(rule)];
2899
2900        let mut content =
2901            r#"<a href="https://github.com/org/repo"><img src="https://contrib.rocks/image" alt="Contributors" /></a>"#
2902                .to_string();
2903        let config = Config::default();
2904        let coordinator = FixCoordinator::new();
2905
2906        let result = coordinator
2907            .apply_fixes_iterative(&rules, &[], &mut content, &config, 10, None)
2908            .unwrap();
2909
2910        assert_eq!(
2911            content,
2912            "[![Contributors](https://contrib.rocks/image)](https://github.com/org/repo)",
2913        );
2914        assert!(result.converged);
2915    }
2916}