Skip to main content

rumdl_lib/rules/
md033_no_inline_html.rs

1//!
2//! Rule MD033: No HTML tags
3//!
4//! See [docs/md033.md](../../docs/md033.md) for full documentation, configuration, and examples.
5
6use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
7use crate::utils::regex_cache::*;
8use std::collections::HashSet;
9
10mod md033_config;
11use md033_config::{MD033Config, MD033FixMode};
12
13#[derive(Clone)]
14pub struct MD033NoInlineHtml {
15    config: MD033Config,
16    allowed: HashSet<String>,
17    disallowed: HashSet<String>,
18    drop_attributes: HashSet<String>,
19    strip_wrapper_elements: HashSet<String>,
20}
21
22impl Default for MD033NoInlineHtml {
23    fn default() -> Self {
24        let config = MD033Config::default();
25        let allowed = config.allowed_set();
26        let disallowed = config.disallowed_set();
27        let drop_attributes = config.drop_attributes_set();
28        let strip_wrapper_elements = config.strip_wrapper_elements_set();
29        Self {
30            config,
31            allowed,
32            disallowed,
33            drop_attributes,
34            strip_wrapper_elements,
35        }
36    }
37}
38
39impl MD033NoInlineHtml {
40    pub fn new() -> Self {
41        Self::default()
42    }
43
44    pub fn with_allowed(allowed_vec: Vec<String>) -> Self {
45        let config = MD033Config {
46            allowed: allowed_vec.clone(),
47            disallowed: Vec::new(),
48            fix: false,
49            ..MD033Config::default()
50        };
51        let allowed = config.allowed_set();
52        let disallowed = config.disallowed_set();
53        let drop_attributes = config.drop_attributes_set();
54        let strip_wrapper_elements = config.strip_wrapper_elements_set();
55        Self {
56            config,
57            allowed,
58            disallowed,
59            drop_attributes,
60            strip_wrapper_elements,
61        }
62    }
63
64    pub fn with_disallowed(disallowed_vec: Vec<String>) -> Self {
65        let config = MD033Config {
66            allowed: Vec::new(),
67            disallowed: disallowed_vec.clone(),
68            fix: false,
69            ..MD033Config::default()
70        };
71        let allowed = config.allowed_set();
72        let disallowed = config.disallowed_set();
73        let drop_attributes = config.drop_attributes_set();
74        let strip_wrapper_elements = config.strip_wrapper_elements_set();
75        Self {
76            config,
77            allowed,
78            disallowed,
79            drop_attributes,
80            strip_wrapper_elements,
81        }
82    }
83
84    /// Create a new rule with auto-fix enabled
85    pub fn with_fix(fix: bool) -> Self {
86        let config = MD033Config {
87            allowed: Vec::new(),
88            disallowed: Vec::new(),
89            fix,
90            ..MD033Config::default()
91        };
92        let allowed = config.allowed_set();
93        let disallowed = config.disallowed_set();
94        let drop_attributes = config.drop_attributes_set();
95        let strip_wrapper_elements = config.strip_wrapper_elements_set();
96        Self {
97            config,
98            allowed,
99            disallowed,
100            drop_attributes,
101            strip_wrapper_elements,
102        }
103    }
104
105    pub fn from_config_struct(config: MD033Config) -> Self {
106        let allowed = config.allowed_set();
107        let disallowed = config.disallowed_set();
108        let drop_attributes = config.drop_attributes_set();
109        let strip_wrapper_elements = config.strip_wrapper_elements_set();
110        Self {
111            config,
112            allowed,
113            disallowed,
114            drop_attributes,
115            strip_wrapper_elements,
116        }
117    }
118
119    // Efficient check for allowed tags using HashSet (case-insensitive)
120    #[inline]
121    fn is_tag_allowed(&self, tag: &str) -> bool {
122        if self.allowed.is_empty() {
123            return false;
124        }
125        // Remove angle brackets and slashes, then split by whitespace or '>'
126        let tag = tag.trim_start_matches('<').trim_start_matches('/');
127        let tag_name = tag
128            .split(|c: char| c.is_whitespace() || c == '>' || c == '/')
129            .next()
130            .unwrap_or("");
131        self.allowed.contains(&tag_name.to_lowercase())
132    }
133
134    /// Check if a tag is in the disallowed set (for disallowed-only mode)
135    #[inline]
136    fn is_tag_disallowed(&self, tag: &str) -> bool {
137        if self.disallowed.is_empty() {
138            return false;
139        }
140        // Remove angle brackets and slashes, then split by whitespace or '>'
141        let tag = tag.trim_start_matches('<').trim_start_matches('/');
142        let tag_name = tag
143            .split(|c: char| c.is_whitespace() || c == '>' || c == '/')
144            .next()
145            .unwrap_or("");
146        self.disallowed.contains(&tag_name.to_lowercase())
147    }
148
149    /// Check if operating in disallowed-only mode
150    #[inline]
151    fn is_disallowed_mode(&self) -> bool {
152        self.config.is_disallowed_mode()
153    }
154
155    // Check if a tag is an HTML comment
156    #[inline]
157    fn is_html_comment(&self, tag: &str) -> bool {
158        tag.starts_with("<!--") && tag.ends_with("-->")
159    }
160
161    /// Check if a tag name is a valid HTML element or custom element.
162    /// Returns false for placeholder syntax like `<NAME>`, `<resource>`, `<actual>`.
163    ///
164    /// Per HTML spec, custom elements must contain a hyphen (e.g., `<my-component>`).
165    #[inline]
166    fn is_html_element_or_custom(tag_name: &str) -> bool {
167        // Sorted for binary search — must remain sorted when adding elements
168        const HTML_ELEMENTS: &[&str] = &[
169            "a",
170            "abbr",
171            "acronym",
172            "address",
173            "applet",
174            "area",
175            "article",
176            "aside",
177            "audio",
178            "b",
179            "base",
180            "basefont",
181            "bdi",
182            "bdo",
183            "big",
184            "blockquote",
185            "body",
186            "br",
187            "button",
188            "canvas",
189            "caption",
190            "center",
191            "cite",
192            "code",
193            "col",
194            "colgroup",
195            "data",
196            "datalist",
197            "dd",
198            "del",
199            "details",
200            "dfn",
201            "dialog",
202            "dir",
203            "div",
204            "dl",
205            "dt",
206            "em",
207            "embed",
208            "fieldset",
209            "figcaption",
210            "figure",
211            "font",
212            "footer",
213            "form",
214            "frame",
215            "frameset",
216            "h1",
217            "h2",
218            "h3",
219            "h4",
220            "h5",
221            "h6",
222            "head",
223            "header",
224            "hgroup",
225            "hr",
226            "html",
227            "i",
228            "iframe",
229            "img",
230            "input",
231            "ins",
232            "isindex",
233            "kbd",
234            "label",
235            "legend",
236            "li",
237            "link",
238            "main",
239            "map",
240            "mark",
241            "marquee",
242            "math",
243            "menu",
244            "meta",
245            "meter",
246            "nav",
247            "noembed",
248            "noframes",
249            "noscript",
250            "object",
251            "ol",
252            "optgroup",
253            "option",
254            "output",
255            "p",
256            "param",
257            "picture",
258            "plaintext",
259            "pre",
260            "progress",
261            "q",
262            "rp",
263            "rt",
264            "ruby",
265            "s",
266            "samp",
267            "script",
268            "search",
269            "section",
270            "select",
271            "slot",
272            "small",
273            "source",
274            "span",
275            "strike",
276            "strong",
277            "style",
278            "sub",
279            "summary",
280            "sup",
281            "svg",
282            "table",
283            "tbody",
284            "td",
285            "template",
286            "textarea",
287            "tfoot",
288            "th",
289            "thead",
290            "time",
291            "title",
292            "tr",
293            "track",
294            "tt",
295            "u",
296            "ul",
297            "var",
298            "video",
299            "wbr",
300            "xmp",
301        ];
302
303        let lower = tag_name.to_ascii_lowercase();
304        if HTML_ELEMENTS.binary_search(&lower.as_str()).is_ok() {
305            return true;
306        }
307        // Custom elements must contain a hyphen per HTML spec
308        tag_name.contains('-')
309    }
310
311    // Check if a tag is likely a programming type annotation rather than HTML
312    #[inline]
313    fn is_likely_type_annotation(&self, tag: &str) -> bool {
314        // Sorted for binary search — must remain sorted when adding elements
315        const COMMON_TYPES: &[&str] = &[
316            "any",
317            "apiresponse",
318            "array",
319            "bigint",
320            "config",
321            "data",
322            "date",
323            "e",
324            "element",
325            "error",
326            "function",
327            "generator",
328            "item",
329            "iterator",
330            "k",
331            "map",
332            "node",
333            "null",
334            "number",
335            "options",
336            "params",
337            "promise",
338            "regexp",
339            "request",
340            "response",
341            "result",
342            "set",
343            "string",
344            "symbol",
345            "t",
346            "u",
347            "undefined",
348            "userdata",
349            "v",
350            "void",
351            "weakmap",
352            "weakset",
353        ];
354
355        let tag_content = tag
356            .trim_start_matches('<')
357            .trim_end_matches('>')
358            .trim_start_matches('/');
359        let tag_name = tag_content
360            .split(|c: char| c.is_whitespace() || c == '>' || c == '/')
361            .next()
362            .unwrap_or("");
363
364        // Check if it's a simple tag (no attributes) with a common type name
365        if !tag_content.contains(' ') && !tag_content.contains('=') {
366            let lower = tag_name.to_ascii_lowercase();
367            COMMON_TYPES.binary_search(&lower.as_str()).is_ok()
368        } else {
369            false
370        }
371    }
372
373    // Check if a tag is actually an email address in angle brackets
374    #[inline]
375    fn is_email_address(&self, tag: &str) -> bool {
376        let content = tag.trim_start_matches('<').trim_end_matches('>');
377        // Simple email pattern: contains @ and has reasonable structure
378        content.contains('@')
379            && content.chars().all(|c| c.is_alphanumeric() || "@.-_+".contains(c))
380            && content.split('@').count() == 2
381            && content.split('@').all(|part| !part.is_empty())
382    }
383
384    // Check if a tag has the markdown attribute (MkDocs/Material for MkDocs)
385    #[inline]
386    fn has_markdown_attribute(&self, tag: &str) -> bool {
387        // Check for various forms of markdown attribute
388        // Examples: <div markdown>, <div markdown="1">, <div class="result" markdown>
389        tag.contains(" markdown>") || tag.contains(" markdown=") || tag.contains(" markdown ")
390    }
391
392    /// Check if a tag contains JSX-specific attributes that indicate it's JSX, not HTML
393    /// JSX uses different attribute names than HTML:
394    /// - `className` instead of `class`
395    /// - `htmlFor` instead of `for`
396    /// - camelCase event handlers (`onClick`, `onChange`, `onSubmit`, etc.)
397    /// - JSX expression syntax `={...}` for dynamic values
398    #[inline]
399    fn has_jsx_attributes(tag: &str) -> bool {
400        // JSX-specific attribute names (HTML uses class, for, onclick, etc.)
401        tag.contains("className")
402            || tag.contains("htmlFor")
403            || tag.contains("dangerouslySetInnerHTML")
404            // camelCase event handlers (JSX uses onClick, HTML uses onclick)
405            || tag.contains("onClick")
406            || tag.contains("onChange")
407            || tag.contains("onSubmit")
408            || tag.contains("onFocus")
409            || tag.contains("onBlur")
410            || tag.contains("onKeyDown")
411            || tag.contains("onKeyUp")
412            || tag.contains("onKeyPress")
413            || tag.contains("onMouseDown")
414            || tag.contains("onMouseUp")
415            || tag.contains("onMouseEnter")
416            || tag.contains("onMouseLeave")
417            // JSX expression syntax: ={expression} or ={ expression }
418            || tag.contains("={")
419    }
420
421    // Check if a tag is actually a URL in angle brackets
422    #[inline]
423    fn is_url_in_angle_brackets(&self, tag: &str) -> bool {
424        let content = tag.trim_start_matches('<').trim_end_matches('>');
425        // Check for common URL schemes
426        content.starts_with("http://")
427            || content.starts_with("https://")
428            || content.starts_with("ftp://")
429            || content.starts_with("ftps://")
430            || content.starts_with("mailto:")
431    }
432
433    #[inline]
434    fn is_relaxed_fix_mode(&self) -> bool {
435        self.config.fix_mode == MD033FixMode::Relaxed
436    }
437
438    #[inline]
439    fn is_droppable_attribute(&self, attr_name: &str) -> bool {
440        // Event handler attributes (onclick, onload, etc.) are never droppable
441        // because they can execute arbitrary JavaScript.
442        if attr_name.starts_with("on") && attr_name.len() > 2 {
443            return false;
444        }
445        self.drop_attributes.contains(attr_name)
446            || (attr_name.starts_with("data-")
447                && (self.drop_attributes.contains("data-*") || self.drop_attributes.contains("data-")))
448    }
449
450    #[inline]
451    fn is_strippable_wrapper(&self, tag_name: &str) -> bool {
452        self.is_relaxed_fix_mode() && self.strip_wrapper_elements.contains(tag_name)
453    }
454
455    /// Check whether `byte_offset` sits directly inside a top-level strippable
456    /// wrapper element (e.g. `<p>`).  Returns `true` only when:
457    ///  1. The nearest unclosed opening tag before the offset is a configured
458    ///     wrapper element, AND
459    ///  2. That wrapper is itself NOT nested inside another HTML element.
460    ///
461    /// Condition 2 prevents converting inner content when the wrapper cannot
462    /// be stripped (e.g. `<div><p><img/></p></div>` -- stripping `<p>` is
463    /// blocked because it is nested, so converting `<img>` would leave
464    /// markdown inside an HTML block where it won't render).
465    fn is_inside_strippable_wrapper(&self, content: &str, byte_offset: usize) -> bool {
466        if byte_offset == 0 {
467            return false;
468        }
469        let before = content[..byte_offset].trim_end();
470        if !before.ends_with('>') || before.ends_with("->") {
471            return false;
472        }
473        if let Some(last_lt) = before.rfind('<') {
474            let potential_tag = &before[last_lt..];
475            if potential_tag.starts_with("</") || potential_tag.starts_with("<!--") {
476                return false;
477            }
478            let parent_name = potential_tag
479                .trim_start_matches('<')
480                .split(|c: char| c.is_whitespace() || c == '>' || c == '/')
481                .next()
482                .unwrap_or("")
483                .to_lowercase();
484            if !self.strip_wrapper_elements.contains(&parent_name) {
485                return false;
486            }
487            // Verify the wrapper itself is not nested inside another element.
488            let wrapper_before = before[..last_lt].trim_end();
489            if wrapper_before.ends_with('>')
490                && !wrapper_before.ends_with("->")
491                && let Some(outer_lt) = wrapper_before.rfind('<')
492                && let outer_tag = &wrapper_before[outer_lt..]
493                && !outer_tag.starts_with("</")
494                && !outer_tag.starts_with("<!--")
495            {
496                return false;
497            }
498            return true;
499        }
500        false
501    }
502
503    /// Convert paired HTML tags to their Markdown equivalents.
504    /// Returns None if the tag cannot be safely converted (has nested tags, HTML entities, etc.)
505    fn convert_to_markdown(tag_name: &str, inner_content: &str) -> Option<String> {
506        // Skip if content contains nested HTML tags
507        if inner_content.contains('<') {
508            return None;
509        }
510        // Skip if content contains HTML entities (e.g., &vert;, &amp;, &lt;)
511        // These need HTML context to render correctly; markdown won't process them
512        if inner_content.contains('&') && inner_content.contains(';') {
513            // Check for common HTML entity patterns
514            let has_entity = inner_content
515                .split('&')
516                .skip(1)
517                .any(|part| part.split(';').next().is_some_and(|e| !e.is_empty() && e.len() < 10));
518            if has_entity {
519                return None;
520            }
521        }
522        match tag_name {
523            "em" | "i" => Some(format!("*{inner_content}*")),
524            "strong" | "b" => Some(format!("**{inner_content}**")),
525            "code" => {
526                // Handle backticks in content by using double backticks with padding
527                if inner_content.contains('`') {
528                    Some(format!("`` {inner_content} ``"))
529                } else {
530                    Some(format!("`{inner_content}`"))
531                }
532            }
533            _ => None,
534        }
535    }
536
537    /// Convert self-closing HTML tags to their Markdown equivalents.
538    fn convert_self_closing_to_markdown(&self, tag_name: &str, opening_tag: &str) -> Option<String> {
539        match tag_name {
540            "br" => match self.config.br_style {
541                md033_config::BrStyle::TrailingSpaces => Some("  \n".to_string()),
542                md033_config::BrStyle::Backslash => Some("\\\n".to_string()),
543            },
544            "hr" => Some("\n---\n".to_string()),
545            "img" => self.convert_img_to_markdown(opening_tag),
546            _ => None,
547        }
548    }
549
550    /// Parse all attributes from an HTML tag into a list of (name, value) pairs.
551    /// This provides proper attribute parsing instead of naive string matching.
552    fn parse_attributes(tag: &str) -> Vec<(String, Option<String>)> {
553        let mut attrs = Vec::new();
554
555        // Remove < and > and tag name
556        let tag_content = tag.trim_start_matches('<').trim_end_matches('>').trim_end_matches('/');
557
558        // Find first whitespace to skip tag name
559        let attr_start = tag_content
560            .find(|c: char| c.is_whitespace())
561            .map(|i| i + 1)
562            .unwrap_or(tag_content.len());
563
564        if attr_start >= tag_content.len() {
565            return attrs;
566        }
567
568        let attr_str = &tag_content[attr_start..];
569        let mut chars = attr_str.chars().peekable();
570
571        while chars.peek().is_some() {
572            // Skip whitespace
573            while chars.peek().is_some_and(|c| c.is_whitespace()) {
574                chars.next();
575            }
576
577            if chars.peek().is_none() {
578                break;
579            }
580
581            // Read attribute name
582            let mut attr_name = String::new();
583            while let Some(&c) = chars.peek() {
584                if c.is_whitespace() || c == '=' || c == '>' || c == '/' {
585                    break;
586                }
587                attr_name.push(c);
588                chars.next();
589            }
590
591            if attr_name.is_empty() {
592                break;
593            }
594
595            // Skip whitespace before =
596            while chars.peek().is_some_and(|c| c.is_whitespace()) {
597                chars.next();
598            }
599
600            // Check for = and value
601            if chars.peek() == Some(&'=') {
602                chars.next(); // consume =
603
604                // Skip whitespace after =
605                while chars.peek().is_some_and(|c| c.is_whitespace()) {
606                    chars.next();
607                }
608
609                // Read value
610                let mut value = String::new();
611                if let Some(&quote) = chars.peek() {
612                    if quote == '"' || quote == '\'' {
613                        chars.next(); // consume opening quote
614                        for c in chars.by_ref() {
615                            if c == quote {
616                                break;
617                            }
618                            value.push(c);
619                        }
620                    } else {
621                        // Unquoted value
622                        while let Some(&c) = chars.peek() {
623                            if c.is_whitespace() || c == '>' || c == '/' {
624                                break;
625                            }
626                            value.push(c);
627                            chars.next();
628                        }
629                    }
630                }
631                attrs.push((attr_name.to_ascii_lowercase(), Some(value)));
632            } else {
633                // Boolean attribute (no value)
634                attrs.push((attr_name.to_ascii_lowercase(), None));
635            }
636        }
637
638        attrs
639    }
640
641    /// Extract an HTML attribute value from a tag string.
642    /// Handles double quotes, single quotes, and unquoted values.
643    /// Returns None if the attribute is not found.
644    fn extract_attribute(tag: &str, attr_name: &str) -> Option<String> {
645        let attrs = Self::parse_attributes(tag);
646        let attr_lower = attr_name.to_ascii_lowercase();
647
648        attrs
649            .into_iter()
650            .find(|(name, _)| name == &attr_lower)
651            .and_then(|(_, value)| value)
652    }
653
654    /// Check if an HTML tag has extra attributes beyond the specified allowed ones.
655    /// Uses proper attribute parsing to avoid false positives from string matching.
656    fn has_extra_attributes(&self, tag: &str, allowed_attrs: &[&str]) -> bool {
657        let attrs = Self::parse_attributes(tag);
658
659        // All event handlers (on*) are dangerous
660        // Plus common attributes that would be lost in markdown conversion
661        const DANGEROUS_ATTR_PREFIXES: &[&str] = &["on"]; // onclick, onload, onerror, etc.
662        const DANGEROUS_ATTRS: &[&str] = &[
663            "class",
664            "id",
665            "style",
666            "target",
667            "rel",
668            "download",
669            "referrerpolicy",
670            "crossorigin",
671            "loading",
672            "decoding",
673            "fetchpriority",
674            "sizes",
675            "srcset",
676            "usemap",
677            "ismap",
678            "width",
679            "height",
680            "name",   // anchor names
681            "data-*", // data attributes (checked separately)
682        ];
683
684        for (attr_name, _) in attrs {
685            // Skip allowed attributes (list is small, linear scan is efficient)
686            if allowed_attrs.iter().any(|a| a.to_ascii_lowercase() == attr_name) {
687                continue;
688            }
689
690            if self.is_relaxed_fix_mode() {
691                if self.is_droppable_attribute(&attr_name) {
692                    continue;
693                }
694                return true;
695            }
696
697            // Check for event handlers (on*)
698            for prefix in DANGEROUS_ATTR_PREFIXES {
699                if attr_name.starts_with(prefix) && attr_name.len() > prefix.len() {
700                    return true;
701                }
702            }
703
704            // Check for data-* attributes
705            if attr_name.starts_with("data-") {
706                return true;
707            }
708
709            // Check for other dangerous attributes
710            if DANGEROUS_ATTRS.contains(&attr_name.as_str()) {
711                return true;
712            }
713        }
714
715        false
716    }
717
718    /// Convert `<a href="url">text</a>` to `[text](url)` or `[text](url "title")`
719    /// Returns None if conversion is not safe.
720    fn convert_a_to_markdown(&self, opening_tag: &str, inner_content: &str) -> Option<String> {
721        // Extract href attribute
722        let href = Self::extract_attribute(opening_tag, "href")?;
723
724        // Check URL is safe
725        if !MD033Config::is_safe_url(&href) {
726            return None;
727        }
728
729        // Check for nested HTML tags in content
730        if inner_content.contains('<') {
731            return None;
732        }
733
734        // Check for HTML entities that wouldn't render correctly in markdown
735        if inner_content.contains('&') && inner_content.contains(';') {
736            let has_entity = inner_content
737                .split('&')
738                .skip(1)
739                .any(|part| part.split(';').next().is_some_and(|e| !e.is_empty() && e.len() < 10));
740            if has_entity {
741                return None;
742            }
743        }
744
745        // Extract optional title attribute
746        let title = Self::extract_attribute(opening_tag, "title");
747
748        // Check for extra dangerous attributes (title is allowed)
749        if self.has_extra_attributes(opening_tag, &["href", "title"]) {
750            return None;
751        }
752
753        // If inner content is exactly a markdown image (from a prior <img> fix),
754        // use it directly without bracket escaping to produce valid [![alt](src)](href).
755        // Must verify the entire content is a single image — not mixed content like
756        // "![](url) extra [text]" where trailing brackets still need escaping.
757        let trimmed_inner = inner_content.trim();
758        let is_markdown_image =
759            trimmed_inner.starts_with("![") && trimmed_inner.contains("](") && trimmed_inner.ends_with(')') && {
760                // Verify the closing ](url) accounts for the rest of the content
761                // by finding the image's ]( and checking nothing follows the final )
762                if let Some(bracket_close) = trimmed_inner.rfind("](") {
763                    let after_paren = &trimmed_inner[bracket_close + 2..];
764                    // The rest should be just "url)" — find the matching close paren
765                    after_paren.ends_with(')')
766                        && after_paren.chars().filter(|&c| c == ')').count()
767                            >= after_paren.chars().filter(|&c| c == '(').count()
768                } else {
769                    false
770                }
771            };
772        let escaped_text = if is_markdown_image {
773            trimmed_inner.to_string()
774        } else {
775            // Escape special markdown characters in link text
776            // Brackets need escaping to avoid breaking the link syntax
777            inner_content.replace('[', r"\[").replace(']', r"\]")
778        };
779
780        // Escape parentheses in URL
781        let escaped_url = href.replace('(', "%28").replace(')', "%29");
782
783        // Format with or without title
784        if let Some(title_text) = title {
785            // Escape quotes in title
786            let escaped_title = title_text.replace('"', r#"\""#);
787            Some(format!("[{escaped_text}]({escaped_url} \"{escaped_title}\")"))
788        } else {
789            Some(format!("[{escaped_text}]({escaped_url})"))
790        }
791    }
792
793    /// Convert `<img src="url" alt="text">` to `![alt](src)` or `![alt](src "title")`
794    /// Returns None if conversion is not safe.
795    fn convert_img_to_markdown(&self, tag: &str) -> Option<String> {
796        // Extract src attribute (required)
797        let src = Self::extract_attribute(tag, "src")?;
798
799        // Check URL is safe
800        if !MD033Config::is_safe_url(&src) {
801            return None;
802        }
803
804        // Extract alt attribute (optional, default to empty)
805        let alt = Self::extract_attribute(tag, "alt").unwrap_or_default();
806
807        // Extract optional title attribute
808        let title = Self::extract_attribute(tag, "title");
809
810        // Check for extra dangerous attributes (title is allowed)
811        if self.has_extra_attributes(tag, &["src", "alt", "title"]) {
812            return None;
813        }
814
815        // Escape special markdown characters in alt text
816        let escaped_alt = alt.replace('[', r"\[").replace(']', r"\]");
817
818        // Escape parentheses in URL
819        let escaped_url = src.replace('(', "%28").replace(')', "%29");
820
821        // Format with or without title
822        if let Some(title_text) = title {
823            // Escape quotes in title
824            let escaped_title = title_text.replace('"', r#"\""#);
825            Some(format!("![{escaped_alt}]({escaped_url} \"{escaped_title}\")"))
826        } else {
827            Some(format!("![{escaped_alt}]({escaped_url})"))
828        }
829    }
830
831    /// Check if an HTML tag has attributes that would make conversion unsafe
832    fn has_significant_attributes(opening_tag: &str) -> bool {
833        // Tags with just whitespace or empty are fine
834        let tag_content = opening_tag
835            .trim_start_matches('<')
836            .trim_end_matches('>')
837            .trim_end_matches('/');
838
839        // Split by whitespace; if there's more than the tag name, it has attributes
840        let parts: Vec<&str> = tag_content.split_whitespace().collect();
841        parts.len() > 1
842    }
843
844    /// Check if a tag appears to be nested inside another HTML element
845    /// by looking at the surrounding context (e.g., `<code><em>text</em></code>`)
846    fn is_nested_in_html(content: &str, tag_byte_start: usize, tag_byte_end: usize) -> bool {
847        // Check if there's a `>` immediately before this tag (indicating inside another element)
848        if tag_byte_start > 0 {
849            let before = &content[..tag_byte_start];
850            let before_trimmed = before.trim_end();
851            if before_trimmed.ends_with('>') && !before_trimmed.ends_with("->") {
852                // Check it's not a closing tag or comment
853                if let Some(last_lt) = before_trimmed.rfind('<') {
854                    let potential_tag = &before_trimmed[last_lt..];
855                    // Skip if it's a closing tag (</...>) or comment (<!--)
856                    if !potential_tag.starts_with("</") && !potential_tag.starts_with("<!--") {
857                        return true;
858                    }
859                }
860            }
861        }
862        // Check if there's a `<` immediately after the closing tag (indicating inside another element)
863        if tag_byte_end < content.len() {
864            let after = &content[tag_byte_end..];
865            let after_trimmed = after.trim_start();
866            if after_trimmed.starts_with("</") {
867                return true;
868            }
869        }
870        false
871    }
872
873    /// Calculate fix to remove HTML tags while keeping content.
874    ///
875    /// For self-closing tags like `<br/>`, returns a single fix to remove the tag.
876    /// For paired tags like `<span>text</span>`, returns the replacement text (just the content).
877    ///
878    /// Returns (range, replacement_text) where range is the bytes to replace
879    /// and replacement_text is what to put there (content without tags, or empty for self-closing).
880    ///
881    /// When `in_html_block` is true, returns None in conservative mode.  In
882    /// relaxed mode two exceptions apply:
883    /// - Strippable wrapper elements (e.g. `<p>`) bypass the block guard so
884    ///   they can be stripped even though they ARE the HTML block.
885    /// - Self-closing tags whose direct parent is a strippable wrapper also
886    ///   bypass the guard so inner content can be converted first.
887    fn calculate_fix(
888        &self,
889        content: &str,
890        opening_tag: &str,
891        tag_byte_start: usize,
892        in_html_block: bool,
893    ) -> Option<(std::ops::Range<usize>, String)> {
894        // Extract tag name from opening tag
895        let tag_name = opening_tag
896            .trim_start_matches('<')
897            .split(|c: char| c.is_whitespace() || c == '>' || c == '/')
898            .next()?
899            .to_lowercase();
900
901        // Check if it's a self-closing tag (ends with /> or is a void element like <br>)
902        let is_self_closing =
903            opening_tag.ends_with("/>") || matches!(tag_name.as_str(), "br" | "hr" | "img" | "input" | "meta" | "link");
904
905        if is_self_closing {
906            // When fix is enabled, try to convert to Markdown equivalent.
907            // Skip tags inside HTML blocks (would break structure), UNLESS we
908            // are in relaxed mode and the containing block is a strippable
909            // wrapper -- this lets the inner element be converted first so the
910            // wrapper can be stripped on a subsequent pass.
911            let block_ok = !in_html_block
912                || (self.is_relaxed_fix_mode() && self.is_inside_strippable_wrapper(content, tag_byte_start));
913            if self.config.fix
914                && MD033Config::is_safe_fixable_tag(&tag_name)
915                && block_ok
916                && let Some(markdown) = self.convert_self_closing_to_markdown(&tag_name, opening_tag)
917            {
918                return Some((tag_byte_start..tag_byte_start + opening_tag.len(), markdown));
919            }
920            // Can't convert this self-closing tag to Markdown, don't provide a fix
921            // (e.g., <input>, <meta> - these have no Markdown equivalent without the new img support)
922            return None;
923        }
924
925        // Search for the closing tag after the opening tag (case-insensitive)
926        let search_start = tag_byte_start + opening_tag.len();
927        let search_slice = &content[search_start..];
928
929        // Find closing tag case-insensitively
930        let closing_tag_lower = format!("</{tag_name}>");
931        let closing_pos = search_slice.to_ascii_lowercase().find(&closing_tag_lower);
932
933        if let Some(closing_pos) = closing_pos {
934            // Get actual closing tag from original content to get correct byte length
935            let closing_tag_len = closing_tag_lower.len();
936            let closing_byte_start = search_start + closing_pos;
937            let closing_byte_end = closing_byte_start + closing_tag_len;
938
939            // Extract the content between tags
940            let inner_content = &content[search_start..closing_byte_start];
941
942            // In relaxed mode, check wrapper stripping BEFORE the in_html_block
943            // guard because the wrapper element itself IS the HTML block. We only
944            // strip when:
945            //  - the wrapper is not nested inside another HTML element
946            //  - the inner content no longer contains HTML tags (prevents
947            //    overlapping byte-range replacements within a single fix pass)
948            if self.config.fix && self.is_strippable_wrapper(&tag_name) {
949                if Self::is_nested_in_html(content, tag_byte_start, closing_byte_end) {
950                    return None;
951                }
952                if inner_content.contains('<') {
953                    return None;
954                }
955                return Some((tag_byte_start..closing_byte_end, inner_content.trim().to_string()));
956            }
957
958            // Skip auto-fix if inside an HTML block (like <pre>, <div>, etc.)
959            // Converting tags inside HTML blocks would break the intended structure
960            if in_html_block {
961                return None;
962            }
963
964            // Skip auto-fix if this tag is nested inside another HTML element
965            // e.g., <code><em>text</em></code> - don't convert the inner <em>
966            if Self::is_nested_in_html(content, tag_byte_start, closing_byte_end) {
967                return None;
968            }
969
970            // When fix is enabled and tag is safe to convert, try markdown conversion
971            if self.config.fix && MD033Config::is_safe_fixable_tag(&tag_name) {
972                // Handle <a> tags specially - they require attribute extraction
973                if tag_name == "a" {
974                    if let Some(markdown) = self.convert_a_to_markdown(opening_tag, inner_content) {
975                        return Some((tag_byte_start..closing_byte_end, markdown));
976                    }
977                    // convert_a_to_markdown returned None - unsafe URL, nested HTML, etc.
978                    return None;
979                }
980
981                // For simple tags (em, strong, code, etc.) - no attributes allowed
982                if Self::has_significant_attributes(opening_tag) {
983                    // Don't provide a fix for tags with attributes
984                    // User may want to keep the attributes, so leave as-is
985                    return None;
986                }
987                if let Some(markdown) = Self::convert_to_markdown(&tag_name, inner_content) {
988                    return Some((tag_byte_start..closing_byte_end, markdown));
989                }
990                // convert_to_markdown returned None, meaning content has nested tags or
991                // HTML entities that shouldn't be converted - leave as-is
992                return None;
993            }
994
995            // For non-fixable tags, don't provide a fix
996            // (e.g., <div>content</div>, <span>text</span>)
997            return None;
998        }
999
1000        // If no closing tag found, don't provide a fix (malformed HTML)
1001        None
1002    }
1003}
1004
1005impl Rule for MD033NoInlineHtml {
1006    fn name(&self) -> &'static str {
1007        "MD033"
1008    }
1009
1010    fn description(&self) -> &'static str {
1011        "Inline HTML is not allowed"
1012    }
1013
1014    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
1015        let content = ctx.content;
1016
1017        // Early return: if no HTML tags at all, skip processing
1018        if content.is_empty() || !ctx.likely_has_html() {
1019            return Ok(Vec::new());
1020        }
1021
1022        // Quick check for HTML tag pattern before expensive processing
1023        if !HTML_TAG_QUICK_CHECK.is_match(content) {
1024            return Ok(Vec::new());
1025        }
1026
1027        let mut warnings = Vec::new();
1028
1029        // Use centralized HTML parser to get all HTML tags (including multiline)
1030        let html_tags = ctx.html_tags();
1031
1032        for html_tag in html_tags.iter() {
1033            // Skip closing tags (only warn on opening tags)
1034            if html_tag.is_closing {
1035                continue;
1036            }
1037
1038            let line_num = html_tag.line;
1039            let tag_byte_start = html_tag.byte_offset;
1040
1041            // Reconstruct tag string from byte offsets
1042            let tag = &content[html_tag.byte_offset..html_tag.byte_end];
1043
1044            // Skip tags in code blocks, PyMdown blocks, and block IALs
1045            if ctx
1046                .line_info(line_num)
1047                .is_some_and(|info| info.in_code_block || info.in_pymdown_block || info.is_kramdown_block_ial)
1048            {
1049                continue;
1050            }
1051
1052            // Skip HTML tags inside HTML comments
1053            if ctx.is_in_html_comment(tag_byte_start) {
1054                continue;
1055            }
1056
1057            // Skip HTML comments themselves
1058            if self.is_html_comment(tag) {
1059                continue;
1060            }
1061
1062            // Skip angle brackets inside link reference definition titles
1063            // e.g., [ref]: url "Title with <angle brackets>"
1064            if ctx.is_in_link_title(tag_byte_start) {
1065                continue;
1066            }
1067
1068            // Skip JSX components in MDX files (e.g., <Chart />, <MyComponent>)
1069            if ctx.flavor.supports_jsx() && html_tag.tag_name.chars().next().is_some_and(|c| c.is_uppercase()) {
1070                continue;
1071            }
1072
1073            // Skip JSX fragments in MDX files (<> and </>)
1074            if ctx.flavor.supports_jsx() && (html_tag.tag_name.is_empty() || tag == "<>" || tag == "</>") {
1075                continue;
1076            }
1077
1078            // Skip elements with JSX-specific attributes in MDX files
1079            // e.g., <div className="...">, <button onClick={handler}>
1080            if ctx.flavor.supports_jsx() && Self::has_jsx_attributes(tag) {
1081                continue;
1082            }
1083
1084            // Skip non-HTML elements (placeholder syntax like <NAME>, <resource>)
1085            if !Self::is_html_element_or_custom(&html_tag.tag_name) {
1086                continue;
1087            }
1088
1089            // Skip likely programming type annotations
1090            if self.is_likely_type_annotation(tag) {
1091                continue;
1092            }
1093
1094            // Skip email addresses in angle brackets
1095            if self.is_email_address(tag) {
1096                continue;
1097            }
1098
1099            // Skip URLs in angle brackets
1100            if self.is_url_in_angle_brackets(tag) {
1101                continue;
1102            }
1103
1104            // Skip tags inside code spans (use byte offset for reliable multi-line span detection)
1105            if ctx.is_byte_offset_in_code_span(tag_byte_start) {
1106                continue;
1107            }
1108
1109            // Determine whether to report this tag based on mode:
1110            // - Disallowed mode: only report tags in the disallowed list
1111            // - Default mode: report all tags except those in the allowed list
1112            if self.is_disallowed_mode() {
1113                // In disallowed mode, skip tags NOT in the disallowed list
1114                if !self.is_tag_disallowed(tag) {
1115                    continue;
1116                }
1117            } else {
1118                // In default mode, skip allowed tags
1119                if self.is_tag_allowed(tag) {
1120                    continue;
1121                }
1122            }
1123
1124            // Skip tags with markdown attribute in MkDocs mode
1125            if ctx.flavor == crate::config::MarkdownFlavor::MkDocs && self.has_markdown_attribute(tag) {
1126                continue;
1127            }
1128
1129            // Check if we're inside an HTML block (like <pre>, <div>, etc.)
1130            let in_html_block = ctx.is_in_html_block(line_num);
1131
1132            // Calculate fix to remove HTML tags but keep content
1133            let fix = self
1134                .calculate_fix(content, tag, tag_byte_start, in_html_block)
1135                .map(|(range, replacement)| Fix { range, replacement });
1136
1137            // Calculate actual end line and column for multiline tags
1138            // Use byte_end - 1 to get the last character position of the tag
1139            let (end_line, end_col) = if html_tag.byte_end > 0 {
1140                ctx.offset_to_line_col(html_tag.byte_end - 1)
1141            } else {
1142                (line_num, html_tag.end_col + 1)
1143            };
1144
1145            // Report the HTML tag
1146            warnings.push(LintWarning {
1147                rule_name: Some(self.name().to_string()),
1148                line: line_num,
1149                column: html_tag.start_col + 1, // Convert to 1-indexed
1150                end_line,                       // Actual end line for multiline tags
1151                end_column: end_col + 1,        // Actual end column
1152                message: format!("Inline HTML found: {tag}"),
1153                severity: Severity::Warning,
1154                fix,
1155            });
1156        }
1157
1158        Ok(warnings)
1159    }
1160
1161    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
1162        // Auto-fix is opt-in: only apply if explicitly enabled in config
1163        if !self.config.fix {
1164            return Ok(ctx.content.to_string());
1165        }
1166
1167        // Get warnings with their inline fixes
1168        let warnings = self.check(ctx)?;
1169        let warnings =
1170            crate::utils::fix_utils::filter_warnings_by_inline_config(warnings, ctx.inline_config(), self.name());
1171
1172        // If no warnings with fixes, return original content
1173        if warnings.is_empty() || !warnings.iter().any(|w| w.fix.is_some()) {
1174            return Ok(ctx.content.to_string());
1175        }
1176
1177        // Collect all fixes and sort by range start (descending) to apply from end to beginning
1178        let mut fixes: Vec<_> = warnings
1179            .iter()
1180            .filter_map(|w| w.fix.as_ref().map(|f| (f.range.start, f.range.end, &f.replacement)))
1181            .collect();
1182        fixes.sort_by(|a, b| b.0.cmp(&a.0));
1183
1184        // Apply fixes from end to beginning to preserve byte offsets
1185        let mut result = ctx.content.to_string();
1186        for (start, end, replacement) in fixes {
1187            if start < result.len() && end <= result.len() && start <= end {
1188                result.replace_range(start..end, replacement);
1189            }
1190        }
1191
1192        Ok(result)
1193    }
1194
1195    fn fix_capability(&self) -> crate::rule::FixCapability {
1196        if self.config.fix {
1197            crate::rule::FixCapability::FullyFixable
1198        } else {
1199            crate::rule::FixCapability::Unfixable
1200        }
1201    }
1202
1203    /// Get the category of this rule for selective processing
1204    fn category(&self) -> RuleCategory {
1205        RuleCategory::Html
1206    }
1207
1208    /// Check if this rule should be skipped
1209    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
1210        ctx.content.is_empty() || !ctx.likely_has_html()
1211    }
1212
1213    fn as_any(&self) -> &dyn std::any::Any {
1214        self
1215    }
1216
1217    fn default_config_section(&self) -> Option<(String, toml::Value)> {
1218        let json_value = serde_json::to_value(&self.config).ok()?;
1219        Some((
1220            self.name().to_string(),
1221            crate::rule_config_serde::json_to_toml_value(&json_value)?,
1222        ))
1223    }
1224
1225    fn config_aliases(&self) -> Option<std::collections::HashMap<String, String>> {
1226        let mut aliases = std::collections::HashMap::new();
1227        // Shorthand aliases for allowed-elements/disallowed-elements
1228        aliases.insert("allowed".to_string(), "allowed-elements".to_string());
1229        aliases.insert("disallowed".to_string(), "disallowed-elements".to_string());
1230        Some(aliases)
1231    }
1232
1233    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
1234    where
1235        Self: Sized,
1236    {
1237        let rule_config = crate::rule_config_serde::load_rule_config::<MD033Config>(config);
1238        Box::new(Self::from_config_struct(rule_config))
1239    }
1240}
1241
1242#[cfg(test)]
1243mod tests {
1244    use super::*;
1245    use crate::lint_context::LintContext;
1246    use crate::rule::Rule;
1247
1248    fn relaxed_fix_rule() -> MD033NoInlineHtml {
1249        let config = MD033Config {
1250            fix: true,
1251            fix_mode: MD033FixMode::Relaxed,
1252            ..MD033Config::default()
1253        };
1254        MD033NoInlineHtml::from_config_struct(config)
1255    }
1256
1257    #[test]
1258    fn test_md033_basic_html() {
1259        let rule = MD033NoInlineHtml::default();
1260        let content = "<div>Some content</div>";
1261        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1262        let result = rule.check(&ctx).unwrap();
1263        // Only reports opening tags, not closing tags
1264        assert_eq!(result.len(), 1); // Only <div>, not </div>
1265        assert!(result[0].message.starts_with("Inline HTML found: <div>"));
1266    }
1267
1268    #[test]
1269    fn test_md033_case_insensitive() {
1270        let rule = MD033NoInlineHtml::default();
1271        let content = "<DiV>Some <B>content</B></dIv>";
1272        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1273        let result = rule.check(&ctx).unwrap();
1274        // Only reports opening tags, not closing tags
1275        assert_eq!(result.len(), 2); // <DiV>, <B> (not </B>, </dIv>)
1276        assert_eq!(result[0].message, "Inline HTML found: <DiV>");
1277        assert_eq!(result[1].message, "Inline HTML found: <B>");
1278    }
1279
1280    #[test]
1281    fn test_md033_allowed_tags() {
1282        let rule = MD033NoInlineHtml::with_allowed(vec!["div".to_string(), "br".to_string()]);
1283        let content = "<div>Allowed</div><p>Not allowed</p><br/>";
1284        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1285        let result = rule.check(&ctx).unwrap();
1286        // Only warnings for non-allowed opening tags (<p> only, div and br are allowed)
1287        assert_eq!(result.len(), 1);
1288        assert_eq!(result[0].message, "Inline HTML found: <p>");
1289
1290        // Test case-insensitivity of allowed tags
1291        let content2 = "<DIV>Allowed</DIV><P>Not allowed</P><BR/>";
1292        let ctx2 = LintContext::new(content2, crate::config::MarkdownFlavor::Standard, None);
1293        let result2 = rule.check(&ctx2).unwrap();
1294        assert_eq!(result2.len(), 1); // Only <P> flagged
1295        assert_eq!(result2[0].message, "Inline HTML found: <P>");
1296    }
1297
1298    #[test]
1299    fn test_md033_html_comments() {
1300        let rule = MD033NoInlineHtml::default();
1301        let content = "<!-- This is a comment --> <p>Not a comment</p>";
1302        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1303        let result = rule.check(&ctx).unwrap();
1304        // Should detect warnings for HTML opening tags (comments are skipped, closing tags not reported)
1305        assert_eq!(result.len(), 1); // Only <p>
1306        assert_eq!(result[0].message, "Inline HTML found: <p>");
1307    }
1308
1309    #[test]
1310    fn test_md033_tags_in_links() {
1311        let rule = MD033NoInlineHtml::default();
1312        let content = "[Link](http://example.com/<div>)";
1313        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1314        let result = rule.check(&ctx).unwrap();
1315        // The <div> in the URL should be detected as HTML (not skipped)
1316        assert_eq!(result.len(), 1);
1317        assert_eq!(result[0].message, "Inline HTML found: <div>");
1318
1319        let content2 = "[Link <a>text</a>](url)";
1320        let ctx2 = LintContext::new(content2, crate::config::MarkdownFlavor::Standard, None);
1321        let result2 = rule.check(&ctx2).unwrap();
1322        // Only reports opening tags
1323        assert_eq!(result2.len(), 1); // Only <a>
1324        assert_eq!(result2[0].message, "Inline HTML found: <a>");
1325    }
1326
1327    #[test]
1328    fn test_md033_fix_escaping() {
1329        let rule = MD033NoInlineHtml::default();
1330        let content = "Text with <div> and <br/> tags.";
1331        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1332        let fixed_content = rule.fix(&ctx).unwrap();
1333        // No fix for HTML tags; output should be unchanged
1334        assert_eq!(fixed_content, content);
1335    }
1336
1337    #[test]
1338    fn test_md033_in_code_blocks() {
1339        let rule = MD033NoInlineHtml::default();
1340        let content = "```html\n<div>Code</div>\n```\n<div>Not code</div>";
1341        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1342        let result = rule.check(&ctx).unwrap();
1343        // Only reports opening tags outside code block
1344        assert_eq!(result.len(), 1); // Only <div> outside code block
1345        assert_eq!(result[0].message, "Inline HTML found: <div>");
1346    }
1347
1348    #[test]
1349    fn test_md033_in_code_spans() {
1350        let rule = MD033NoInlineHtml::default();
1351        let content = "Text with `<p>in code</p>` span. <br/> Not in span.";
1352        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1353        let result = rule.check(&ctx).unwrap();
1354        // Should detect <br/> outside code span, but not tags inside code span
1355        assert_eq!(result.len(), 1);
1356        assert_eq!(result[0].message, "Inline HTML found: <br/>");
1357    }
1358
1359    #[test]
1360    fn test_md033_issue_90_code_span_with_diff_block() {
1361        // Test for issue #90: inline code span followed by diff code block
1362        let rule = MD033NoInlineHtml::default();
1363        let content = r#"# Heading
1364
1365`<env>`
1366
1367```diff
1368- this
1369+ that
1370```"#;
1371        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1372        let result = rule.check(&ctx).unwrap();
1373        // Should NOT detect <env> as HTML since it's inside backticks
1374        assert_eq!(result.len(), 0, "Should not report HTML tags inside code spans");
1375    }
1376
1377    #[test]
1378    fn test_md033_multiple_code_spans_with_angle_brackets() {
1379        // Test multiple code spans on same line
1380        let rule = MD033NoInlineHtml::default();
1381        let content = "`<one>` and `<two>` and `<three>` are all code spans";
1382        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1383        let result = rule.check(&ctx).unwrap();
1384        assert_eq!(result.len(), 0, "Should not report HTML tags inside any code spans");
1385    }
1386
1387    #[test]
1388    fn test_md033_nested_angle_brackets_in_code_span() {
1389        // Test nested angle brackets
1390        let rule = MD033NoInlineHtml::default();
1391        let content = "Text with `<<nested>>` brackets";
1392        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1393        let result = rule.check(&ctx).unwrap();
1394        assert_eq!(result.len(), 0, "Should handle nested angle brackets in code spans");
1395    }
1396
1397    #[test]
1398    fn test_md033_code_span_at_end_before_code_block() {
1399        // Test code span at end of line before code block
1400        let rule = MD033NoInlineHtml::default();
1401        let content = "Testing `<test>`\n```\ncode here\n```";
1402        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1403        let result = rule.check(&ctx).unwrap();
1404        assert_eq!(result.len(), 0, "Should handle code span before code block");
1405    }
1406
1407    #[test]
1408    fn test_md033_quick_fix_inline_tag() {
1409        // Test that non-fixable tags (like <span>) do NOT get a fix
1410        // Only safe fixable tags (em, i, strong, b, code, br, hr) with fix=true get fixes
1411        let rule = MD033NoInlineHtml::default();
1412        let content = "This has <span>inline text</span> that should keep content.";
1413        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1414        let result = rule.check(&ctx).unwrap();
1415
1416        assert_eq!(result.len(), 1, "Should find one HTML tag");
1417        // <span> is NOT a safe fixable tag, so no fix should be provided
1418        assert!(
1419            result[0].fix.is_none(),
1420            "Non-fixable tags like <span> should not have a fix"
1421        );
1422    }
1423
1424    #[test]
1425    fn test_md033_quick_fix_multiline_tag() {
1426        // HTML block elements like <div> are intentionally NOT auto-fixed
1427        // Removing them would change document structure significantly
1428        let rule = MD033NoInlineHtml::default();
1429        let content = "<div>\nBlock content\n</div>";
1430        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1431        let result = rule.check(&ctx).unwrap();
1432
1433        assert_eq!(result.len(), 1, "Should find one HTML tag");
1434        // HTML block elements should NOT have auto-fix
1435        assert!(result[0].fix.is_none(), "HTML block elements should NOT have auto-fix");
1436    }
1437
1438    #[test]
1439    fn test_md033_quick_fix_self_closing_tag() {
1440        // Test that self-closing tags with fix=false (default) do NOT get a fix
1441        let rule = MD033NoInlineHtml::default();
1442        let content = "Self-closing: <br/>";
1443        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1444        let result = rule.check(&ctx).unwrap();
1445
1446        assert_eq!(result.len(), 1, "Should find one HTML tag");
1447        // Default config has fix=false, so no fix should be provided
1448        assert!(
1449            result[0].fix.is_none(),
1450            "Self-closing tags should not have a fix when fix config is false"
1451        );
1452    }
1453
1454    #[test]
1455    fn test_md033_quick_fix_multiple_tags() {
1456        // Test that multiple tags without fix=true do NOT get fixes
1457        // <span> is not a safe fixable tag, <strong> is but fix=false by default
1458        let rule = MD033NoInlineHtml::default();
1459        let content = "<span>first</span> and <strong>second</strong>";
1460        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1461        let result = rule.check(&ctx).unwrap();
1462
1463        assert_eq!(result.len(), 2, "Should find two HTML tags");
1464        // Neither should have a fix: <span> is not fixable, <strong> is but fix=false
1465        assert!(result[0].fix.is_none(), "Non-fixable <span> should not have a fix");
1466        assert!(
1467            result[1].fix.is_none(),
1468            "<strong> should not have a fix when fix config is false"
1469        );
1470    }
1471
1472    #[test]
1473    fn test_md033_skip_angle_brackets_in_link_titles() {
1474        // Angle brackets inside link reference definition titles should not be flagged as HTML
1475        let rule = MD033NoInlineHtml::default();
1476        let content = r#"# Test
1477
1478[example]: <https://example.com> "Title with <Angle Brackets> inside"
1479
1480Regular text with <div>content</div> HTML tag.
1481"#;
1482        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1483        let result = rule.check(&ctx).unwrap();
1484
1485        // Should only flag <div>, not <Angle Brackets> in the title (not a valid HTML element)
1486        // Opening tag only (markdownlint behavior)
1487        assert_eq!(result.len(), 1, "Should find opening div tag");
1488        assert!(
1489            result[0].message.contains("<div>"),
1490            "Should flag <div>, got: {}",
1491            result[0].message
1492        );
1493    }
1494
1495    #[test]
1496    fn test_md033_skip_angle_brackets_in_link_title_single_quotes() {
1497        // Test with single-quoted title
1498        let rule = MD033NoInlineHtml::default();
1499        let content = r#"[ref]: url 'Title <Help Wanted> here'
1500
1501<span>text</span> here
1502"#;
1503        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1504        let result = rule.check(&ctx).unwrap();
1505
1506        // <Help Wanted> is not a valid HTML element, so only <span> is flagged
1507        // Opening tag only (markdownlint behavior)
1508        assert_eq!(result.len(), 1, "Should find opening span tag");
1509        assert!(
1510            result[0].message.contains("<span>"),
1511            "Should flag <span>, got: {}",
1512            result[0].message
1513        );
1514    }
1515
1516    #[test]
1517    fn test_md033_multiline_tag_end_line_calculation() {
1518        // Test that multiline HTML tags report correct end_line
1519        let rule = MD033NoInlineHtml::default();
1520        let content = "<div\n  class=\"test\"\n  id=\"example\">";
1521        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1522        let result = rule.check(&ctx).unwrap();
1523
1524        assert_eq!(result.len(), 1, "Should find one HTML tag");
1525        // Tag starts on line 1
1526        assert_eq!(result[0].line, 1, "Start line should be 1");
1527        // Tag ends on line 3 (where the closing > is)
1528        assert_eq!(result[0].end_line, 3, "End line should be 3");
1529    }
1530
1531    #[test]
1532    fn test_md033_single_line_tag_same_start_end_line() {
1533        // Test that single-line HTML tags have same start and end line
1534        let rule = MD033NoInlineHtml::default();
1535        let content = "Some text <div class=\"test\"> more text";
1536        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1537        let result = rule.check(&ctx).unwrap();
1538
1539        assert_eq!(result.len(), 1, "Should find one HTML tag");
1540        assert_eq!(result[0].line, 1, "Start line should be 1");
1541        assert_eq!(result[0].end_line, 1, "End line should be 1 for single-line tag");
1542    }
1543
1544    #[test]
1545    fn test_md033_multiline_tag_with_many_attributes() {
1546        // Test multiline tag spanning multiple lines
1547        let rule = MD033NoInlineHtml::default();
1548        let content =
1549            "Text\n<div\n  data-attr1=\"value1\"\n  data-attr2=\"value2\"\n  data-attr3=\"value3\">\nMore text";
1550        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1551        let result = rule.check(&ctx).unwrap();
1552
1553        assert_eq!(result.len(), 1, "Should find one HTML tag");
1554        // Tag starts on line 2 (first line is "Text")
1555        assert_eq!(result[0].line, 2, "Start line should be 2");
1556        // Tag ends on line 5 (where the closing > is)
1557        assert_eq!(result[0].end_line, 5, "End line should be 5");
1558    }
1559
1560    #[test]
1561    fn test_md033_disallowed_mode_basic() {
1562        // Test disallowed mode: only flags tags in the disallowed list
1563        let rule = MD033NoInlineHtml::with_disallowed(vec!["script".to_string(), "iframe".to_string()]);
1564        let content = "<div>Safe content</div><script>alert('xss')</script>";
1565        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1566        let result = rule.check(&ctx).unwrap();
1567
1568        // Should only flag <script>, not <div>
1569        assert_eq!(result.len(), 1, "Should only flag disallowed tags");
1570        assert!(result[0].message.contains("<script>"), "Should flag script tag");
1571    }
1572
1573    #[test]
1574    fn test_md033_disallowed_gfm_security_tags() {
1575        // Test GFM security tags expansion
1576        let rule = MD033NoInlineHtml::with_disallowed(vec!["gfm".to_string()]);
1577        let content = r#"
1578<div>Safe</div>
1579<title>Bad title</title>
1580<textarea>Bad textarea</textarea>
1581<style>.bad{}</style>
1582<iframe src="evil"></iframe>
1583<script>evil()</script>
1584<plaintext>old tag</plaintext>
1585<span>Safe span</span>
1586"#;
1587        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1588        let result = rule.check(&ctx).unwrap();
1589
1590        // Should flag: title, textarea, style, iframe, script, plaintext
1591        // Should NOT flag: div, span
1592        assert_eq!(result.len(), 6, "Should flag 6 GFM security tags");
1593
1594        let flagged_tags: Vec<&str> = result
1595            .iter()
1596            .filter_map(|w| w.message.split("<").nth(1))
1597            .filter_map(|s| s.split(">").next())
1598            .filter_map(|s| s.split_whitespace().next())
1599            .collect();
1600
1601        assert!(flagged_tags.contains(&"title"), "Should flag title");
1602        assert!(flagged_tags.contains(&"textarea"), "Should flag textarea");
1603        assert!(flagged_tags.contains(&"style"), "Should flag style");
1604        assert!(flagged_tags.contains(&"iframe"), "Should flag iframe");
1605        assert!(flagged_tags.contains(&"script"), "Should flag script");
1606        assert!(flagged_tags.contains(&"plaintext"), "Should flag plaintext");
1607        assert!(!flagged_tags.contains(&"div"), "Should NOT flag div");
1608        assert!(!flagged_tags.contains(&"span"), "Should NOT flag span");
1609    }
1610
1611    #[test]
1612    fn test_md033_disallowed_case_insensitive() {
1613        // Test that disallowed check is case-insensitive
1614        let rule = MD033NoInlineHtml::with_disallowed(vec!["script".to_string()]);
1615        let content = "<SCRIPT>alert('xss')</SCRIPT><Script>alert('xss')</Script>";
1616        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1617        let result = rule.check(&ctx).unwrap();
1618
1619        // Should flag both <SCRIPT> and <Script>
1620        assert_eq!(result.len(), 2, "Should flag both case variants");
1621    }
1622
1623    #[test]
1624    fn test_md033_disallowed_with_attributes() {
1625        // Test that disallowed mode works with tags that have attributes
1626        let rule = MD033NoInlineHtml::with_disallowed(vec!["iframe".to_string()]);
1627        let content = r#"<iframe src="https://evil.com" width="100" height="100"></iframe>"#;
1628        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1629        let result = rule.check(&ctx).unwrap();
1630
1631        assert_eq!(result.len(), 1, "Should flag iframe with attributes");
1632        assert!(result[0].message.contains("iframe"), "Should flag iframe");
1633    }
1634
1635    #[test]
1636    fn test_md033_disallowed_all_gfm_tags() {
1637        // Verify all GFM disallowed tags are covered
1638        use md033_config::GFM_DISALLOWED_TAGS;
1639        let rule = MD033NoInlineHtml::with_disallowed(vec!["gfm".to_string()]);
1640
1641        for tag in GFM_DISALLOWED_TAGS {
1642            let content = format!("<{tag}>content</{tag}>");
1643            let ctx = LintContext::new(&content, crate::config::MarkdownFlavor::Standard, None);
1644            let result = rule.check(&ctx).unwrap();
1645
1646            assert_eq!(result.len(), 1, "GFM tag <{tag}> should be flagged");
1647        }
1648    }
1649
1650    #[test]
1651    fn test_md033_disallowed_mixed_with_custom() {
1652        // Test mixing "gfm" with custom disallowed tags
1653        let rule = MD033NoInlineHtml::with_disallowed(vec![
1654            "gfm".to_string(),
1655            "marquee".to_string(), // Custom disallowed tag
1656        ]);
1657        let content = r#"<script>bad</script><marquee>annoying</marquee><div>ok</div>"#;
1658        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1659        let result = rule.check(&ctx).unwrap();
1660
1661        // Should flag script (gfm) and marquee (custom)
1662        assert_eq!(result.len(), 2, "Should flag both gfm and custom tags");
1663    }
1664
1665    #[test]
1666    fn test_md033_disallowed_empty_means_default_mode() {
1667        // Empty disallowed list means default mode (flag all HTML)
1668        let rule = MD033NoInlineHtml::with_disallowed(vec![]);
1669        let content = "<div>content</div>";
1670        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1671        let result = rule.check(&ctx).unwrap();
1672
1673        // Should flag <div> in default mode
1674        assert_eq!(result.len(), 1, "Empty disallowed = default mode");
1675    }
1676
1677    #[test]
1678    fn test_md033_jsx_fragments_in_mdx() {
1679        // JSX fragments (<> and </>) should not trigger warnings in MDX
1680        let rule = MD033NoInlineHtml::default();
1681        let content = r#"# MDX Document
1682
1683<>
1684  <Heading />
1685  <Content />
1686</>
1687
1688<div>Regular HTML should still be flagged</div>
1689"#;
1690        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::MDX, None);
1691        let result = rule.check(&ctx).unwrap();
1692
1693        // Should only flag <div>, not the fragments or JSX components
1694        assert_eq!(result.len(), 1, "Should only find one HTML tag (the div)");
1695        assert!(
1696            result[0].message.contains("<div>"),
1697            "Should flag <div>, not JSX fragments"
1698        );
1699    }
1700
1701    #[test]
1702    fn test_md033_jsx_components_in_mdx() {
1703        // JSX components (capitalized) should not trigger warnings in MDX
1704        let rule = MD033NoInlineHtml::default();
1705        let content = r#"<CustomComponent prop="value">
1706  Content
1707</CustomComponent>
1708
1709<MyButton onClick={handler}>Click</MyButton>
1710"#;
1711        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::MDX, None);
1712        let result = rule.check(&ctx).unwrap();
1713
1714        // No warnings - all are JSX components
1715        assert_eq!(result.len(), 0, "Should not flag JSX components in MDX");
1716    }
1717
1718    #[test]
1719    fn test_md033_jsx_not_skipped_in_standard_markdown() {
1720        // In standard markdown, capitalized tags should still be flagged if they're valid HTML
1721        let rule = MD033NoInlineHtml::default();
1722        let content = "<Script>alert(1)</Script>";
1723        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1724        let result = rule.check(&ctx).unwrap();
1725
1726        // Should flag <Script> in standard markdown (it's a valid HTML element)
1727        assert_eq!(result.len(), 1, "Should flag <Script> in standard markdown");
1728    }
1729
1730    #[test]
1731    fn test_md033_jsx_attributes_in_mdx() {
1732        // Elements with JSX-specific attributes should not trigger warnings in MDX
1733        let rule = MD033NoInlineHtml::default();
1734        let content = r#"# MDX with JSX Attributes
1735
1736<div className="card big">Content</div>
1737
1738<button onClick={handleClick}>Click me</button>
1739
1740<label htmlFor="input-id">Label</label>
1741
1742<input onChange={handleChange} />
1743
1744<div class="html-class">Regular HTML should be flagged</div>
1745"#;
1746        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::MDX, None);
1747        let result = rule.check(&ctx).unwrap();
1748
1749        // Should only flag the div with regular HTML "class" attribute
1750        assert_eq!(
1751            result.len(),
1752            1,
1753            "Should only flag HTML element without JSX attributes, got: {result:?}"
1754        );
1755        assert!(
1756            result[0].message.contains("<div class="),
1757            "Should flag the div with HTML class attribute"
1758        );
1759    }
1760
1761    #[test]
1762    fn test_md033_jsx_attributes_not_skipped_in_standard() {
1763        // In standard markdown, JSX attributes should still be flagged
1764        let rule = MD033NoInlineHtml::default();
1765        let content = r#"<div className="card">Content</div>"#;
1766        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1767        let result = rule.check(&ctx).unwrap();
1768
1769        // Should flag in standard markdown
1770        assert_eq!(result.len(), 1, "Should flag JSX-style elements in standard markdown");
1771    }
1772
1773    // Auto-fix tests for MD033
1774
1775    #[test]
1776    fn test_md033_fix_disabled_by_default() {
1777        // Auto-fix should be disabled by default
1778        let rule = MD033NoInlineHtml::default();
1779        assert!(!rule.config.fix, "Fix should be disabled by default");
1780        assert_eq!(rule.fix_capability(), crate::rule::FixCapability::Unfixable);
1781    }
1782
1783    #[test]
1784    fn test_md033_fix_enabled_em_to_italic() {
1785        // When fix is enabled, <em>text</em> should convert to *text*
1786        let rule = MD033NoInlineHtml::with_fix(true);
1787        let content = "This has <em>emphasized text</em> here.";
1788        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1789        let fixed = rule.fix(&ctx).unwrap();
1790        assert_eq!(fixed, "This has *emphasized text* here.");
1791    }
1792
1793    #[test]
1794    fn test_md033_fix_enabled_i_to_italic() {
1795        // <i>text</i> should convert to *text*
1796        let rule = MD033NoInlineHtml::with_fix(true);
1797        let content = "This has <i>italic text</i> here.";
1798        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1799        let fixed = rule.fix(&ctx).unwrap();
1800        assert_eq!(fixed, "This has *italic text* here.");
1801    }
1802
1803    #[test]
1804    fn test_md033_fix_enabled_strong_to_bold() {
1805        // <strong>text</strong> should convert to **text**
1806        let rule = MD033NoInlineHtml::with_fix(true);
1807        let content = "This has <strong>bold text</strong> here.";
1808        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1809        let fixed = rule.fix(&ctx).unwrap();
1810        assert_eq!(fixed, "This has **bold text** here.");
1811    }
1812
1813    #[test]
1814    fn test_md033_fix_enabled_b_to_bold() {
1815        // <b>text</b> should convert to **text**
1816        let rule = MD033NoInlineHtml::with_fix(true);
1817        let content = "This has <b>bold text</b> here.";
1818        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1819        let fixed = rule.fix(&ctx).unwrap();
1820        assert_eq!(fixed, "This has **bold text** here.");
1821    }
1822
1823    #[test]
1824    fn test_md033_fix_enabled_code_to_backticks() {
1825        // <code>text</code> should convert to `text`
1826        let rule = MD033NoInlineHtml::with_fix(true);
1827        let content = "This has <code>inline code</code> here.";
1828        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1829        let fixed = rule.fix(&ctx).unwrap();
1830        assert_eq!(fixed, "This has `inline code` here.");
1831    }
1832
1833    #[test]
1834    fn test_md033_fix_enabled_code_with_backticks() {
1835        // <code>text with `backticks`</code> should use double backticks
1836        let rule = MD033NoInlineHtml::with_fix(true);
1837        let content = "This has <code>text with `backticks`</code> here.";
1838        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1839        let fixed = rule.fix(&ctx).unwrap();
1840        assert_eq!(fixed, "This has `` text with `backticks` `` here.");
1841    }
1842
1843    #[test]
1844    fn test_md033_fix_enabled_br_trailing_spaces() {
1845        // <br> should convert to two trailing spaces + newline (default)
1846        let rule = MD033NoInlineHtml::with_fix(true);
1847        let content = "First line<br>Second line";
1848        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1849        let fixed = rule.fix(&ctx).unwrap();
1850        assert_eq!(fixed, "First line  \nSecond line");
1851    }
1852
1853    #[test]
1854    fn test_md033_fix_enabled_br_self_closing() {
1855        // <br/> and <br /> should also convert
1856        let rule = MD033NoInlineHtml::with_fix(true);
1857        let content = "First<br/>second<br />third";
1858        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1859        let fixed = rule.fix(&ctx).unwrap();
1860        assert_eq!(fixed, "First  \nsecond  \nthird");
1861    }
1862
1863    #[test]
1864    fn test_md033_fix_enabled_br_backslash_style() {
1865        // With br_style = backslash, <br> should convert to backslash + newline
1866        let config = MD033Config {
1867            allowed: Vec::new(),
1868            disallowed: Vec::new(),
1869            fix: true,
1870            br_style: md033_config::BrStyle::Backslash,
1871            ..MD033Config::default()
1872        };
1873        let rule = MD033NoInlineHtml::from_config_struct(config);
1874        let content = "First line<br>Second line";
1875        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1876        let fixed = rule.fix(&ctx).unwrap();
1877        assert_eq!(fixed, "First line\\\nSecond line");
1878    }
1879
1880    #[test]
1881    fn test_md033_fix_enabled_hr() {
1882        // <hr> should convert to horizontal rule
1883        let rule = MD033NoInlineHtml::with_fix(true);
1884        let content = "Above<hr>Below";
1885        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1886        let fixed = rule.fix(&ctx).unwrap();
1887        assert_eq!(fixed, "Above\n---\nBelow");
1888    }
1889
1890    #[test]
1891    fn test_md033_fix_enabled_hr_self_closing() {
1892        // <hr/> should also convert
1893        let rule = MD033NoInlineHtml::with_fix(true);
1894        let content = "Above<hr/>Below";
1895        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1896        let fixed = rule.fix(&ctx).unwrap();
1897        assert_eq!(fixed, "Above\n---\nBelow");
1898    }
1899
1900    #[test]
1901    fn test_md033_fix_skips_nested_tags() {
1902        // Tags with nested HTML - outer tags may not be fully fixed due to overlapping ranges
1903        // The inner tags are processed first, which can invalidate outer tag ranges
1904        let rule = MD033NoInlineHtml::with_fix(true);
1905        let content = "This has <em>text with <strong>nested</strong> tags</em> here.";
1906        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1907        let fixed = rule.fix(&ctx).unwrap();
1908        // Inner <strong> is converted to markdown, outer <em> range becomes invalid
1909        // This is expected behavior - user should run fix multiple times for nested tags
1910        assert_eq!(fixed, "This has <em>text with **nested** tags</em> here.");
1911    }
1912
1913    #[test]
1914    fn test_md033_fix_skips_tags_with_attributes() {
1915        // Tags with attributes should NOT be fixed at all - leave as-is
1916        // User may want to keep the attributes (e.g., class="highlight" for styling)
1917        let rule = MD033NoInlineHtml::with_fix(true);
1918        let content = "This has <em class=\"highlight\">emphasized</em> text.";
1919        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1920        let fixed = rule.fix(&ctx).unwrap();
1921        // Content should remain unchanged - we don't know if attributes matter
1922        assert_eq!(fixed, content);
1923    }
1924
1925    #[test]
1926    fn test_md033_fix_disabled_no_changes() {
1927        // When fix is disabled, original content should be returned
1928        let rule = MD033NoInlineHtml::default(); // fix is false by default
1929        let content = "This has <em>emphasized text</em> here.";
1930        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1931        let fixed = rule.fix(&ctx).unwrap();
1932        assert_eq!(fixed, content, "Should return original content when fix is disabled");
1933    }
1934
1935    #[test]
1936    fn test_md033_fix_capability_enabled() {
1937        let rule = MD033NoInlineHtml::with_fix(true);
1938        assert_eq!(rule.fix_capability(), crate::rule::FixCapability::FullyFixable);
1939    }
1940
1941    #[test]
1942    fn test_md033_fix_multiple_tags() {
1943        // Test fixing multiple HTML tags in one document
1944        let rule = MD033NoInlineHtml::with_fix(true);
1945        let content = "Here is <em>italic</em> and <strong>bold</strong> text.";
1946        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1947        let fixed = rule.fix(&ctx).unwrap();
1948        assert_eq!(fixed, "Here is *italic* and **bold** text.");
1949    }
1950
1951    #[test]
1952    fn test_md033_fix_uppercase_tags() {
1953        // HTML tags are case-insensitive
1954        let rule = MD033NoInlineHtml::with_fix(true);
1955        let content = "This has <EM>emphasized</EM> text.";
1956        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1957        let fixed = rule.fix(&ctx).unwrap();
1958        assert_eq!(fixed, "This has *emphasized* text.");
1959    }
1960
1961    #[test]
1962    fn test_md033_fix_unsafe_tags_not_modified() {
1963        // Tags without safe markdown equivalents should NOT be modified
1964        // Only safe fixable tags (em, i, strong, b, code, br, hr) get converted
1965        let rule = MD033NoInlineHtml::with_fix(true);
1966        let content = "This has <div>a div</div> content.";
1967        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1968        let fixed = rule.fix(&ctx).unwrap();
1969        // <div> is not a safe fixable tag, so content should be unchanged
1970        assert_eq!(fixed, "This has <div>a div</div> content.");
1971    }
1972
1973    #[test]
1974    fn test_md033_fix_img_tag_converted() {
1975        // <img> tags with simple src/alt attributes are converted to markdown images
1976        let rule = MD033NoInlineHtml::with_fix(true);
1977        let content = "Image: <img src=\"photo.jpg\" alt=\"My Photo\">";
1978        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1979        let fixed = rule.fix(&ctx).unwrap();
1980        // <img> is converted to ![alt](src) format
1981        assert_eq!(fixed, "Image: ![My Photo](photo.jpg)");
1982    }
1983
1984    #[test]
1985    fn test_md033_fix_img_tag_with_extra_attrs_not_converted() {
1986        // <img> tags with width/height/style attributes are NOT converted
1987        let rule = MD033NoInlineHtml::with_fix(true);
1988        let content = "Image: <img src=\"photo.jpg\" alt=\"My Photo\" width=\"100\">";
1989        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1990        let fixed = rule.fix(&ctx).unwrap();
1991        // Has width attribute - not safe to convert
1992        assert_eq!(fixed, "Image: <img src=\"photo.jpg\" alt=\"My Photo\" width=\"100\">");
1993    }
1994
1995    #[test]
1996    fn test_md033_fix_relaxed_a_with_target_is_converted() {
1997        let rule = relaxed_fix_rule();
1998        let content = "Link: <a href=\"https://example.com\" target=\"_blank\">Example</a>";
1999        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2000        let fixed = rule.fix(&ctx).unwrap();
2001        assert_eq!(fixed, "Link: [Example](https://example.com)");
2002    }
2003
2004    #[test]
2005    fn test_md033_fix_relaxed_img_with_width_is_converted() {
2006        let rule = relaxed_fix_rule();
2007        let content = "Image: <img src=\"photo.jpg\" alt=\"My Photo\" width=\"100\">";
2008        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2009        let fixed = rule.fix(&ctx).unwrap();
2010        assert_eq!(fixed, "Image: ![My Photo](photo.jpg)");
2011    }
2012
2013    #[test]
2014    fn test_md033_fix_relaxed_rejects_unknown_extra_attributes() {
2015        let rule = relaxed_fix_rule();
2016        let content = "Image: <img src=\"photo.jpg\" alt=\"My Photo\" aria-label=\"hero\">";
2017        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2018        let fixed = rule.fix(&ctx).unwrap();
2019        assert_eq!(fixed, content, "Unknown attributes should not be dropped by default");
2020    }
2021
2022    #[test]
2023    fn test_md033_fix_relaxed_still_blocks_unsafe_schemes() {
2024        let rule = relaxed_fix_rule();
2025        let content = "Link: <a href=\"javascript:alert(1)\" target=\"_blank\">Example</a>";
2026        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2027        let fixed = rule.fix(&ctx).unwrap();
2028        assert_eq!(fixed, content, "Unsafe URL schemes must never be converted");
2029    }
2030
2031    #[test]
2032    fn test_md033_fix_relaxed_wrapper_strip_requires_second_pass_for_nested_html() {
2033        let rule = relaxed_fix_rule();
2034        let content = "<p align=\"center\">\n  <img src=\"logo.svg\" alt=\"Logo\" width=\"120\" />\n</p>";
2035        let ctx1 = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2036        let fixed_once = rule.fix(&ctx1).unwrap();
2037        assert!(
2038            fixed_once.contains("<p"),
2039            "First pass should keep wrapper when inner HTML is still present: {fixed_once}"
2040        );
2041        assert!(
2042            fixed_once.contains("![Logo](logo.svg)"),
2043            "Inner image should be converted on first pass: {fixed_once}"
2044        );
2045
2046        let ctx2 = LintContext::new(&fixed_once, crate::config::MarkdownFlavor::Standard, None);
2047        let fixed_twice = rule.fix(&ctx2).unwrap();
2048        assert!(
2049            !fixed_twice.contains("<p"),
2050            "Second pass should strip configured wrapper: {fixed_twice}"
2051        );
2052        assert!(fixed_twice.contains("![Logo](logo.svg)"));
2053    }
2054
2055    #[test]
2056    fn test_md033_fix_relaxed_multiple_droppable_attrs() {
2057        let rule = relaxed_fix_rule();
2058        let content = "<a href=\"https://example.com\" target=\"_blank\" rel=\"noopener\" class=\"btn\">Click</a>";
2059        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2060        let fixed = rule.fix(&ctx).unwrap();
2061        assert_eq!(fixed, "[Click](https://example.com)");
2062    }
2063
2064    #[test]
2065    fn test_md033_fix_relaxed_img_multiple_droppable_attrs() {
2066        let rule = relaxed_fix_rule();
2067        let content = "<img src=\"logo.png\" alt=\"Logo\" width=\"120\" height=\"40\" style=\"border:none\" />";
2068        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2069        let fixed = rule.fix(&ctx).unwrap();
2070        assert_eq!(fixed, "![Logo](logo.png)");
2071    }
2072
2073    #[test]
2074    fn test_md033_fix_relaxed_event_handler_never_dropped() {
2075        let rule = relaxed_fix_rule();
2076        let content = "<a href=\"https://example.com\" onclick=\"track()\">Link</a>";
2077        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2078        let fixed = rule.fix(&ctx).unwrap();
2079        assert_eq!(fixed, content, "Event handler attributes must block conversion");
2080    }
2081
2082    #[test]
2083    fn test_md033_fix_relaxed_event_handler_even_with_custom_config() {
2084        // Even if someone adds on* to drop-attributes, event handlers must be rejected
2085        let config = MD033Config {
2086            fix: true,
2087            fix_mode: MD033FixMode::Relaxed,
2088            drop_attributes: vec!["on*".to_string(), "target".to_string()],
2089            ..MD033Config::default()
2090        };
2091        let rule = MD033NoInlineHtml::from_config_struct(config);
2092        let content = "<a href=\"https://example.com\" onclick=\"alert(1)\">Link</a>";
2093        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2094        let fixed = rule.fix(&ctx).unwrap();
2095        assert_eq!(fixed, content, "on* event handlers must never be dropped");
2096    }
2097
2098    #[test]
2099    fn test_md033_fix_relaxed_custom_drop_attributes() {
2100        let config = MD033Config {
2101            fix: true,
2102            fix_mode: MD033FixMode::Relaxed,
2103            drop_attributes: vec!["loading".to_string()],
2104            ..MD033Config::default()
2105        };
2106        let rule = MD033NoInlineHtml::from_config_struct(config);
2107        // "loading" is in the custom list, "width" is NOT
2108        let content = "<img src=\"x.jpg\" alt=\"\" loading=\"lazy\">";
2109        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2110        let fixed = rule.fix(&ctx).unwrap();
2111        assert_eq!(fixed, "![](x.jpg)", "Custom drop-attributes should be respected");
2112
2113        let content2 = "<img src=\"x.jpg\" alt=\"\" width=\"100\">";
2114        let ctx2 = LintContext::new(content2, crate::config::MarkdownFlavor::Standard, None);
2115        let fixed2 = rule.fix(&ctx2).unwrap();
2116        assert_eq!(
2117            fixed2, content2,
2118            "Attributes not in custom list should block conversion"
2119        );
2120    }
2121
2122    #[test]
2123    fn test_md033_fix_relaxed_custom_strip_wrapper() {
2124        let config = MD033Config {
2125            fix: true,
2126            fix_mode: MD033FixMode::Relaxed,
2127            strip_wrapper_elements: vec!["div".to_string()],
2128            ..MD033Config::default()
2129        };
2130        let rule = MD033NoInlineHtml::from_config_struct(config);
2131        let content = "<div>Some text content</div>";
2132        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2133        let fixed = rule.fix(&ctx).unwrap();
2134        assert_eq!(fixed, "Some text content");
2135    }
2136
2137    #[test]
2138    fn test_md033_fix_relaxed_wrapper_with_plain_text() {
2139        let rule = relaxed_fix_rule();
2140        let content = "<p align=\"center\">Just some text</p>";
2141        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2142        let fixed = rule.fix(&ctx).unwrap();
2143        assert_eq!(fixed, "Just some text");
2144    }
2145
2146    #[test]
2147    fn test_md033_fix_relaxed_data_attr_with_wildcard() {
2148        let config = MD033Config {
2149            fix: true,
2150            fix_mode: MD033FixMode::Relaxed,
2151            drop_attributes: vec!["data-*".to_string(), "target".to_string()],
2152            ..MD033Config::default()
2153        };
2154        let rule = MD033NoInlineHtml::from_config_struct(config);
2155        let content = "<a href=\"https://example.com\" data-tracking=\"abc\" target=\"_blank\">Link</a>";
2156        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2157        let fixed = rule.fix(&ctx).unwrap();
2158        assert_eq!(fixed, "[Link](https://example.com)");
2159    }
2160
2161    #[test]
2162    fn test_md033_fix_relaxed_mixed_droppable_and_blocking_attrs() {
2163        let rule = relaxed_fix_rule();
2164        // "target" is droppable, "aria-label" is not in the default list
2165        let content = "<a href=\"https://example.com\" target=\"_blank\" aria-label=\"nav\">Link</a>";
2166        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2167        let fixed = rule.fix(&ctx).unwrap();
2168        assert_eq!(fixed, content, "Non-droppable attribute should block conversion");
2169    }
2170
2171    #[test]
2172    fn test_md033_fix_relaxed_badge_pattern() {
2173        // Common GitHub README badge pattern
2174        let rule = relaxed_fix_rule();
2175        let content = "<a href=\"https://crates.io/crates/rumdl\" target=\"_blank\"><img src=\"https://img.shields.io/crates/v/rumdl.svg\" alt=\"Crate\" width=\"120\" /></a>";
2176        let ctx1 = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2177        let fixed_once = rule.fix(&ctx1).unwrap();
2178        // First pass should convert the inner <img>
2179        assert!(
2180            fixed_once.contains("![Crate](https://img.shields.io/crates/v/rumdl.svg)"),
2181            "Inner img should be converted: {fixed_once}"
2182        );
2183
2184        // Second pass converts the <a> wrapper
2185        let ctx2 = LintContext::new(&fixed_once, crate::config::MarkdownFlavor::Standard, None);
2186        let fixed_twice = rule.fix(&ctx2).unwrap();
2187        assert!(
2188            fixed_twice
2189                .contains("[![Crate](https://img.shields.io/crates/v/rumdl.svg)](https://crates.io/crates/rumdl)"),
2190            "Badge should produce nested markdown image link: {fixed_twice}"
2191        );
2192    }
2193
2194    #[test]
2195    fn test_md033_fix_relaxed_conservative_mode_unchanged() {
2196        // Verify conservative mode (default) is unaffected by the relaxed logic
2197        let rule = MD033NoInlineHtml::with_fix(true);
2198        let content = "<a href=\"https://example.com\" target=\"_blank\">Link</a>";
2199        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2200        let fixed = rule.fix(&ctx).unwrap();
2201        assert_eq!(fixed, content, "Conservative mode should not drop target attribute");
2202    }
2203
2204    #[test]
2205    fn test_md033_fix_relaxed_img_inside_pre_not_converted() {
2206        // <img> inside <pre> must NOT be converted, even in relaxed mode
2207        let rule = relaxed_fix_rule();
2208        let content = "<pre>\n  <img src=\"diagram.png\" alt=\"d\" width=\"100\" />\n</pre>";
2209        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2210        let fixed = rule.fix(&ctx).unwrap();
2211        assert!(fixed.contains("<img"), "img inside pre must not be converted: {fixed}");
2212    }
2213
2214    #[test]
2215    fn test_md033_fix_relaxed_wrapper_nested_inside_div_not_stripped() {
2216        // <p> nested inside <div> should not be stripped
2217        let rule = relaxed_fix_rule();
2218        let content = "<div><p>text</p></div>";
2219        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2220        let fixed = rule.fix(&ctx).unwrap();
2221        assert!(
2222            fixed.contains("<p>text</p>") || fixed.contains("<p>"),
2223            "Nested <p> inside <div> should not be stripped: {fixed}"
2224        );
2225    }
2226
2227    #[test]
2228    fn test_md033_fix_relaxed_img_inside_nested_wrapper_not_converted() {
2229        // <img> inside <div><p>...</p></div> must NOT be converted because the
2230        // <p> wrapper can't be stripped (it's nested), so the markdown would be
2231        // stuck inside an HTML block where it won't render.
2232        let rule = relaxed_fix_rule();
2233        let content = "<div><p><img src=\"x.jpg\" alt=\"pic\" width=\"100\" /></p></div>";
2234        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2235        let fixed = rule.fix(&ctx).unwrap();
2236        assert!(
2237            fixed.contains("<img"),
2238            "img inside nested wrapper must not be converted: {fixed}"
2239        );
2240    }
2241
2242    #[test]
2243    fn test_md033_fix_mixed_safe_tags() {
2244        // All tags are now safe fixable (em, img, strong)
2245        let rule = MD033NoInlineHtml::with_fix(true);
2246        let content = "<em>italic</em> and <img src=\"x.jpg\"> and <strong>bold</strong>";
2247        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2248        let fixed = rule.fix(&ctx).unwrap();
2249        // All are converted
2250        assert_eq!(fixed, "*italic* and ![](x.jpg) and **bold**");
2251    }
2252
2253    #[test]
2254    fn test_md033_fix_multiple_tags_same_line() {
2255        // Multiple tags on the same line should all be fixed correctly
2256        let rule = MD033NoInlineHtml::with_fix(true);
2257        let content = "Regular text <i>italic</i> and <b>bold</b> here.";
2258        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2259        let fixed = rule.fix(&ctx).unwrap();
2260        assert_eq!(fixed, "Regular text *italic* and **bold** here.");
2261    }
2262
2263    #[test]
2264    fn test_md033_fix_multiple_em_tags_same_line() {
2265        // Multiple em/strong tags on the same line
2266        let rule = MD033NoInlineHtml::with_fix(true);
2267        let content = "<em>first</em> and <strong>second</strong> and <code>third</code>";
2268        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2269        let fixed = rule.fix(&ctx).unwrap();
2270        assert_eq!(fixed, "*first* and **second** and `third`");
2271    }
2272
2273    #[test]
2274    fn test_md033_fix_skips_tags_inside_pre() {
2275        // Tags inside <pre> blocks should NOT be fixed (would break structure)
2276        let rule = MD033NoInlineHtml::with_fix(true);
2277        let content = "<pre><code><em>VALUE</em></code></pre>";
2278        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2279        let fixed = rule.fix(&ctx).unwrap();
2280        // The <em> inside <pre><code> should NOT be converted
2281        // Only the outer structure might be changed
2282        assert!(
2283            !fixed.contains("*VALUE*"),
2284            "Tags inside <pre> should not be converted to markdown. Got: {fixed}"
2285        );
2286    }
2287
2288    #[test]
2289    fn test_md033_fix_skips_tags_inside_div() {
2290        // Tags inside HTML block elements should not be fixed
2291        let rule = MD033NoInlineHtml::with_fix(true);
2292        let content = "<div>\n<em>emphasized</em>\n</div>";
2293        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2294        let fixed = rule.fix(&ctx).unwrap();
2295        // The <em> inside <div> should not be converted to *emphasized*
2296        assert!(
2297            !fixed.contains("*emphasized*"),
2298            "Tags inside HTML blocks should not be converted. Got: {fixed}"
2299        );
2300    }
2301
2302    #[test]
2303    fn test_md033_fix_outside_html_block() {
2304        // Tags outside HTML blocks should still be fixed
2305        let rule = MD033NoInlineHtml::with_fix(true);
2306        let content = "<div>\ncontent\n</div>\n\nOutside <em>emphasized</em> text.";
2307        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2308        let fixed = rule.fix(&ctx).unwrap();
2309        // The <em> outside the div should be converted
2310        assert!(
2311            fixed.contains("*emphasized*"),
2312            "Tags outside HTML blocks should be converted. Got: {fixed}"
2313        );
2314    }
2315
2316    #[test]
2317    fn test_md033_fix_with_id_attribute() {
2318        // Tags with id attributes should not be fixed (id might be used for anchors)
2319        let rule = MD033NoInlineHtml::with_fix(true);
2320        let content = "See <em id=\"important\">this note</em> for details.";
2321        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2322        let fixed = rule.fix(&ctx).unwrap();
2323        // Should remain unchanged - id attribute matters for linking
2324        assert_eq!(fixed, content);
2325    }
2326
2327    #[test]
2328    fn test_md033_fix_with_style_attribute() {
2329        // Tags with style attributes should not be fixed
2330        let rule = MD033NoInlineHtml::with_fix(true);
2331        let content = "This is <strong style=\"color: red\">important</strong> text.";
2332        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2333        let fixed = rule.fix(&ctx).unwrap();
2334        // Should remain unchanged - style attribute provides formatting
2335        assert_eq!(fixed, content);
2336    }
2337
2338    #[test]
2339    fn test_md033_fix_mixed_with_and_without_attributes() {
2340        // Mix of tags with and without attributes
2341        let rule = MD033NoInlineHtml::with_fix(true);
2342        let content = "<em>normal</em> and <em class=\"special\">styled</em> text.";
2343        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2344        let fixed = rule.fix(&ctx).unwrap();
2345        // Only the tag without attributes should be fixed
2346        assert_eq!(fixed, "*normal* and <em class=\"special\">styled</em> text.");
2347    }
2348
2349    #[test]
2350    fn test_md033_quick_fix_tag_with_attributes_no_fix() {
2351        // Quick fix should not be provided for tags with attributes
2352        let rule = MD033NoInlineHtml::with_fix(true);
2353        let content = "<em class=\"test\">emphasized</em>";
2354        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2355        let result = rule.check(&ctx).unwrap();
2356
2357        assert_eq!(result.len(), 1, "Should find one HTML tag");
2358        // No fix should be provided for tags with attributes
2359        assert!(
2360            result[0].fix.is_none(),
2361            "Should NOT have a fix for tags with attributes"
2362        );
2363    }
2364
2365    #[test]
2366    fn test_md033_fix_skips_html_entities() {
2367        // Tags containing HTML entities should NOT be fixed
2368        // HTML entities need HTML context to render; markdown won't process them
2369        let rule = MD033NoInlineHtml::with_fix(true);
2370        let content = "<code>&vert;</code>";
2371        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2372        let fixed = rule.fix(&ctx).unwrap();
2373        // Should remain unchanged - converting would break rendering
2374        assert_eq!(fixed, content);
2375    }
2376
2377    #[test]
2378    fn test_md033_fix_skips_multiple_html_entities() {
2379        // Multiple HTML entities should also be skipped
2380        let rule = MD033NoInlineHtml::with_fix(true);
2381        let content = "<code>&lt;T&gt;</code>";
2382        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2383        let fixed = rule.fix(&ctx).unwrap();
2384        // Should remain unchanged
2385        assert_eq!(fixed, content);
2386    }
2387
2388    #[test]
2389    fn test_md033_fix_allows_ampersand_without_entity() {
2390        // Content with & but no semicolon should still be fixed
2391        let rule = MD033NoInlineHtml::with_fix(true);
2392        let content = "<code>a & b</code>";
2393        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2394        let fixed = rule.fix(&ctx).unwrap();
2395        // Should be converted since & is not part of an entity
2396        assert_eq!(fixed, "`a & b`");
2397    }
2398
2399    #[test]
2400    fn test_md033_fix_em_with_entities_skipped() {
2401        // <em> with entities should also be skipped
2402        let rule = MD033NoInlineHtml::with_fix(true);
2403        let content = "<em>&nbsp;text</em>";
2404        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2405        let fixed = rule.fix(&ctx).unwrap();
2406        // Should remain unchanged
2407        assert_eq!(fixed, content);
2408    }
2409
2410    #[test]
2411    fn test_md033_fix_skips_nested_em_in_code() {
2412        // Tags nested inside other HTML elements should NOT be fixed
2413        // e.g., <code><em>n</em></code> - the <em> should not be converted
2414        let rule = MD033NoInlineHtml::with_fix(true);
2415        let content = "<code><em>n</em></code>";
2416        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2417        let fixed = rule.fix(&ctx).unwrap();
2418        // The inner <em> should NOT be converted to *n* because it's nested
2419        // The whole structure should be left as-is (or outer code converted, but not inner)
2420        assert!(
2421            !fixed.contains("*n*"),
2422            "Nested <em> should not be converted to markdown. Got: {fixed}"
2423        );
2424    }
2425
2426    #[test]
2427    fn test_md033_fix_skips_nested_in_table() {
2428        // Tags nested in HTML structures in tables should not be fixed
2429        let rule = MD033NoInlineHtml::with_fix(true);
2430        let content = "| <code>><em>n</em></code> | description |";
2431        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2432        let fixed = rule.fix(&ctx).unwrap();
2433        // Should not convert nested <em> to *n*
2434        assert!(
2435            !fixed.contains("*n*"),
2436            "Nested tags in table should not be converted. Got: {fixed}"
2437        );
2438    }
2439
2440    #[test]
2441    fn test_md033_fix_standalone_em_still_converted() {
2442        // Standalone (non-nested) <em> should still be converted
2443        let rule = MD033NoInlineHtml::with_fix(true);
2444        let content = "This is <em>emphasized</em> text.";
2445        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2446        let fixed = rule.fix(&ctx).unwrap();
2447        assert_eq!(fixed, "This is *emphasized* text.");
2448    }
2449
2450    // ==========================================================================
2451    // Obsidian Templater Plugin Syntax Tests
2452    //
2453    // Templater is a popular Obsidian plugin that uses `<% ... %>` syntax for
2454    // template interpolation. The `<%` pattern is NOT captured by the HTML tag
2455    // parser because `%` is not a valid HTML tag name character (tags must start
2456    // with a letter). This behavior is documented here with comprehensive tests.
2457    //
2458    // Reference: https://silentvoid13.github.io/Templater/
2459    // ==========================================================================
2460
2461    #[test]
2462    fn test_md033_templater_basic_interpolation_not_flagged() {
2463        // Basic Templater interpolation: <% expr %>
2464        // Should NOT be flagged because `%` is not a valid HTML tag character
2465        let rule = MD033NoInlineHtml::default();
2466        let content = "Today is <% tp.date.now() %> which is nice.";
2467        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2468        let result = rule.check(&ctx).unwrap();
2469        assert!(
2470            result.is_empty(),
2471            "Templater basic interpolation should not be flagged as HTML. Got: {result:?}"
2472        );
2473    }
2474
2475    #[test]
2476    fn test_md033_templater_file_functions_not_flagged() {
2477        // Templater file functions: <% tp.file.* %>
2478        let rule = MD033NoInlineHtml::default();
2479        let content = "File: <% tp.file.title %>\nCreated: <% tp.file.creation_date() %>";
2480        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2481        let result = rule.check(&ctx).unwrap();
2482        assert!(
2483            result.is_empty(),
2484            "Templater file functions should not be flagged. Got: {result:?}"
2485        );
2486    }
2487
2488    #[test]
2489    fn test_md033_templater_with_arguments_not_flagged() {
2490        // Templater with function arguments
2491        let rule = MD033NoInlineHtml::default();
2492        let content = r#"Date: <% tp.date.now("YYYY-MM-DD") %>"#;
2493        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2494        let result = rule.check(&ctx).unwrap();
2495        assert!(
2496            result.is_empty(),
2497            "Templater with arguments should not be flagged. Got: {result:?}"
2498        );
2499    }
2500
2501    #[test]
2502    fn test_md033_templater_javascript_execution_not_flagged() {
2503        // Templater JavaScript execution block: <%* code %>
2504        let rule = MD033NoInlineHtml::default();
2505        let content = "<%* const today = tp.date.now(); tR += today; %>";
2506        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2507        let result = rule.check(&ctx).unwrap();
2508        assert!(
2509            result.is_empty(),
2510            "Templater JS execution block should not be flagged. Got: {result:?}"
2511        );
2512    }
2513
2514    #[test]
2515    fn test_md033_templater_dynamic_execution_not_flagged() {
2516        // Templater dynamic/preview execution: <%+ expr %>
2517        let rule = MD033NoInlineHtml::default();
2518        let content = "Dynamic: <%+ tp.date.now() %>";
2519        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2520        let result = rule.check(&ctx).unwrap();
2521        assert!(
2522            result.is_empty(),
2523            "Templater dynamic execution should not be flagged. Got: {result:?}"
2524        );
2525    }
2526
2527    #[test]
2528    fn test_md033_templater_whitespace_trim_all_not_flagged() {
2529        // Templater whitespace control - trim all: <%_ expr _%>
2530        let rule = MD033NoInlineHtml::default();
2531        let content = "<%_ tp.date.now() _%>";
2532        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2533        let result = rule.check(&ctx).unwrap();
2534        assert!(
2535            result.is_empty(),
2536            "Templater trim-all whitespace should not be flagged. Got: {result:?}"
2537        );
2538    }
2539
2540    #[test]
2541    fn test_md033_templater_whitespace_trim_newline_not_flagged() {
2542        // Templater whitespace control - trim newline: <%- expr -%>
2543        let rule = MD033NoInlineHtml::default();
2544        let content = "<%- tp.date.now() -%>";
2545        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2546        let result = rule.check(&ctx).unwrap();
2547        assert!(
2548            result.is_empty(),
2549            "Templater trim-newline should not be flagged. Got: {result:?}"
2550        );
2551    }
2552
2553    #[test]
2554    fn test_md033_templater_combined_modifiers_not_flagged() {
2555        // Templater combined whitespace and execution modifiers
2556        let rule = MD033NoInlineHtml::default();
2557        let contents = [
2558            "<%-* const x = 1; -%>",  // trim + JS execution
2559            "<%_+ tp.date.now() _%>", // trim-all + dynamic
2560            "<%- tp.file.title -%>",  // trim-newline only
2561            "<%_ tp.file.title _%>",  // trim-all only
2562        ];
2563        for content in contents {
2564            let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2565            let result = rule.check(&ctx).unwrap();
2566            assert!(
2567                result.is_empty(),
2568                "Templater combined modifiers should not be flagged: {content}. Got: {result:?}"
2569            );
2570        }
2571    }
2572
2573    #[test]
2574    fn test_md033_templater_multiline_block_not_flagged() {
2575        // Multi-line Templater JavaScript block
2576        let rule = MD033NoInlineHtml::default();
2577        let content = r#"<%*
2578const x = 1;
2579const y = 2;
2580tR += x + y;
2581%>"#;
2582        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2583        let result = rule.check(&ctx).unwrap();
2584        assert!(
2585            result.is_empty(),
2586            "Templater multi-line block should not be flagged. Got: {result:?}"
2587        );
2588    }
2589
2590    #[test]
2591    fn test_md033_templater_with_angle_brackets_in_condition_not_flagged() {
2592        // Templater with angle brackets in JavaScript condition
2593        // This is a key edge case: `<` inside Templater should not trigger HTML detection
2594        let rule = MD033NoInlineHtml::default();
2595        let content = "<%* if (x < 5) { tR += 'small'; } %>";
2596        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2597        let result = rule.check(&ctx).unwrap();
2598        assert!(
2599            result.is_empty(),
2600            "Templater with angle brackets in conditions should not be flagged. Got: {result:?}"
2601        );
2602    }
2603
2604    #[test]
2605    fn test_md033_templater_mixed_with_html_only_html_flagged() {
2606        // Templater syntax mixed with actual HTML - only HTML should be flagged
2607        let rule = MD033NoInlineHtml::default();
2608        let content = "<% tp.date.now() %> is today's date. <div>This is HTML</div>";
2609        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2610        let result = rule.check(&ctx).unwrap();
2611        assert_eq!(result.len(), 1, "Should only flag the HTML div tag");
2612        assert!(
2613            result[0].message.contains("<div>"),
2614            "Should flag <div>, got: {}",
2615            result[0].message
2616        );
2617    }
2618
2619    #[test]
2620    fn test_md033_templater_in_heading_not_flagged() {
2621        // Templater in markdown heading
2622        let rule = MD033NoInlineHtml::default();
2623        let content = "# <% tp.file.title %>";
2624        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2625        let result = rule.check(&ctx).unwrap();
2626        assert!(
2627            result.is_empty(),
2628            "Templater in heading should not be flagged. Got: {result:?}"
2629        );
2630    }
2631
2632    #[test]
2633    fn test_md033_templater_multiple_on_same_line_not_flagged() {
2634        // Multiple Templater blocks on same line
2635        let rule = MD033NoInlineHtml::default();
2636        let content = "From <% tp.date.now() %> to <% tp.date.tomorrow() %> we have meetings.";
2637        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2638        let result = rule.check(&ctx).unwrap();
2639        assert!(
2640            result.is_empty(),
2641            "Multiple Templater blocks should not be flagged. Got: {result:?}"
2642        );
2643    }
2644
2645    #[test]
2646    fn test_md033_templater_in_code_block_not_flagged() {
2647        // Templater syntax in code blocks should not be flagged (code blocks are skipped)
2648        let rule = MD033NoInlineHtml::default();
2649        let content = "```\n<% tp.date.now() %>\n```";
2650        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2651        let result = rule.check(&ctx).unwrap();
2652        assert!(
2653            result.is_empty(),
2654            "Templater in code block should not be flagged. Got: {result:?}"
2655        );
2656    }
2657
2658    #[test]
2659    fn test_md033_templater_in_inline_code_not_flagged() {
2660        // Templater syntax in inline code span should not be flagged
2661        let rule = MD033NoInlineHtml::default();
2662        let content = "Use `<% tp.date.now() %>` for current date.";
2663        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2664        let result = rule.check(&ctx).unwrap();
2665        assert!(
2666            result.is_empty(),
2667            "Templater in inline code should not be flagged. Got: {result:?}"
2668        );
2669    }
2670
2671    #[test]
2672    fn test_md033_templater_also_works_in_standard_flavor() {
2673        // Templater syntax should also not be flagged in Standard flavor
2674        // because the HTML parser doesn't recognize `<%` as a valid tag
2675        let rule = MD033NoInlineHtml::default();
2676        let content = "<% tp.date.now() %> works everywhere.";
2677        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2678        let result = rule.check(&ctx).unwrap();
2679        assert!(
2680            result.is_empty(),
2681            "Templater should not be flagged even in Standard flavor. Got: {result:?}"
2682        );
2683    }
2684
2685    #[test]
2686    fn test_md033_templater_empty_tag_not_flagged() {
2687        // Empty Templater tags
2688        let rule = MD033NoInlineHtml::default();
2689        let content = "<%>";
2690        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2691        let result = rule.check(&ctx).unwrap();
2692        assert!(
2693            result.is_empty(),
2694            "Empty Templater-like tag should not be flagged. Got: {result:?}"
2695        );
2696    }
2697
2698    #[test]
2699    fn test_md033_templater_unclosed_not_flagged() {
2700        // Unclosed Templater tags - these are template errors, not HTML
2701        let rule = MD033NoInlineHtml::default();
2702        let content = "<% tp.date.now() without closing tag";
2703        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2704        let result = rule.check(&ctx).unwrap();
2705        assert!(
2706            result.is_empty(),
2707            "Unclosed Templater should not be flagged as HTML. Got: {result:?}"
2708        );
2709    }
2710
2711    #[test]
2712    fn test_md033_templater_with_newlines_inside_not_flagged() {
2713        // Templater with newlines inside the expression
2714        let rule = MD033NoInlineHtml::default();
2715        let content = r#"<% tp.date.now("YYYY") +
2716"-" +
2717tp.date.now("MM") %>"#;
2718        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2719        let result = rule.check(&ctx).unwrap();
2720        assert!(
2721            result.is_empty(),
2722            "Templater with internal newlines should not be flagged. Got: {result:?}"
2723        );
2724    }
2725
2726    #[test]
2727    fn test_md033_erb_style_tags_not_flagged() {
2728        // ERB/EJS style tags (similar to Templater) are also not HTML
2729        // This documents the general principle that `<%` is not valid HTML
2730        let rule = MD033NoInlineHtml::default();
2731        let content = "<%= variable %> and <% code %> and <%# comment %>";
2732        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2733        let result = rule.check(&ctx).unwrap();
2734        assert!(
2735            result.is_empty(),
2736            "ERB/EJS style tags should not be flagged as HTML. Got: {result:?}"
2737        );
2738    }
2739
2740    #[test]
2741    fn test_md033_templater_complex_expression_not_flagged() {
2742        // Complex Templater expression with multiple function calls
2743        let rule = MD033NoInlineHtml::default();
2744        let content = r#"<%*
2745const file = tp.file.title;
2746const date = tp.date.now("YYYY-MM-DD");
2747const folder = tp.file.folder();
2748tR += `# ${file}\n\nCreated: ${date}\nIn: ${folder}`;
2749%>"#;
2750        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2751        let result = rule.check(&ctx).unwrap();
2752        assert!(
2753            result.is_empty(),
2754            "Complex Templater expression should not be flagged. Got: {result:?}"
2755        );
2756    }
2757
2758    #[test]
2759    fn test_md033_percent_sign_variations_not_flagged() {
2760        // Various patterns starting with <% that should all be safe
2761        let rule = MD033NoInlineHtml::default();
2762        let patterns = [
2763            "<%=",  // ERB output
2764            "<%#",  // ERB comment
2765            "<%%",  // Double percent
2766            "<%!",  // Some template engines
2767            "<%@",  // JSP directive
2768            "<%--", // JSP comment
2769        ];
2770        for pattern in patterns {
2771            let content = format!("{pattern} content %>");
2772            let ctx = LintContext::new(&content, crate::config::MarkdownFlavor::Standard, None);
2773            let result = rule.check(&ctx).unwrap();
2774            assert!(
2775                result.is_empty(),
2776                "Pattern {pattern} should not be flagged. Got: {result:?}"
2777            );
2778        }
2779    }
2780
2781    // ───── Bug #3: Bracket escaping in image-inside-link conversion ─────
2782    //
2783    // When <a> wraps already-converted markdown image text, the bracket escaping
2784    // must be skipped to produce valid [![alt](url)](href) instead of !\[\](url)
2785
2786    #[test]
2787    fn test_md033_fix_a_wrapping_markdown_image_no_escaped_brackets() {
2788        // When <a> wraps a markdown image (from a prior fix iteration),
2789        // the result should be [![](url)](href) — no escaped brackets
2790        let rule = MD033NoInlineHtml::with_fix(true);
2791        let content = r#"<a href="https://example.com">![](https://example.com/image.png)</a>"#;
2792        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2793        let fixed = rule.fix(&ctx).unwrap();
2794
2795        assert_eq!(fixed, "[![](https://example.com/image.png)](https://example.com)",);
2796        assert!(!fixed.contains(r"\["), "Must not escape brackets: {fixed}");
2797        assert!(!fixed.contains(r"\]"), "Must not escape brackets: {fixed}");
2798    }
2799
2800    #[test]
2801    fn test_md033_fix_a_wrapping_markdown_image_with_alt() {
2802        // <a> wrapping ![alt](url) preserves alt text in linked image
2803        let rule = MD033NoInlineHtml::with_fix(true);
2804        let content =
2805            r#"<a href="https://github.com/repo">![Contributors](https://contrib.rocks/image?repo=org/repo)</a>"#;
2806        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2807        let fixed = rule.fix(&ctx).unwrap();
2808
2809        assert_eq!(
2810            fixed,
2811            "[![Contributors](https://contrib.rocks/image?repo=org/repo)](https://github.com/repo)"
2812        );
2813    }
2814
2815    #[test]
2816    fn test_md033_fix_img_without_alt_produces_empty_alt() {
2817        let rule = MD033NoInlineHtml::with_fix(true);
2818        let content = r#"<img src="photo.jpg" />"#;
2819        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2820        let fixed = rule.fix(&ctx).unwrap();
2821
2822        assert_eq!(fixed, "![](photo.jpg)");
2823    }
2824
2825    #[test]
2826    fn test_md033_fix_a_with_plain_text_still_escapes_brackets() {
2827        // Plain text brackets inside <a> SHOULD be escaped
2828        let rule = MD033NoInlineHtml::with_fix(true);
2829        let content = r#"<a href="https://example.com">text with [brackets]</a>"#;
2830        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2831        let fixed = rule.fix(&ctx).unwrap();
2832
2833        assert!(
2834            fixed.contains(r"\[brackets\]"),
2835            "Plain text brackets should be escaped: {fixed}"
2836        );
2837    }
2838
2839    #[test]
2840    fn test_md033_fix_a_with_image_plus_extra_text_escapes_brackets() {
2841        // Mixed content: image followed by bracketed text — brackets must be escaped
2842        // The image detection must NOT match partial content
2843        let rule = MD033NoInlineHtml::with_fix(true);
2844        let content = r#"<a href="/link">![](img.png) see [docs]</a>"#;
2845        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2846        let fixed = rule.fix(&ctx).unwrap();
2847
2848        // "see [docs]" brackets should be escaped since inner content is mixed
2849        assert!(
2850            fixed.contains(r"\[docs\]"),
2851            "Brackets in mixed image+text content should be escaped: {fixed}"
2852        );
2853    }
2854
2855    #[test]
2856    fn test_md033_fix_img_in_a_end_to_end() {
2857        // End-to-end: verify that iterative fixing of <a><img></a>
2858        // produces the correct final result through the fix coordinator
2859        use crate::config::Config;
2860        use crate::fix_coordinator::FixCoordinator;
2861
2862        let rule = MD033NoInlineHtml::with_fix(true);
2863        let rules: Vec<Box<dyn crate::rule::Rule>> = vec![Box::new(rule)];
2864
2865        let mut content =
2866            r#"<a href="https://github.com/org/repo"><img src="https://contrib.rocks/image?repo=org/repo" /></a>"#
2867                .to_string();
2868        let config = Config::default();
2869        let coordinator = FixCoordinator::new();
2870
2871        let result = coordinator
2872            .apply_fixes_iterative(&rules, &[], &mut content, &config, 10, None)
2873            .unwrap();
2874
2875        assert_eq!(
2876            content, "[![](https://contrib.rocks/image?repo=org/repo)](https://github.com/org/repo)",
2877            "End-to-end: <a><img></a> should become valid linked image"
2878        );
2879        assert!(result.converged);
2880        assert!(!content.contains(r"\["), "No escaped brackets: {content}");
2881    }
2882
2883    #[test]
2884    fn test_md033_fix_img_in_a_with_alt_end_to_end() {
2885        use crate::config::Config;
2886        use crate::fix_coordinator::FixCoordinator;
2887
2888        let rule = MD033NoInlineHtml::with_fix(true);
2889        let rules: Vec<Box<dyn crate::rule::Rule>> = vec![Box::new(rule)];
2890
2891        let mut content =
2892            r#"<a href="https://github.com/org/repo"><img src="https://contrib.rocks/image" alt="Contributors" /></a>"#
2893                .to_string();
2894        let config = Config::default();
2895        let coordinator = FixCoordinator::new();
2896
2897        let result = coordinator
2898            .apply_fixes_iterative(&rules, &[], &mut content, &config, 10, None)
2899            .unwrap();
2900
2901        assert_eq!(
2902            content,
2903            "[![Contributors](https://contrib.rocks/image)](https://github.com/org/repo)",
2904        );
2905        assert!(result.converged);
2906    }
2907}