Skip to main content

rumdl_lib/rules/
md033_no_inline_html.rs

1//!
2//! Rule MD033: No HTML tags
3//!
4//! See [docs/md033.md](../../docs/md033.md) for full documentation, configuration, and examples.
5
6use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
7use crate::utils::regex_cache::*;
8use std::collections::HashSet;
9
10mod md033_config;
11use md033_config::{MD033Config, MD033FixMode};
12
13#[derive(Clone)]
14pub struct MD033NoInlineHtml {
15    config: MD033Config,
16    allowed: HashSet<String>,
17    disallowed: HashSet<String>,
18    drop_attributes: HashSet<String>,
19    strip_wrapper_elements: HashSet<String>,
20}
21
22impl Default for MD033NoInlineHtml {
23    fn default() -> Self {
24        let config = MD033Config::default();
25        let allowed = config.allowed_set();
26        let disallowed = config.disallowed_set();
27        let drop_attributes = config.drop_attributes_set();
28        let strip_wrapper_elements = config.strip_wrapper_elements_set();
29        Self {
30            config,
31            allowed,
32            disallowed,
33            drop_attributes,
34            strip_wrapper_elements,
35        }
36    }
37}
38
39impl MD033NoInlineHtml {
40    pub fn new() -> Self {
41        Self::default()
42    }
43
44    pub fn with_allowed(allowed_vec: Vec<String>) -> Self {
45        let config = MD033Config {
46            allowed: allowed_vec,
47            disallowed: Vec::new(),
48            fix: false,
49            ..MD033Config::default()
50        };
51        let allowed = config.allowed_set();
52        let disallowed = config.disallowed_set();
53        let drop_attributes = config.drop_attributes_set();
54        let strip_wrapper_elements = config.strip_wrapper_elements_set();
55        Self {
56            config,
57            allowed,
58            disallowed,
59            drop_attributes,
60            strip_wrapper_elements,
61        }
62    }
63
64    pub fn with_disallowed(disallowed_vec: Vec<String>) -> Self {
65        let config = MD033Config {
66            allowed: Vec::new(),
67            disallowed: disallowed_vec,
68            fix: false,
69            ..MD033Config::default()
70        };
71        let allowed = config.allowed_set();
72        let disallowed = config.disallowed_set();
73        let drop_attributes = config.drop_attributes_set();
74        let strip_wrapper_elements = config.strip_wrapper_elements_set();
75        Self {
76            config,
77            allowed,
78            disallowed,
79            drop_attributes,
80            strip_wrapper_elements,
81        }
82    }
83
84    /// Create a new rule with auto-fix enabled
85    pub fn with_fix(fix: bool) -> Self {
86        let config = MD033Config {
87            allowed: Vec::new(),
88            disallowed: Vec::new(),
89            fix,
90            ..MD033Config::default()
91        };
92        let allowed = config.allowed_set();
93        let disallowed = config.disallowed_set();
94        let drop_attributes = config.drop_attributes_set();
95        let strip_wrapper_elements = config.strip_wrapper_elements_set();
96        Self {
97            config,
98            allowed,
99            disallowed,
100            drop_attributes,
101            strip_wrapper_elements,
102        }
103    }
104
105    pub fn from_config_struct(config: MD033Config) -> Self {
106        let allowed = config.allowed_set();
107        let disallowed = config.disallowed_set();
108        let drop_attributes = config.drop_attributes_set();
109        let strip_wrapper_elements = config.strip_wrapper_elements_set();
110        Self {
111            config,
112            allowed,
113            disallowed,
114            drop_attributes,
115            strip_wrapper_elements,
116        }
117    }
118
119    // Efficient check for allowed tags using HashSet (case-insensitive)
120    #[inline]
121    fn is_tag_allowed(&self, tag: &str) -> bool {
122        if self.allowed.is_empty() {
123            return false;
124        }
125        // Remove angle brackets and slashes, then split by whitespace or '>'
126        let tag = tag.trim_start_matches('<').trim_start_matches('/');
127        let tag_name = tag
128            .split(|c: char| c.is_whitespace() || c == '>' || c == '/')
129            .next()
130            .unwrap_or("");
131        self.allowed.contains(&tag_name.to_lowercase())
132    }
133
134    /// Check if a tag is in the disallowed set (for disallowed-only mode)
135    #[inline]
136    fn is_tag_disallowed(&self, tag: &str) -> bool {
137        if self.disallowed.is_empty() {
138            return false;
139        }
140        // Remove angle brackets and slashes, then split by whitespace or '>'
141        let tag = tag.trim_start_matches('<').trim_start_matches('/');
142        let tag_name = tag
143            .split(|c: char| c.is_whitespace() || c == '>' || c == '/')
144            .next()
145            .unwrap_or("");
146        self.disallowed.contains(&tag_name.to_lowercase())
147    }
148
149    /// Check if operating in disallowed-only mode
150    #[inline]
151    fn is_disallowed_mode(&self) -> bool {
152        self.config.is_disallowed_mode()
153    }
154
155    // Check if a tag is an HTML comment
156    #[inline]
157    fn is_html_comment(&self, tag: &str) -> bool {
158        tag.starts_with("<!--") && tag.ends_with("-->")
159    }
160
161    /// Check if a tag name is a valid HTML element or custom element.
162    /// Returns false for placeholder syntax like `<NAME>`, `<resource>`, `<actual>`.
163    ///
164    /// Per HTML spec, custom elements must contain a hyphen (e.g., `<my-component>`).
165    #[inline]
166    fn is_html_element_or_custom(tag_name: &str) -> bool {
167        // Sorted for binary search — must remain sorted when adding elements
168        const HTML_ELEMENTS: &[&str] = &[
169            "a",
170            "abbr",
171            "acronym",
172            "address",
173            "applet",
174            "area",
175            "article",
176            "aside",
177            "audio",
178            "b",
179            "base",
180            "basefont",
181            "bdi",
182            "bdo",
183            "big",
184            "blockquote",
185            "body",
186            "br",
187            "button",
188            "canvas",
189            "caption",
190            "center",
191            "cite",
192            "code",
193            "col",
194            "colgroup",
195            "data",
196            "datalist",
197            "dd",
198            "del",
199            "details",
200            "dfn",
201            "dialog",
202            "dir",
203            "div",
204            "dl",
205            "dt",
206            "em",
207            "embed",
208            "fieldset",
209            "figcaption",
210            "figure",
211            "font",
212            "footer",
213            "form",
214            "frame",
215            "frameset",
216            "h1",
217            "h2",
218            "h3",
219            "h4",
220            "h5",
221            "h6",
222            "head",
223            "header",
224            "hgroup",
225            "hr",
226            "html",
227            "i",
228            "iframe",
229            "img",
230            "input",
231            "ins",
232            "isindex",
233            "kbd",
234            "label",
235            "legend",
236            "li",
237            "link",
238            "main",
239            "map",
240            "mark",
241            "marquee",
242            "math",
243            "menu",
244            "meta",
245            "meter",
246            "nav",
247            "noembed",
248            "noframes",
249            "noscript",
250            "object",
251            "ol",
252            "optgroup",
253            "option",
254            "output",
255            "p",
256            "param",
257            "picture",
258            "plaintext",
259            "pre",
260            "progress",
261            "q",
262            "rp",
263            "rt",
264            "ruby",
265            "s",
266            "samp",
267            "script",
268            "search",
269            "section",
270            "select",
271            "slot",
272            "small",
273            "source",
274            "span",
275            "strike",
276            "strong",
277            "style",
278            "sub",
279            "summary",
280            "sup",
281            "svg",
282            "table",
283            "tbody",
284            "td",
285            "template",
286            "textarea",
287            "tfoot",
288            "th",
289            "thead",
290            "time",
291            "title",
292            "tr",
293            "track",
294            "tt",
295            "u",
296            "ul",
297            "var",
298            "video",
299            "wbr",
300            "xmp",
301        ];
302
303        let lower = tag_name.to_ascii_lowercase();
304        if HTML_ELEMENTS.binary_search(&lower.as_str()).is_ok() {
305            return true;
306        }
307        // Custom elements must contain a hyphen per HTML spec
308        tag_name.contains('-')
309    }
310
311    // Check if a tag is likely a programming type annotation rather than HTML
312    #[inline]
313    fn is_likely_type_annotation(&self, tag: &str) -> bool {
314        // Sorted for binary search — must remain sorted when adding elements
315        const COMMON_TYPES: &[&str] = &[
316            "any",
317            "apiresponse",
318            "array",
319            "bigint",
320            "config",
321            "data",
322            "date",
323            "e",
324            "element",
325            "error",
326            "function",
327            "generator",
328            "item",
329            "iterator",
330            "k",
331            "map",
332            "node",
333            "null",
334            "number",
335            "options",
336            "params",
337            "promise",
338            "regexp",
339            "request",
340            "response",
341            "result",
342            "set",
343            "string",
344            "symbol",
345            "t",
346            "u",
347            "undefined",
348            "userdata",
349            "v",
350            "void",
351            "weakmap",
352            "weakset",
353        ];
354
355        let tag_content = tag
356            .trim_start_matches('<')
357            .trim_end_matches('>')
358            .trim_start_matches('/');
359        let tag_name = tag_content
360            .split(|c: char| c.is_whitespace() || c == '>' || c == '/')
361            .next()
362            .unwrap_or("");
363
364        // Check if it's a simple tag (no attributes) with a common type name
365        if !tag_content.contains(' ') && !tag_content.contains('=') {
366            let lower = tag_name.to_ascii_lowercase();
367            COMMON_TYPES.binary_search(&lower.as_str()).is_ok()
368        } else {
369            false
370        }
371    }
372
373    // Check if a tag is actually an email address in angle brackets
374    #[inline]
375    fn is_email_address(&self, tag: &str) -> bool {
376        let content = tag.trim_start_matches('<').trim_end_matches('>');
377        // Simple email pattern: contains @ and has reasonable structure
378        content.contains('@')
379            && content.chars().all(|c| c.is_alphanumeric() || "@.-_+".contains(c))
380            && content.split('@').count() == 2
381            && content.split('@').all(|part| !part.is_empty())
382    }
383
384    // Check if a tag has the markdown attribute (MkDocs/Material for MkDocs)
385    #[inline]
386    fn has_markdown_attribute(&self, tag: &str) -> bool {
387        // Check for various forms of markdown attribute
388        // Examples: <div markdown>, <div markdown="1">, <div class="result" markdown>
389        tag.contains(" markdown>") || tag.contains(" markdown=") || tag.contains(" markdown ")
390    }
391
392    /// Check if a tag contains JSX-specific attributes that indicate it's JSX, not HTML
393    /// JSX uses different attribute names than HTML:
394    /// - `className` instead of `class`
395    /// - `htmlFor` instead of `for`
396    /// - camelCase event handlers (`onClick`, `onChange`, `onSubmit`, etc.)
397    /// - JSX expression syntax `={...}` for dynamic values
398    #[inline]
399    fn has_jsx_attributes(tag: &str) -> bool {
400        // JSX-specific attribute names (HTML uses class, for, onclick, etc.)
401        tag.contains("className")
402            || tag.contains("htmlFor")
403            || tag.contains("dangerouslySetInnerHTML")
404            // camelCase event handlers (JSX uses onClick, HTML uses onclick)
405            || tag.contains("onClick")
406            || tag.contains("onChange")
407            || tag.contains("onSubmit")
408            || tag.contains("onFocus")
409            || tag.contains("onBlur")
410            || tag.contains("onKeyDown")
411            || tag.contains("onKeyUp")
412            || tag.contains("onKeyPress")
413            || tag.contains("onMouseDown")
414            || tag.contains("onMouseUp")
415            || tag.contains("onMouseEnter")
416            || tag.contains("onMouseLeave")
417            // JSX expression syntax: ={expression} or ={ expression }
418            || tag.contains("={")
419    }
420
421    // Check if a tag is actually a URL in angle brackets
422    #[inline]
423    fn is_url_in_angle_brackets(&self, tag: &str) -> bool {
424        let content = tag.trim_start_matches('<').trim_end_matches('>');
425        // Check for common URL schemes
426        content.starts_with("http://")
427            || content.starts_with("https://")
428            || content.starts_with("ftp://")
429            || content.starts_with("ftps://")
430            || content.starts_with("mailto:")
431    }
432
433    #[inline]
434    fn is_relaxed_fix_mode(&self) -> bool {
435        self.config.fix_mode == MD033FixMode::Relaxed
436    }
437
438    #[inline]
439    fn is_droppable_attribute(&self, attr_name: &str) -> bool {
440        // Event handler attributes (onclick, onload, etc.) are never droppable
441        // because they can execute arbitrary JavaScript.
442        if attr_name.starts_with("on") && attr_name.len() > 2 {
443            return false;
444        }
445        self.drop_attributes.contains(attr_name)
446            || (attr_name.starts_with("data-")
447                && (self.drop_attributes.contains("data-*") || self.drop_attributes.contains("data-")))
448    }
449
450    #[inline]
451    fn is_strippable_wrapper(&self, tag_name: &str) -> bool {
452        self.is_relaxed_fix_mode() && self.strip_wrapper_elements.contains(tag_name)
453    }
454
455    /// Check whether `byte_offset` sits directly inside a top-level strippable
456    /// wrapper element (e.g. `<p>`).  Returns `true` only when:
457    ///  1. The nearest unclosed opening tag before the offset is a configured
458    ///     wrapper element, AND
459    ///  2. That wrapper is itself NOT nested inside another HTML element.
460    ///
461    /// Condition 2 prevents converting inner content when the wrapper cannot
462    /// be stripped (e.g. `<div><p><img/></p></div>` -- stripping `<p>` is
463    /// blocked because it is nested, so converting `<img>` would leave
464    /// markdown inside an HTML block where it won't render).
465    fn is_inside_strippable_wrapper(&self, content: &str, byte_offset: usize) -> bool {
466        if byte_offset == 0 {
467            return false;
468        }
469        let before = content[..byte_offset].trim_end();
470        if !before.ends_with('>') || before.ends_with("->") {
471            return false;
472        }
473        if let Some(last_lt) = before.rfind('<') {
474            let potential_tag = &before[last_lt..];
475            if potential_tag.starts_with("</") || potential_tag.starts_with("<!--") {
476                return false;
477            }
478            let parent_name = potential_tag
479                .trim_start_matches('<')
480                .split(|c: char| c.is_whitespace() || c == '>' || c == '/')
481                .next()
482                .unwrap_or("")
483                .to_lowercase();
484            if !self.strip_wrapper_elements.contains(&parent_name) {
485                return false;
486            }
487            // Verify the wrapper itself is not nested inside another element.
488            let wrapper_before = before[..last_lt].trim_end();
489            if wrapper_before.ends_with('>')
490                && !wrapper_before.ends_with("->")
491                && let Some(outer_lt) = wrapper_before.rfind('<')
492                && let outer_tag = &wrapper_before[outer_lt..]
493                && !outer_tag.starts_with("</")
494                && !outer_tag.starts_with("<!--")
495            {
496                return false;
497            }
498            return true;
499        }
500        false
501    }
502
503    /// Convert paired HTML tags to their Markdown equivalents.
504    /// Returns None if the tag cannot be safely converted (has nested tags, HTML entities, etc.)
505    fn convert_to_markdown(tag_name: &str, inner_content: &str) -> Option<String> {
506        // Skip if content contains nested HTML tags
507        if inner_content.contains('<') {
508            return None;
509        }
510        // Skip if content contains HTML entities (e.g., &vert;, &amp;, &lt;)
511        // These need HTML context to render correctly; markdown won't process them
512        if inner_content.contains('&') && inner_content.contains(';') {
513            // Check for common HTML entity patterns
514            let has_entity = inner_content
515                .split('&')
516                .skip(1)
517                .any(|part| part.split(';').next().is_some_and(|e| !e.is_empty() && e.len() < 10));
518            if has_entity {
519                return None;
520            }
521        }
522        match tag_name {
523            "em" | "i" => Some(format!("*{inner_content}*")),
524            "strong" | "b" => Some(format!("**{inner_content}**")),
525            "code" => {
526                // Handle backticks in content by using double backticks with padding
527                if inner_content.contains('`') {
528                    Some(format!("`` {inner_content} ``"))
529                } else {
530                    Some(format!("`{inner_content}`"))
531                }
532            }
533            _ => None,
534        }
535    }
536
537    /// Convert self-closing HTML tags to their Markdown equivalents.
538    fn convert_self_closing_to_markdown(&self, tag_name: &str, opening_tag: &str) -> Option<String> {
539        match tag_name {
540            "br" => match self.config.br_style {
541                md033_config::BrStyle::TrailingSpaces => Some("  \n".to_string()),
542                md033_config::BrStyle::Backslash => Some("\\\n".to_string()),
543            },
544            "hr" => Some("\n---\n".to_string()),
545            "img" => self.convert_img_to_markdown(opening_tag),
546            _ => None,
547        }
548    }
549
550    /// Parse all attributes from an HTML tag into a list of (name, value) pairs.
551    /// This provides proper attribute parsing instead of naive string matching.
552    fn parse_attributes(tag: &str) -> Vec<(String, Option<String>)> {
553        let mut attrs = Vec::new();
554
555        // Remove < and > and tag name
556        let tag_content = tag.trim_start_matches('<').trim_end_matches('>').trim_end_matches('/');
557
558        // Find first whitespace to skip tag name
559        let attr_start = tag_content
560            .find(|c: char| c.is_whitespace())
561            .map_or(tag_content.len(), |i| i + 1);
562
563        if attr_start >= tag_content.len() {
564            return attrs;
565        }
566
567        let attr_str = &tag_content[attr_start..];
568        let mut chars = attr_str.chars().peekable();
569
570        while chars.peek().is_some() {
571            // Skip whitespace
572            while chars.peek().is_some_and(|c| c.is_whitespace()) {
573                chars.next();
574            }
575
576            if chars.peek().is_none() {
577                break;
578            }
579
580            // Read attribute name
581            let mut attr_name = String::new();
582            while let Some(&c) = chars.peek() {
583                if c.is_whitespace() || c == '=' || c == '>' || c == '/' {
584                    break;
585                }
586                attr_name.push(c);
587                chars.next();
588            }
589
590            if attr_name.is_empty() {
591                break;
592            }
593
594            // Skip whitespace before =
595            while chars.peek().is_some_and(|c| c.is_whitespace()) {
596                chars.next();
597            }
598
599            // Check for = and value
600            if chars.peek() == Some(&'=') {
601                chars.next(); // consume =
602
603                // Skip whitespace after =
604                while chars.peek().is_some_and(|c| c.is_whitespace()) {
605                    chars.next();
606                }
607
608                // Read value
609                let mut value = String::new();
610                if let Some(&quote) = chars.peek() {
611                    if quote == '"' || quote == '\'' {
612                        chars.next(); // consume opening quote
613                        for c in chars.by_ref() {
614                            if c == quote {
615                                break;
616                            }
617                            value.push(c);
618                        }
619                    } else {
620                        // Unquoted value
621                        while let Some(&c) = chars.peek() {
622                            if c.is_whitespace() || c == '>' || c == '/' {
623                                break;
624                            }
625                            value.push(c);
626                            chars.next();
627                        }
628                    }
629                }
630                attrs.push((attr_name.to_ascii_lowercase(), Some(value)));
631            } else {
632                // Boolean attribute (no value)
633                attrs.push((attr_name.to_ascii_lowercase(), None));
634            }
635        }
636
637        attrs
638    }
639
640    /// Extract an HTML attribute value from a tag string.
641    /// Handles double quotes, single quotes, and unquoted values.
642    /// Returns None if the attribute is not found.
643    fn extract_attribute(tag: &str, attr_name: &str) -> Option<String> {
644        let attrs = Self::parse_attributes(tag);
645        let attr_lower = attr_name.to_ascii_lowercase();
646
647        attrs
648            .into_iter()
649            .find(|(name, _)| name == &attr_lower)
650            .and_then(|(_, value)| value)
651    }
652
653    /// Check if an HTML tag has extra attributes beyond the specified allowed ones.
654    /// Uses proper attribute parsing to avoid false positives from string matching.
655    fn has_extra_attributes(&self, tag: &str, allowed_attrs: &[&str]) -> bool {
656        let attrs = Self::parse_attributes(tag);
657
658        // All event handlers (on*) are dangerous
659        // Plus common attributes that would be lost in markdown conversion
660        const DANGEROUS_ATTR_PREFIXES: &[&str] = &["on"]; // onclick, onload, onerror, etc.
661        const DANGEROUS_ATTRS: &[&str] = &[
662            "class",
663            "id",
664            "style",
665            "target",
666            "rel",
667            "download",
668            "referrerpolicy",
669            "crossorigin",
670            "loading",
671            "decoding",
672            "fetchpriority",
673            "sizes",
674            "srcset",
675            "usemap",
676            "ismap",
677            "width",
678            "height",
679            "name",   // anchor names
680            "data-*", // data attributes (checked separately)
681        ];
682
683        for (attr_name, _) in attrs {
684            // Skip allowed attributes (list is small, linear scan is efficient)
685            if allowed_attrs.iter().any(|a| a.to_ascii_lowercase() == attr_name) {
686                continue;
687            }
688
689            if self.is_relaxed_fix_mode() {
690                if self.is_droppable_attribute(&attr_name) {
691                    continue;
692                }
693                return true;
694            }
695
696            // Check for event handlers (on*)
697            for prefix in DANGEROUS_ATTR_PREFIXES {
698                if attr_name.starts_with(prefix) && attr_name.len() > prefix.len() {
699                    return true;
700                }
701            }
702
703            // Check for data-* attributes
704            if attr_name.starts_with("data-") {
705                return true;
706            }
707
708            // Check for other dangerous attributes
709            if DANGEROUS_ATTRS.contains(&attr_name.as_str()) {
710                return true;
711            }
712        }
713
714        false
715    }
716
717    /// Convert `<a href="url">text</a>` to `[text](url)` or `[text](url "title")`
718    /// Returns None if conversion is not safe.
719    fn convert_a_to_markdown(&self, opening_tag: &str, inner_content: &str) -> Option<String> {
720        // Extract href attribute
721        let href = Self::extract_attribute(opening_tag, "href")?;
722
723        // Check URL is safe
724        if !MD033Config::is_safe_url(&href) {
725            return None;
726        }
727
728        // Check for nested HTML tags in content
729        if inner_content.contains('<') {
730            return None;
731        }
732
733        // Check for HTML entities that wouldn't render correctly in markdown
734        if inner_content.contains('&') && inner_content.contains(';') {
735            let has_entity = inner_content
736                .split('&')
737                .skip(1)
738                .any(|part| part.split(';').next().is_some_and(|e| !e.is_empty() && e.len() < 10));
739            if has_entity {
740                return None;
741            }
742        }
743
744        // Extract optional title attribute
745        let title = Self::extract_attribute(opening_tag, "title");
746
747        // Check for extra dangerous attributes (title is allowed)
748        if self.has_extra_attributes(opening_tag, &["href", "title"]) {
749            return None;
750        }
751
752        // If inner content is exactly a markdown image (from a prior <img> fix),
753        // use it directly without bracket escaping to produce valid [![alt](src)](href).
754        // Must verify the entire content is a single image — not mixed content like
755        // "![](url) extra [text]" where trailing brackets still need escaping.
756        let trimmed_inner = inner_content.trim();
757        let is_markdown_image =
758            trimmed_inner.starts_with("![") && trimmed_inner.contains("](") && trimmed_inner.ends_with(')') && {
759                // Verify the closing ](url) accounts for the rest of the content
760                // by finding the image's ]( and checking nothing follows the final )
761                if let Some(bracket_close) = trimmed_inner.rfind("](") {
762                    let after_paren = &trimmed_inner[bracket_close + 2..];
763                    // The rest should be just "url)" — find the matching close paren
764                    after_paren.ends_with(')')
765                        && after_paren.chars().filter(|&c| c == ')').count()
766                            >= after_paren.chars().filter(|&c| c == '(').count()
767                } else {
768                    false
769                }
770            };
771        let escaped_text = if is_markdown_image {
772            trimmed_inner.to_string()
773        } else {
774            // Escape special markdown characters in link text
775            // Brackets need escaping to avoid breaking the link syntax
776            inner_content.replace('[', r"\[").replace(']', r"\]")
777        };
778
779        // Escape parentheses in URL
780        let escaped_url = href.replace('(', "%28").replace(')', "%29");
781
782        // Format with or without title
783        if let Some(title_text) = title {
784            // Escape quotes in title
785            let escaped_title = title_text.replace('"', r#"\""#);
786            Some(format!("[{escaped_text}]({escaped_url} \"{escaped_title}\")"))
787        } else {
788            Some(format!("[{escaped_text}]({escaped_url})"))
789        }
790    }
791
792    /// Convert `<img src="url" alt="text">` to `![alt](src)` or `![alt](src "title")`
793    /// Returns None if conversion is not safe.
794    fn convert_img_to_markdown(&self, tag: &str) -> Option<String> {
795        // Extract src attribute (required)
796        let src = Self::extract_attribute(tag, "src")?;
797
798        // Check URL is safe
799        if !MD033Config::is_safe_url(&src) {
800            return None;
801        }
802
803        // Extract alt attribute (optional, default to empty)
804        let alt = Self::extract_attribute(tag, "alt").unwrap_or_default();
805
806        // Extract optional title attribute
807        let title = Self::extract_attribute(tag, "title");
808
809        // Check for extra dangerous attributes (title is allowed)
810        if self.has_extra_attributes(tag, &["src", "alt", "title"]) {
811            return None;
812        }
813
814        // Escape special markdown characters in alt text
815        let escaped_alt = alt.replace('[', r"\[").replace(']', r"\]");
816
817        // Escape parentheses in URL
818        let escaped_url = src.replace('(', "%28").replace(')', "%29");
819
820        // Format with or without title
821        if let Some(title_text) = title {
822            // Escape quotes in title
823            let escaped_title = title_text.replace('"', r#"\""#);
824            Some(format!("![{escaped_alt}]({escaped_url} \"{escaped_title}\")"))
825        } else {
826            Some(format!("![{escaped_alt}]({escaped_url})"))
827        }
828    }
829
830    /// Check if an HTML tag has attributes that would make conversion unsafe
831    fn has_significant_attributes(opening_tag: &str) -> bool {
832        // Tags with just whitespace or empty are fine
833        let tag_content = opening_tag
834            .trim_start_matches('<')
835            .trim_end_matches('>')
836            .trim_end_matches('/');
837
838        // Split by whitespace; if there's more than the tag name, it has attributes
839        let parts: Vec<&str> = tag_content.split_whitespace().collect();
840        parts.len() > 1
841    }
842
843    /// Check if a tag appears to be nested inside another HTML element
844    /// by looking at the surrounding context (e.g., `<code><em>text</em></code>`)
845    fn is_nested_in_html(content: &str, tag_byte_start: usize, tag_byte_end: usize) -> bool {
846        // Check if there's a `>` immediately before this tag (indicating inside another element)
847        if tag_byte_start > 0 {
848            let before = &content[..tag_byte_start];
849            let before_trimmed = before.trim_end();
850            if before_trimmed.ends_with('>') && !before_trimmed.ends_with("->") {
851                // Check it's not a closing tag or comment
852                if let Some(last_lt) = before_trimmed.rfind('<') {
853                    let potential_tag = &before_trimmed[last_lt..];
854                    // Skip if it's a closing tag (</...>) or comment (<!--)
855                    if !potential_tag.starts_with("</") && !potential_tag.starts_with("<!--") {
856                        return true;
857                    }
858                }
859            }
860        }
861        // Check if there's a `<` immediately after the closing tag (indicating inside another element)
862        if tag_byte_end < content.len() {
863            let after = &content[tag_byte_end..];
864            let after_trimmed = after.trim_start();
865            if after_trimmed.starts_with("</") {
866                return true;
867            }
868        }
869        false
870    }
871
872    /// Calculate fix to remove HTML tags while keeping content.
873    ///
874    /// For self-closing tags like `<br/>`, returns a single fix to remove the tag.
875    /// For paired tags like `<span>text</span>`, returns the replacement text (just the content).
876    ///
877    /// Returns (range, replacement_text) where range is the bytes to replace
878    /// and replacement_text is what to put there (content without tags, or empty for self-closing).
879    ///
880    /// When `in_html_block` is true, returns None in conservative mode.  In
881    /// relaxed mode two exceptions apply:
882    /// - Strippable wrapper elements (e.g. `<p>`) bypass the block guard so
883    ///   they can be stripped even though they ARE the HTML block.
884    /// - Self-closing tags whose direct parent is a strippable wrapper also
885    ///   bypass the guard so inner content can be converted first.
886    fn calculate_fix(
887        &self,
888        content: &str,
889        opening_tag: &str,
890        tag_byte_start: usize,
891        in_html_block: bool,
892    ) -> Option<(std::ops::Range<usize>, String)> {
893        // Extract tag name from opening tag
894        let tag_name = opening_tag
895            .trim_start_matches('<')
896            .split(|c: char| c.is_whitespace() || c == '>' || c == '/')
897            .next()?
898            .to_lowercase();
899
900        // Check if it's a self-closing tag (ends with /> or is a void element like <br>)
901        let is_self_closing =
902            opening_tag.ends_with("/>") || matches!(tag_name.as_str(), "br" | "hr" | "img" | "input" | "meta" | "link");
903
904        if is_self_closing {
905            // When fix is enabled, try to convert to Markdown equivalent.
906            // Skip tags inside HTML blocks (would break structure), UNLESS we
907            // are in relaxed mode and the containing block is a strippable
908            // wrapper -- this lets the inner element be converted first so the
909            // wrapper can be stripped on a subsequent pass.
910            let block_ok = !in_html_block
911                || (self.is_relaxed_fix_mode() && self.is_inside_strippable_wrapper(content, tag_byte_start));
912            if self.config.fix
913                && MD033Config::is_safe_fixable_tag(&tag_name)
914                && block_ok
915                && let Some(markdown) = self.convert_self_closing_to_markdown(&tag_name, opening_tag)
916            {
917                return Some((tag_byte_start..tag_byte_start + opening_tag.len(), markdown));
918            }
919            // Can't convert this self-closing tag to Markdown, don't provide a fix
920            // (e.g., <input>, <meta> - these have no Markdown equivalent without the new img support)
921            return None;
922        }
923
924        // Search for the closing tag after the opening tag (case-insensitive)
925        let search_start = tag_byte_start + opening_tag.len();
926        let search_slice = &content[search_start..];
927
928        // Find closing tag case-insensitively
929        let closing_tag_lower = format!("</{tag_name}>");
930        let closing_pos = search_slice.to_ascii_lowercase().find(&closing_tag_lower);
931
932        if let Some(closing_pos) = closing_pos {
933            // Get actual closing tag from original content to get correct byte length
934            let closing_tag_len = closing_tag_lower.len();
935            let closing_byte_start = search_start + closing_pos;
936            let closing_byte_end = closing_byte_start + closing_tag_len;
937
938            // Extract the content between tags
939            let inner_content = &content[search_start..closing_byte_start];
940
941            // In relaxed mode, check wrapper stripping BEFORE the in_html_block
942            // guard because the wrapper element itself IS the HTML block. We only
943            // strip when:
944            //  - the wrapper is not nested inside another HTML element
945            //  - the inner content no longer contains HTML tags (prevents
946            //    overlapping byte-range replacements within a single fix pass)
947            if self.config.fix && self.is_strippable_wrapper(&tag_name) {
948                if Self::is_nested_in_html(content, tag_byte_start, closing_byte_end) {
949                    return None;
950                }
951                if inner_content.contains('<') {
952                    return None;
953                }
954                return Some((tag_byte_start..closing_byte_end, inner_content.trim().to_string()));
955            }
956
957            // Skip auto-fix if inside an HTML block (like <pre>, <div>, etc.)
958            // Converting tags inside HTML blocks would break the intended structure
959            if in_html_block {
960                return None;
961            }
962
963            // Skip auto-fix if this tag is nested inside another HTML element
964            // e.g., <code><em>text</em></code> - don't convert the inner <em>
965            if Self::is_nested_in_html(content, tag_byte_start, closing_byte_end) {
966                return None;
967            }
968
969            // When fix is enabled and tag is safe to convert, try markdown conversion
970            if self.config.fix && MD033Config::is_safe_fixable_tag(&tag_name) {
971                // Handle <a> tags specially - they require attribute extraction
972                if tag_name == "a" {
973                    if let Some(markdown) = self.convert_a_to_markdown(opening_tag, inner_content) {
974                        return Some((tag_byte_start..closing_byte_end, markdown));
975                    }
976                    // convert_a_to_markdown returned None - unsafe URL, nested HTML, etc.
977                    return None;
978                }
979
980                // For simple tags (em, strong, code, etc.) - no attributes allowed
981                if Self::has_significant_attributes(opening_tag) {
982                    // Don't provide a fix for tags with attributes
983                    // User may want to keep the attributes, so leave as-is
984                    return None;
985                }
986                if let Some(markdown) = Self::convert_to_markdown(&tag_name, inner_content) {
987                    return Some((tag_byte_start..closing_byte_end, markdown));
988                }
989                // convert_to_markdown returned None, meaning content has nested tags or
990                // HTML entities that shouldn't be converted - leave as-is
991                return None;
992            }
993
994            // For non-fixable tags, don't provide a fix
995            // (e.g., <div>content</div>, <span>text</span>)
996            return None;
997        }
998
999        // If no closing tag found, don't provide a fix (malformed HTML)
1000        None
1001    }
1002}
1003
1004impl Rule for MD033NoInlineHtml {
1005    fn name(&self) -> &'static str {
1006        "MD033"
1007    }
1008
1009    fn description(&self) -> &'static str {
1010        "Inline HTML is not allowed"
1011    }
1012
1013    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
1014        let content = ctx.content;
1015
1016        // Early return: if no HTML tags at all, skip processing
1017        if content.is_empty() || !ctx.likely_has_html() {
1018            return Ok(Vec::new());
1019        }
1020
1021        // Quick check for HTML tag pattern before expensive processing
1022        if !HTML_TAG_QUICK_CHECK.is_match(content) {
1023            return Ok(Vec::new());
1024        }
1025
1026        let mut warnings = Vec::new();
1027
1028        // Use centralized HTML parser to get all HTML tags (including multiline)
1029        let html_tags = ctx.html_tags();
1030
1031        for html_tag in html_tags.iter() {
1032            // Skip closing tags (only warn on opening tags)
1033            if html_tag.is_closing {
1034                continue;
1035            }
1036
1037            let line_num = html_tag.line;
1038            let tag_byte_start = html_tag.byte_offset;
1039
1040            // Reconstruct tag string from byte offsets
1041            let tag = &content[html_tag.byte_offset..html_tag.byte_end];
1042
1043            // Skip tags in code blocks, PyMdown blocks, and block IALs
1044            if ctx
1045                .line_info(line_num)
1046                .is_some_and(|info| info.in_code_block || info.in_pymdown_block || info.is_kramdown_block_ial)
1047            {
1048                continue;
1049            }
1050
1051            // Skip HTML tags inside HTML comments
1052            if ctx.is_in_html_comment(tag_byte_start) || ctx.is_in_mdx_comment(tag_byte_start) {
1053                continue;
1054            }
1055
1056            // Skip HTML comments themselves
1057            if self.is_html_comment(tag) {
1058                continue;
1059            }
1060
1061            // Skip angle brackets inside link reference definition titles
1062            // e.g., [ref]: url "Title with <angle brackets>"
1063            if ctx.is_in_link_title(tag_byte_start) {
1064                continue;
1065            }
1066
1067            // Skip JSX components in MDX files (e.g., <Chart />, <MyComponent>)
1068            if ctx.flavor.supports_jsx() && html_tag.tag_name.chars().next().is_some_and(char::is_uppercase) {
1069                continue;
1070            }
1071
1072            // Skip JSX fragments in MDX files (<> and </>)
1073            if ctx.flavor.supports_jsx() && (html_tag.tag_name.is_empty() || tag == "<>" || tag == "</>") {
1074                continue;
1075            }
1076
1077            // Skip elements with JSX-specific attributes in MDX files
1078            // e.g., <div className="...">, <button onClick={handler}>
1079            if ctx.flavor.supports_jsx() && Self::has_jsx_attributes(tag) {
1080                continue;
1081            }
1082
1083            // Skip non-HTML elements (placeholder syntax like <NAME>, <resource>)
1084            if !Self::is_html_element_or_custom(&html_tag.tag_name) {
1085                continue;
1086            }
1087
1088            // Skip likely programming type annotations
1089            if self.is_likely_type_annotation(tag) {
1090                continue;
1091            }
1092
1093            // Skip email addresses in angle brackets
1094            if self.is_email_address(tag) {
1095                continue;
1096            }
1097
1098            // Skip URLs in angle brackets
1099            if self.is_url_in_angle_brackets(tag) {
1100                continue;
1101            }
1102
1103            // Skip tags inside code spans (use byte offset for reliable multi-line span detection)
1104            if ctx.is_byte_offset_in_code_span(tag_byte_start) {
1105                continue;
1106            }
1107
1108            // Determine whether to report this tag based on mode:
1109            // - Disallowed mode: only report tags in the disallowed list
1110            // - Default mode: report all tags except those in the allowed list
1111            if self.is_disallowed_mode() {
1112                // In disallowed mode, skip tags NOT in the disallowed list
1113                if !self.is_tag_disallowed(tag) {
1114                    continue;
1115                }
1116            } else {
1117                // In default mode, skip allowed tags
1118                if self.is_tag_allowed(tag) {
1119                    continue;
1120                }
1121            }
1122
1123            // Skip tags with markdown attribute in MkDocs mode
1124            if ctx.flavor == crate::config::MarkdownFlavor::MkDocs && self.has_markdown_attribute(tag) {
1125                continue;
1126            }
1127
1128            // Check if we're inside an HTML block (like <pre>, <div>, etc.)
1129            let in_html_block = ctx.is_in_html_block(line_num);
1130
1131            // Calculate fix to remove HTML tags but keep content
1132            let fix = self
1133                .calculate_fix(content, tag, tag_byte_start, in_html_block)
1134                .map(|(range, replacement)| Fix { range, replacement });
1135
1136            // Calculate actual end line and column for multiline tags
1137            // Use byte_end - 1 to get the last character position of the tag
1138            let (end_line, end_col) = if html_tag.byte_end > 0 {
1139                ctx.offset_to_line_col(html_tag.byte_end - 1)
1140            } else {
1141                (line_num, html_tag.end_col + 1)
1142            };
1143
1144            // Report the HTML tag
1145            warnings.push(LintWarning {
1146                rule_name: Some(self.name().to_string()),
1147                line: line_num,
1148                column: html_tag.start_col + 1, // Convert to 1-indexed
1149                end_line,                       // Actual end line for multiline tags
1150                end_column: end_col + 1,        // Actual end column
1151                message: format!("Inline HTML found: {tag}"),
1152                severity: Severity::Warning,
1153                fix,
1154            });
1155        }
1156
1157        Ok(warnings)
1158    }
1159
1160    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
1161        // Auto-fix is opt-in: only apply if explicitly enabled in config
1162        if !self.config.fix {
1163            return Ok(ctx.content.to_string());
1164        }
1165
1166        // Get warnings with their inline fixes
1167        let warnings = self.check(ctx)?;
1168        let warnings =
1169            crate::utils::fix_utils::filter_warnings_by_inline_config(warnings, ctx.inline_config(), self.name());
1170
1171        // If no warnings with fixes, return original content
1172        if warnings.is_empty() || !warnings.iter().any(|w| w.fix.is_some()) {
1173            return Ok(ctx.content.to_string());
1174        }
1175
1176        // Collect all fixes and sort by range start (descending) to apply from end to beginning
1177        let mut fixes: Vec<_> = warnings
1178            .iter()
1179            .filter_map(|w| w.fix.as_ref().map(|f| (f.range.start, f.range.end, &f.replacement)))
1180            .collect();
1181        fixes.sort_by(|a, b| b.0.cmp(&a.0));
1182
1183        // Apply fixes from end to beginning to preserve byte offsets
1184        let mut result = ctx.content.to_string();
1185        for (start, end, replacement) in fixes {
1186            if start < result.len() && end <= result.len() && start <= end {
1187                result.replace_range(start..end, replacement);
1188            }
1189        }
1190
1191        Ok(result)
1192    }
1193
1194    fn fix_capability(&self) -> crate::rule::FixCapability {
1195        if self.config.fix {
1196            crate::rule::FixCapability::FullyFixable
1197        } else {
1198            crate::rule::FixCapability::Unfixable
1199        }
1200    }
1201
1202    /// Get the category of this rule for selective processing
1203    fn category(&self) -> RuleCategory {
1204        RuleCategory::Html
1205    }
1206
1207    /// Check if this rule should be skipped
1208    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
1209        ctx.content.is_empty() || !ctx.likely_has_html()
1210    }
1211
1212    fn as_any(&self) -> &dyn std::any::Any {
1213        self
1214    }
1215
1216    fn default_config_section(&self) -> Option<(String, toml::Value)> {
1217        let json_value = serde_json::to_value(&self.config).ok()?;
1218        Some((
1219            self.name().to_string(),
1220            crate::rule_config_serde::json_to_toml_value(&json_value)?,
1221        ))
1222    }
1223
1224    fn config_aliases(&self) -> Option<std::collections::HashMap<String, String>> {
1225        let mut aliases = std::collections::HashMap::new();
1226        // Shorthand aliases for allowed-elements/disallowed-elements
1227        aliases.insert("allowed".to_string(), "allowed-elements".to_string());
1228        aliases.insert("disallowed".to_string(), "disallowed-elements".to_string());
1229        Some(aliases)
1230    }
1231
1232    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
1233    where
1234        Self: Sized,
1235    {
1236        let rule_config = crate::rule_config_serde::load_rule_config::<MD033Config>(config);
1237        Box::new(Self::from_config_struct(rule_config))
1238    }
1239}
1240
1241#[cfg(test)]
1242mod tests {
1243    use super::*;
1244    use crate::lint_context::LintContext;
1245    use crate::rule::Rule;
1246
1247    fn relaxed_fix_rule() -> MD033NoInlineHtml {
1248        let config = MD033Config {
1249            fix: true,
1250            fix_mode: MD033FixMode::Relaxed,
1251            ..MD033Config::default()
1252        };
1253        MD033NoInlineHtml::from_config_struct(config)
1254    }
1255
1256    #[test]
1257    fn test_md033_basic_html() {
1258        let rule = MD033NoInlineHtml::default();
1259        let content = "<div>Some content</div>";
1260        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1261        let result = rule.check(&ctx).unwrap();
1262        // Only reports opening tags, not closing tags
1263        assert_eq!(result.len(), 1); // Only <div>, not </div>
1264        assert!(result[0].message.starts_with("Inline HTML found: <div>"));
1265    }
1266
1267    #[test]
1268    fn test_md033_case_insensitive() {
1269        let rule = MD033NoInlineHtml::default();
1270        let content = "<DiV>Some <B>content</B></dIv>";
1271        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1272        let result = rule.check(&ctx).unwrap();
1273        // Only reports opening tags, not closing tags
1274        assert_eq!(result.len(), 2); // <DiV>, <B> (not </B>, </dIv>)
1275        assert_eq!(result[0].message, "Inline HTML found: <DiV>");
1276        assert_eq!(result[1].message, "Inline HTML found: <B>");
1277    }
1278
1279    #[test]
1280    fn test_md033_allowed_tags() {
1281        let rule = MD033NoInlineHtml::with_allowed(vec!["div".to_string(), "br".to_string()]);
1282        let content = "<div>Allowed</div><p>Not allowed</p><br/>";
1283        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1284        let result = rule.check(&ctx).unwrap();
1285        // Only warnings for non-allowed opening tags (<p> only, div and br are allowed)
1286        assert_eq!(result.len(), 1);
1287        assert_eq!(result[0].message, "Inline HTML found: <p>");
1288
1289        // Test case-insensitivity of allowed tags
1290        let content2 = "<DIV>Allowed</DIV><P>Not allowed</P><BR/>";
1291        let ctx2 = LintContext::new(content2, crate::config::MarkdownFlavor::Standard, None);
1292        let result2 = rule.check(&ctx2).unwrap();
1293        assert_eq!(result2.len(), 1); // Only <P> flagged
1294        assert_eq!(result2[0].message, "Inline HTML found: <P>");
1295    }
1296
1297    #[test]
1298    fn test_md033_html_comments() {
1299        let rule = MD033NoInlineHtml::default();
1300        let content = "<!-- This is a comment --> <p>Not a comment</p>";
1301        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1302        let result = rule.check(&ctx).unwrap();
1303        // Should detect warnings for HTML opening tags (comments are skipped, closing tags not reported)
1304        assert_eq!(result.len(), 1); // Only <p>
1305        assert_eq!(result[0].message, "Inline HTML found: <p>");
1306    }
1307
1308    #[test]
1309    fn test_md033_tags_in_links() {
1310        let rule = MD033NoInlineHtml::default();
1311        let content = "[Link](http://example.com/<div>)";
1312        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1313        let result = rule.check(&ctx).unwrap();
1314        // The <div> in the URL should be detected as HTML (not skipped)
1315        assert_eq!(result.len(), 1);
1316        assert_eq!(result[0].message, "Inline HTML found: <div>");
1317
1318        let content2 = "[Link <a>text</a>](url)";
1319        let ctx2 = LintContext::new(content2, crate::config::MarkdownFlavor::Standard, None);
1320        let result2 = rule.check(&ctx2).unwrap();
1321        // Only reports opening tags
1322        assert_eq!(result2.len(), 1); // Only <a>
1323        assert_eq!(result2[0].message, "Inline HTML found: <a>");
1324    }
1325
1326    #[test]
1327    fn test_md033_fix_escaping() {
1328        let rule = MD033NoInlineHtml::default();
1329        let content = "Text with <div> and <br/> tags.";
1330        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1331        let fixed_content = rule.fix(&ctx).unwrap();
1332        // No fix for HTML tags; output should be unchanged
1333        assert_eq!(fixed_content, content);
1334    }
1335
1336    #[test]
1337    fn test_md033_in_code_blocks() {
1338        let rule = MD033NoInlineHtml::default();
1339        let content = "```html\n<div>Code</div>\n```\n<div>Not code</div>";
1340        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1341        let result = rule.check(&ctx).unwrap();
1342        // Only reports opening tags outside code block
1343        assert_eq!(result.len(), 1); // Only <div> outside code block
1344        assert_eq!(result[0].message, "Inline HTML found: <div>");
1345    }
1346
1347    #[test]
1348    fn test_md033_in_code_spans() {
1349        let rule = MD033NoInlineHtml::default();
1350        let content = "Text with `<p>in code</p>` span. <br/> Not in span.";
1351        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1352        let result = rule.check(&ctx).unwrap();
1353        // Should detect <br/> outside code span, but not tags inside code span
1354        assert_eq!(result.len(), 1);
1355        assert_eq!(result[0].message, "Inline HTML found: <br/>");
1356    }
1357
1358    #[test]
1359    fn test_md033_issue_90_code_span_with_diff_block() {
1360        // Test for issue #90: inline code span followed by diff code block
1361        let rule = MD033NoInlineHtml::default();
1362        let content = r#"# Heading
1363
1364`<env>`
1365
1366```diff
1367- this
1368+ that
1369```"#;
1370        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1371        let result = rule.check(&ctx).unwrap();
1372        // Should NOT detect <env> as HTML since it's inside backticks
1373        assert_eq!(result.len(), 0, "Should not report HTML tags inside code spans");
1374    }
1375
1376    #[test]
1377    fn test_md033_multiple_code_spans_with_angle_brackets() {
1378        // Test multiple code spans on same line
1379        let rule = MD033NoInlineHtml::default();
1380        let content = "`<one>` and `<two>` and `<three>` are all code spans";
1381        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1382        let result = rule.check(&ctx).unwrap();
1383        assert_eq!(result.len(), 0, "Should not report HTML tags inside any code spans");
1384    }
1385
1386    #[test]
1387    fn test_md033_nested_angle_brackets_in_code_span() {
1388        // Test nested angle brackets
1389        let rule = MD033NoInlineHtml::default();
1390        let content = "Text with `<<nested>>` brackets";
1391        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1392        let result = rule.check(&ctx).unwrap();
1393        assert_eq!(result.len(), 0, "Should handle nested angle brackets in code spans");
1394    }
1395
1396    #[test]
1397    fn test_md033_code_span_at_end_before_code_block() {
1398        // Test code span at end of line before code block
1399        let rule = MD033NoInlineHtml::default();
1400        let content = "Testing `<test>`\n```\ncode here\n```";
1401        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1402        let result = rule.check(&ctx).unwrap();
1403        assert_eq!(result.len(), 0, "Should handle code span before code block");
1404    }
1405
1406    #[test]
1407    fn test_md033_quick_fix_inline_tag() {
1408        // Test that non-fixable tags (like <span>) do NOT get a fix
1409        // Only safe fixable tags (em, i, strong, b, code, br, hr) with fix=true get fixes
1410        let rule = MD033NoInlineHtml::default();
1411        let content = "This has <span>inline text</span> that should keep content.";
1412        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1413        let result = rule.check(&ctx).unwrap();
1414
1415        assert_eq!(result.len(), 1, "Should find one HTML tag");
1416        // <span> is NOT a safe fixable tag, so no fix should be provided
1417        assert!(
1418            result[0].fix.is_none(),
1419            "Non-fixable tags like <span> should not have a fix"
1420        );
1421    }
1422
1423    #[test]
1424    fn test_md033_quick_fix_multiline_tag() {
1425        // HTML block elements like <div> are intentionally NOT auto-fixed
1426        // Removing them would change document structure significantly
1427        let rule = MD033NoInlineHtml::default();
1428        let content = "<div>\nBlock content\n</div>";
1429        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1430        let result = rule.check(&ctx).unwrap();
1431
1432        assert_eq!(result.len(), 1, "Should find one HTML tag");
1433        // HTML block elements should NOT have auto-fix
1434        assert!(result[0].fix.is_none(), "HTML block elements should NOT have auto-fix");
1435    }
1436
1437    #[test]
1438    fn test_md033_quick_fix_self_closing_tag() {
1439        // Test that self-closing tags with fix=false (default) do NOT get a fix
1440        let rule = MD033NoInlineHtml::default();
1441        let content = "Self-closing: <br/>";
1442        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1443        let result = rule.check(&ctx).unwrap();
1444
1445        assert_eq!(result.len(), 1, "Should find one HTML tag");
1446        // Default config has fix=false, so no fix should be provided
1447        assert!(
1448            result[0].fix.is_none(),
1449            "Self-closing tags should not have a fix when fix config is false"
1450        );
1451    }
1452
1453    #[test]
1454    fn test_md033_quick_fix_multiple_tags() {
1455        // Test that multiple tags without fix=true do NOT get fixes
1456        // <span> is not a safe fixable tag, <strong> is but fix=false by default
1457        let rule = MD033NoInlineHtml::default();
1458        let content = "<span>first</span> and <strong>second</strong>";
1459        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1460        let result = rule.check(&ctx).unwrap();
1461
1462        assert_eq!(result.len(), 2, "Should find two HTML tags");
1463        // Neither should have a fix: <span> is not fixable, <strong> is but fix=false
1464        assert!(result[0].fix.is_none(), "Non-fixable <span> should not have a fix");
1465        assert!(
1466            result[1].fix.is_none(),
1467            "<strong> should not have a fix when fix config is false"
1468        );
1469    }
1470
1471    #[test]
1472    fn test_md033_skip_angle_brackets_in_link_titles() {
1473        // Angle brackets inside link reference definition titles should not be flagged as HTML
1474        let rule = MD033NoInlineHtml::default();
1475        let content = r#"# Test
1476
1477[example]: <https://example.com> "Title with <Angle Brackets> inside"
1478
1479Regular text with <div>content</div> HTML tag.
1480"#;
1481        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1482        let result = rule.check(&ctx).unwrap();
1483
1484        // Should only flag <div>, not <Angle Brackets> in the title (not a valid HTML element)
1485        // Opening tag only (markdownlint behavior)
1486        assert_eq!(result.len(), 1, "Should find opening div tag");
1487        assert!(
1488            result[0].message.contains("<div>"),
1489            "Should flag <div>, got: {}",
1490            result[0].message
1491        );
1492    }
1493
1494    #[test]
1495    fn test_md033_skip_angle_brackets_in_link_title_single_quotes() {
1496        // Test with single-quoted title
1497        let rule = MD033NoInlineHtml::default();
1498        let content = r#"[ref]: url 'Title <Help Wanted> here'
1499
1500<span>text</span> here
1501"#;
1502        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1503        let result = rule.check(&ctx).unwrap();
1504
1505        // <Help Wanted> is not a valid HTML element, so only <span> is flagged
1506        // Opening tag only (markdownlint behavior)
1507        assert_eq!(result.len(), 1, "Should find opening span tag");
1508        assert!(
1509            result[0].message.contains("<span>"),
1510            "Should flag <span>, got: {}",
1511            result[0].message
1512        );
1513    }
1514
1515    #[test]
1516    fn test_md033_multiline_tag_end_line_calculation() {
1517        // Test that multiline HTML tags report correct end_line
1518        let rule = MD033NoInlineHtml::default();
1519        let content = "<div\n  class=\"test\"\n  id=\"example\">";
1520        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1521        let result = rule.check(&ctx).unwrap();
1522
1523        assert_eq!(result.len(), 1, "Should find one HTML tag");
1524        // Tag starts on line 1
1525        assert_eq!(result[0].line, 1, "Start line should be 1");
1526        // Tag ends on line 3 (where the closing > is)
1527        assert_eq!(result[0].end_line, 3, "End line should be 3");
1528    }
1529
1530    #[test]
1531    fn test_md033_single_line_tag_same_start_end_line() {
1532        // Test that single-line HTML tags have same start and end line
1533        let rule = MD033NoInlineHtml::default();
1534        let content = "Some text <div class=\"test\"> more text";
1535        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1536        let result = rule.check(&ctx).unwrap();
1537
1538        assert_eq!(result.len(), 1, "Should find one HTML tag");
1539        assert_eq!(result[0].line, 1, "Start line should be 1");
1540        assert_eq!(result[0].end_line, 1, "End line should be 1 for single-line tag");
1541    }
1542
1543    #[test]
1544    fn test_md033_multiline_tag_with_many_attributes() {
1545        // Test multiline tag spanning multiple lines
1546        let rule = MD033NoInlineHtml::default();
1547        let content =
1548            "Text\n<div\n  data-attr1=\"value1\"\n  data-attr2=\"value2\"\n  data-attr3=\"value3\">\nMore text";
1549        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1550        let result = rule.check(&ctx).unwrap();
1551
1552        assert_eq!(result.len(), 1, "Should find one HTML tag");
1553        // Tag starts on line 2 (first line is "Text")
1554        assert_eq!(result[0].line, 2, "Start line should be 2");
1555        // Tag ends on line 5 (where the closing > is)
1556        assert_eq!(result[0].end_line, 5, "End line should be 5");
1557    }
1558
1559    #[test]
1560    fn test_md033_disallowed_mode_basic() {
1561        // Test disallowed mode: only flags tags in the disallowed list
1562        let rule = MD033NoInlineHtml::with_disallowed(vec!["script".to_string(), "iframe".to_string()]);
1563        let content = "<div>Safe content</div><script>alert('xss')</script>";
1564        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1565        let result = rule.check(&ctx).unwrap();
1566
1567        // Should only flag <script>, not <div>
1568        assert_eq!(result.len(), 1, "Should only flag disallowed tags");
1569        assert!(result[0].message.contains("<script>"), "Should flag script tag");
1570    }
1571
1572    #[test]
1573    fn test_md033_disallowed_gfm_security_tags() {
1574        // Test GFM security tags expansion
1575        let rule = MD033NoInlineHtml::with_disallowed(vec!["gfm".to_string()]);
1576        let content = r#"
1577<div>Safe</div>
1578<title>Bad title</title>
1579<textarea>Bad textarea</textarea>
1580<style>.bad{}</style>
1581<iframe src="evil"></iframe>
1582<script>evil()</script>
1583<plaintext>old tag</plaintext>
1584<span>Safe span</span>
1585"#;
1586        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1587        let result = rule.check(&ctx).unwrap();
1588
1589        // Should flag: title, textarea, style, iframe, script, plaintext
1590        // Should NOT flag: div, span
1591        assert_eq!(result.len(), 6, "Should flag 6 GFM security tags");
1592
1593        let flagged_tags: Vec<&str> = result
1594            .iter()
1595            .filter_map(|w| w.message.split('<').nth(1))
1596            .filter_map(|s| s.split('>').next())
1597            .filter_map(|s| s.split_whitespace().next())
1598            .collect();
1599
1600        assert!(flagged_tags.contains(&"title"), "Should flag title");
1601        assert!(flagged_tags.contains(&"textarea"), "Should flag textarea");
1602        assert!(flagged_tags.contains(&"style"), "Should flag style");
1603        assert!(flagged_tags.contains(&"iframe"), "Should flag iframe");
1604        assert!(flagged_tags.contains(&"script"), "Should flag script");
1605        assert!(flagged_tags.contains(&"plaintext"), "Should flag plaintext");
1606        assert!(!flagged_tags.contains(&"div"), "Should NOT flag div");
1607        assert!(!flagged_tags.contains(&"span"), "Should NOT flag span");
1608    }
1609
1610    #[test]
1611    fn test_md033_disallowed_case_insensitive() {
1612        // Test that disallowed check is case-insensitive
1613        let rule = MD033NoInlineHtml::with_disallowed(vec!["script".to_string()]);
1614        let content = "<SCRIPT>alert('xss')</SCRIPT><Script>alert('xss')</Script>";
1615        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1616        let result = rule.check(&ctx).unwrap();
1617
1618        // Should flag both <SCRIPT> and <Script>
1619        assert_eq!(result.len(), 2, "Should flag both case variants");
1620    }
1621
1622    #[test]
1623    fn test_md033_disallowed_with_attributes() {
1624        // Test that disallowed mode works with tags that have attributes
1625        let rule = MD033NoInlineHtml::with_disallowed(vec!["iframe".to_string()]);
1626        let content = r#"<iframe src="https://evil.com" width="100" height="100"></iframe>"#;
1627        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1628        let result = rule.check(&ctx).unwrap();
1629
1630        assert_eq!(result.len(), 1, "Should flag iframe with attributes");
1631        assert!(result[0].message.contains("iframe"), "Should flag iframe");
1632    }
1633
1634    #[test]
1635    fn test_md033_disallowed_all_gfm_tags() {
1636        // Verify all GFM disallowed tags are covered
1637        use md033_config::GFM_DISALLOWED_TAGS;
1638        let rule = MD033NoInlineHtml::with_disallowed(vec!["gfm".to_string()]);
1639
1640        for tag in GFM_DISALLOWED_TAGS {
1641            let content = format!("<{tag}>content</{tag}>");
1642            let ctx = LintContext::new(&content, crate::config::MarkdownFlavor::Standard, None);
1643            let result = rule.check(&ctx).unwrap();
1644
1645            assert_eq!(result.len(), 1, "GFM tag <{tag}> should be flagged");
1646        }
1647    }
1648
1649    #[test]
1650    fn test_md033_disallowed_mixed_with_custom() {
1651        // Test mixing "gfm" with custom disallowed tags
1652        let rule = MD033NoInlineHtml::with_disallowed(vec![
1653            "gfm".to_string(),
1654            "marquee".to_string(), // Custom disallowed tag
1655        ]);
1656        let content = r#"<script>bad</script><marquee>annoying</marquee><div>ok</div>"#;
1657        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1658        let result = rule.check(&ctx).unwrap();
1659
1660        // Should flag script (gfm) and marquee (custom)
1661        assert_eq!(result.len(), 2, "Should flag both gfm and custom tags");
1662    }
1663
1664    #[test]
1665    fn test_md033_disallowed_empty_means_default_mode() {
1666        // Empty disallowed list means default mode (flag all HTML)
1667        let rule = MD033NoInlineHtml::with_disallowed(vec![]);
1668        let content = "<div>content</div>";
1669        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1670        let result = rule.check(&ctx).unwrap();
1671
1672        // Should flag <div> in default mode
1673        assert_eq!(result.len(), 1, "Empty disallowed = default mode");
1674    }
1675
1676    #[test]
1677    fn test_md033_jsx_fragments_in_mdx() {
1678        // JSX fragments (<> and </>) should not trigger warnings in MDX
1679        let rule = MD033NoInlineHtml::default();
1680        let content = r#"# MDX Document
1681
1682<>
1683  <Heading />
1684  <Content />
1685</>
1686
1687<div>Regular HTML should still be flagged</div>
1688"#;
1689        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::MDX, None);
1690        let result = rule.check(&ctx).unwrap();
1691
1692        // Should only flag <div>, not the fragments or JSX components
1693        assert_eq!(result.len(), 1, "Should only find one HTML tag (the div)");
1694        assert!(
1695            result[0].message.contains("<div>"),
1696            "Should flag <div>, not JSX fragments"
1697        );
1698    }
1699
1700    #[test]
1701    fn test_md033_jsx_components_in_mdx() {
1702        // JSX components (capitalized) should not trigger warnings in MDX
1703        let rule = MD033NoInlineHtml::default();
1704        let content = r#"<CustomComponent prop="value">
1705  Content
1706</CustomComponent>
1707
1708<MyButton onClick={handler}>Click</MyButton>
1709"#;
1710        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::MDX, None);
1711        let result = rule.check(&ctx).unwrap();
1712
1713        // No warnings - all are JSX components
1714        assert_eq!(result.len(), 0, "Should not flag JSX components in MDX");
1715    }
1716
1717    #[test]
1718    fn test_md033_jsx_not_skipped_in_standard_markdown() {
1719        // In standard markdown, capitalized tags should still be flagged if they're valid HTML
1720        let rule = MD033NoInlineHtml::default();
1721        let content = "<Script>alert(1)</Script>";
1722        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1723        let result = rule.check(&ctx).unwrap();
1724
1725        // Should flag <Script> in standard markdown (it's a valid HTML element)
1726        assert_eq!(result.len(), 1, "Should flag <Script> in standard markdown");
1727    }
1728
1729    #[test]
1730    fn test_md033_jsx_attributes_in_mdx() {
1731        // Elements with JSX-specific attributes should not trigger warnings in MDX
1732        let rule = MD033NoInlineHtml::default();
1733        let content = r#"# MDX with JSX Attributes
1734
1735<div className="card big">Content</div>
1736
1737<button onClick={handleClick}>Click me</button>
1738
1739<label htmlFor="input-id">Label</label>
1740
1741<input onChange={handleChange} />
1742
1743<div class="html-class">Regular HTML should be flagged</div>
1744"#;
1745        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::MDX, None);
1746        let result = rule.check(&ctx).unwrap();
1747
1748        // Should only flag the div with regular HTML "class" attribute
1749        assert_eq!(
1750            result.len(),
1751            1,
1752            "Should only flag HTML element without JSX attributes, got: {result:?}"
1753        );
1754        assert!(
1755            result[0].message.contains("<div class="),
1756            "Should flag the div with HTML class attribute"
1757        );
1758    }
1759
1760    #[test]
1761    fn test_md033_jsx_attributes_not_skipped_in_standard() {
1762        // In standard markdown, JSX attributes should still be flagged
1763        let rule = MD033NoInlineHtml::default();
1764        let content = r#"<div className="card">Content</div>"#;
1765        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1766        let result = rule.check(&ctx).unwrap();
1767
1768        // Should flag in standard markdown
1769        assert_eq!(result.len(), 1, "Should flag JSX-style elements in standard markdown");
1770    }
1771
1772    // Auto-fix tests for MD033
1773
1774    #[test]
1775    fn test_md033_fix_disabled_by_default() {
1776        // Auto-fix should be disabled by default
1777        let rule = MD033NoInlineHtml::default();
1778        assert!(!rule.config.fix, "Fix should be disabled by default");
1779        assert_eq!(rule.fix_capability(), crate::rule::FixCapability::Unfixable);
1780    }
1781
1782    #[test]
1783    fn test_md033_fix_enabled_em_to_italic() {
1784        // When fix is enabled, <em>text</em> should convert to *text*
1785        let rule = MD033NoInlineHtml::with_fix(true);
1786        let content = "This has <em>emphasized text</em> here.";
1787        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1788        let fixed = rule.fix(&ctx).unwrap();
1789        assert_eq!(fixed, "This has *emphasized text* here.");
1790    }
1791
1792    #[test]
1793    fn test_md033_fix_enabled_i_to_italic() {
1794        // <i>text</i> should convert to *text*
1795        let rule = MD033NoInlineHtml::with_fix(true);
1796        let content = "This has <i>italic text</i> here.";
1797        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1798        let fixed = rule.fix(&ctx).unwrap();
1799        assert_eq!(fixed, "This has *italic text* here.");
1800    }
1801
1802    #[test]
1803    fn test_md033_fix_enabled_strong_to_bold() {
1804        // <strong>text</strong> should convert to **text**
1805        let rule = MD033NoInlineHtml::with_fix(true);
1806        let content = "This has <strong>bold text</strong> here.";
1807        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1808        let fixed = rule.fix(&ctx).unwrap();
1809        assert_eq!(fixed, "This has **bold text** here.");
1810    }
1811
1812    #[test]
1813    fn test_md033_fix_enabled_b_to_bold() {
1814        // <b>text</b> should convert to **text**
1815        let rule = MD033NoInlineHtml::with_fix(true);
1816        let content = "This has <b>bold text</b> here.";
1817        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1818        let fixed = rule.fix(&ctx).unwrap();
1819        assert_eq!(fixed, "This has **bold text** here.");
1820    }
1821
1822    #[test]
1823    fn test_md033_fix_enabled_code_to_backticks() {
1824        // <code>text</code> should convert to `text`
1825        let rule = MD033NoInlineHtml::with_fix(true);
1826        let content = "This has <code>inline code</code> here.";
1827        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1828        let fixed = rule.fix(&ctx).unwrap();
1829        assert_eq!(fixed, "This has `inline code` here.");
1830    }
1831
1832    #[test]
1833    fn test_md033_fix_enabled_code_with_backticks() {
1834        // <code>text with `backticks`</code> should use double backticks
1835        let rule = MD033NoInlineHtml::with_fix(true);
1836        let content = "This has <code>text with `backticks`</code> here.";
1837        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1838        let fixed = rule.fix(&ctx).unwrap();
1839        assert_eq!(fixed, "This has `` text with `backticks` `` here.");
1840    }
1841
1842    #[test]
1843    fn test_md033_fix_enabled_br_trailing_spaces() {
1844        // <br> should convert to two trailing spaces + newline (default)
1845        let rule = MD033NoInlineHtml::with_fix(true);
1846        let content = "First line<br>Second line";
1847        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1848        let fixed = rule.fix(&ctx).unwrap();
1849        assert_eq!(fixed, "First line  \nSecond line");
1850    }
1851
1852    #[test]
1853    fn test_md033_fix_enabled_br_self_closing() {
1854        // <br/> and <br /> should also convert
1855        let rule = MD033NoInlineHtml::with_fix(true);
1856        let content = "First<br/>second<br />third";
1857        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1858        let fixed = rule.fix(&ctx).unwrap();
1859        assert_eq!(fixed, "First  \nsecond  \nthird");
1860    }
1861
1862    #[test]
1863    fn test_md033_fix_enabled_br_backslash_style() {
1864        // With br_style = backslash, <br> should convert to backslash + newline
1865        let config = MD033Config {
1866            allowed: Vec::new(),
1867            disallowed: Vec::new(),
1868            fix: true,
1869            br_style: md033_config::BrStyle::Backslash,
1870            ..MD033Config::default()
1871        };
1872        let rule = MD033NoInlineHtml::from_config_struct(config);
1873        let content = "First line<br>Second line";
1874        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1875        let fixed = rule.fix(&ctx).unwrap();
1876        assert_eq!(fixed, "First line\\\nSecond line");
1877    }
1878
1879    #[test]
1880    fn test_md033_fix_enabled_hr() {
1881        // <hr> should convert to horizontal rule
1882        let rule = MD033NoInlineHtml::with_fix(true);
1883        let content = "Above<hr>Below";
1884        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1885        let fixed = rule.fix(&ctx).unwrap();
1886        assert_eq!(fixed, "Above\n---\nBelow");
1887    }
1888
1889    #[test]
1890    fn test_md033_fix_enabled_hr_self_closing() {
1891        // <hr/> should also convert
1892        let rule = MD033NoInlineHtml::with_fix(true);
1893        let content = "Above<hr/>Below";
1894        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1895        let fixed = rule.fix(&ctx).unwrap();
1896        assert_eq!(fixed, "Above\n---\nBelow");
1897    }
1898
1899    #[test]
1900    fn test_md033_fix_skips_nested_tags() {
1901        // Tags with nested HTML - outer tags may not be fully fixed due to overlapping ranges
1902        // The inner tags are processed first, which can invalidate outer tag ranges
1903        let rule = MD033NoInlineHtml::with_fix(true);
1904        let content = "This has <em>text with <strong>nested</strong> tags</em> here.";
1905        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1906        let fixed = rule.fix(&ctx).unwrap();
1907        // Inner <strong> is converted to markdown, outer <em> range becomes invalid
1908        // This is expected behavior - user should run fix multiple times for nested tags
1909        assert_eq!(fixed, "This has <em>text with **nested** tags</em> here.");
1910    }
1911
1912    #[test]
1913    fn test_md033_fix_skips_tags_with_attributes() {
1914        // Tags with attributes should NOT be fixed at all - leave as-is
1915        // User may want to keep the attributes (e.g., class="highlight" for styling)
1916        let rule = MD033NoInlineHtml::with_fix(true);
1917        let content = "This has <em class=\"highlight\">emphasized</em> text.";
1918        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1919        let fixed = rule.fix(&ctx).unwrap();
1920        // Content should remain unchanged - we don't know if attributes matter
1921        assert_eq!(fixed, content);
1922    }
1923
1924    #[test]
1925    fn test_md033_fix_disabled_no_changes() {
1926        // When fix is disabled, original content should be returned
1927        let rule = MD033NoInlineHtml::default(); // fix is false by default
1928        let content = "This has <em>emphasized text</em> here.";
1929        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1930        let fixed = rule.fix(&ctx).unwrap();
1931        assert_eq!(fixed, content, "Should return original content when fix is disabled");
1932    }
1933
1934    #[test]
1935    fn test_md033_fix_capability_enabled() {
1936        let rule = MD033NoInlineHtml::with_fix(true);
1937        assert_eq!(rule.fix_capability(), crate::rule::FixCapability::FullyFixable);
1938    }
1939
1940    #[test]
1941    fn test_md033_fix_multiple_tags() {
1942        // Test fixing multiple HTML tags in one document
1943        let rule = MD033NoInlineHtml::with_fix(true);
1944        let content = "Here is <em>italic</em> and <strong>bold</strong> text.";
1945        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1946        let fixed = rule.fix(&ctx).unwrap();
1947        assert_eq!(fixed, "Here is *italic* and **bold** text.");
1948    }
1949
1950    #[test]
1951    fn test_md033_fix_uppercase_tags() {
1952        // HTML tags are case-insensitive
1953        let rule = MD033NoInlineHtml::with_fix(true);
1954        let content = "This has <EM>emphasized</EM> text.";
1955        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1956        let fixed = rule.fix(&ctx).unwrap();
1957        assert_eq!(fixed, "This has *emphasized* text.");
1958    }
1959
1960    #[test]
1961    fn test_md033_fix_unsafe_tags_not_modified() {
1962        // Tags without safe markdown equivalents should NOT be modified
1963        // Only safe fixable tags (em, i, strong, b, code, br, hr) get converted
1964        let rule = MD033NoInlineHtml::with_fix(true);
1965        let content = "This has <div>a div</div> content.";
1966        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1967        let fixed = rule.fix(&ctx).unwrap();
1968        // <div> is not a safe fixable tag, so content should be unchanged
1969        assert_eq!(fixed, "This has <div>a div</div> content.");
1970    }
1971
1972    #[test]
1973    fn test_md033_fix_img_tag_converted() {
1974        // <img> tags with simple src/alt attributes are converted to markdown images
1975        let rule = MD033NoInlineHtml::with_fix(true);
1976        let content = "Image: <img src=\"photo.jpg\" alt=\"My Photo\">";
1977        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1978        let fixed = rule.fix(&ctx).unwrap();
1979        // <img> is converted to ![alt](src) format
1980        assert_eq!(fixed, "Image: ![My Photo](photo.jpg)");
1981    }
1982
1983    #[test]
1984    fn test_md033_fix_img_tag_with_extra_attrs_not_converted() {
1985        // <img> tags with width/height/style attributes are NOT converted
1986        let rule = MD033NoInlineHtml::with_fix(true);
1987        let content = "Image: <img src=\"photo.jpg\" alt=\"My Photo\" width=\"100\">";
1988        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1989        let fixed = rule.fix(&ctx).unwrap();
1990        // Has width attribute - not safe to convert
1991        assert_eq!(fixed, "Image: <img src=\"photo.jpg\" alt=\"My Photo\" width=\"100\">");
1992    }
1993
1994    #[test]
1995    fn test_md033_fix_relaxed_a_with_target_is_converted() {
1996        let rule = relaxed_fix_rule();
1997        let content = "Link: <a href=\"https://example.com\" target=\"_blank\">Example</a>";
1998        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1999        let fixed = rule.fix(&ctx).unwrap();
2000        assert_eq!(fixed, "Link: [Example](https://example.com)");
2001    }
2002
2003    #[test]
2004    fn test_md033_fix_relaxed_img_with_width_is_converted() {
2005        let rule = relaxed_fix_rule();
2006        let content = "Image: <img src=\"photo.jpg\" alt=\"My Photo\" width=\"100\">";
2007        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2008        let fixed = rule.fix(&ctx).unwrap();
2009        assert_eq!(fixed, "Image: ![My Photo](photo.jpg)");
2010    }
2011
2012    #[test]
2013    fn test_md033_fix_relaxed_rejects_unknown_extra_attributes() {
2014        let rule = relaxed_fix_rule();
2015        let content = "Image: <img src=\"photo.jpg\" alt=\"My Photo\" aria-label=\"hero\">";
2016        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2017        let fixed = rule.fix(&ctx).unwrap();
2018        assert_eq!(fixed, content, "Unknown attributes should not be dropped by default");
2019    }
2020
2021    #[test]
2022    fn test_md033_fix_relaxed_still_blocks_unsafe_schemes() {
2023        let rule = relaxed_fix_rule();
2024        let content = "Link: <a href=\"javascript:alert(1)\" target=\"_blank\">Example</a>";
2025        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2026        let fixed = rule.fix(&ctx).unwrap();
2027        assert_eq!(fixed, content, "Unsafe URL schemes must never be converted");
2028    }
2029
2030    #[test]
2031    fn test_md033_fix_relaxed_wrapper_strip_requires_second_pass_for_nested_html() {
2032        let rule = relaxed_fix_rule();
2033        let content = "<p align=\"center\">\n  <img src=\"logo.svg\" alt=\"Logo\" width=\"120\" />\n</p>";
2034        let ctx1 = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2035        let fixed_once = rule.fix(&ctx1).unwrap();
2036        assert!(
2037            fixed_once.contains("<p"),
2038            "First pass should keep wrapper when inner HTML is still present: {fixed_once}"
2039        );
2040        assert!(
2041            fixed_once.contains("![Logo](logo.svg)"),
2042            "Inner image should be converted on first pass: {fixed_once}"
2043        );
2044
2045        let ctx2 = LintContext::new(&fixed_once, crate::config::MarkdownFlavor::Standard, None);
2046        let fixed_twice = rule.fix(&ctx2).unwrap();
2047        assert!(
2048            !fixed_twice.contains("<p"),
2049            "Second pass should strip configured wrapper: {fixed_twice}"
2050        );
2051        assert!(fixed_twice.contains("![Logo](logo.svg)"));
2052    }
2053
2054    #[test]
2055    fn test_md033_fix_relaxed_multiple_droppable_attrs() {
2056        let rule = relaxed_fix_rule();
2057        let content = "<a href=\"https://example.com\" target=\"_blank\" rel=\"noopener\" class=\"btn\">Click</a>";
2058        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2059        let fixed = rule.fix(&ctx).unwrap();
2060        assert_eq!(fixed, "[Click](https://example.com)");
2061    }
2062
2063    #[test]
2064    fn test_md033_fix_relaxed_img_multiple_droppable_attrs() {
2065        let rule = relaxed_fix_rule();
2066        let content = "<img src=\"logo.png\" alt=\"Logo\" width=\"120\" height=\"40\" style=\"border:none\" />";
2067        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2068        let fixed = rule.fix(&ctx).unwrap();
2069        assert_eq!(fixed, "![Logo](logo.png)");
2070    }
2071
2072    #[test]
2073    fn test_md033_fix_relaxed_event_handler_never_dropped() {
2074        let rule = relaxed_fix_rule();
2075        let content = "<a href=\"https://example.com\" onclick=\"track()\">Link</a>";
2076        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2077        let fixed = rule.fix(&ctx).unwrap();
2078        assert_eq!(fixed, content, "Event handler attributes must block conversion");
2079    }
2080
2081    #[test]
2082    fn test_md033_fix_relaxed_event_handler_even_with_custom_config() {
2083        // Even if someone adds on* to drop-attributes, event handlers must be rejected
2084        let config = MD033Config {
2085            fix: true,
2086            fix_mode: MD033FixMode::Relaxed,
2087            drop_attributes: vec!["on*".to_string(), "target".to_string()],
2088            ..MD033Config::default()
2089        };
2090        let rule = MD033NoInlineHtml::from_config_struct(config);
2091        let content = "<a href=\"https://example.com\" onclick=\"alert(1)\">Link</a>";
2092        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2093        let fixed = rule.fix(&ctx).unwrap();
2094        assert_eq!(fixed, content, "on* event handlers must never be dropped");
2095    }
2096
2097    #[test]
2098    fn test_md033_fix_relaxed_custom_drop_attributes() {
2099        let config = MD033Config {
2100            fix: true,
2101            fix_mode: MD033FixMode::Relaxed,
2102            drop_attributes: vec!["loading".to_string()],
2103            ..MD033Config::default()
2104        };
2105        let rule = MD033NoInlineHtml::from_config_struct(config);
2106        // "loading" is in the custom list, "width" is NOT
2107        let content = "<img src=\"x.jpg\" alt=\"\" loading=\"lazy\">";
2108        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2109        let fixed = rule.fix(&ctx).unwrap();
2110        assert_eq!(fixed, "![](x.jpg)", "Custom drop-attributes should be respected");
2111
2112        let content2 = "<img src=\"x.jpg\" alt=\"\" width=\"100\">";
2113        let ctx2 = LintContext::new(content2, crate::config::MarkdownFlavor::Standard, None);
2114        let fixed2 = rule.fix(&ctx2).unwrap();
2115        assert_eq!(
2116            fixed2, content2,
2117            "Attributes not in custom list should block conversion"
2118        );
2119    }
2120
2121    #[test]
2122    fn test_md033_fix_relaxed_custom_strip_wrapper() {
2123        let config = MD033Config {
2124            fix: true,
2125            fix_mode: MD033FixMode::Relaxed,
2126            strip_wrapper_elements: vec!["div".to_string()],
2127            ..MD033Config::default()
2128        };
2129        let rule = MD033NoInlineHtml::from_config_struct(config);
2130        let content = "<div>Some text content</div>";
2131        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2132        let fixed = rule.fix(&ctx).unwrap();
2133        assert_eq!(fixed, "Some text content");
2134    }
2135
2136    #[test]
2137    fn test_md033_fix_relaxed_wrapper_with_plain_text() {
2138        let rule = relaxed_fix_rule();
2139        let content = "<p align=\"center\">Just some text</p>";
2140        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2141        let fixed = rule.fix(&ctx).unwrap();
2142        assert_eq!(fixed, "Just some text");
2143    }
2144
2145    #[test]
2146    fn test_md033_fix_relaxed_data_attr_with_wildcard() {
2147        let config = MD033Config {
2148            fix: true,
2149            fix_mode: MD033FixMode::Relaxed,
2150            drop_attributes: vec!["data-*".to_string(), "target".to_string()],
2151            ..MD033Config::default()
2152        };
2153        let rule = MD033NoInlineHtml::from_config_struct(config);
2154        let content = "<a href=\"https://example.com\" data-tracking=\"abc\" target=\"_blank\">Link</a>";
2155        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2156        let fixed = rule.fix(&ctx).unwrap();
2157        assert_eq!(fixed, "[Link](https://example.com)");
2158    }
2159
2160    #[test]
2161    fn test_md033_fix_relaxed_mixed_droppable_and_blocking_attrs() {
2162        let rule = relaxed_fix_rule();
2163        // "target" is droppable, "aria-label" is not in the default list
2164        let content = "<a href=\"https://example.com\" target=\"_blank\" aria-label=\"nav\">Link</a>";
2165        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2166        let fixed = rule.fix(&ctx).unwrap();
2167        assert_eq!(fixed, content, "Non-droppable attribute should block conversion");
2168    }
2169
2170    #[test]
2171    fn test_md033_fix_relaxed_badge_pattern() {
2172        // Common GitHub README badge pattern
2173        let rule = relaxed_fix_rule();
2174        let content = "<a href=\"https://crates.io/crates/rumdl\" target=\"_blank\"><img src=\"https://img.shields.io/crates/v/rumdl.svg\" alt=\"Crate\" width=\"120\" /></a>";
2175        let ctx1 = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2176        let fixed_once = rule.fix(&ctx1).unwrap();
2177        // First pass should convert the inner <img>
2178        assert!(
2179            fixed_once.contains("![Crate](https://img.shields.io/crates/v/rumdl.svg)"),
2180            "Inner img should be converted: {fixed_once}"
2181        );
2182
2183        // Second pass converts the <a> wrapper
2184        let ctx2 = LintContext::new(&fixed_once, crate::config::MarkdownFlavor::Standard, None);
2185        let fixed_twice = rule.fix(&ctx2).unwrap();
2186        assert!(
2187            fixed_twice
2188                .contains("[![Crate](https://img.shields.io/crates/v/rumdl.svg)](https://crates.io/crates/rumdl)"),
2189            "Badge should produce nested markdown image link: {fixed_twice}"
2190        );
2191    }
2192
2193    #[test]
2194    fn test_md033_fix_relaxed_conservative_mode_unchanged() {
2195        // Verify conservative mode (default) is unaffected by the relaxed logic
2196        let rule = MD033NoInlineHtml::with_fix(true);
2197        let content = "<a href=\"https://example.com\" target=\"_blank\">Link</a>";
2198        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2199        let fixed = rule.fix(&ctx).unwrap();
2200        assert_eq!(fixed, content, "Conservative mode should not drop target attribute");
2201    }
2202
2203    #[test]
2204    fn test_md033_fix_relaxed_img_inside_pre_not_converted() {
2205        // <img> inside <pre> must NOT be converted, even in relaxed mode
2206        let rule = relaxed_fix_rule();
2207        let content = "<pre>\n  <img src=\"diagram.png\" alt=\"d\" width=\"100\" />\n</pre>";
2208        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2209        let fixed = rule.fix(&ctx).unwrap();
2210        assert!(fixed.contains("<img"), "img inside pre must not be converted: {fixed}");
2211    }
2212
2213    #[test]
2214    fn test_md033_fix_relaxed_wrapper_nested_inside_div_not_stripped() {
2215        // <p> nested inside <div> should not be stripped
2216        let rule = relaxed_fix_rule();
2217        let content = "<div><p>text</p></div>";
2218        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2219        let fixed = rule.fix(&ctx).unwrap();
2220        assert!(
2221            fixed.contains("<p>text</p>") || fixed.contains("<p>"),
2222            "Nested <p> inside <div> should not be stripped: {fixed}"
2223        );
2224    }
2225
2226    #[test]
2227    fn test_md033_fix_relaxed_img_inside_nested_wrapper_not_converted() {
2228        // <img> inside <div><p>...</p></div> must NOT be converted because the
2229        // <p> wrapper can't be stripped (it's nested), so the markdown would be
2230        // stuck inside an HTML block where it won't render.
2231        let rule = relaxed_fix_rule();
2232        let content = "<div><p><img src=\"x.jpg\" alt=\"pic\" width=\"100\" /></p></div>";
2233        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2234        let fixed = rule.fix(&ctx).unwrap();
2235        assert!(
2236            fixed.contains("<img"),
2237            "img inside nested wrapper must not be converted: {fixed}"
2238        );
2239    }
2240
2241    #[test]
2242    fn test_md033_fix_mixed_safe_tags() {
2243        // All tags are now safe fixable (em, img, strong)
2244        let rule = MD033NoInlineHtml::with_fix(true);
2245        let content = "<em>italic</em> and <img src=\"x.jpg\"> and <strong>bold</strong>";
2246        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2247        let fixed = rule.fix(&ctx).unwrap();
2248        // All are converted
2249        assert_eq!(fixed, "*italic* and ![](x.jpg) and **bold**");
2250    }
2251
2252    #[test]
2253    fn test_md033_fix_multiple_tags_same_line() {
2254        // Multiple tags on the same line should all be fixed correctly
2255        let rule = MD033NoInlineHtml::with_fix(true);
2256        let content = "Regular text <i>italic</i> and <b>bold</b> here.";
2257        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2258        let fixed = rule.fix(&ctx).unwrap();
2259        assert_eq!(fixed, "Regular text *italic* and **bold** here.");
2260    }
2261
2262    #[test]
2263    fn test_md033_fix_multiple_em_tags_same_line() {
2264        // Multiple em/strong tags on the same line
2265        let rule = MD033NoInlineHtml::with_fix(true);
2266        let content = "<em>first</em> and <strong>second</strong> and <code>third</code>";
2267        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2268        let fixed = rule.fix(&ctx).unwrap();
2269        assert_eq!(fixed, "*first* and **second** and `third`");
2270    }
2271
2272    #[test]
2273    fn test_md033_fix_skips_tags_inside_pre() {
2274        // Tags inside <pre> blocks should NOT be fixed (would break structure)
2275        let rule = MD033NoInlineHtml::with_fix(true);
2276        let content = "<pre><code><em>VALUE</em></code></pre>";
2277        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2278        let fixed = rule.fix(&ctx).unwrap();
2279        // The <em> inside <pre><code> should NOT be converted
2280        // Only the outer structure might be changed
2281        assert!(
2282            !fixed.contains("*VALUE*"),
2283            "Tags inside <pre> should not be converted to markdown. Got: {fixed}"
2284        );
2285    }
2286
2287    #[test]
2288    fn test_md033_fix_skips_tags_inside_div() {
2289        // Tags inside HTML block elements should not be fixed
2290        let rule = MD033NoInlineHtml::with_fix(true);
2291        let content = "<div>\n<em>emphasized</em>\n</div>";
2292        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2293        let fixed = rule.fix(&ctx).unwrap();
2294        // The <em> inside <div> should not be converted to *emphasized*
2295        assert!(
2296            !fixed.contains("*emphasized*"),
2297            "Tags inside HTML blocks should not be converted. Got: {fixed}"
2298        );
2299    }
2300
2301    #[test]
2302    fn test_md033_fix_outside_html_block() {
2303        // Tags outside HTML blocks should still be fixed
2304        let rule = MD033NoInlineHtml::with_fix(true);
2305        let content = "<div>\ncontent\n</div>\n\nOutside <em>emphasized</em> text.";
2306        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2307        let fixed = rule.fix(&ctx).unwrap();
2308        // The <em> outside the div should be converted
2309        assert!(
2310            fixed.contains("*emphasized*"),
2311            "Tags outside HTML blocks should be converted. Got: {fixed}"
2312        );
2313    }
2314
2315    #[test]
2316    fn test_md033_fix_with_id_attribute() {
2317        // Tags with id attributes should not be fixed (id might be used for anchors)
2318        let rule = MD033NoInlineHtml::with_fix(true);
2319        let content = "See <em id=\"important\">this note</em> for details.";
2320        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2321        let fixed = rule.fix(&ctx).unwrap();
2322        // Should remain unchanged - id attribute matters for linking
2323        assert_eq!(fixed, content);
2324    }
2325
2326    #[test]
2327    fn test_md033_fix_with_style_attribute() {
2328        // Tags with style attributes should not be fixed
2329        let rule = MD033NoInlineHtml::with_fix(true);
2330        let content = "This is <strong style=\"color: red\">important</strong> text.";
2331        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2332        let fixed = rule.fix(&ctx).unwrap();
2333        // Should remain unchanged - style attribute provides formatting
2334        assert_eq!(fixed, content);
2335    }
2336
2337    #[test]
2338    fn test_md033_fix_mixed_with_and_without_attributes() {
2339        // Mix of tags with and without attributes
2340        let rule = MD033NoInlineHtml::with_fix(true);
2341        let content = "<em>normal</em> and <em class=\"special\">styled</em> text.";
2342        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2343        let fixed = rule.fix(&ctx).unwrap();
2344        // Only the tag without attributes should be fixed
2345        assert_eq!(fixed, "*normal* and <em class=\"special\">styled</em> text.");
2346    }
2347
2348    #[test]
2349    fn test_md033_quick_fix_tag_with_attributes_no_fix() {
2350        // Quick fix should not be provided for tags with attributes
2351        let rule = MD033NoInlineHtml::with_fix(true);
2352        let content = "<em class=\"test\">emphasized</em>";
2353        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2354        let result = rule.check(&ctx).unwrap();
2355
2356        assert_eq!(result.len(), 1, "Should find one HTML tag");
2357        // No fix should be provided for tags with attributes
2358        assert!(
2359            result[0].fix.is_none(),
2360            "Should NOT have a fix for tags with attributes"
2361        );
2362    }
2363
2364    #[test]
2365    fn test_md033_fix_skips_html_entities() {
2366        // Tags containing HTML entities should NOT be fixed
2367        // HTML entities need HTML context to render; markdown won't process them
2368        let rule = MD033NoInlineHtml::with_fix(true);
2369        let content = "<code>&vert;</code>";
2370        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2371        let fixed = rule.fix(&ctx).unwrap();
2372        // Should remain unchanged - converting would break rendering
2373        assert_eq!(fixed, content);
2374    }
2375
2376    #[test]
2377    fn test_md033_fix_skips_multiple_html_entities() {
2378        // Multiple HTML entities should also be skipped
2379        let rule = MD033NoInlineHtml::with_fix(true);
2380        let content = "<code>&lt;T&gt;</code>";
2381        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2382        let fixed = rule.fix(&ctx).unwrap();
2383        // Should remain unchanged
2384        assert_eq!(fixed, content);
2385    }
2386
2387    #[test]
2388    fn test_md033_fix_allows_ampersand_without_entity() {
2389        // Content with & but no semicolon should still be fixed
2390        let rule = MD033NoInlineHtml::with_fix(true);
2391        let content = "<code>a & b</code>";
2392        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2393        let fixed = rule.fix(&ctx).unwrap();
2394        // Should be converted since & is not part of an entity
2395        assert_eq!(fixed, "`a & b`");
2396    }
2397
2398    #[test]
2399    fn test_md033_fix_em_with_entities_skipped() {
2400        // <em> with entities should also be skipped
2401        let rule = MD033NoInlineHtml::with_fix(true);
2402        let content = "<em>&nbsp;text</em>";
2403        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2404        let fixed = rule.fix(&ctx).unwrap();
2405        // Should remain unchanged
2406        assert_eq!(fixed, content);
2407    }
2408
2409    #[test]
2410    fn test_md033_fix_skips_nested_em_in_code() {
2411        // Tags nested inside other HTML elements should NOT be fixed
2412        // e.g., <code><em>n</em></code> - the <em> should not be converted
2413        let rule = MD033NoInlineHtml::with_fix(true);
2414        let content = "<code><em>n</em></code>";
2415        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2416        let fixed = rule.fix(&ctx).unwrap();
2417        // The inner <em> should NOT be converted to *n* because it's nested
2418        // The whole structure should be left as-is (or outer code converted, but not inner)
2419        assert!(
2420            !fixed.contains("*n*"),
2421            "Nested <em> should not be converted to markdown. Got: {fixed}"
2422        );
2423    }
2424
2425    #[test]
2426    fn test_md033_fix_skips_nested_in_table() {
2427        // Tags nested in HTML structures in tables should not be fixed
2428        let rule = MD033NoInlineHtml::with_fix(true);
2429        let content = "| <code>><em>n</em></code> | description |";
2430        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2431        let fixed = rule.fix(&ctx).unwrap();
2432        // Should not convert nested <em> to *n*
2433        assert!(
2434            !fixed.contains("*n*"),
2435            "Nested tags in table should not be converted. Got: {fixed}"
2436        );
2437    }
2438
2439    #[test]
2440    fn test_md033_fix_standalone_em_still_converted() {
2441        // Standalone (non-nested) <em> should still be converted
2442        let rule = MD033NoInlineHtml::with_fix(true);
2443        let content = "This is <em>emphasized</em> text.";
2444        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2445        let fixed = rule.fix(&ctx).unwrap();
2446        assert_eq!(fixed, "This is *emphasized* text.");
2447    }
2448
2449    // ==========================================================================
2450    // Obsidian Templater Plugin Syntax Tests
2451    //
2452    // Templater is a popular Obsidian plugin that uses `<% ... %>` syntax for
2453    // template interpolation. The `<%` pattern is NOT captured by the HTML tag
2454    // parser because `%` is not a valid HTML tag name character (tags must start
2455    // with a letter). This behavior is documented here with comprehensive tests.
2456    //
2457    // Reference: https://silentvoid13.github.io/Templater/
2458    // ==========================================================================
2459
2460    #[test]
2461    fn test_md033_templater_basic_interpolation_not_flagged() {
2462        // Basic Templater interpolation: <% expr %>
2463        // Should NOT be flagged because `%` is not a valid HTML tag character
2464        let rule = MD033NoInlineHtml::default();
2465        let content = "Today is <% tp.date.now() %> which is nice.";
2466        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2467        let result = rule.check(&ctx).unwrap();
2468        assert!(
2469            result.is_empty(),
2470            "Templater basic interpolation should not be flagged as HTML. Got: {result:?}"
2471        );
2472    }
2473
2474    #[test]
2475    fn test_md033_templater_file_functions_not_flagged() {
2476        // Templater file functions: <% tp.file.* %>
2477        let rule = MD033NoInlineHtml::default();
2478        let content = "File: <% tp.file.title %>\nCreated: <% tp.file.creation_date() %>";
2479        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2480        let result = rule.check(&ctx).unwrap();
2481        assert!(
2482            result.is_empty(),
2483            "Templater file functions should not be flagged. Got: {result:?}"
2484        );
2485    }
2486
2487    #[test]
2488    fn test_md033_templater_with_arguments_not_flagged() {
2489        // Templater with function arguments
2490        let rule = MD033NoInlineHtml::default();
2491        let content = r#"Date: <% tp.date.now("YYYY-MM-DD") %>"#;
2492        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2493        let result = rule.check(&ctx).unwrap();
2494        assert!(
2495            result.is_empty(),
2496            "Templater with arguments should not be flagged. Got: {result:?}"
2497        );
2498    }
2499
2500    #[test]
2501    fn test_md033_templater_javascript_execution_not_flagged() {
2502        // Templater JavaScript execution block: <%* code %>
2503        let rule = MD033NoInlineHtml::default();
2504        let content = "<%* const today = tp.date.now(); tR += today; %>";
2505        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2506        let result = rule.check(&ctx).unwrap();
2507        assert!(
2508            result.is_empty(),
2509            "Templater JS execution block should not be flagged. Got: {result:?}"
2510        );
2511    }
2512
2513    #[test]
2514    fn test_md033_templater_dynamic_execution_not_flagged() {
2515        // Templater dynamic/preview execution: <%+ expr %>
2516        let rule = MD033NoInlineHtml::default();
2517        let content = "Dynamic: <%+ tp.date.now() %>";
2518        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2519        let result = rule.check(&ctx).unwrap();
2520        assert!(
2521            result.is_empty(),
2522            "Templater dynamic execution should not be flagged. Got: {result:?}"
2523        );
2524    }
2525
2526    #[test]
2527    fn test_md033_templater_whitespace_trim_all_not_flagged() {
2528        // Templater whitespace control - trim all: <%_ expr _%>
2529        let rule = MD033NoInlineHtml::default();
2530        let content = "<%_ tp.date.now() _%>";
2531        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2532        let result = rule.check(&ctx).unwrap();
2533        assert!(
2534            result.is_empty(),
2535            "Templater trim-all whitespace should not be flagged. Got: {result:?}"
2536        );
2537    }
2538
2539    #[test]
2540    fn test_md033_templater_whitespace_trim_newline_not_flagged() {
2541        // Templater whitespace control - trim newline: <%- expr -%>
2542        let rule = MD033NoInlineHtml::default();
2543        let content = "<%- tp.date.now() -%>";
2544        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2545        let result = rule.check(&ctx).unwrap();
2546        assert!(
2547            result.is_empty(),
2548            "Templater trim-newline should not be flagged. Got: {result:?}"
2549        );
2550    }
2551
2552    #[test]
2553    fn test_md033_templater_combined_modifiers_not_flagged() {
2554        // Templater combined whitespace and execution modifiers
2555        let rule = MD033NoInlineHtml::default();
2556        let contents = [
2557            "<%-* const x = 1; -%>",  // trim + JS execution
2558            "<%_+ tp.date.now() _%>", // trim-all + dynamic
2559            "<%- tp.file.title -%>",  // trim-newline only
2560            "<%_ tp.file.title _%>",  // trim-all only
2561        ];
2562        for content in contents {
2563            let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2564            let result = rule.check(&ctx).unwrap();
2565            assert!(
2566                result.is_empty(),
2567                "Templater combined modifiers should not be flagged: {content}. Got: {result:?}"
2568            );
2569        }
2570    }
2571
2572    #[test]
2573    fn test_md033_templater_multiline_block_not_flagged() {
2574        // Multi-line Templater JavaScript block
2575        let rule = MD033NoInlineHtml::default();
2576        let content = r#"<%*
2577const x = 1;
2578const y = 2;
2579tR += x + y;
2580%>"#;
2581        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2582        let result = rule.check(&ctx).unwrap();
2583        assert!(
2584            result.is_empty(),
2585            "Templater multi-line block should not be flagged. Got: {result:?}"
2586        );
2587    }
2588
2589    #[test]
2590    fn test_md033_templater_with_angle_brackets_in_condition_not_flagged() {
2591        // Templater with angle brackets in JavaScript condition
2592        // This is a key edge case: `<` inside Templater should not trigger HTML detection
2593        let rule = MD033NoInlineHtml::default();
2594        let content = "<%* if (x < 5) { tR += 'small'; } %>";
2595        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2596        let result = rule.check(&ctx).unwrap();
2597        assert!(
2598            result.is_empty(),
2599            "Templater with angle brackets in conditions should not be flagged. Got: {result:?}"
2600        );
2601    }
2602
2603    #[test]
2604    fn test_md033_templater_mixed_with_html_only_html_flagged() {
2605        // Templater syntax mixed with actual HTML - only HTML should be flagged
2606        let rule = MD033NoInlineHtml::default();
2607        let content = "<% tp.date.now() %> is today's date. <div>This is HTML</div>";
2608        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2609        let result = rule.check(&ctx).unwrap();
2610        assert_eq!(result.len(), 1, "Should only flag the HTML div tag");
2611        assert!(
2612            result[0].message.contains("<div>"),
2613            "Should flag <div>, got: {}",
2614            result[0].message
2615        );
2616    }
2617
2618    #[test]
2619    fn test_md033_templater_in_heading_not_flagged() {
2620        // Templater in markdown heading
2621        let rule = MD033NoInlineHtml::default();
2622        let content = "# <% tp.file.title %>";
2623        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2624        let result = rule.check(&ctx).unwrap();
2625        assert!(
2626            result.is_empty(),
2627            "Templater in heading should not be flagged. Got: {result:?}"
2628        );
2629    }
2630
2631    #[test]
2632    fn test_md033_templater_multiple_on_same_line_not_flagged() {
2633        // Multiple Templater blocks on same line
2634        let rule = MD033NoInlineHtml::default();
2635        let content = "From <% tp.date.now() %> to <% tp.date.tomorrow() %> we have meetings.";
2636        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2637        let result = rule.check(&ctx).unwrap();
2638        assert!(
2639            result.is_empty(),
2640            "Multiple Templater blocks should not be flagged. Got: {result:?}"
2641        );
2642    }
2643
2644    #[test]
2645    fn test_md033_templater_in_code_block_not_flagged() {
2646        // Templater syntax in code blocks should not be flagged (code blocks are skipped)
2647        let rule = MD033NoInlineHtml::default();
2648        let content = "```\n<% tp.date.now() %>\n```";
2649        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2650        let result = rule.check(&ctx).unwrap();
2651        assert!(
2652            result.is_empty(),
2653            "Templater in code block should not be flagged. Got: {result:?}"
2654        );
2655    }
2656
2657    #[test]
2658    fn test_md033_templater_in_inline_code_not_flagged() {
2659        // Templater syntax in inline code span should not be flagged
2660        let rule = MD033NoInlineHtml::default();
2661        let content = "Use `<% tp.date.now() %>` for current date.";
2662        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2663        let result = rule.check(&ctx).unwrap();
2664        assert!(
2665            result.is_empty(),
2666            "Templater in inline code should not be flagged. Got: {result:?}"
2667        );
2668    }
2669
2670    #[test]
2671    fn test_md033_templater_also_works_in_standard_flavor() {
2672        // Templater syntax should also not be flagged in Standard flavor
2673        // because the HTML parser doesn't recognize `<%` as a valid tag
2674        let rule = MD033NoInlineHtml::default();
2675        let content = "<% tp.date.now() %> works everywhere.";
2676        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2677        let result = rule.check(&ctx).unwrap();
2678        assert!(
2679            result.is_empty(),
2680            "Templater should not be flagged even in Standard flavor. Got: {result:?}"
2681        );
2682    }
2683
2684    #[test]
2685    fn test_md033_templater_empty_tag_not_flagged() {
2686        // Empty Templater tags
2687        let rule = MD033NoInlineHtml::default();
2688        let content = "<%>";
2689        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2690        let result = rule.check(&ctx).unwrap();
2691        assert!(
2692            result.is_empty(),
2693            "Empty Templater-like tag should not be flagged. Got: {result:?}"
2694        );
2695    }
2696
2697    #[test]
2698    fn test_md033_templater_unclosed_not_flagged() {
2699        // Unclosed Templater tags - these are template errors, not HTML
2700        let rule = MD033NoInlineHtml::default();
2701        let content = "<% tp.date.now() without closing tag";
2702        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2703        let result = rule.check(&ctx).unwrap();
2704        assert!(
2705            result.is_empty(),
2706            "Unclosed Templater should not be flagged as HTML. Got: {result:?}"
2707        );
2708    }
2709
2710    #[test]
2711    fn test_md033_templater_with_newlines_inside_not_flagged() {
2712        // Templater with newlines inside the expression
2713        let rule = MD033NoInlineHtml::default();
2714        let content = r#"<% tp.date.now("YYYY") +
2715"-" +
2716tp.date.now("MM") %>"#;
2717        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2718        let result = rule.check(&ctx).unwrap();
2719        assert!(
2720            result.is_empty(),
2721            "Templater with internal newlines should not be flagged. Got: {result:?}"
2722        );
2723    }
2724
2725    #[test]
2726    fn test_md033_erb_style_tags_not_flagged() {
2727        // ERB/EJS style tags (similar to Templater) are also not HTML
2728        // This documents the general principle that `<%` is not valid HTML
2729        let rule = MD033NoInlineHtml::default();
2730        let content = "<%= variable %> and <% code %> and <%# comment %>";
2731        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2732        let result = rule.check(&ctx).unwrap();
2733        assert!(
2734            result.is_empty(),
2735            "ERB/EJS style tags should not be flagged as HTML. Got: {result:?}"
2736        );
2737    }
2738
2739    #[test]
2740    fn test_md033_templater_complex_expression_not_flagged() {
2741        // Complex Templater expression with multiple function calls
2742        let rule = MD033NoInlineHtml::default();
2743        let content = r#"<%*
2744const file = tp.file.title;
2745const date = tp.date.now("YYYY-MM-DD");
2746const folder = tp.file.folder();
2747tR += `# ${file}\n\nCreated: ${date}\nIn: ${folder}`;
2748%>"#;
2749        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Obsidian, None);
2750        let result = rule.check(&ctx).unwrap();
2751        assert!(
2752            result.is_empty(),
2753            "Complex Templater expression should not be flagged. Got: {result:?}"
2754        );
2755    }
2756
2757    #[test]
2758    fn test_md033_percent_sign_variations_not_flagged() {
2759        // Various patterns starting with <% that should all be safe
2760        let rule = MD033NoInlineHtml::default();
2761        let patterns = [
2762            "<%=",  // ERB output
2763            "<%#",  // ERB comment
2764            "<%%",  // Double percent
2765            "<%!",  // Some template engines
2766            "<%@",  // JSP directive
2767            "<%--", // JSP comment
2768        ];
2769        for pattern in patterns {
2770            let content = format!("{pattern} content %>");
2771            let ctx = LintContext::new(&content, crate::config::MarkdownFlavor::Standard, None);
2772            let result = rule.check(&ctx).unwrap();
2773            assert!(
2774                result.is_empty(),
2775                "Pattern {pattern} should not be flagged. Got: {result:?}"
2776            );
2777        }
2778    }
2779
2780    // ───── Bug #3: Bracket escaping in image-inside-link conversion ─────
2781    //
2782    // When <a> wraps already-converted markdown image text, the bracket escaping
2783    // must be skipped to produce valid [![alt](url)](href) instead of !\[\](url)
2784
2785    #[test]
2786    fn test_md033_fix_a_wrapping_markdown_image_no_escaped_brackets() {
2787        // When <a> wraps a markdown image (from a prior fix iteration),
2788        // the result should be [![](url)](href) — no escaped brackets
2789        let rule = MD033NoInlineHtml::with_fix(true);
2790        let content = r#"<a href="https://example.com">![](https://example.com/image.png)</a>"#;
2791        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2792        let fixed = rule.fix(&ctx).unwrap();
2793
2794        assert_eq!(fixed, "[![](https://example.com/image.png)](https://example.com)",);
2795        assert!(!fixed.contains(r"\["), "Must not escape brackets: {fixed}");
2796        assert!(!fixed.contains(r"\]"), "Must not escape brackets: {fixed}");
2797    }
2798
2799    #[test]
2800    fn test_md033_fix_a_wrapping_markdown_image_with_alt() {
2801        // <a> wrapping ![alt](url) preserves alt text in linked image
2802        let rule = MD033NoInlineHtml::with_fix(true);
2803        let content =
2804            r#"<a href="https://github.com/repo">![Contributors](https://contrib.rocks/image?repo=org/repo)</a>"#;
2805        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2806        let fixed = rule.fix(&ctx).unwrap();
2807
2808        assert_eq!(
2809            fixed,
2810            "[![Contributors](https://contrib.rocks/image?repo=org/repo)](https://github.com/repo)"
2811        );
2812    }
2813
2814    #[test]
2815    fn test_md033_fix_img_without_alt_produces_empty_alt() {
2816        let rule = MD033NoInlineHtml::with_fix(true);
2817        let content = r#"<img src="photo.jpg" />"#;
2818        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2819        let fixed = rule.fix(&ctx).unwrap();
2820
2821        assert_eq!(fixed, "![](photo.jpg)");
2822    }
2823
2824    #[test]
2825    fn test_md033_fix_a_with_plain_text_still_escapes_brackets() {
2826        // Plain text brackets inside <a> SHOULD be escaped
2827        let rule = MD033NoInlineHtml::with_fix(true);
2828        let content = r#"<a href="https://example.com">text with [brackets]</a>"#;
2829        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2830        let fixed = rule.fix(&ctx).unwrap();
2831
2832        assert!(
2833            fixed.contains(r"\[brackets\]"),
2834            "Plain text brackets should be escaped: {fixed}"
2835        );
2836    }
2837
2838    #[test]
2839    fn test_md033_fix_a_with_image_plus_extra_text_escapes_brackets() {
2840        // Mixed content: image followed by bracketed text — brackets must be escaped
2841        // The image detection must NOT match partial content
2842        let rule = MD033NoInlineHtml::with_fix(true);
2843        let content = r#"<a href="/link">![](img.png) see [docs]</a>"#;
2844        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
2845        let fixed = rule.fix(&ctx).unwrap();
2846
2847        // "see [docs]" brackets should be escaped since inner content is mixed
2848        assert!(
2849            fixed.contains(r"\[docs\]"),
2850            "Brackets in mixed image+text content should be escaped: {fixed}"
2851        );
2852    }
2853
2854    #[test]
2855    fn test_md033_fix_img_in_a_end_to_end() {
2856        // End-to-end: verify that iterative fixing of <a><img></a>
2857        // produces the correct final result through the fix coordinator
2858        use crate::config::Config;
2859        use crate::fix_coordinator::FixCoordinator;
2860
2861        let rule = MD033NoInlineHtml::with_fix(true);
2862        let rules: Vec<Box<dyn crate::rule::Rule>> = vec![Box::new(rule)];
2863
2864        let mut content =
2865            r#"<a href="https://github.com/org/repo"><img src="https://contrib.rocks/image?repo=org/repo" /></a>"#
2866                .to_string();
2867        let config = Config::default();
2868        let coordinator = FixCoordinator::new();
2869
2870        let result = coordinator
2871            .apply_fixes_iterative(&rules, &[], &mut content, &config, 10, None)
2872            .unwrap();
2873
2874        assert_eq!(
2875            content, "[![](https://contrib.rocks/image?repo=org/repo)](https://github.com/org/repo)",
2876            "End-to-end: <a><img></a> should become valid linked image"
2877        );
2878        assert!(result.converged);
2879        assert!(!content.contains(r"\["), "No escaped brackets: {content}");
2880    }
2881
2882    #[test]
2883    fn test_md033_fix_img_in_a_with_alt_end_to_end() {
2884        use crate::config::Config;
2885        use crate::fix_coordinator::FixCoordinator;
2886
2887        let rule = MD033NoInlineHtml::with_fix(true);
2888        let rules: Vec<Box<dyn crate::rule::Rule>> = vec![Box::new(rule)];
2889
2890        let mut content =
2891            r#"<a href="https://github.com/org/repo"><img src="https://contrib.rocks/image" alt="Contributors" /></a>"#
2892                .to_string();
2893        let config = Config::default();
2894        let coordinator = FixCoordinator::new();
2895
2896        let result = coordinator
2897            .apply_fixes_iterative(&rules, &[], &mut content, &config, 10, None)
2898            .unwrap();
2899
2900        assert_eq!(
2901            content,
2902            "[![Contributors](https://contrib.rocks/image)](https://github.com/org/repo)",
2903        );
2904        assert!(result.converged);
2905    }
2906}