rumdl_lib/rules/
md033_no_inline_html.rs

1//!
2//! Rule MD033: No HTML tags
3//!
4//! See [docs/md033.md](../../docs/md033.md) for full documentation, configuration, and examples.
5
6use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
7use crate::utils::kramdown_utils::{is_kramdown_block_attribute, is_kramdown_extension};
8use crate::utils::regex_cache::*;
9use std::collections::HashSet;
10
11mod md033_config;
12use md033_config::MD033Config;
13
14#[derive(Clone)]
15pub struct MD033NoInlineHtml {
16    config: MD033Config,
17    allowed: HashSet<String>,
18    disallowed: HashSet<String>,
19}
20
21impl Default for MD033NoInlineHtml {
22    fn default() -> Self {
23        let config = MD033Config::default();
24        let allowed = config.allowed_set();
25        let disallowed = config.disallowed_set();
26        Self {
27            config,
28            allowed,
29            disallowed,
30        }
31    }
32}
33
34impl MD033NoInlineHtml {
35    pub fn new() -> Self {
36        Self::default()
37    }
38
39    pub fn with_allowed(allowed_vec: Vec<String>) -> Self {
40        let config = MD033Config {
41            allowed: allowed_vec.clone(),
42            disallowed: Vec::new(),
43        };
44        let allowed = config.allowed_set();
45        let disallowed = config.disallowed_set();
46        Self {
47            config,
48            allowed,
49            disallowed,
50        }
51    }
52
53    pub fn with_disallowed(disallowed_vec: Vec<String>) -> Self {
54        let config = MD033Config {
55            allowed: Vec::new(),
56            disallowed: disallowed_vec.clone(),
57        };
58        let allowed = config.allowed_set();
59        let disallowed = config.disallowed_set();
60        Self {
61            config,
62            allowed,
63            disallowed,
64        }
65    }
66
67    pub fn from_config_struct(config: MD033Config) -> Self {
68        let allowed = config.allowed_set();
69        let disallowed = config.disallowed_set();
70        Self {
71            config,
72            allowed,
73            disallowed,
74        }
75    }
76
77    // Efficient check for allowed tags using HashSet (case-insensitive)
78    #[inline]
79    fn is_tag_allowed(&self, tag: &str) -> bool {
80        if self.allowed.is_empty() {
81            return false;
82        }
83        // Remove angle brackets and slashes, then split by whitespace or '>'
84        let tag = tag.trim_start_matches('<').trim_start_matches('/');
85        let tag_name = tag
86            .split(|c: char| c.is_whitespace() || c == '>' || c == '/')
87            .next()
88            .unwrap_or("");
89        self.allowed.contains(&tag_name.to_lowercase())
90    }
91
92    /// Check if a tag is in the disallowed set (for disallowed-only mode)
93    #[inline]
94    fn is_tag_disallowed(&self, tag: &str) -> bool {
95        if self.disallowed.is_empty() {
96            return false;
97        }
98        // Remove angle brackets and slashes, then split by whitespace or '>'
99        let tag = tag.trim_start_matches('<').trim_start_matches('/');
100        let tag_name = tag
101            .split(|c: char| c.is_whitespace() || c == '>' || c == '/')
102            .next()
103            .unwrap_or("");
104        self.disallowed.contains(&tag_name.to_lowercase())
105    }
106
107    /// Check if operating in disallowed-only mode
108    #[inline]
109    fn is_disallowed_mode(&self) -> bool {
110        self.config.is_disallowed_mode()
111    }
112
113    // Check if a tag is an HTML comment
114    #[inline]
115    fn is_html_comment(&self, tag: &str) -> bool {
116        tag.starts_with("<!--") && tag.ends_with("-->")
117    }
118
119    /// Check if a tag name is a valid HTML element or custom element.
120    /// Returns false for placeholder syntax like `<NAME>`, `<resource>`, `<actual>`.
121    ///
122    /// Per HTML spec, custom elements must contain a hyphen (e.g., `<my-component>`).
123    #[inline]
124    fn is_html_element_or_custom(tag_name: &str) -> bool {
125        const HTML_ELEMENTS: &[&str] = &[
126            // Document structure
127            "html",
128            "head",
129            "body",
130            "title",
131            "base",
132            "link",
133            "meta",
134            "style",
135            // Sections
136            "article",
137            "section",
138            "nav",
139            "aside",
140            "h1",
141            "h2",
142            "h3",
143            "h4",
144            "h5",
145            "h6",
146            "hgroup",
147            "header",
148            "footer",
149            "address",
150            "main",
151            "search",
152            // Grouping
153            "p",
154            "hr",
155            "pre",
156            "blockquote",
157            "ol",
158            "ul",
159            "menu",
160            "li",
161            "dl",
162            "dt",
163            "dd",
164            "figure",
165            "figcaption",
166            "div",
167            // Text-level
168            "a",
169            "em",
170            "strong",
171            "small",
172            "s",
173            "cite",
174            "q",
175            "dfn",
176            "abbr",
177            "ruby",
178            "rt",
179            "rp",
180            "data",
181            "time",
182            "code",
183            "var",
184            "samp",
185            "kbd",
186            "sub",
187            "sup",
188            "i",
189            "b",
190            "u",
191            "mark",
192            "bdi",
193            "bdo",
194            "span",
195            "br",
196            "wbr",
197            // Edits
198            "ins",
199            "del",
200            // Embedded
201            "picture",
202            "source",
203            "img",
204            "iframe",
205            "embed",
206            "object",
207            "param",
208            "video",
209            "audio",
210            "track",
211            "map",
212            "area",
213            "svg",
214            "math",
215            "canvas",
216            // Tables
217            "table",
218            "caption",
219            "colgroup",
220            "col",
221            "tbody",
222            "thead",
223            "tfoot",
224            "tr",
225            "td",
226            "th",
227            // Forms
228            "form",
229            "label",
230            "input",
231            "button",
232            "select",
233            "datalist",
234            "optgroup",
235            "option",
236            "textarea",
237            "output",
238            "progress",
239            "meter",
240            "fieldset",
241            "legend",
242            // Interactive
243            "details",
244            "summary",
245            "dialog",
246            // Scripting
247            "script",
248            "noscript",
249            "template",
250            "slot",
251            // Deprecated but recognized
252            "acronym",
253            "applet",
254            "basefont",
255            "big",
256            "center",
257            "dir",
258            "font",
259            "frame",
260            "frameset",
261            "isindex",
262            "marquee",
263            "noembed",
264            "noframes",
265            "plaintext",
266            "strike",
267            "tt",
268            "xmp",
269        ];
270
271        let lower = tag_name.to_ascii_lowercase();
272        if HTML_ELEMENTS.contains(&lower.as_str()) {
273            return true;
274        }
275        // Custom elements must contain a hyphen per HTML spec
276        tag_name.contains('-')
277    }
278
279    // Check if a tag is likely a programming type annotation rather than HTML
280    #[inline]
281    fn is_likely_type_annotation(&self, tag: &str) -> bool {
282        // Common programming type names that are often used in generics
283        const COMMON_TYPES: &[&str] = &[
284            "string",
285            "number",
286            "any",
287            "void",
288            "null",
289            "undefined",
290            "array",
291            "promise",
292            "function",
293            "error",
294            "date",
295            "regexp",
296            "symbol",
297            "bigint",
298            "map",
299            "set",
300            "weakmap",
301            "weakset",
302            "iterator",
303            "generator",
304            "t",
305            "u",
306            "v",
307            "k",
308            "e", // Common single-letter type parameters
309            "userdata",
310            "apiresponse",
311            "config",
312            "options",
313            "params",
314            "result",
315            "response",
316            "request",
317            "data",
318            "item",
319            "element",
320            "node",
321        ];
322
323        let tag_content = tag
324            .trim_start_matches('<')
325            .trim_end_matches('>')
326            .trim_start_matches('/');
327        let tag_name = tag_content
328            .split(|c: char| c.is_whitespace() || c == '>' || c == '/')
329            .next()
330            .unwrap_or("");
331
332        // Check if it's a simple tag (no attributes) with a common type name
333        if !tag_content.contains(' ') && !tag_content.contains('=') {
334            COMMON_TYPES.contains(&tag_name.to_ascii_lowercase().as_str())
335        } else {
336            false
337        }
338    }
339
340    // Check if a tag is actually an email address in angle brackets
341    #[inline]
342    fn is_email_address(&self, tag: &str) -> bool {
343        let content = tag.trim_start_matches('<').trim_end_matches('>');
344        // Simple email pattern: contains @ and has reasonable structure
345        content.contains('@')
346            && content.chars().all(|c| c.is_alphanumeric() || "@.-_+".contains(c))
347            && content.split('@').count() == 2
348            && content.split('@').all(|part| !part.is_empty())
349    }
350
351    // Check if a tag has the markdown attribute (MkDocs/Material for MkDocs)
352    #[inline]
353    fn has_markdown_attribute(&self, tag: &str) -> bool {
354        // Check for various forms of markdown attribute
355        // Examples: <div markdown>, <div markdown="1">, <div class="result" markdown>
356        tag.contains(" markdown>") || tag.contains(" markdown=") || tag.contains(" markdown ")
357    }
358
359    /// Check if a tag contains JSX-specific attributes that indicate it's JSX, not HTML
360    /// JSX uses different attribute names than HTML:
361    /// - `className` instead of `class`
362    /// - `htmlFor` instead of `for`
363    /// - camelCase event handlers (`onClick`, `onChange`, `onSubmit`, etc.)
364    /// - JSX expression syntax `={...}` for dynamic values
365    #[inline]
366    fn has_jsx_attributes(tag: &str) -> bool {
367        // JSX-specific attribute names (HTML uses class, for, onclick, etc.)
368        tag.contains("className")
369            || tag.contains("htmlFor")
370            || tag.contains("dangerouslySetInnerHTML")
371            // camelCase event handlers (JSX uses onClick, HTML uses onclick)
372            || tag.contains("onClick")
373            || tag.contains("onChange")
374            || tag.contains("onSubmit")
375            || tag.contains("onFocus")
376            || tag.contains("onBlur")
377            || tag.contains("onKeyDown")
378            || tag.contains("onKeyUp")
379            || tag.contains("onKeyPress")
380            || tag.contains("onMouseDown")
381            || tag.contains("onMouseUp")
382            || tag.contains("onMouseEnter")
383            || tag.contains("onMouseLeave")
384            // JSX expression syntax: ={expression} or ={ expression }
385            || tag.contains("={")
386    }
387
388    // Check if a tag is actually a URL in angle brackets
389    #[inline]
390    fn is_url_in_angle_brackets(&self, tag: &str) -> bool {
391        let content = tag.trim_start_matches('<').trim_end_matches('>');
392        // Check for common URL schemes
393        content.starts_with("http://")
394            || content.starts_with("https://")
395            || content.starts_with("ftp://")
396            || content.starts_with("ftps://")
397            || content.starts_with("mailto:")
398    }
399
400    /// Calculate fix to remove HTML tags while keeping content
401    ///
402    /// For self-closing tags like `<br/>`, returns a single fix to remove the tag.
403    /// For paired tags like `<span>text</span>`, returns the replacement text (just the content).
404    ///
405    /// Returns (range, replacement_text) where range is the bytes to replace
406    /// and replacement_text is what to put there (content without tags, or empty for self-closing).
407    fn calculate_fix(
408        &self,
409        content: &str,
410        opening_tag: &str,
411        tag_byte_start: usize,
412    ) -> Option<(std::ops::Range<usize>, String)> {
413        // Check if it's a self-closing tag (ends with />)
414        if opening_tag.ends_with("/>") {
415            return Some((tag_byte_start..tag_byte_start + opening_tag.len(), String::new()));
416        }
417
418        // Extract tag name from opening tag (e.g., "<div>" -> "div", "<span class='x'>" -> "span")
419        let tag_name = opening_tag
420            .trim_start_matches('<')
421            .split(|c: char| c.is_whitespace() || c == '>' || c == '/')
422            .next()?
423            .to_lowercase();
424
425        // Build the closing tag pattern
426        let closing_tag = format!("</{tag_name}>");
427
428        // Search for the closing tag after the opening tag
429        let search_start = tag_byte_start + opening_tag.len();
430        if let Some(closing_pos) = content[search_start..].find(&closing_tag) {
431            let closing_byte_start = search_start + closing_pos;
432            let closing_byte_end = closing_byte_start + closing_tag.len();
433
434            // Extract the content between tags
435            let inner_content = &content[search_start..closing_byte_start];
436
437            return Some((tag_byte_start..closing_byte_end, inner_content.to_string()));
438        }
439
440        // If no closing tag found, just remove the opening tag
441        Some((tag_byte_start..tag_byte_start + opening_tag.len(), String::new()))
442    }
443}
444
445impl Rule for MD033NoInlineHtml {
446    fn name(&self) -> &'static str {
447        "MD033"
448    }
449
450    fn description(&self) -> &'static str {
451        "Inline HTML is not allowed"
452    }
453
454    fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
455        let content = ctx.content;
456
457        // Early return: if no HTML tags at all, skip processing
458        if content.is_empty() || !ctx.likely_has_html() {
459            return Ok(Vec::new());
460        }
461
462        // Quick check for HTML tag pattern before expensive processing
463        if !HTML_TAG_QUICK_CHECK.is_match(content) {
464            return Ok(Vec::new());
465        }
466
467        let mut warnings = Vec::new();
468        let lines: Vec<&str> = content.lines().collect();
469
470        // Track nomarkdown and comment blocks (Kramdown extension)
471        let mut in_nomarkdown = false;
472        let mut in_comment = false;
473        let mut nomarkdown_ranges: Vec<(usize, usize)> = Vec::new();
474        let mut nomarkdown_start = 0;
475        let mut comment_start = 0;
476
477        for (i, line) in lines.iter().enumerate() {
478            let line_num = i + 1;
479
480            // Check for nomarkdown start
481            if line.trim() == "{::nomarkdown}" {
482                in_nomarkdown = true;
483                nomarkdown_start = line_num;
484            } else if line.trim() == "{:/nomarkdown}" && in_nomarkdown {
485                in_nomarkdown = false;
486                nomarkdown_ranges.push((nomarkdown_start, line_num));
487            }
488
489            // Check for comment blocks
490            if line.trim() == "{::comment}" {
491                in_comment = true;
492                comment_start = line_num;
493            } else if line.trim() == "{:/comment}" && in_comment {
494                in_comment = false;
495                nomarkdown_ranges.push((comment_start, line_num));
496            }
497        }
498
499        // Use centralized HTML parser to get all HTML tags (including multiline)
500        let html_tags = ctx.html_tags();
501
502        for html_tag in html_tags.iter() {
503            // Skip closing tags (only warn on opening tags)
504            if html_tag.is_closing {
505                continue;
506            }
507
508            let line_num = html_tag.line;
509            let tag_byte_start = html_tag.byte_offset;
510
511            // Reconstruct tag string from byte offsets
512            let tag = &content[html_tag.byte_offset..html_tag.byte_end];
513
514            // Skip tags in code blocks (uses proper code block detection from LintContext)
515            if ctx.line_info(line_num).is_some_and(|info| info.in_code_block) {
516                continue;
517            }
518
519            // Skip Kramdown extensions and block attributes
520            if let Some(line) = lines.get(line_num.saturating_sub(1))
521                && (is_kramdown_extension(line) || is_kramdown_block_attribute(line))
522            {
523                continue;
524            }
525
526            // Skip lines inside nomarkdown blocks
527            if nomarkdown_ranges
528                .iter()
529                .any(|(start, end)| line_num >= *start && line_num <= *end)
530            {
531                continue;
532            }
533
534            // Skip HTML tags inside HTML comments
535            if ctx.is_in_html_comment(tag_byte_start) {
536                continue;
537            }
538
539            // Skip HTML comments themselves
540            if self.is_html_comment(tag) {
541                continue;
542            }
543
544            // Skip angle brackets inside link reference definition titles
545            // e.g., [ref]: url "Title with <angle brackets>"
546            if ctx.is_in_link_title(tag_byte_start) {
547                continue;
548            }
549
550            // Skip JSX components in MDX files (e.g., <Chart />, <MyComponent>)
551            if ctx.flavor.supports_jsx() && html_tag.tag_name.chars().next().is_some_and(|c| c.is_uppercase()) {
552                continue;
553            }
554
555            // Skip JSX fragments in MDX files (<> and </>)
556            if ctx.flavor.supports_jsx() && (html_tag.tag_name.is_empty() || tag == "<>" || tag == "</>") {
557                continue;
558            }
559
560            // Skip elements with JSX-specific attributes in MDX files
561            // e.g., <div className="...">, <button onClick={handler}>
562            if ctx.flavor.supports_jsx() && Self::has_jsx_attributes(tag) {
563                continue;
564            }
565
566            // Skip non-HTML elements (placeholder syntax like <NAME>, <resource>)
567            if !Self::is_html_element_or_custom(&html_tag.tag_name) {
568                continue;
569            }
570
571            // Skip likely programming type annotations
572            if self.is_likely_type_annotation(tag) {
573                continue;
574            }
575
576            // Skip email addresses in angle brackets
577            if self.is_email_address(tag) {
578                continue;
579            }
580
581            // Skip URLs in angle brackets
582            if self.is_url_in_angle_brackets(tag) {
583                continue;
584            }
585
586            // Skip tags inside code spans (use byte offset for reliable multi-line span detection)
587            if ctx.is_byte_offset_in_code_span(tag_byte_start) {
588                continue;
589            }
590
591            // Determine whether to report this tag based on mode:
592            // - Disallowed mode: only report tags in the disallowed list
593            // - Default mode: report all tags except those in the allowed list
594            if self.is_disallowed_mode() {
595                // In disallowed mode, skip tags NOT in the disallowed list
596                if !self.is_tag_disallowed(tag) {
597                    continue;
598                }
599            } else {
600                // In default mode, skip allowed tags
601                if self.is_tag_allowed(tag) {
602                    continue;
603                }
604            }
605
606            // Skip tags with markdown attribute in MkDocs mode
607            if ctx.flavor == crate::config::MarkdownFlavor::MkDocs && self.has_markdown_attribute(tag) {
608                continue;
609            }
610
611            // Calculate fix to remove HTML tags but keep content
612            let fix = self
613                .calculate_fix(content, tag, tag_byte_start)
614                .map(|(range, replacement)| Fix { range, replacement });
615
616            // Calculate actual end line and column for multiline tags
617            // Use byte_end - 1 to get the last character position of the tag
618            let (end_line, end_col) = if html_tag.byte_end > 0 {
619                ctx.offset_to_line_col(html_tag.byte_end - 1)
620            } else {
621                (line_num, html_tag.end_col + 1)
622            };
623
624            // Report the HTML tag
625            warnings.push(LintWarning {
626                rule_name: Some(self.name().to_string()),
627                line: line_num,
628                column: html_tag.start_col + 1, // Convert to 1-indexed
629                end_line,                       // Actual end line for multiline tags
630                end_column: end_col + 1,        // Actual end column
631                message: format!("Inline HTML found: {tag}"),
632                severity: Severity::Warning,
633                fix,
634            });
635        }
636
637        Ok(warnings)
638    }
639
640    fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
641        // No fix for MD033: do not remove or alter HTML, just return the input unchanged
642        Ok(ctx.content.to_string())
643    }
644
645    fn fix_capability(&self) -> crate::rule::FixCapability {
646        crate::rule::FixCapability::Unfixable
647    }
648
649    /// Get the category of this rule for selective processing
650    fn category(&self) -> RuleCategory {
651        RuleCategory::Html
652    }
653
654    /// Check if this rule should be skipped
655    fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
656        ctx.content.is_empty() || !ctx.likely_has_html()
657    }
658
659    fn as_any(&self) -> &dyn std::any::Any {
660        self
661    }
662
663    fn default_config_section(&self) -> Option<(String, toml::Value)> {
664        let json_value = serde_json::to_value(&self.config).ok()?;
665        Some((
666            self.name().to_string(),
667            crate::rule_config_serde::json_to_toml_value(&json_value)?,
668        ))
669    }
670
671    fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
672    where
673        Self: Sized,
674    {
675        let rule_config = crate::rule_config_serde::load_rule_config::<MD033Config>(config);
676        Box::new(Self::from_config_struct(rule_config))
677    }
678}
679
680#[cfg(test)]
681mod tests {
682    use super::*;
683    use crate::lint_context::LintContext;
684    use crate::rule::Rule;
685
686    #[test]
687    fn test_md033_basic_html() {
688        let rule = MD033NoInlineHtml::default();
689        let content = "<div>Some content</div>";
690        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
691        let result = rule.check(&ctx).unwrap();
692        // Only reports opening tags, not closing tags
693        assert_eq!(result.len(), 1); // Only <div>, not </div>
694        assert!(result[0].message.starts_with("Inline HTML found: <div>"));
695    }
696
697    #[test]
698    fn test_md033_case_insensitive() {
699        let rule = MD033NoInlineHtml::default();
700        let content = "<DiV>Some <B>content</B></dIv>";
701        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
702        let result = rule.check(&ctx).unwrap();
703        // Only reports opening tags, not closing tags
704        assert_eq!(result.len(), 2); // <DiV>, <B> (not </B>, </dIv>)
705        assert_eq!(result[0].message, "Inline HTML found: <DiV>");
706        assert_eq!(result[1].message, "Inline HTML found: <B>");
707    }
708
709    #[test]
710    fn test_md033_allowed_tags() {
711        let rule = MD033NoInlineHtml::with_allowed(vec!["div".to_string(), "br".to_string()]);
712        let content = "<div>Allowed</div><p>Not allowed</p><br/>";
713        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
714        let result = rule.check(&ctx).unwrap();
715        // Only warnings for non-allowed opening tags (<p> only, div and br are allowed)
716        assert_eq!(result.len(), 1);
717        assert_eq!(result[0].message, "Inline HTML found: <p>");
718
719        // Test case-insensitivity of allowed tags
720        let content2 = "<DIV>Allowed</DIV><P>Not allowed</P><BR/>";
721        let ctx2 = LintContext::new(content2, crate::config::MarkdownFlavor::Standard, None);
722        let result2 = rule.check(&ctx2).unwrap();
723        assert_eq!(result2.len(), 1); // Only <P> flagged
724        assert_eq!(result2[0].message, "Inline HTML found: <P>");
725    }
726
727    #[test]
728    fn test_md033_html_comments() {
729        let rule = MD033NoInlineHtml::default();
730        let content = "<!-- This is a comment --> <p>Not a comment</p>";
731        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
732        let result = rule.check(&ctx).unwrap();
733        // Should detect warnings for HTML opening tags (comments are skipped, closing tags not reported)
734        assert_eq!(result.len(), 1); // Only <p>
735        assert_eq!(result[0].message, "Inline HTML found: <p>");
736    }
737
738    #[test]
739    fn test_md033_tags_in_links() {
740        let rule = MD033NoInlineHtml::default();
741        let content = "[Link](http://example.com/<div>)";
742        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
743        let result = rule.check(&ctx).unwrap();
744        // The <div> in the URL should be detected as HTML (not skipped)
745        assert_eq!(result.len(), 1);
746        assert_eq!(result[0].message, "Inline HTML found: <div>");
747
748        let content2 = "[Link <a>text</a>](url)";
749        let ctx2 = LintContext::new(content2, crate::config::MarkdownFlavor::Standard, None);
750        let result2 = rule.check(&ctx2).unwrap();
751        // Only reports opening tags
752        assert_eq!(result2.len(), 1); // Only <a>
753        assert_eq!(result2[0].message, "Inline HTML found: <a>");
754    }
755
756    #[test]
757    fn test_md033_fix_escaping() {
758        let rule = MD033NoInlineHtml::default();
759        let content = "Text with <div> and <br/> tags.";
760        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
761        let fixed_content = rule.fix(&ctx).unwrap();
762        // No fix for HTML tags; output should be unchanged
763        assert_eq!(fixed_content, content);
764    }
765
766    #[test]
767    fn test_md033_in_code_blocks() {
768        let rule = MD033NoInlineHtml::default();
769        let content = "```html\n<div>Code</div>\n```\n<div>Not code</div>";
770        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
771        let result = rule.check(&ctx).unwrap();
772        // Only reports opening tags outside code block
773        assert_eq!(result.len(), 1); // Only <div> outside code block
774        assert_eq!(result[0].message, "Inline HTML found: <div>");
775    }
776
777    #[test]
778    fn test_md033_in_code_spans() {
779        let rule = MD033NoInlineHtml::default();
780        let content = "Text with `<p>in code</p>` span. <br/> Not in span.";
781        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
782        let result = rule.check(&ctx).unwrap();
783        // Should detect <br/> outside code span, but not tags inside code span
784        assert_eq!(result.len(), 1);
785        assert_eq!(result[0].message, "Inline HTML found: <br/>");
786    }
787
788    #[test]
789    fn test_md033_issue_90_code_span_with_diff_block() {
790        // Test for issue #90: inline code span followed by diff code block
791        let rule = MD033NoInlineHtml::default();
792        let content = r#"# Heading
793
794`<env>`
795
796```diff
797- this
798+ that
799```"#;
800        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
801        let result = rule.check(&ctx).unwrap();
802        // Should NOT detect <env> as HTML since it's inside backticks
803        assert_eq!(result.len(), 0, "Should not report HTML tags inside code spans");
804    }
805
806    #[test]
807    fn test_md033_multiple_code_spans_with_angle_brackets() {
808        // Test multiple code spans on same line
809        let rule = MD033NoInlineHtml::default();
810        let content = "`<one>` and `<two>` and `<three>` are all code spans";
811        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
812        let result = rule.check(&ctx).unwrap();
813        assert_eq!(result.len(), 0, "Should not report HTML tags inside any code spans");
814    }
815
816    #[test]
817    fn test_md033_nested_angle_brackets_in_code_span() {
818        // Test nested angle brackets
819        let rule = MD033NoInlineHtml::default();
820        let content = "Text with `<<nested>>` brackets";
821        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
822        let result = rule.check(&ctx).unwrap();
823        assert_eq!(result.len(), 0, "Should handle nested angle brackets in code spans");
824    }
825
826    #[test]
827    fn test_md033_code_span_at_end_before_code_block() {
828        // Test code span at end of line before code block
829        let rule = MD033NoInlineHtml::default();
830        let content = "Testing `<test>`\n```\ncode here\n```";
831        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
832        let result = rule.check(&ctx).unwrap();
833        assert_eq!(result.len(), 0, "Should handle code span before code block");
834    }
835
836    #[test]
837    fn test_md033_quick_fix_inline_tag() {
838        // Test Quick Fix for inline HTML tags - keeps content, removes tags
839        let rule = MD033NoInlineHtml::default();
840        let content = "This has <span>inline text</span> that should keep content.";
841        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
842        let result = rule.check(&ctx).unwrap();
843
844        assert_eq!(result.len(), 1, "Should find one HTML tag");
845        assert!(result[0].fix.is_some(), "Should have a fix");
846
847        let fix = result[0].fix.as_ref().unwrap();
848        assert_eq!(&content[fix.range.clone()], "<span>inline text</span>");
849        assert_eq!(fix.replacement, "inline text");
850    }
851
852    #[test]
853    fn test_md033_quick_fix_multiline_tag() {
854        // Test Quick Fix for multiline HTML tags - keeps content
855        let rule = MD033NoInlineHtml::default();
856        let content = "<div>\nBlock content\n</div>";
857        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
858        let result = rule.check(&ctx).unwrap();
859
860        assert_eq!(result.len(), 1, "Should find one HTML tag");
861        assert!(result[0].fix.is_some(), "Should have a fix");
862
863        let fix = result[0].fix.as_ref().unwrap();
864        assert_eq!(&content[fix.range.clone()], "<div>\nBlock content\n</div>");
865        assert_eq!(fix.replacement, "\nBlock content\n");
866    }
867
868    #[test]
869    fn test_md033_quick_fix_self_closing_tag() {
870        // Test Quick Fix for self-closing tags - removes tag (no content)
871        let rule = MD033NoInlineHtml::default();
872        let content = "Self-closing: <br/>";
873        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
874        let result = rule.check(&ctx).unwrap();
875
876        assert_eq!(result.len(), 1, "Should find one HTML tag");
877        assert!(result[0].fix.is_some(), "Should have a fix");
878
879        let fix = result[0].fix.as_ref().unwrap();
880        assert_eq!(&content[fix.range.clone()], "<br/>");
881        assert_eq!(fix.replacement, "");
882    }
883
884    #[test]
885    fn test_md033_quick_fix_multiple_tags() {
886        // Test Quick Fix with multiple HTML tags - keeps content for both
887        let rule = MD033NoInlineHtml::default();
888        let content = "<span>first</span> and <strong>second</strong>";
889        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
890        let result = rule.check(&ctx).unwrap();
891
892        assert_eq!(result.len(), 2, "Should find two HTML tags");
893        assert!(result[0].fix.is_some(), "First tag should have a fix");
894        assert!(result[1].fix.is_some(), "Second tag should have a fix");
895
896        let fix1 = result[0].fix.as_ref().unwrap();
897        assert_eq!(&content[fix1.range.clone()], "<span>first</span>");
898        assert_eq!(fix1.replacement, "first");
899
900        let fix2 = result[1].fix.as_ref().unwrap();
901        assert_eq!(&content[fix2.range.clone()], "<strong>second</strong>");
902        assert_eq!(fix2.replacement, "second");
903    }
904
905    #[test]
906    fn test_md033_skip_angle_brackets_in_link_titles() {
907        // Angle brackets inside link reference definition titles should not be flagged as HTML
908        let rule = MD033NoInlineHtml::default();
909        let content = r#"# Test
910
911[example]: <https://example.com> "Title with <Angle Brackets> inside"
912
913Regular text with <div>content</div> HTML tag.
914"#;
915        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
916        let result = rule.check(&ctx).unwrap();
917
918        // Should only flag <div>, not <Angle Brackets> in the title (not a valid HTML element)
919        // Opening tag only (markdownlint behavior)
920        assert_eq!(result.len(), 1, "Should find opening div tag");
921        assert!(
922            result[0].message.contains("<div>"),
923            "Should flag <div>, got: {}",
924            result[0].message
925        );
926    }
927
928    #[test]
929    fn test_md033_skip_angle_brackets_in_link_title_single_quotes() {
930        // Test with single-quoted title
931        let rule = MD033NoInlineHtml::default();
932        let content = r#"[ref]: url 'Title <Help Wanted> here'
933
934<span>text</span> here
935"#;
936        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
937        let result = rule.check(&ctx).unwrap();
938
939        // <Help Wanted> is not a valid HTML element, so only <span> is flagged
940        // Opening tag only (markdownlint behavior)
941        assert_eq!(result.len(), 1, "Should find opening span tag");
942        assert!(
943            result[0].message.contains("<span>"),
944            "Should flag <span>, got: {}",
945            result[0].message
946        );
947    }
948
949    #[test]
950    fn test_md033_multiline_tag_end_line_calculation() {
951        // Test that multiline HTML tags report correct end_line
952        let rule = MD033NoInlineHtml::default();
953        let content = "<div\n  class=\"test\"\n  id=\"example\">";
954        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
955        let result = rule.check(&ctx).unwrap();
956
957        assert_eq!(result.len(), 1, "Should find one HTML tag");
958        // Tag starts on line 1
959        assert_eq!(result[0].line, 1, "Start line should be 1");
960        // Tag ends on line 3 (where the closing > is)
961        assert_eq!(result[0].end_line, 3, "End line should be 3");
962    }
963
964    #[test]
965    fn test_md033_single_line_tag_same_start_end_line() {
966        // Test that single-line HTML tags have same start and end line
967        let rule = MD033NoInlineHtml::default();
968        let content = "Some text <div class=\"test\"> more text";
969        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
970        let result = rule.check(&ctx).unwrap();
971
972        assert_eq!(result.len(), 1, "Should find one HTML tag");
973        assert_eq!(result[0].line, 1, "Start line should be 1");
974        assert_eq!(result[0].end_line, 1, "End line should be 1 for single-line tag");
975    }
976
977    #[test]
978    fn test_md033_multiline_tag_with_many_attributes() {
979        // Test multiline tag spanning multiple lines
980        let rule = MD033NoInlineHtml::default();
981        let content =
982            "Text\n<div\n  data-attr1=\"value1\"\n  data-attr2=\"value2\"\n  data-attr3=\"value3\">\nMore text";
983        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
984        let result = rule.check(&ctx).unwrap();
985
986        assert_eq!(result.len(), 1, "Should find one HTML tag");
987        // Tag starts on line 2 (first line is "Text")
988        assert_eq!(result[0].line, 2, "Start line should be 2");
989        // Tag ends on line 5 (where the closing > is)
990        assert_eq!(result[0].end_line, 5, "End line should be 5");
991    }
992
993    #[test]
994    fn test_md033_disallowed_mode_basic() {
995        // Test disallowed mode: only flags tags in the disallowed list
996        let rule = MD033NoInlineHtml::with_disallowed(vec!["script".to_string(), "iframe".to_string()]);
997        let content = "<div>Safe content</div><script>alert('xss')</script>";
998        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
999        let result = rule.check(&ctx).unwrap();
1000
1001        // Should only flag <script>, not <div>
1002        assert_eq!(result.len(), 1, "Should only flag disallowed tags");
1003        assert!(result[0].message.contains("<script>"), "Should flag script tag");
1004    }
1005
1006    #[test]
1007    fn test_md033_disallowed_gfm_security_tags() {
1008        // Test GFM security tags expansion
1009        let rule = MD033NoInlineHtml::with_disallowed(vec!["gfm".to_string()]);
1010        let content = r#"
1011<div>Safe</div>
1012<title>Bad title</title>
1013<textarea>Bad textarea</textarea>
1014<style>.bad{}</style>
1015<iframe src="evil"></iframe>
1016<script>evil()</script>
1017<plaintext>old tag</plaintext>
1018<span>Safe span</span>
1019"#;
1020        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1021        let result = rule.check(&ctx).unwrap();
1022
1023        // Should flag: title, textarea, style, iframe, script, plaintext
1024        // Should NOT flag: div, span
1025        assert_eq!(result.len(), 6, "Should flag 6 GFM security tags");
1026
1027        let flagged_tags: Vec<&str> = result
1028            .iter()
1029            .filter_map(|w| w.message.split("<").nth(1))
1030            .filter_map(|s| s.split(">").next())
1031            .filter_map(|s| s.split_whitespace().next())
1032            .collect();
1033
1034        assert!(flagged_tags.contains(&"title"), "Should flag title");
1035        assert!(flagged_tags.contains(&"textarea"), "Should flag textarea");
1036        assert!(flagged_tags.contains(&"style"), "Should flag style");
1037        assert!(flagged_tags.contains(&"iframe"), "Should flag iframe");
1038        assert!(flagged_tags.contains(&"script"), "Should flag script");
1039        assert!(flagged_tags.contains(&"plaintext"), "Should flag plaintext");
1040        assert!(!flagged_tags.contains(&"div"), "Should NOT flag div");
1041        assert!(!flagged_tags.contains(&"span"), "Should NOT flag span");
1042    }
1043
1044    #[test]
1045    fn test_md033_disallowed_case_insensitive() {
1046        // Test that disallowed check is case-insensitive
1047        let rule = MD033NoInlineHtml::with_disallowed(vec!["script".to_string()]);
1048        let content = "<SCRIPT>alert('xss')</SCRIPT><Script>alert('xss')</Script>";
1049        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1050        let result = rule.check(&ctx).unwrap();
1051
1052        // Should flag both <SCRIPT> and <Script>
1053        assert_eq!(result.len(), 2, "Should flag both case variants");
1054    }
1055
1056    #[test]
1057    fn test_md033_disallowed_with_attributes() {
1058        // Test that disallowed mode works with tags that have attributes
1059        let rule = MD033NoInlineHtml::with_disallowed(vec!["iframe".to_string()]);
1060        let content = r#"<iframe src="https://evil.com" width="100" height="100"></iframe>"#;
1061        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1062        let result = rule.check(&ctx).unwrap();
1063
1064        assert_eq!(result.len(), 1, "Should flag iframe with attributes");
1065        assert!(result[0].message.contains("iframe"), "Should flag iframe");
1066    }
1067
1068    #[test]
1069    fn test_md033_disallowed_all_gfm_tags() {
1070        // Verify all GFM disallowed tags are covered
1071        use md033_config::GFM_DISALLOWED_TAGS;
1072        let rule = MD033NoInlineHtml::with_disallowed(vec!["gfm".to_string()]);
1073
1074        for tag in GFM_DISALLOWED_TAGS {
1075            let content = format!("<{tag}>content</{tag}>");
1076            let ctx = LintContext::new(&content, crate::config::MarkdownFlavor::Standard, None);
1077            let result = rule.check(&ctx).unwrap();
1078
1079            assert_eq!(result.len(), 1, "GFM tag <{tag}> should be flagged");
1080        }
1081    }
1082
1083    #[test]
1084    fn test_md033_disallowed_mixed_with_custom() {
1085        // Test mixing "gfm" with custom disallowed tags
1086        let rule = MD033NoInlineHtml::with_disallowed(vec![
1087            "gfm".to_string(),
1088            "marquee".to_string(), // Custom disallowed tag
1089        ]);
1090        let content = r#"<script>bad</script><marquee>annoying</marquee><div>ok</div>"#;
1091        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1092        let result = rule.check(&ctx).unwrap();
1093
1094        // Should flag script (gfm) and marquee (custom)
1095        assert_eq!(result.len(), 2, "Should flag both gfm and custom tags");
1096    }
1097
1098    #[test]
1099    fn test_md033_disallowed_empty_means_default_mode() {
1100        // Empty disallowed list means default mode (flag all HTML)
1101        let rule = MD033NoInlineHtml::with_disallowed(vec![]);
1102        let content = "<div>content</div>";
1103        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1104        let result = rule.check(&ctx).unwrap();
1105
1106        // Should flag <div> in default mode
1107        assert_eq!(result.len(), 1, "Empty disallowed = default mode");
1108    }
1109
1110    #[test]
1111    fn test_md033_jsx_fragments_in_mdx() {
1112        // JSX fragments (<> and </>) should not trigger warnings in MDX
1113        let rule = MD033NoInlineHtml::default();
1114        let content = r#"# MDX Document
1115
1116<>
1117  <Heading />
1118  <Content />
1119</>
1120
1121<div>Regular HTML should still be flagged</div>
1122"#;
1123        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::MDX, None);
1124        let result = rule.check(&ctx).unwrap();
1125
1126        // Should only flag <div>, not the fragments or JSX components
1127        assert_eq!(result.len(), 1, "Should only find one HTML tag (the div)");
1128        assert!(
1129            result[0].message.contains("<div>"),
1130            "Should flag <div>, not JSX fragments"
1131        );
1132    }
1133
1134    #[test]
1135    fn test_md033_jsx_components_in_mdx() {
1136        // JSX components (capitalized) should not trigger warnings in MDX
1137        let rule = MD033NoInlineHtml::default();
1138        let content = r#"<CustomComponent prop="value">
1139  Content
1140</CustomComponent>
1141
1142<MyButton onClick={handler}>Click</MyButton>
1143"#;
1144        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::MDX, None);
1145        let result = rule.check(&ctx).unwrap();
1146
1147        // No warnings - all are JSX components
1148        assert_eq!(result.len(), 0, "Should not flag JSX components in MDX");
1149    }
1150
1151    #[test]
1152    fn test_md033_jsx_not_skipped_in_standard_markdown() {
1153        // In standard markdown, capitalized tags should still be flagged if they're valid HTML
1154        let rule = MD033NoInlineHtml::default();
1155        let content = "<Script>alert(1)</Script>";
1156        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1157        let result = rule.check(&ctx).unwrap();
1158
1159        // Should flag <Script> in standard markdown (it's a valid HTML element)
1160        assert_eq!(result.len(), 1, "Should flag <Script> in standard markdown");
1161    }
1162
1163    #[test]
1164    fn test_md033_jsx_attributes_in_mdx() {
1165        // Elements with JSX-specific attributes should not trigger warnings in MDX
1166        let rule = MD033NoInlineHtml::default();
1167        let content = r#"# MDX with JSX Attributes
1168
1169<div className="card big">Content</div>
1170
1171<button onClick={handleClick}>Click me</button>
1172
1173<label htmlFor="input-id">Label</label>
1174
1175<input onChange={handleChange} />
1176
1177<div class="html-class">Regular HTML should be flagged</div>
1178"#;
1179        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::MDX, None);
1180        let result = rule.check(&ctx).unwrap();
1181
1182        // Should only flag the div with regular HTML "class" attribute
1183        assert_eq!(
1184            result.len(),
1185            1,
1186            "Should only flag HTML element without JSX attributes, got: {result:?}"
1187        );
1188        assert!(
1189            result[0].message.contains("<div class="),
1190            "Should flag the div with HTML class attribute"
1191        );
1192    }
1193
1194    #[test]
1195    fn test_md033_jsx_attributes_not_skipped_in_standard() {
1196        // In standard markdown, JSX attributes should still be flagged
1197        let rule = MD033NoInlineHtml::default();
1198        let content = r#"<div className="card">Content</div>"#;
1199        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
1200        let result = rule.check(&ctx).unwrap();
1201
1202        // Should flag in standard markdown
1203        assert_eq!(result.len(), 1, "Should flag JSX-style elements in standard markdown");
1204    }
1205}