1use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
7use crate::utils::kramdown_utils::{is_kramdown_block_attribute, is_kramdown_extension};
8use crate::utils::regex_cache::*;
9use std::collections::HashSet;
10
11mod md033_config;
12use md033_config::MD033Config;
13
14#[derive(Clone)]
15pub struct MD033NoInlineHtml {
16 config: MD033Config,
17 allowed: HashSet<String>,
18}
19
20impl Default for MD033NoInlineHtml {
21 fn default() -> Self {
22 let config = MD033Config::default();
23 let allowed = config.allowed_set();
24 Self { config, allowed }
25 }
26}
27
28impl MD033NoInlineHtml {
29 pub fn new() -> Self {
30 Self::default()
31 }
32
33 pub fn with_allowed(allowed_vec: Vec<String>) -> Self {
34 let config = MD033Config {
35 allowed: allowed_vec.clone(),
36 };
37 let allowed = config.allowed_set();
38 Self { config, allowed }
39 }
40
41 pub fn from_config_struct(config: MD033Config) -> Self {
42 let allowed = config.allowed_set();
43 Self { config, allowed }
44 }
45
46 #[inline]
48 fn is_tag_allowed(&self, tag: &str) -> bool {
49 if self.allowed.is_empty() {
50 return false;
51 }
52 let tag = tag.trim_start_matches('<').trim_start_matches('/');
54 let tag_name = tag
55 .split(|c: char| c.is_whitespace() || c == '>' || c == '/')
56 .next()
57 .unwrap_or("");
58 self.allowed.contains(&tag_name.to_lowercase())
59 }
60
61 #[inline]
63 fn is_html_comment(&self, tag: &str) -> bool {
64 tag.starts_with("<!--") && tag.ends_with("-->")
65 }
66
67 #[inline]
72 fn is_html_element_or_custom(tag_name: &str) -> bool {
73 const HTML_ELEMENTS: &[&str] = &[
74 "html",
76 "head",
77 "body",
78 "title",
79 "base",
80 "link",
81 "meta",
82 "style",
83 "article",
85 "section",
86 "nav",
87 "aside",
88 "h1",
89 "h2",
90 "h3",
91 "h4",
92 "h5",
93 "h6",
94 "hgroup",
95 "header",
96 "footer",
97 "address",
98 "main",
99 "search",
100 "p",
102 "hr",
103 "pre",
104 "blockquote",
105 "ol",
106 "ul",
107 "menu",
108 "li",
109 "dl",
110 "dt",
111 "dd",
112 "figure",
113 "figcaption",
114 "div",
115 "a",
117 "em",
118 "strong",
119 "small",
120 "s",
121 "cite",
122 "q",
123 "dfn",
124 "abbr",
125 "ruby",
126 "rt",
127 "rp",
128 "data",
129 "time",
130 "code",
131 "var",
132 "samp",
133 "kbd",
134 "sub",
135 "sup",
136 "i",
137 "b",
138 "u",
139 "mark",
140 "bdi",
141 "bdo",
142 "span",
143 "br",
144 "wbr",
145 "ins",
147 "del",
148 "picture",
150 "source",
151 "img",
152 "iframe",
153 "embed",
154 "object",
155 "param",
156 "video",
157 "audio",
158 "track",
159 "map",
160 "area",
161 "svg",
162 "math",
163 "canvas",
164 "table",
166 "caption",
167 "colgroup",
168 "col",
169 "tbody",
170 "thead",
171 "tfoot",
172 "tr",
173 "td",
174 "th",
175 "form",
177 "label",
178 "input",
179 "button",
180 "select",
181 "datalist",
182 "optgroup",
183 "option",
184 "textarea",
185 "output",
186 "progress",
187 "meter",
188 "fieldset",
189 "legend",
190 "details",
192 "summary",
193 "dialog",
194 "script",
196 "noscript",
197 "template",
198 "slot",
199 "acronym",
201 "applet",
202 "basefont",
203 "big",
204 "center",
205 "dir",
206 "font",
207 "frame",
208 "frameset",
209 "isindex",
210 "noframes",
211 "strike",
212 "tt",
213 ];
214
215 let lower = tag_name.to_ascii_lowercase();
216 if HTML_ELEMENTS.contains(&lower.as_str()) {
217 return true;
218 }
219 tag_name.contains('-')
221 }
222
223 #[inline]
225 fn is_likely_type_annotation(&self, tag: &str) -> bool {
226 const COMMON_TYPES: &[&str] = &[
228 "string",
229 "number",
230 "any",
231 "void",
232 "null",
233 "undefined",
234 "array",
235 "promise",
236 "function",
237 "error",
238 "date",
239 "regexp",
240 "symbol",
241 "bigint",
242 "map",
243 "set",
244 "weakmap",
245 "weakset",
246 "iterator",
247 "generator",
248 "t",
249 "u",
250 "v",
251 "k",
252 "e", "userdata",
254 "apiresponse",
255 "config",
256 "options",
257 "params",
258 "result",
259 "response",
260 "request",
261 "data",
262 "item",
263 "element",
264 "node",
265 ];
266
267 let tag_content = tag
268 .trim_start_matches('<')
269 .trim_end_matches('>')
270 .trim_start_matches('/');
271 let tag_name = tag_content
272 .split(|c: char| c.is_whitespace() || c == '>' || c == '/')
273 .next()
274 .unwrap_or("");
275
276 if !tag_content.contains(' ') && !tag_content.contains('=') {
278 COMMON_TYPES.contains(&tag_name.to_ascii_lowercase().as_str())
279 } else {
280 false
281 }
282 }
283
284 #[inline]
286 fn is_email_address(&self, tag: &str) -> bool {
287 let content = tag.trim_start_matches('<').trim_end_matches('>');
288 content.contains('@')
290 && content.chars().all(|c| c.is_alphanumeric() || "@.-_+".contains(c))
291 && content.split('@').count() == 2
292 && content.split('@').all(|part| !part.is_empty())
293 }
294
295 #[inline]
297 fn has_markdown_attribute(&self, tag: &str) -> bool {
298 tag.contains(" markdown>") || tag.contains(" markdown=") || tag.contains(" markdown ")
301 }
302
303 #[inline]
305 fn is_url_in_angle_brackets(&self, tag: &str) -> bool {
306 let content = tag.trim_start_matches('<').trim_end_matches('>');
307 content.starts_with("http://")
309 || content.starts_with("https://")
310 || content.starts_with("ftp://")
311 || content.starts_with("ftps://")
312 || content.starts_with("mailto:")
313 }
314
315 fn calculate_fix(
323 &self,
324 content: &str,
325 opening_tag: &str,
326 tag_byte_start: usize,
327 ) -> Option<(std::ops::Range<usize>, String)> {
328 if opening_tag.ends_with("/>") {
330 return Some((tag_byte_start..tag_byte_start + opening_tag.len(), String::new()));
331 }
332
333 let tag_name = opening_tag
335 .trim_start_matches('<')
336 .split(|c: char| c.is_whitespace() || c == '>' || c == '/')
337 .next()?
338 .to_lowercase();
339
340 let closing_tag = format!("</{tag_name}>");
342
343 let search_start = tag_byte_start + opening_tag.len();
345 if let Some(closing_pos) = content[search_start..].find(&closing_tag) {
346 let closing_byte_start = search_start + closing_pos;
347 let closing_byte_end = closing_byte_start + closing_tag.len();
348
349 let inner_content = &content[search_start..closing_byte_start];
351
352 return Some((tag_byte_start..closing_byte_end, inner_content.to_string()));
353 }
354
355 Some((tag_byte_start..tag_byte_start + opening_tag.len(), String::new()))
357 }
358}
359
360impl Rule for MD033NoInlineHtml {
361 fn name(&self) -> &'static str {
362 "MD033"
363 }
364
365 fn description(&self) -> &'static str {
366 "Inline HTML is not allowed"
367 }
368
369 fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
370 let content = ctx.content;
371
372 if content.is_empty() || !ctx.likely_has_html() {
374 return Ok(Vec::new());
375 }
376
377 if !HTML_TAG_QUICK_CHECK.is_match(content) {
379 return Ok(Vec::new());
380 }
381
382 let mut warnings = Vec::new();
383 let lines: Vec<&str> = content.lines().collect();
384
385 let mut in_nomarkdown = false;
387 let mut in_comment = false;
388 let mut nomarkdown_ranges: Vec<(usize, usize)> = Vec::new();
389 let mut nomarkdown_start = 0;
390 let mut comment_start = 0;
391
392 for (i, line) in lines.iter().enumerate() {
393 let line_num = i + 1;
394
395 if line.trim() == "{::nomarkdown}" {
397 in_nomarkdown = true;
398 nomarkdown_start = line_num;
399 } else if line.trim() == "{:/nomarkdown}" && in_nomarkdown {
400 in_nomarkdown = false;
401 nomarkdown_ranges.push((nomarkdown_start, line_num));
402 }
403
404 if line.trim() == "{::comment}" {
406 in_comment = true;
407 comment_start = line_num;
408 } else if line.trim() == "{:/comment}" && in_comment {
409 in_comment = false;
410 nomarkdown_ranges.push((comment_start, line_num));
411 }
412 }
413
414 let html_tags = ctx.html_tags();
416
417 for html_tag in html_tags.iter() {
418 if html_tag.is_closing {
420 continue;
421 }
422
423 let line_num = html_tag.line;
424 let tag_byte_start = html_tag.byte_offset;
425
426 let tag = &content[html_tag.byte_offset..html_tag.byte_end];
428
429 if ctx.line_info(line_num).is_some_and(|info| info.in_code_block) {
431 continue;
432 }
433
434 if let Some(line) = lines.get(line_num.saturating_sub(1))
436 && (is_kramdown_extension(line) || is_kramdown_block_attribute(line))
437 {
438 continue;
439 }
440
441 if nomarkdown_ranges
443 .iter()
444 .any(|(start, end)| line_num >= *start && line_num <= *end)
445 {
446 continue;
447 }
448
449 if ctx.is_in_html_comment(tag_byte_start) {
451 continue;
452 }
453
454 if self.is_html_comment(tag) {
456 continue;
457 }
458
459 if ctx.is_in_link_title(tag_byte_start) {
462 continue;
463 }
464
465 if ctx.flavor.supports_jsx() && html_tag.tag_name.chars().next().is_some_and(|c| c.is_uppercase()) {
467 continue;
468 }
469
470 if !Self::is_html_element_or_custom(&html_tag.tag_name) {
472 continue;
473 }
474
475 if self.is_likely_type_annotation(tag) {
477 continue;
478 }
479
480 if self.is_email_address(tag) {
482 continue;
483 }
484
485 if self.is_url_in_angle_brackets(tag) {
487 continue;
488 }
489
490 if ctx.is_byte_offset_in_code_span(tag_byte_start) {
492 continue;
493 }
494
495 if self.is_tag_allowed(tag) {
497 continue;
498 }
499
500 if ctx.flavor == crate::config::MarkdownFlavor::MkDocs && self.has_markdown_attribute(tag) {
502 continue;
503 }
504
505 let fix = self
507 .calculate_fix(content, tag, tag_byte_start)
508 .map(|(range, replacement)| Fix { range, replacement });
509
510 warnings.push(LintWarning {
512 rule_name: Some(self.name().to_string()),
513 line: line_num,
514 column: html_tag.start_col + 1, end_line: line_num, end_column: html_tag.end_col + 1, message: format!("Inline HTML found: {tag}"),
518 severity: Severity::Warning,
519 fix,
520 });
521 }
522
523 Ok(warnings)
524 }
525
526 fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
527 Ok(ctx.content.to_string())
529 }
530
531 fn fix_capability(&self) -> crate::rule::FixCapability {
532 crate::rule::FixCapability::Unfixable
533 }
534
535 fn category(&self) -> RuleCategory {
537 RuleCategory::Html
538 }
539
540 fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
542 ctx.content.is_empty() || !ctx.likely_has_html()
543 }
544
545 fn as_any(&self) -> &dyn std::any::Any {
546 self
547 }
548
549 fn default_config_section(&self) -> Option<(String, toml::Value)> {
550 let json_value = serde_json::to_value(&self.config).ok()?;
551 Some((
552 self.name().to_string(),
553 crate::rule_config_serde::json_to_toml_value(&json_value)?,
554 ))
555 }
556
557 fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
558 where
559 Self: Sized,
560 {
561 let rule_config = crate::rule_config_serde::load_rule_config::<MD033Config>(config);
562 Box::new(Self::from_config_struct(rule_config))
563 }
564}
565
566#[cfg(test)]
567mod tests {
568 use super::*;
569 use crate::lint_context::LintContext;
570 use crate::rule::Rule;
571
572 #[test]
573 fn test_md033_basic_html() {
574 let rule = MD033NoInlineHtml::default();
575 let content = "<div>Some content</div>";
576 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
577 let result = rule.check(&ctx).unwrap();
578 assert_eq!(result.len(), 1); assert!(result[0].message.starts_with("Inline HTML found: <div>"));
581 }
582
583 #[test]
584 fn test_md033_case_insensitive() {
585 let rule = MD033NoInlineHtml::default();
586 let content = "<DiV>Some <B>content</B></dIv>";
587 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
588 let result = rule.check(&ctx).unwrap();
589 assert_eq!(result.len(), 2); assert_eq!(result[0].message, "Inline HTML found: <DiV>");
592 assert_eq!(result[1].message, "Inline HTML found: <B>");
593 }
594
595 #[test]
596 fn test_md033_allowed_tags() {
597 let rule = MD033NoInlineHtml::with_allowed(vec!["div".to_string(), "br".to_string()]);
598 let content = "<div>Allowed</div><p>Not allowed</p><br/>";
599 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
600 let result = rule.check(&ctx).unwrap();
601 assert_eq!(result.len(), 1);
603 assert_eq!(result[0].message, "Inline HTML found: <p>");
604
605 let content2 = "<DIV>Allowed</DIV><P>Not allowed</P><BR/>";
607 let ctx2 = LintContext::new(content2, crate::config::MarkdownFlavor::Standard, None);
608 let result2 = rule.check(&ctx2).unwrap();
609 assert_eq!(result2.len(), 1); assert_eq!(result2[0].message, "Inline HTML found: <P>");
611 }
612
613 #[test]
614 fn test_md033_html_comments() {
615 let rule = MD033NoInlineHtml::default();
616 let content = "<!-- This is a comment --> <p>Not a comment</p>";
617 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
618 let result = rule.check(&ctx).unwrap();
619 assert_eq!(result.len(), 1); assert_eq!(result[0].message, "Inline HTML found: <p>");
622 }
623
624 #[test]
625 fn test_md033_tags_in_links() {
626 let rule = MD033NoInlineHtml::default();
627 let content = "[Link](http://example.com/<div>)";
628 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
629 let result = rule.check(&ctx).unwrap();
630 assert_eq!(result.len(), 1);
632 assert_eq!(result[0].message, "Inline HTML found: <div>");
633
634 let content2 = "[Link <a>text</a>](url)";
635 let ctx2 = LintContext::new(content2, crate::config::MarkdownFlavor::Standard, None);
636 let result2 = rule.check(&ctx2).unwrap();
637 assert_eq!(result2.len(), 1); assert_eq!(result2[0].message, "Inline HTML found: <a>");
640 }
641
642 #[test]
643 fn test_md033_fix_escaping() {
644 let rule = MD033NoInlineHtml::default();
645 let content = "Text with <div> and <br/> tags.";
646 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
647 let fixed_content = rule.fix(&ctx).unwrap();
648 assert_eq!(fixed_content, content);
650 }
651
652 #[test]
653 fn test_md033_in_code_blocks() {
654 let rule = MD033NoInlineHtml::default();
655 let content = "```html\n<div>Code</div>\n```\n<div>Not code</div>";
656 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
657 let result = rule.check(&ctx).unwrap();
658 assert_eq!(result.len(), 1); assert_eq!(result[0].message, "Inline HTML found: <div>");
661 }
662
663 #[test]
664 fn test_md033_in_code_spans() {
665 let rule = MD033NoInlineHtml::default();
666 let content = "Text with `<p>in code</p>` span. <br/> Not in span.";
667 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
668 let result = rule.check(&ctx).unwrap();
669 assert_eq!(result.len(), 1);
671 assert_eq!(result[0].message, "Inline HTML found: <br/>");
672 }
673
674 #[test]
675 fn test_md033_issue_90_code_span_with_diff_block() {
676 let rule = MD033NoInlineHtml::default();
678 let content = r#"# Heading
679
680`<env>`
681
682```diff
683- this
684+ that
685```"#;
686 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
687 let result = rule.check(&ctx).unwrap();
688 assert_eq!(result.len(), 0, "Should not report HTML tags inside code spans");
690 }
691
692 #[test]
693 fn test_md033_multiple_code_spans_with_angle_brackets() {
694 let rule = MD033NoInlineHtml::default();
696 let content = "`<one>` and `<two>` and `<three>` are all code spans";
697 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
698 let result = rule.check(&ctx).unwrap();
699 assert_eq!(result.len(), 0, "Should not report HTML tags inside any code spans");
700 }
701
702 #[test]
703 fn test_md033_nested_angle_brackets_in_code_span() {
704 let rule = MD033NoInlineHtml::default();
706 let content = "Text with `<<nested>>` brackets";
707 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
708 let result = rule.check(&ctx).unwrap();
709 assert_eq!(result.len(), 0, "Should handle nested angle brackets in code spans");
710 }
711
712 #[test]
713 fn test_md033_code_span_at_end_before_code_block() {
714 let rule = MD033NoInlineHtml::default();
716 let content = "Testing `<test>`\n```\ncode here\n```";
717 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
718 let result = rule.check(&ctx).unwrap();
719 assert_eq!(result.len(), 0, "Should handle code span before code block");
720 }
721
722 #[test]
723 fn test_md033_quick_fix_inline_tag() {
724 let rule = MD033NoInlineHtml::default();
726 let content = "This has <span>inline text</span> that should keep content.";
727 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
728 let result = rule.check(&ctx).unwrap();
729
730 assert_eq!(result.len(), 1, "Should find one HTML tag");
731 assert!(result[0].fix.is_some(), "Should have a fix");
732
733 let fix = result[0].fix.as_ref().unwrap();
734 assert_eq!(&content[fix.range.clone()], "<span>inline text</span>");
735 assert_eq!(fix.replacement, "inline text");
736 }
737
738 #[test]
739 fn test_md033_quick_fix_multiline_tag() {
740 let rule = MD033NoInlineHtml::default();
742 let content = "<div>\nBlock content\n</div>";
743 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
744 let result = rule.check(&ctx).unwrap();
745
746 assert_eq!(result.len(), 1, "Should find one HTML tag");
747 assert!(result[0].fix.is_some(), "Should have a fix");
748
749 let fix = result[0].fix.as_ref().unwrap();
750 assert_eq!(&content[fix.range.clone()], "<div>\nBlock content\n</div>");
751 assert_eq!(fix.replacement, "\nBlock content\n");
752 }
753
754 #[test]
755 fn test_md033_quick_fix_self_closing_tag() {
756 let rule = MD033NoInlineHtml::default();
758 let content = "Self-closing: <br/>";
759 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
760 let result = rule.check(&ctx).unwrap();
761
762 assert_eq!(result.len(), 1, "Should find one HTML tag");
763 assert!(result[0].fix.is_some(), "Should have a fix");
764
765 let fix = result[0].fix.as_ref().unwrap();
766 assert_eq!(&content[fix.range.clone()], "<br/>");
767 assert_eq!(fix.replacement, "");
768 }
769
770 #[test]
771 fn test_md033_quick_fix_multiple_tags() {
772 let rule = MD033NoInlineHtml::default();
774 let content = "<span>first</span> and <strong>second</strong>";
775 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
776 let result = rule.check(&ctx).unwrap();
777
778 assert_eq!(result.len(), 2, "Should find two HTML tags");
779 assert!(result[0].fix.is_some(), "First tag should have a fix");
780 assert!(result[1].fix.is_some(), "Second tag should have a fix");
781
782 let fix1 = result[0].fix.as_ref().unwrap();
783 assert_eq!(&content[fix1.range.clone()], "<span>first</span>");
784 assert_eq!(fix1.replacement, "first");
785
786 let fix2 = result[1].fix.as_ref().unwrap();
787 assert_eq!(&content[fix2.range.clone()], "<strong>second</strong>");
788 assert_eq!(fix2.replacement, "second");
789 }
790
791 #[test]
792 fn test_md033_skip_angle_brackets_in_link_titles() {
793 let rule = MD033NoInlineHtml::default();
795 let content = r#"# Test
796
797[example]: <https://example.com> "Title with <Angle Brackets> inside"
798
799Regular text with <div>content</div> HTML tag.
800"#;
801 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
802 let result = rule.check(&ctx).unwrap();
803
804 assert_eq!(result.len(), 1, "Should find opening div tag");
807 assert!(
808 result[0].message.contains("<div>"),
809 "Should flag <div>, got: {}",
810 result[0].message
811 );
812 }
813
814 #[test]
815 fn test_md033_skip_angle_brackets_in_link_title_single_quotes() {
816 let rule = MD033NoInlineHtml::default();
818 let content = r#"[ref]: url 'Title <Help Wanted> here'
819
820<span>text</span> here
821"#;
822 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
823 let result = rule.check(&ctx).unwrap();
824
825 assert_eq!(result.len(), 1, "Should find opening span tag");
828 assert!(
829 result[0].message.contains("<span>"),
830 "Should flag <span>, got: {}",
831 result[0].message
832 );
833 }
834}