1use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
7use crate::utils::kramdown_utils::{is_kramdown_block_attribute, is_kramdown_extension};
8use crate::utils::range_utils::calculate_html_tag_range;
9use crate::utils::regex_cache::*;
10use std::collections::HashSet;
11use std::sync::LazyLock;
12
13mod md033_config;
14use md033_config::MD033Config;
15
16#[derive(Clone)]
17pub struct MD033NoInlineHtml {
18 config: MD033Config,
19 allowed: HashSet<String>,
20}
21
22impl Default for MD033NoInlineHtml {
23 fn default() -> Self {
24 let config = MD033Config::default();
25 let allowed = config.allowed_set();
26 Self { config, allowed }
27 }
28}
29
30impl MD033NoInlineHtml {
31 pub fn new() -> Self {
32 Self::default()
33 }
34
35 pub fn with_allowed(allowed_vec: Vec<String>) -> Self {
36 let config = MD033Config {
37 allowed: allowed_vec.clone(),
38 };
39 let allowed = config.allowed_set();
40 Self { config, allowed }
41 }
42
43 pub fn from_config_struct(config: MD033Config) -> Self {
44 let allowed = config.allowed_set();
45 Self { config, allowed }
46 }
47
48 #[inline]
50 fn is_tag_allowed(&self, tag: &str) -> bool {
51 if self.allowed.is_empty() {
52 return false;
53 }
54 let tag = tag.trim_start_matches('<').trim_start_matches('/');
56 let tag_name = tag
57 .split(|c: char| c.is_whitespace() || c == '>' || c == '/')
58 .next()
59 .unwrap_or("");
60 self.allowed.contains(&tag_name.to_lowercase())
61 }
62
63 #[inline]
65 fn is_html_comment(&self, tag: &str) -> bool {
66 tag.starts_with("<!--") && tag.ends_with("-->")
67 }
68
69 #[inline]
71 fn is_likely_type_annotation(&self, tag: &str) -> bool {
72 const COMMON_TYPES: &[&str] = &[
74 "string",
75 "number",
76 "any",
77 "void",
78 "null",
79 "undefined",
80 "array",
81 "promise",
82 "function",
83 "error",
84 "date",
85 "regexp",
86 "symbol",
87 "bigint",
88 "map",
89 "set",
90 "weakmap",
91 "weakset",
92 "iterator",
93 "generator",
94 "t",
95 "u",
96 "v",
97 "k",
98 "e", "userdata",
100 "apiresponse",
101 "config",
102 "options",
103 "params",
104 "result",
105 "response",
106 "request",
107 "data",
108 "item",
109 "element",
110 "node",
111 ];
112
113 let tag_content = tag
114 .trim_start_matches('<')
115 .trim_end_matches('>')
116 .trim_start_matches('/');
117 let tag_name = tag_content
118 .split(|c: char| c.is_whitespace() || c == '>' || c == '/')
119 .next()
120 .unwrap_or("");
121
122 if !tag_content.contains(' ') && !tag_content.contains('=') {
124 COMMON_TYPES.contains(&tag_name.to_ascii_lowercase().as_str())
125 } else {
126 false
127 }
128 }
129
130 #[inline]
132 fn is_email_address(&self, tag: &str) -> bool {
133 let content = tag.trim_start_matches('<').trim_end_matches('>');
134 content.contains('@')
136 && content.chars().all(|c| c.is_alphanumeric() || "@.-_+".contains(c))
137 && content.split('@').count() == 2
138 && content.split('@').all(|part| !part.is_empty())
139 }
140
141 #[inline]
143 fn has_markdown_attribute(&self, tag: &str) -> bool {
144 tag.contains(" markdown>") || tag.contains(" markdown=") || tag.contains(" markdown ")
147 }
148
149 #[inline]
151 fn is_url_in_angle_brackets(&self, tag: &str) -> bool {
152 let content = tag.trim_start_matches('<').trim_end_matches('>');
153 content.starts_with("http://")
155 || content.starts_with("https://")
156 || content.starts_with("ftp://")
157 || content.starts_with("ftps://")
158 || content.starts_with("mailto:")
159 }
160
161 fn calculate_fix(
169 &self,
170 content: &str,
171 opening_tag: &str,
172 tag_byte_start: usize,
173 ) -> Option<(std::ops::Range<usize>, String)> {
174 if opening_tag.ends_with("/>") {
176 return Some((tag_byte_start..tag_byte_start + opening_tag.len(), String::new()));
177 }
178
179 let tag_name = opening_tag
181 .trim_start_matches('<')
182 .split(|c: char| c.is_whitespace() || c == '>' || c == '/')
183 .next()?
184 .to_lowercase();
185
186 let closing_tag = format!("</{tag_name}>");
188
189 let search_start = tag_byte_start + opening_tag.len();
191 if let Some(closing_pos) = content[search_start..].find(&closing_tag) {
192 let closing_byte_start = search_start + closing_pos;
193 let closing_byte_end = closing_byte_start + closing_tag.len();
194
195 let inner_content = &content[search_start..closing_byte_start];
197
198 return Some((tag_byte_start..closing_byte_end, inner_content.to_string()));
199 }
200
201 Some((tag_byte_start..tag_byte_start + opening_tag.len(), String::new()))
203 }
204
205 fn find_multiline_html_tags(
207 &self,
208 ctx: &crate::lint_context::LintContext,
209 content: &str,
210 nomarkdown_ranges: &[(usize, usize)],
211 warnings: &mut Vec<LintWarning>,
212 ) {
213 if !content.contains('<') || !content.lines().any(|line| line.trim_end().ends_with('<')) {
215 return;
216 }
217
218 static INCOMPLETE_TAG_START: LazyLock<regex::Regex> =
220 LazyLock::new(|| regex::Regex::new(r"(?i)<[a-zA-Z][^>]*$").unwrap());
221
222 let lines: Vec<&str> = content.lines().collect();
223
224 for (i, line) in lines.iter().enumerate() {
225 let line_num = i + 1;
226
227 if line.trim().is_empty() || ctx.line_info(line_num).is_some_and(|info| info.in_code_block) {
229 continue;
230 }
231
232 if nomarkdown_ranges
234 .iter()
235 .any(|(start, end)| line_num >= *start && line_num <= *end)
236 {
237 continue;
238 }
239
240 if !line.contains('<') {
242 continue;
243 }
244
245 if let Some(incomplete_match) = INCOMPLETE_TAG_START.find(line) {
247 let start_column = incomplete_match.start() + 1; let mut complete_tag = incomplete_match.as_str().to_string();
251 let mut found_end = false;
252
253 for (j, next_line) in lines.iter().enumerate().skip(i + 1).take(10) {
255 let next_line_num = j + 1;
256
257 if ctx.line_info(next_line_num).is_some_and(|info| info.in_code_block) {
259 break;
260 }
261
262 complete_tag.push(' '); complete_tag.push_str(next_line.trim());
264
265 if next_line.contains('>') {
266 found_end = true;
267 break;
268 }
269 }
270
271 if found_end {
272 if let Some(end_pos) = complete_tag.find('>') {
274 let final_tag = &complete_tag[0..=end_pos];
275
276 let skip_mkdocs_markdown = ctx.flavor == crate::config::MarkdownFlavor::MkDocs
278 && self.has_markdown_attribute(final_tag);
279
280 if !self.is_html_comment(final_tag)
281 && !self.is_likely_type_annotation(final_tag)
282 && !self.is_email_address(final_tag)
283 && !self.is_url_in_angle_brackets(final_tag)
284 && !self.is_tag_allowed(final_tag)
285 && !skip_mkdocs_markdown
286 && HTML_OPENING_TAG_FINDER.is_match(final_tag)
287 {
288 let already_warned =
290 warnings.iter().any(|w| w.line == line_num && w.column == start_column);
291
292 if !already_warned {
293 let (start_line, start_col, end_line, end_col) = calculate_html_tag_range(
294 line_num,
295 line,
296 incomplete_match.start(),
297 incomplete_match.len(),
298 );
299 warnings.push(LintWarning {
300 rule_name: Some(self.name().to_string()),
301 line: start_line,
302 column: start_col,
303 end_line,
304 end_column: end_col,
305 message: format!("HTML tag found: {final_tag}"),
306 severity: Severity::Warning,
307 fix: None,
308 });
309 }
310 }
311 }
312 }
313 }
314 }
315 }
316}
317
318impl Rule for MD033NoInlineHtml {
319 fn name(&self) -> &'static str {
320 "MD033"
321 }
322
323 fn description(&self) -> &'static str {
324 "Inline HTML is not allowed"
325 }
326
327 fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
328 let content = ctx.content;
329
330 if content.is_empty() || !ctx.likely_has_html() {
332 return Ok(Vec::new());
333 }
334
335 if !HTML_TAG_QUICK_CHECK.is_match(content) {
337 return Ok(Vec::new());
338 }
339
340 let mut warnings = Vec::new();
341 let lines: Vec<&str> = content.lines().collect();
342
343 let mut in_nomarkdown = false;
345 let mut in_comment = false;
346 let mut nomarkdown_ranges: Vec<(usize, usize)> = Vec::new();
347 let mut nomarkdown_start = 0;
348 let mut comment_start = 0;
349
350 for (i, line) in lines.iter().enumerate() {
352 let line_num = i + 1;
353
354 if line.trim() == "{::nomarkdown}" {
356 in_nomarkdown = true;
357 nomarkdown_start = line_num;
358 } else if line.trim() == "{:/nomarkdown}" && in_nomarkdown {
359 in_nomarkdown = false;
360 nomarkdown_ranges.push((nomarkdown_start, line_num));
361 }
362
363 if line.trim() == "{::comment}" {
365 in_comment = true;
366 comment_start = line_num;
367 } else if line.trim() == "{:/comment}" && in_comment {
368 in_comment = false;
369 nomarkdown_ranges.push((comment_start, line_num));
370 }
371 }
372
373 for (i, line) in lines.iter().enumerate() {
376 let line_num = i + 1;
377
378 if line.trim().is_empty() {
379 continue;
380 }
381 if ctx.line_info(line_num).is_some_and(|info| info.in_code_block) {
382 continue;
383 }
384 if line.starts_with(" ") || line.starts_with('\t') {
387 continue;
388 }
389
390 if nomarkdown_ranges
392 .iter()
393 .any(|(start, end)| line_num >= *start && line_num <= *end)
394 {
395 continue;
396 }
397
398 if is_kramdown_extension(line) || is_kramdown_block_attribute(line) {
400 continue;
401 }
402
403 for tag_match in HTML_OPENING_TAG_FINDER.find_iter(line) {
405 let tag = tag_match.as_str();
406
407 let line_byte_offset: usize = content
409 .lines()
410 .take(line_num - 1)
411 .map(|l| l.len() + 1) .sum();
413 let tag_byte_start = line_byte_offset + tag_match.start();
414
415 if ctx.is_in_html_comment(tag_byte_start) {
417 continue;
418 }
419
420 if self.is_html_comment(tag) {
422 continue;
423 }
424
425 if ctx.flavor.supports_jsx() {
428 let tag_clean = tag.trim_start_matches('<').trim_start_matches('/');
430 let tag_name = tag_clean
431 .split(|c: char| c.is_whitespace() || c == '>' || c == '/')
432 .next()
433 .unwrap_or("");
434
435 if tag_name.chars().next().is_some_and(|c| c.is_uppercase()) {
436 continue;
437 }
438 }
439
440 if self.is_likely_type_annotation(tag) {
442 continue;
443 }
444
445 if self.is_email_address(tag) {
447 continue;
448 }
449
450 if self.is_url_in_angle_brackets(tag) {
452 continue;
453 }
454
455 let tag_start_col = tag_match.start() + 1; if ctx.is_in_code_span(line_num, tag_start_col) {
458 continue;
459 }
460
461 if self.is_tag_allowed(tag) {
463 continue;
464 }
465
466 if ctx.flavor == crate::config::MarkdownFlavor::MkDocs && self.has_markdown_attribute(tag) {
468 continue;
469 }
470
471 let (start_line, start_col, end_line, end_col) =
473 calculate_html_tag_range(line_num, line, tag_match.start(), tag_match.len());
474
475 let fix = self
477 .calculate_fix(content, tag, tag_byte_start)
478 .map(|(range, replacement)| Fix { range, replacement });
479
480 warnings.push(LintWarning {
481 rule_name: Some(self.name().to_string()),
482 line: start_line,
483 column: start_col,
484 end_line,
485 end_column: end_col,
486 message: format!("Inline HTML found: {tag}"),
487 severity: Severity::Warning,
488 fix,
489 });
490 }
491 }
492
493 self.find_multiline_html_tags(ctx, ctx.content, &nomarkdown_ranges, &mut warnings);
495
496 Ok(warnings)
497 }
498
499 fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
500 Ok(ctx.content.to_string())
502 }
503
504 fn fix_capability(&self) -> crate::rule::FixCapability {
505 crate::rule::FixCapability::Unfixable
506 }
507
508 fn category(&self) -> RuleCategory {
510 RuleCategory::Html
511 }
512
513 fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
515 ctx.content.is_empty() || !ctx.likely_has_html()
516 }
517
518 fn as_any(&self) -> &dyn std::any::Any {
519 self
520 }
521
522 fn default_config_section(&self) -> Option<(String, toml::Value)> {
523 let json_value = serde_json::to_value(&self.config).ok()?;
524 Some((
525 self.name().to_string(),
526 crate::rule_config_serde::json_to_toml_value(&json_value)?,
527 ))
528 }
529
530 fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
531 where
532 Self: Sized,
533 {
534 let rule_config = crate::rule_config_serde::load_rule_config::<MD033Config>(config);
535 Box::new(Self::from_config_struct(rule_config))
536 }
537}
538
539#[cfg(test)]
540mod tests {
541 use super::*;
542 use crate::lint_context::LintContext;
543 use crate::rule::Rule;
544
545 #[test]
546 fn test_md033_basic_html() {
547 let rule = MD033NoInlineHtml::default();
548 let content = "<div>Some content</div>";
549 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
550 let result = rule.check(&ctx).unwrap();
551 assert_eq!(result.len(), 1); assert!(result[0].message.starts_with("Inline HTML found: <div>"));
554 }
555
556 #[test]
557 fn test_md033_case_insensitive() {
558 let rule = MD033NoInlineHtml::default();
559 let content = "<DiV>Some <B>content</B></dIv>";
560 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
561 let result = rule.check(&ctx).unwrap();
562 assert_eq!(result.len(), 2); assert_eq!(result[0].message, "Inline HTML found: <DiV>");
565 assert_eq!(result[1].message, "Inline HTML found: <B>");
566 }
567
568 #[test]
569 fn test_md033_allowed_tags() {
570 let rule = MD033NoInlineHtml::with_allowed(vec!["div".to_string(), "br".to_string()]);
571 let content = "<div>Allowed</div><p>Not allowed</p><br/>";
572 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
573 let result = rule.check(&ctx).unwrap();
574 assert_eq!(result.len(), 1);
576 assert_eq!(result[0].message, "Inline HTML found: <p>");
577
578 let content2 = "<DIV>Allowed</DIV><P>Not allowed</P><BR/>";
580 let ctx2 = LintContext::new(content2, crate::config::MarkdownFlavor::Standard);
581 let result2 = rule.check(&ctx2).unwrap();
582 assert_eq!(result2.len(), 1); assert_eq!(result2[0].message, "Inline HTML found: <P>");
584 }
585
586 #[test]
587 fn test_md033_html_comments() {
588 let rule = MD033NoInlineHtml::default();
589 let content = "<!-- This is a comment --> <p>Not a comment</p>";
590 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
591 let result = rule.check(&ctx).unwrap();
592 assert_eq!(result.len(), 1); assert_eq!(result[0].message, "Inline HTML found: <p>");
595 }
596
597 #[test]
598 fn test_md033_tags_in_links() {
599 let rule = MD033NoInlineHtml::default();
600 let content = "[Link](http://example.com/<div>)";
601 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
602 let result = rule.check(&ctx).unwrap();
603 assert_eq!(result.len(), 1);
605 assert_eq!(result[0].message, "Inline HTML found: <div>");
606
607 let content2 = "[Link <a>text</a>](url)";
608 let ctx2 = LintContext::new(content2, crate::config::MarkdownFlavor::Standard);
609 let result2 = rule.check(&ctx2).unwrap();
610 assert_eq!(result2.len(), 1); assert_eq!(result2[0].message, "Inline HTML found: <a>");
613 }
614
615 #[test]
616 fn test_md033_fix_escaping() {
617 let rule = MD033NoInlineHtml::default();
618 let content = "Text with <div> and <br/> tags.";
619 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
620 let fixed_content = rule.fix(&ctx).unwrap();
621 assert_eq!(fixed_content, content);
623 }
624
625 #[test]
626 fn test_md033_in_code_blocks() {
627 let rule = MD033NoInlineHtml::default();
628 let content = "```html\n<div>Code</div>\n```\n<div>Not code</div>";
629 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
630 let result = rule.check(&ctx).unwrap();
631 assert_eq!(result.len(), 1); assert_eq!(result[0].message, "Inline HTML found: <div>");
634 }
635
636 #[test]
637 fn test_md033_in_code_spans() {
638 let rule = MD033NoInlineHtml::default();
639 let content = "Text with `<p>in code</p>` span. <br/> Not in span.";
640 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
641 let result = rule.check(&ctx).unwrap();
642 assert_eq!(result.len(), 1);
644 assert_eq!(result[0].message, "Inline HTML found: <br/>");
645 }
646
647 #[test]
648 fn test_md033_issue_90_code_span_with_diff_block() {
649 let rule = MD033NoInlineHtml::default();
651 let content = r#"# Heading
652
653`<env>`
654
655```diff
656- this
657+ that
658```"#;
659 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
660 let result = rule.check(&ctx).unwrap();
661 assert_eq!(result.len(), 0, "Should not report HTML tags inside code spans");
663 }
664
665 #[test]
666 fn test_md033_multiple_code_spans_with_angle_brackets() {
667 let rule = MD033NoInlineHtml::default();
669 let content = "`<one>` and `<two>` and `<three>` are all code spans";
670 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
671 let result = rule.check(&ctx).unwrap();
672 assert_eq!(result.len(), 0, "Should not report HTML tags inside any code spans");
673 }
674
675 #[test]
676 fn test_md033_nested_angle_brackets_in_code_span() {
677 let rule = MD033NoInlineHtml::default();
679 let content = "Text with `<<nested>>` brackets";
680 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
681 let result = rule.check(&ctx).unwrap();
682 assert_eq!(result.len(), 0, "Should handle nested angle brackets in code spans");
683 }
684
685 #[test]
686 fn test_md033_code_span_at_end_before_code_block() {
687 let rule = MD033NoInlineHtml::default();
689 let content = "Testing `<test>`\n```\ncode here\n```";
690 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
691 let result = rule.check(&ctx).unwrap();
692 assert_eq!(result.len(), 0, "Should handle code span before code block");
693 }
694
695 #[test]
696 fn test_md033_quick_fix_inline_tag() {
697 let rule = MD033NoInlineHtml::default();
699 let content = "This has <span>inline text</span> that should keep content.";
700 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
701 let result = rule.check(&ctx).unwrap();
702
703 assert_eq!(result.len(), 1, "Should find one HTML tag");
704 assert!(result[0].fix.is_some(), "Should have a fix");
705
706 let fix = result[0].fix.as_ref().unwrap();
707 assert_eq!(&content[fix.range.clone()], "<span>inline text</span>");
708 assert_eq!(fix.replacement, "inline text");
709 }
710
711 #[test]
712 fn test_md033_quick_fix_multiline_tag() {
713 let rule = MD033NoInlineHtml::default();
715 let content = "<div>\nBlock content\n</div>";
716 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
717 let result = rule.check(&ctx).unwrap();
718
719 assert_eq!(result.len(), 1, "Should find one HTML tag");
720 assert!(result[0].fix.is_some(), "Should have a fix");
721
722 let fix = result[0].fix.as_ref().unwrap();
723 assert_eq!(&content[fix.range.clone()], "<div>\nBlock content\n</div>");
724 assert_eq!(fix.replacement, "\nBlock content\n");
725 }
726
727 #[test]
728 fn test_md033_quick_fix_self_closing_tag() {
729 let rule = MD033NoInlineHtml::default();
731 let content = "Self-closing: <br/>";
732 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
733 let result = rule.check(&ctx).unwrap();
734
735 assert_eq!(result.len(), 1, "Should find one HTML tag");
736 assert!(result[0].fix.is_some(), "Should have a fix");
737
738 let fix = result[0].fix.as_ref().unwrap();
739 assert_eq!(&content[fix.range.clone()], "<br/>");
740 assert_eq!(fix.replacement, "");
741 }
742
743 #[test]
744 fn test_md033_quick_fix_multiple_tags() {
745 let rule = MD033NoInlineHtml::default();
747 let content = "<span>first</span> and <strong>second</strong>";
748 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
749 let result = rule.check(&ctx).unwrap();
750
751 assert_eq!(result.len(), 2, "Should find two HTML tags");
752 assert!(result[0].fix.is_some(), "First tag should have a fix");
753 assert!(result[1].fix.is_some(), "Second tag should have a fix");
754
755 let fix1 = result[0].fix.as_ref().unwrap();
756 assert_eq!(&content[fix1.range.clone()], "<span>first</span>");
757 assert_eq!(fix1.replacement, "first");
758
759 let fix2 = result[1].fix.as_ref().unwrap();
760 assert_eq!(&content[fix2.range.clone()], "<strong>second</strong>");
761 assert_eq!(fix2.replacement, "second");
762 }
763}