1use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
7use crate::utils::kramdown_utils::{is_kramdown_block_attribute, is_kramdown_extension};
8use crate::utils::range_utils::calculate_html_tag_range;
9use crate::utils::regex_cache::*;
10use std::collections::HashSet;
11
12mod md033_config;
13use md033_config::MD033Config;
14
15#[derive(Clone)]
16pub struct MD033NoInlineHtml {
17 config: MD033Config,
18 allowed: HashSet<String>,
19}
20
21impl Default for MD033NoInlineHtml {
22 fn default() -> Self {
23 let config = MD033Config::default();
24 let allowed = config.allowed_set();
25 Self { config, allowed }
26 }
27}
28
29impl MD033NoInlineHtml {
30 pub fn new() -> Self {
31 Self::default()
32 }
33
34 pub fn with_allowed(allowed_vec: Vec<String>) -> Self {
35 let config = MD033Config {
36 allowed: allowed_vec.clone(),
37 };
38 let allowed = config.allowed_set();
39 Self { config, allowed }
40 }
41
42 pub fn from_config_struct(config: MD033Config) -> Self {
43 let allowed = config.allowed_set();
44 Self { config, allowed }
45 }
46
47 #[inline]
49 fn is_tag_allowed(&self, tag: &str) -> bool {
50 if self.allowed.is_empty() {
51 return false;
52 }
53 let tag = tag.trim_start_matches('<').trim_start_matches('/');
55 let tag_name = tag
56 .split(|c: char| c.is_whitespace() || c == '>' || c == '/')
57 .next()
58 .unwrap_or("");
59 self.allowed.contains(&tag_name.to_lowercase())
60 }
61
62 #[inline]
64 fn is_html_comment(&self, tag: &str) -> bool {
65 tag.starts_with("<!--") && tag.ends_with("-->")
66 }
67
68 #[inline]
70 fn is_likely_type_annotation(&self, tag: &str) -> bool {
71 const COMMON_TYPES: &[&str] = &[
73 "string",
74 "number",
75 "any",
76 "void",
77 "null",
78 "undefined",
79 "array",
80 "promise",
81 "function",
82 "error",
83 "date",
84 "regexp",
85 "symbol",
86 "bigint",
87 "map",
88 "set",
89 "weakmap",
90 "weakset",
91 "iterator",
92 "generator",
93 "t",
94 "u",
95 "v",
96 "k",
97 "e", "userdata",
99 "apiresponse",
100 "config",
101 "options",
102 "params",
103 "result",
104 "response",
105 "request",
106 "data",
107 "item",
108 "element",
109 "node",
110 ];
111
112 let tag_content = tag
113 .trim_start_matches('<')
114 .trim_end_matches('>')
115 .trim_start_matches('/');
116 let tag_name = tag_content
117 .split(|c: char| c.is_whitespace() || c == '>' || c == '/')
118 .next()
119 .unwrap_or("");
120
121 if !tag_content.contains(' ') && !tag_content.contains('=') {
123 COMMON_TYPES.contains(&tag_name.to_ascii_lowercase().as_str())
124 } else {
125 false
126 }
127 }
128
129 #[inline]
131 fn is_email_address(&self, tag: &str) -> bool {
132 let content = tag.trim_start_matches('<').trim_end_matches('>');
133 content.contains('@')
135 && content.chars().all(|c| c.is_alphanumeric() || "@.-_+".contains(c))
136 && content.split('@').count() == 2
137 && content.split('@').all(|part| !part.is_empty())
138 }
139
140 #[inline]
142 fn has_markdown_attribute(&self, tag: &str) -> bool {
143 tag.contains(" markdown>") || tag.contains(" markdown=") || tag.contains(" markdown ")
146 }
147
148 #[inline]
150 fn is_url_in_angle_brackets(&self, tag: &str) -> bool {
151 let content = tag.trim_start_matches('<').trim_end_matches('>');
152 content.starts_with("http://")
154 || content.starts_with("https://")
155 || content.starts_with("ftp://")
156 || content.starts_with("ftps://")
157 || content.starts_with("mailto:")
158 }
159
160 fn calculate_fix(
168 &self,
169 content: &str,
170 opening_tag: &str,
171 tag_byte_start: usize,
172 ) -> Option<(std::ops::Range<usize>, String)> {
173 if opening_tag.ends_with("/>") {
175 return Some((tag_byte_start..tag_byte_start + opening_tag.len(), String::new()));
176 }
177
178 let tag_name = opening_tag
180 .trim_start_matches('<')
181 .split(|c: char| c.is_whitespace() || c == '>' || c == '/')
182 .next()?
183 .to_lowercase();
184
185 let closing_tag = format!("</{tag_name}>");
187
188 let search_start = tag_byte_start + opening_tag.len();
190 if let Some(closing_pos) = content[search_start..].find(&closing_tag) {
191 let closing_byte_start = search_start + closing_pos;
192 let closing_byte_end = closing_byte_start + closing_tag.len();
193
194 let inner_content = &content[search_start..closing_byte_start];
196
197 return Some((tag_byte_start..closing_byte_end, inner_content.to_string()));
198 }
199
200 Some((tag_byte_start..tag_byte_start + opening_tag.len(), String::new()))
202 }
203
204 fn find_multiline_html_tags(
206 &self,
207 ctx: &crate::lint_context::LintContext,
208 content: &str,
209 nomarkdown_ranges: &[(usize, usize)],
210 warnings: &mut Vec<LintWarning>,
211 ) {
212 if !content.contains('<') || !content.lines().any(|line| line.trim_end().ends_with('<')) {
214 return;
215 }
216
217 lazy_static::lazy_static! {
219 static ref INCOMPLETE_TAG_START: regex::Regex = regex::Regex::new(r"(?i)<[a-zA-Z][^>]*$").unwrap();
220 }
221
222 let lines: Vec<&str> = content.lines().collect();
223
224 for (i, line) in lines.iter().enumerate() {
225 let line_num = i + 1;
226
227 if line.trim().is_empty() || ctx.line_info(line_num).is_some_and(|info| info.in_code_block) {
229 continue;
230 }
231
232 if nomarkdown_ranges
234 .iter()
235 .any(|(start, end)| line_num >= *start && line_num <= *end)
236 {
237 continue;
238 }
239
240 if !line.contains('<') {
242 continue;
243 }
244
245 if let Some(incomplete_match) = INCOMPLETE_TAG_START.find(line) {
247 let start_column = incomplete_match.start() + 1; let mut complete_tag = incomplete_match.as_str().to_string();
251 let mut found_end = false;
252
253 for (j, next_line) in lines.iter().enumerate().skip(i + 1).take(10) {
255 let next_line_num = j + 1;
256
257 if ctx.line_info(next_line_num).is_some_and(|info| info.in_code_block) {
259 break;
260 }
261
262 complete_tag.push(' '); complete_tag.push_str(next_line.trim());
264
265 if next_line.contains('>') {
266 found_end = true;
267 break;
268 }
269 }
270
271 if found_end {
272 if let Some(end_pos) = complete_tag.find('>') {
274 let final_tag = &complete_tag[0..=end_pos];
275
276 let skip_mkdocs_markdown = ctx.flavor == crate::config::MarkdownFlavor::MkDocs
278 && self.has_markdown_attribute(final_tag);
279
280 if !self.is_html_comment(final_tag)
281 && !self.is_likely_type_annotation(final_tag)
282 && !self.is_email_address(final_tag)
283 && !self.is_url_in_angle_brackets(final_tag)
284 && !self.is_tag_allowed(final_tag)
285 && !skip_mkdocs_markdown
286 && HTML_OPENING_TAG_FINDER.is_match(final_tag)
287 {
288 let already_warned =
290 warnings.iter().any(|w| w.line == line_num && w.column == start_column);
291
292 if !already_warned {
293 let (start_line, start_col, end_line, end_col) = calculate_html_tag_range(
294 line_num,
295 line,
296 incomplete_match.start(),
297 incomplete_match.len(),
298 );
299 warnings.push(LintWarning {
300 rule_name: Some(self.name().to_string()),
301 line: start_line,
302 column: start_col,
303 end_line,
304 end_column: end_col,
305 message: format!("HTML tag found: {final_tag}"),
306 severity: Severity::Warning,
307 fix: None,
308 });
309 }
310 }
311 }
312 }
313 }
314 }
315 }
316}
317
318impl Rule for MD033NoInlineHtml {
319 fn name(&self) -> &'static str {
320 "MD033"
321 }
322
323 fn description(&self) -> &'static str {
324 "Inline HTML is not allowed"
325 }
326
327 fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
328 let content = ctx.content;
329
330 if content.is_empty() || !ctx.likely_has_html() {
332 return Ok(Vec::new());
333 }
334
335 if !HTML_TAG_QUICK_CHECK.is_match(content) {
337 return Ok(Vec::new());
338 }
339
340 let mut warnings = Vec::new();
341 let lines: Vec<&str> = content.lines().collect();
342
343 let mut in_nomarkdown = false;
345 let mut in_comment = false;
346 let mut nomarkdown_ranges: Vec<(usize, usize)> = Vec::new();
347 let mut nomarkdown_start = 0;
348 let mut comment_start = 0;
349
350 for (i, line) in lines.iter().enumerate() {
352 let line_num = i + 1;
353
354 if line.trim() == "{::nomarkdown}" {
356 in_nomarkdown = true;
357 nomarkdown_start = line_num;
358 } else if line.trim() == "{:/nomarkdown}" && in_nomarkdown {
359 in_nomarkdown = false;
360 nomarkdown_ranges.push((nomarkdown_start, line_num));
361 }
362
363 if line.trim() == "{::comment}" {
365 in_comment = true;
366 comment_start = line_num;
367 } else if line.trim() == "{:/comment}" && in_comment {
368 in_comment = false;
369 nomarkdown_ranges.push((comment_start, line_num));
370 }
371 }
372
373 for (i, line) in lines.iter().enumerate() {
376 let line_num = i + 1;
377
378 if line.trim().is_empty() {
379 continue;
380 }
381 if ctx.line_info(line_num).is_some_and(|info| info.in_code_block) {
382 continue;
383 }
384 if line.starts_with(" ") || line.starts_with('\t') {
387 continue;
388 }
389
390 if nomarkdown_ranges
392 .iter()
393 .any(|(start, end)| line_num >= *start && line_num <= *end)
394 {
395 continue;
396 }
397
398 if is_kramdown_extension(line) || is_kramdown_block_attribute(line) {
400 continue;
401 }
402
403 for tag_match in HTML_OPENING_TAG_FINDER.find_iter(line) {
405 let tag = tag_match.as_str();
406
407 if self.is_html_comment(tag) {
409 continue;
410 }
411
412 if ctx.flavor.supports_jsx() {
415 let tag_clean = tag.trim_start_matches('<').trim_start_matches('/');
417 let tag_name = tag_clean
418 .split(|c: char| c.is_whitespace() || c == '>' || c == '/')
419 .next()
420 .unwrap_or("");
421
422 if tag_name.chars().next().is_some_and(|c| c.is_uppercase()) {
423 continue;
424 }
425 }
426
427 if self.is_likely_type_annotation(tag) {
429 continue;
430 }
431
432 if self.is_email_address(tag) {
434 continue;
435 }
436
437 if self.is_url_in_angle_brackets(tag) {
439 continue;
440 }
441
442 let tag_start_col = tag_match.start() + 1; if ctx.is_in_code_span(line_num, tag_start_col) {
445 continue;
446 }
447
448 if self.is_tag_allowed(tag) {
450 continue;
451 }
452
453 if ctx.flavor == crate::config::MarkdownFlavor::MkDocs && self.has_markdown_attribute(tag) {
455 continue;
456 }
457
458 let (start_line, start_col, end_line, end_col) =
460 calculate_html_tag_range(line_num, line, tag_match.start(), tag_match.len());
461
462 let line_byte_offset: usize = content
464 .lines()
465 .take(line_num - 1)
466 .map(|l| l.len() + 1) .sum();
468 let tag_byte_start = line_byte_offset + tag_match.start();
469
470 let fix = self
472 .calculate_fix(content, tag, tag_byte_start)
473 .map(|(range, replacement)| Fix { range, replacement });
474
475 warnings.push(LintWarning {
476 rule_name: Some(self.name().to_string()),
477 line: start_line,
478 column: start_col,
479 end_line,
480 end_column: end_col,
481 message: format!("Inline HTML found: {tag}"),
482 severity: Severity::Warning,
483 fix,
484 });
485 }
486 }
487
488 self.find_multiline_html_tags(ctx, ctx.content, &nomarkdown_ranges, &mut warnings);
490
491 Ok(warnings)
492 }
493
494 fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
495 Ok(ctx.content.to_string())
497 }
498
499 fn fix_capability(&self) -> crate::rule::FixCapability {
500 crate::rule::FixCapability::Unfixable
501 }
502
503 fn category(&self) -> RuleCategory {
505 RuleCategory::Html
506 }
507
508 fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
510 ctx.content.is_empty() || !ctx.likely_has_html()
511 }
512
513 fn as_any(&self) -> &dyn std::any::Any {
514 self
515 }
516
517 fn default_config_section(&self) -> Option<(String, toml::Value)> {
518 let json_value = serde_json::to_value(&self.config).ok()?;
519 Some((
520 self.name().to_string(),
521 crate::rule_config_serde::json_to_toml_value(&json_value)?,
522 ))
523 }
524
525 fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
526 where
527 Self: Sized,
528 {
529 let rule_config = crate::rule_config_serde::load_rule_config::<MD033Config>(config);
530 Box::new(Self::from_config_struct(rule_config))
531 }
532}
533
534#[cfg(test)]
535mod tests {
536 use super::*;
537 use crate::lint_context::LintContext;
538 use crate::rule::Rule;
539
540 #[test]
541 fn test_md033_basic_html() {
542 let rule = MD033NoInlineHtml::default();
543 let content = "<div>Some content</div>";
544 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
545 let result = rule.check(&ctx).unwrap();
546 assert_eq!(result.len(), 1); assert!(result[0].message.starts_with("Inline HTML found: <div>"));
549 }
550
551 #[test]
552 fn test_md033_case_insensitive() {
553 let rule = MD033NoInlineHtml::default();
554 let content = "<DiV>Some <B>content</B></dIv>";
555 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
556 let result = rule.check(&ctx).unwrap();
557 assert_eq!(result.len(), 2); assert_eq!(result[0].message, "Inline HTML found: <DiV>");
560 assert_eq!(result[1].message, "Inline HTML found: <B>");
561 }
562
563 #[test]
564 fn test_md033_allowed_tags() {
565 let rule = MD033NoInlineHtml::with_allowed(vec!["div".to_string(), "br".to_string()]);
566 let content = "<div>Allowed</div><p>Not allowed</p><br/>";
567 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
568 let result = rule.check(&ctx).unwrap();
569 assert_eq!(result.len(), 1);
571 assert_eq!(result[0].message, "Inline HTML found: <p>");
572
573 let content2 = "<DIV>Allowed</DIV><P>Not allowed</P><BR/>";
575 let ctx2 = LintContext::new(content2, crate::config::MarkdownFlavor::Standard);
576 let result2 = rule.check(&ctx2).unwrap();
577 assert_eq!(result2.len(), 1); assert_eq!(result2[0].message, "Inline HTML found: <P>");
579 }
580
581 #[test]
582 fn test_md033_html_comments() {
583 let rule = MD033NoInlineHtml::default();
584 let content = "<!-- This is a comment --> <p>Not a comment</p>";
585 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
586 let result = rule.check(&ctx).unwrap();
587 assert_eq!(result.len(), 1); assert_eq!(result[0].message, "Inline HTML found: <p>");
590 }
591
592 #[test]
593 fn test_md033_tags_in_links() {
594 let rule = MD033NoInlineHtml::default();
595 let content = "[Link](http://example.com/<div>)";
596 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
597 let result = rule.check(&ctx).unwrap();
598 assert_eq!(result.len(), 1);
600 assert_eq!(result[0].message, "Inline HTML found: <div>");
601
602 let content2 = "[Link <a>text</a>](url)";
603 let ctx2 = LintContext::new(content2, crate::config::MarkdownFlavor::Standard);
604 let result2 = rule.check(&ctx2).unwrap();
605 assert_eq!(result2.len(), 1); assert_eq!(result2[0].message, "Inline HTML found: <a>");
608 }
609
610 #[test]
611 fn test_md033_fix_escaping() {
612 let rule = MD033NoInlineHtml::default();
613 let content = "Text with <div> and <br/> tags.";
614 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
615 let fixed_content = rule.fix(&ctx).unwrap();
616 assert_eq!(fixed_content, content);
618 }
619
620 #[test]
621 fn test_md033_in_code_blocks() {
622 let rule = MD033NoInlineHtml::default();
623 let content = "```html\n<div>Code</div>\n```\n<div>Not code</div>";
624 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
625 let result = rule.check(&ctx).unwrap();
626 assert_eq!(result.len(), 1); assert_eq!(result[0].message, "Inline HTML found: <div>");
629 }
630
631 #[test]
632 fn test_md033_in_code_spans() {
633 let rule = MD033NoInlineHtml::default();
634 let content = "Text with `<p>in code</p>` span. <br/> Not in span.";
635 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
636 let result = rule.check(&ctx).unwrap();
637 assert_eq!(result.len(), 1);
639 assert_eq!(result[0].message, "Inline HTML found: <br/>");
640 }
641
642 #[test]
643 fn test_md033_issue_90_code_span_with_diff_block() {
644 let rule = MD033NoInlineHtml::default();
646 let content = r#"# Heading
647
648`<env>`
649
650```diff
651- this
652+ that
653```"#;
654 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
655 let result = rule.check(&ctx).unwrap();
656 assert_eq!(result.len(), 0, "Should not report HTML tags inside code spans");
658 }
659
660 #[test]
661 fn test_md033_multiple_code_spans_with_angle_brackets() {
662 let rule = MD033NoInlineHtml::default();
664 let content = "`<one>` and `<two>` and `<three>` are all code spans";
665 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
666 let result = rule.check(&ctx).unwrap();
667 assert_eq!(result.len(), 0, "Should not report HTML tags inside any code spans");
668 }
669
670 #[test]
671 fn test_md033_nested_angle_brackets_in_code_span() {
672 let rule = MD033NoInlineHtml::default();
674 let content = "Text with `<<nested>>` brackets";
675 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
676 let result = rule.check(&ctx).unwrap();
677 assert_eq!(result.len(), 0, "Should handle nested angle brackets in code spans");
678 }
679
680 #[test]
681 fn test_md033_code_span_at_end_before_code_block() {
682 let rule = MD033NoInlineHtml::default();
684 let content = "Testing `<test>`\n```\ncode here\n```";
685 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
686 let result = rule.check(&ctx).unwrap();
687 assert_eq!(result.len(), 0, "Should handle code span before code block");
688 }
689
690 #[test]
691 fn test_md033_quick_fix_inline_tag() {
692 let rule = MD033NoInlineHtml::default();
694 let content = "This has <span>inline text</span> that should keep content.";
695 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
696 let result = rule.check(&ctx).unwrap();
697
698 assert_eq!(result.len(), 1, "Should find one HTML tag");
699 assert!(result[0].fix.is_some(), "Should have a fix");
700
701 let fix = result[0].fix.as_ref().unwrap();
702 assert_eq!(&content[fix.range.clone()], "<span>inline text</span>");
703 assert_eq!(fix.replacement, "inline text");
704 }
705
706 #[test]
707 fn test_md033_quick_fix_multiline_tag() {
708 let rule = MD033NoInlineHtml::default();
710 let content = "<div>\nBlock content\n</div>";
711 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
712 let result = rule.check(&ctx).unwrap();
713
714 assert_eq!(result.len(), 1, "Should find one HTML tag");
715 assert!(result[0].fix.is_some(), "Should have a fix");
716
717 let fix = result[0].fix.as_ref().unwrap();
718 assert_eq!(&content[fix.range.clone()], "<div>\nBlock content\n</div>");
719 assert_eq!(fix.replacement, "\nBlock content\n");
720 }
721
722 #[test]
723 fn test_md033_quick_fix_self_closing_tag() {
724 let rule = MD033NoInlineHtml::default();
726 let content = "Self-closing: <br/>";
727 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
728 let result = rule.check(&ctx).unwrap();
729
730 assert_eq!(result.len(), 1, "Should find one HTML tag");
731 assert!(result[0].fix.is_some(), "Should have a fix");
732
733 let fix = result[0].fix.as_ref().unwrap();
734 assert_eq!(&content[fix.range.clone()], "<br/>");
735 assert_eq!(fix.replacement, "");
736 }
737
738 #[test]
739 fn test_md033_quick_fix_multiple_tags() {
740 let rule = MD033NoInlineHtml::default();
742 let content = "<span>first</span> and <strong>second</strong>";
743 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
744 let result = rule.check(&ctx).unwrap();
745
746 assert_eq!(result.len(), 2, "Should find two HTML tags");
747 assert!(result[0].fix.is_some(), "First tag should have a fix");
748 assert!(result[1].fix.is_some(), "Second tag should have a fix");
749
750 let fix1 = result[0].fix.as_ref().unwrap();
751 assert_eq!(&content[fix1.range.clone()], "<span>first</span>");
752 assert_eq!(fix1.replacement, "first");
753
754 let fix2 = result[1].fix.as_ref().unwrap();
755 assert_eq!(&content[fix2.range.clone()], "<strong>second</strong>");
756 assert_eq!(fix2.replacement, "second");
757 }
758}