1use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
7use crate::utils::kramdown_utils::{is_kramdown_block_attribute, is_kramdown_extension};
8use crate::utils::range_utils::calculate_html_tag_range;
9use crate::utils::regex_cache::*;
10use std::collections::HashSet;
11
12mod md033_config;
13use md033_config::MD033Config;
14
15#[derive(Clone)]
16pub struct MD033NoInlineHtml {
17 config: MD033Config,
18 allowed: HashSet<String>,
19}
20
21impl Default for MD033NoInlineHtml {
22 fn default() -> Self {
23 let config = MD033Config::default();
24 let allowed = config.allowed_set();
25 Self { config, allowed }
26 }
27}
28
29impl MD033NoInlineHtml {
30 pub fn new() -> Self {
31 Self::default()
32 }
33
34 pub fn with_allowed(allowed_vec: Vec<String>) -> Self {
35 let config = MD033Config {
36 allowed: allowed_vec.clone(),
37 };
38 let allowed = config.allowed_set();
39 Self { config, allowed }
40 }
41
42 pub fn from_config_struct(config: MD033Config) -> Self {
43 let allowed = config.allowed_set();
44 Self { config, allowed }
45 }
46
47 #[inline]
49 fn is_tag_allowed(&self, tag: &str) -> bool {
50 if self.allowed.is_empty() {
51 return false;
52 }
53 let tag = tag.trim_start_matches('<').trim_start_matches('/');
55 let tag_name = tag
56 .split(|c: char| c.is_whitespace() || c == '>' || c == '/')
57 .next()
58 .unwrap_or("");
59 self.allowed.contains(&tag_name.to_lowercase())
60 }
61
62 #[inline]
64 fn is_html_comment(&self, tag: &str) -> bool {
65 tag.starts_with("<!--") && tag.ends_with("-->")
66 }
67
68 #[inline]
70 fn is_likely_type_annotation(&self, tag: &str) -> bool {
71 const COMMON_TYPES: &[&str] = &[
73 "string",
74 "number",
75 "any",
76 "void",
77 "null",
78 "undefined",
79 "array",
80 "promise",
81 "function",
82 "error",
83 "date",
84 "regexp",
85 "symbol",
86 "bigint",
87 "map",
88 "set",
89 "weakmap",
90 "weakset",
91 "iterator",
92 "generator",
93 "t",
94 "u",
95 "v",
96 "k",
97 "e", "userdata",
99 "apiresponse",
100 "config",
101 "options",
102 "params",
103 "result",
104 "response",
105 "request",
106 "data",
107 "item",
108 "element",
109 "node",
110 ];
111
112 let tag_content = tag
113 .trim_start_matches('<')
114 .trim_end_matches('>')
115 .trim_start_matches('/');
116 let tag_name = tag_content
117 .split(|c: char| c.is_whitespace() || c == '>' || c == '/')
118 .next()
119 .unwrap_or("");
120
121 if !tag_content.contains(' ') && !tag_content.contains('=') {
123 COMMON_TYPES.contains(&tag_name.to_ascii_lowercase().as_str())
124 } else {
125 false
126 }
127 }
128
129 #[inline]
131 fn is_email_address(&self, tag: &str) -> bool {
132 let content = tag.trim_start_matches('<').trim_end_matches('>');
133 content.contains('@')
135 && content.chars().all(|c| c.is_alphanumeric() || "@.-_+".contains(c))
136 && content.split('@').count() == 2
137 && content.split('@').all(|part| !part.is_empty())
138 }
139
140 #[inline]
142 fn has_markdown_attribute(&self, tag: &str) -> bool {
143 tag.contains(" markdown>") || tag.contains(" markdown=") || tag.contains(" markdown ")
146 }
147
148 #[inline]
150 fn is_url_in_angle_brackets(&self, tag: &str) -> bool {
151 let content = tag.trim_start_matches('<').trim_end_matches('>');
152 content.starts_with("http://")
154 || content.starts_with("https://")
155 || content.starts_with("ftp://")
156 || content.starts_with("ftps://")
157 || content.starts_with("mailto:")
158 }
159
160 fn calculate_fix(
168 &self,
169 content: &str,
170 opening_tag: &str,
171 tag_byte_start: usize,
172 ) -> Option<(std::ops::Range<usize>, String)> {
173 if opening_tag.ends_with("/>") {
175 return Some((tag_byte_start..tag_byte_start + opening_tag.len(), String::new()));
176 }
177
178 let tag_name = opening_tag
180 .trim_start_matches('<')
181 .split(|c: char| c.is_whitespace() || c == '>' || c == '/')
182 .next()?
183 .to_lowercase();
184
185 let closing_tag = format!("</{tag_name}>");
187
188 let search_start = tag_byte_start + opening_tag.len();
190 if let Some(closing_pos) = content[search_start..].find(&closing_tag) {
191 let closing_byte_start = search_start + closing_pos;
192 let closing_byte_end = closing_byte_start + closing_tag.len();
193
194 let inner_content = &content[search_start..closing_byte_start];
196
197 return Some((tag_byte_start..closing_byte_end, inner_content.to_string()));
198 }
199
200 Some((tag_byte_start..tag_byte_start + opening_tag.len(), String::new()))
202 }
203
204 fn find_multiline_html_tags(
206 &self,
207 ctx: &crate::lint_context::LintContext,
208 content: &str,
209 nomarkdown_ranges: &[(usize, usize)],
210 warnings: &mut Vec<LintWarning>,
211 ) {
212 if !content.contains('<') || !content.lines().any(|line| line.trim_end().ends_with('<')) {
214 return;
215 }
216
217 lazy_static::lazy_static! {
219 static ref INCOMPLETE_TAG_START: regex::Regex = regex::Regex::new(r"(?i)<[a-zA-Z][^>]*$").unwrap();
220 }
221
222 let lines: Vec<&str> = content.lines().collect();
223
224 for (i, line) in lines.iter().enumerate() {
225 let line_num = i + 1;
226
227 if line.trim().is_empty() || ctx.line_info(line_num).is_some_and(|info| info.in_code_block) {
229 continue;
230 }
231
232 if nomarkdown_ranges
234 .iter()
235 .any(|(start, end)| line_num >= *start && line_num <= *end)
236 {
237 continue;
238 }
239
240 if !line.contains('<') {
242 continue;
243 }
244
245 if let Some(incomplete_match) = INCOMPLETE_TAG_START.find(line) {
247 let start_column = incomplete_match.start() + 1; let mut complete_tag = incomplete_match.as_str().to_string();
251 let mut found_end = false;
252
253 for (j, next_line) in lines.iter().enumerate().skip(i + 1).take(10) {
255 let next_line_num = j + 1;
256
257 if ctx.line_info(next_line_num).is_some_and(|info| info.in_code_block) {
259 break;
260 }
261
262 complete_tag.push(' '); complete_tag.push_str(next_line.trim());
264
265 if next_line.contains('>') {
266 found_end = true;
267 break;
268 }
269 }
270
271 if found_end {
272 if let Some(end_pos) = complete_tag.find('>') {
274 let final_tag = &complete_tag[0..=end_pos];
275
276 let skip_mkdocs_markdown = ctx.flavor == crate::config::MarkdownFlavor::MkDocs
278 && self.has_markdown_attribute(final_tag);
279
280 if !self.is_html_comment(final_tag)
281 && !self.is_likely_type_annotation(final_tag)
282 && !self.is_email_address(final_tag)
283 && !self.is_url_in_angle_brackets(final_tag)
284 && !self.is_tag_allowed(final_tag)
285 && !skip_mkdocs_markdown
286 && HTML_OPENING_TAG_FINDER.is_match(final_tag)
287 {
288 let already_warned =
290 warnings.iter().any(|w| w.line == line_num && w.column == start_column);
291
292 if !already_warned {
293 let (start_line, start_col, end_line, end_col) = calculate_html_tag_range(
294 line_num,
295 line,
296 incomplete_match.start(),
297 incomplete_match.len(),
298 );
299 warnings.push(LintWarning {
300 rule_name: Some(self.name().to_string()),
301 line: start_line,
302 column: start_col,
303 end_line,
304 end_column: end_col,
305 message: format!("HTML tag found: {final_tag}"),
306 severity: Severity::Warning,
307 fix: None,
308 });
309 }
310 }
311 }
312 }
313 }
314 }
315 }
316}
317
318impl Rule for MD033NoInlineHtml {
319 fn name(&self) -> &'static str {
320 "MD033"
321 }
322
323 fn description(&self) -> &'static str {
324 "Inline HTML is not allowed"
325 }
326
327 fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
328 let content = ctx.content;
329
330 if content.is_empty() || !ctx.likely_has_html() {
332 return Ok(Vec::new());
333 }
334
335 if !HTML_TAG_QUICK_CHECK.is_match(content) {
337 return Ok(Vec::new());
338 }
339
340 let mut warnings = Vec::new();
341 let lines: Vec<&str> = content.lines().collect();
342
343 let mut in_nomarkdown = false;
345 let mut in_comment = false;
346 let mut nomarkdown_ranges: Vec<(usize, usize)> = Vec::new();
347 let mut nomarkdown_start = 0;
348 let mut comment_start = 0;
349
350 for (i, line) in lines.iter().enumerate() {
352 let line_num = i + 1;
353
354 if line.trim() == "{::nomarkdown}" {
356 in_nomarkdown = true;
357 nomarkdown_start = line_num;
358 } else if line.trim() == "{:/nomarkdown}" && in_nomarkdown {
359 in_nomarkdown = false;
360 nomarkdown_ranges.push((nomarkdown_start, line_num));
361 }
362
363 if line.trim() == "{::comment}" {
365 in_comment = true;
366 comment_start = line_num;
367 } else if line.trim() == "{:/comment}" && in_comment {
368 in_comment = false;
369 nomarkdown_ranges.push((comment_start, line_num));
370 }
371 }
372
373 for (i, line) in lines.iter().enumerate() {
376 let line_num = i + 1;
377
378 if line.trim().is_empty() {
379 continue;
380 }
381 if ctx.line_info(line_num).is_some_and(|info| info.in_code_block) {
382 continue;
383 }
384 if line.starts_with(" ") || line.starts_with('\t') {
387 continue;
388 }
389
390 if nomarkdown_ranges
392 .iter()
393 .any(|(start, end)| line_num >= *start && line_num <= *end)
394 {
395 continue;
396 }
397
398 if is_kramdown_extension(line) || is_kramdown_block_attribute(line) {
400 continue;
401 }
402
403 for tag_match in HTML_OPENING_TAG_FINDER.find_iter(line) {
405 let tag = tag_match.as_str();
406
407 if self.is_html_comment(tag) {
409 continue;
410 }
411
412 if self.is_likely_type_annotation(tag) {
414 continue;
415 }
416
417 if self.is_email_address(tag) {
419 continue;
420 }
421
422 if self.is_url_in_angle_brackets(tag) {
424 continue;
425 }
426
427 let tag_start_col = tag_match.start() + 1; if ctx.is_in_code_span(line_num, tag_start_col) {
430 continue;
431 }
432
433 if self.is_tag_allowed(tag) {
435 continue;
436 }
437
438 if ctx.flavor == crate::config::MarkdownFlavor::MkDocs && self.has_markdown_attribute(tag) {
440 continue;
441 }
442
443 let (start_line, start_col, end_line, end_col) =
445 calculate_html_tag_range(line_num, line, tag_match.start(), tag_match.len());
446
447 let line_byte_offset: usize = content
449 .lines()
450 .take(line_num - 1)
451 .map(|l| l.len() + 1) .sum();
453 let tag_byte_start = line_byte_offset + tag_match.start();
454
455 let fix = self
457 .calculate_fix(content, tag, tag_byte_start)
458 .map(|(range, replacement)| Fix { range, replacement });
459
460 warnings.push(LintWarning {
461 rule_name: Some(self.name().to_string()),
462 line: start_line,
463 column: start_col,
464 end_line,
465 end_column: end_col,
466 message: format!("Inline HTML found: {tag}"),
467 severity: Severity::Warning,
468 fix,
469 });
470 }
471 }
472
473 self.find_multiline_html_tags(ctx, ctx.content, &nomarkdown_ranges, &mut warnings);
475
476 Ok(warnings)
477 }
478
479 fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
480 Ok(ctx.content.to_string())
482 }
483
484 fn fix_capability(&self) -> crate::rule::FixCapability {
485 crate::rule::FixCapability::Unfixable
486 }
487
488 fn category(&self) -> RuleCategory {
490 RuleCategory::Html
491 }
492
493 fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
495 ctx.content.is_empty() || !ctx.likely_has_html()
496 }
497
498 fn as_any(&self) -> &dyn std::any::Any {
499 self
500 }
501
502 fn default_config_section(&self) -> Option<(String, toml::Value)> {
503 let json_value = serde_json::to_value(&self.config).ok()?;
504 Some((
505 self.name().to_string(),
506 crate::rule_config_serde::json_to_toml_value(&json_value)?,
507 ))
508 }
509
510 fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
511 where
512 Self: Sized,
513 {
514 let rule_config = crate::rule_config_serde::load_rule_config::<MD033Config>(config);
515 Box::new(Self::from_config_struct(rule_config))
516 }
517}
518
519#[cfg(test)]
520mod tests {
521 use super::*;
522 use crate::lint_context::LintContext;
523 use crate::rule::Rule;
524
525 #[test]
526 fn test_md033_basic_html() {
527 let rule = MD033NoInlineHtml::default();
528 let content = "<div>Some content</div>";
529 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
530 let result = rule.check(&ctx).unwrap();
531 assert_eq!(result.len(), 1); assert!(result[0].message.starts_with("Inline HTML found: <div>"));
534 }
535
536 #[test]
537 fn test_md033_case_insensitive() {
538 let rule = MD033NoInlineHtml::default();
539 let content = "<DiV>Some <B>content</B></dIv>";
540 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
541 let result = rule.check(&ctx).unwrap();
542 assert_eq!(result.len(), 2); assert_eq!(result[0].message, "Inline HTML found: <DiV>");
545 assert_eq!(result[1].message, "Inline HTML found: <B>");
546 }
547
548 #[test]
549 fn test_md033_allowed_tags() {
550 let rule = MD033NoInlineHtml::with_allowed(vec!["div".to_string(), "br".to_string()]);
551 let content = "<div>Allowed</div><p>Not allowed</p><br/>";
552 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
553 let result = rule.check(&ctx).unwrap();
554 assert_eq!(result.len(), 1);
556 assert_eq!(result[0].message, "Inline HTML found: <p>");
557
558 let content2 = "<DIV>Allowed</DIV><P>Not allowed</P><BR/>";
560 let ctx2 = LintContext::new(content2, crate::config::MarkdownFlavor::Standard);
561 let result2 = rule.check(&ctx2).unwrap();
562 assert_eq!(result2.len(), 1); assert_eq!(result2[0].message, "Inline HTML found: <P>");
564 }
565
566 #[test]
567 fn test_md033_html_comments() {
568 let rule = MD033NoInlineHtml::default();
569 let content = "<!-- This is a comment --> <p>Not a comment</p>";
570 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
571 let result = rule.check(&ctx).unwrap();
572 assert_eq!(result.len(), 1); assert_eq!(result[0].message, "Inline HTML found: <p>");
575 }
576
577 #[test]
578 fn test_md033_tags_in_links() {
579 let rule = MD033NoInlineHtml::default();
580 let content = "[Link](http://example.com/<div>)";
581 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
582 let result = rule.check(&ctx).unwrap();
583 assert_eq!(result.len(), 1);
585 assert_eq!(result[0].message, "Inline HTML found: <div>");
586
587 let content2 = "[Link <a>text</a>](url)";
588 let ctx2 = LintContext::new(content2, crate::config::MarkdownFlavor::Standard);
589 let result2 = rule.check(&ctx2).unwrap();
590 assert_eq!(result2.len(), 1); assert_eq!(result2[0].message, "Inline HTML found: <a>");
593 }
594
595 #[test]
596 fn test_md033_fix_escaping() {
597 let rule = MD033NoInlineHtml::default();
598 let content = "Text with <div> and <br/> tags.";
599 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
600 let fixed_content = rule.fix(&ctx).unwrap();
601 assert_eq!(fixed_content, content);
603 }
604
605 #[test]
606 fn test_md033_in_code_blocks() {
607 let rule = MD033NoInlineHtml::default();
608 let content = "```html\n<div>Code</div>\n```\n<div>Not code</div>";
609 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
610 let result = rule.check(&ctx).unwrap();
611 assert_eq!(result.len(), 1); assert_eq!(result[0].message, "Inline HTML found: <div>");
614 }
615
616 #[test]
617 fn test_md033_in_code_spans() {
618 let rule = MD033NoInlineHtml::default();
619 let content = "Text with `<p>in code</p>` span. <br/> Not in span.";
620 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
621 let result = rule.check(&ctx).unwrap();
622 assert_eq!(result.len(), 1);
624 assert_eq!(result[0].message, "Inline HTML found: <br/>");
625 }
626
627 #[test]
628 fn test_md033_issue_90_code_span_with_diff_block() {
629 let rule = MD033NoInlineHtml::default();
631 let content = r#"# Heading
632
633`<env>`
634
635```diff
636- this
637+ that
638```"#;
639 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
640 let result = rule.check(&ctx).unwrap();
641 assert_eq!(result.len(), 0, "Should not report HTML tags inside code spans");
643 }
644
645 #[test]
646 fn test_md033_multiple_code_spans_with_angle_brackets() {
647 let rule = MD033NoInlineHtml::default();
649 let content = "`<one>` and `<two>` and `<three>` are all code spans";
650 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
651 let result = rule.check(&ctx).unwrap();
652 assert_eq!(result.len(), 0, "Should not report HTML tags inside any code spans");
653 }
654
655 #[test]
656 fn test_md033_nested_angle_brackets_in_code_span() {
657 let rule = MD033NoInlineHtml::default();
659 let content = "Text with `<<nested>>` brackets";
660 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
661 let result = rule.check(&ctx).unwrap();
662 assert_eq!(result.len(), 0, "Should handle nested angle brackets in code spans");
663 }
664
665 #[test]
666 fn test_md033_code_span_at_end_before_code_block() {
667 let rule = MD033NoInlineHtml::default();
669 let content = "Testing `<test>`\n```\ncode here\n```";
670 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
671 let result = rule.check(&ctx).unwrap();
672 assert_eq!(result.len(), 0, "Should handle code span before code block");
673 }
674
675 #[test]
676 fn test_md033_quick_fix_inline_tag() {
677 let rule = MD033NoInlineHtml::default();
679 let content = "This has <span>inline text</span> that should keep content.";
680 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
681 let result = rule.check(&ctx).unwrap();
682
683 assert_eq!(result.len(), 1, "Should find one HTML tag");
684 assert!(result[0].fix.is_some(), "Should have a fix");
685
686 let fix = result[0].fix.as_ref().unwrap();
687 assert_eq!(&content[fix.range.clone()], "<span>inline text</span>");
688 assert_eq!(fix.replacement, "inline text");
689 }
690
691 #[test]
692 fn test_md033_quick_fix_multiline_tag() {
693 let rule = MD033NoInlineHtml::default();
695 let content = "<div>\nBlock content\n</div>";
696 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
697 let result = rule.check(&ctx).unwrap();
698
699 assert_eq!(result.len(), 1, "Should find one HTML tag");
700 assert!(result[0].fix.is_some(), "Should have a fix");
701
702 let fix = result[0].fix.as_ref().unwrap();
703 assert_eq!(&content[fix.range.clone()], "<div>\nBlock content\n</div>");
704 assert_eq!(fix.replacement, "\nBlock content\n");
705 }
706
707 #[test]
708 fn test_md033_quick_fix_self_closing_tag() {
709 let rule = MD033NoInlineHtml::default();
711 let content = "Self-closing: <br/>";
712 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
713 let result = rule.check(&ctx).unwrap();
714
715 assert_eq!(result.len(), 1, "Should find one HTML tag");
716 assert!(result[0].fix.is_some(), "Should have a fix");
717
718 let fix = result[0].fix.as_ref().unwrap();
719 assert_eq!(&content[fix.range.clone()], "<br/>");
720 assert_eq!(fix.replacement, "");
721 }
722
723 #[test]
724 fn test_md033_quick_fix_multiple_tags() {
725 let rule = MD033NoInlineHtml::default();
727 let content = "<span>first</span> and <strong>second</strong>";
728 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
729 let result = rule.check(&ctx).unwrap();
730
731 assert_eq!(result.len(), 2, "Should find two HTML tags");
732 assert!(result[0].fix.is_some(), "First tag should have a fix");
733 assert!(result[1].fix.is_some(), "Second tag should have a fix");
734
735 let fix1 = result[0].fix.as_ref().unwrap();
736 assert_eq!(&content[fix1.range.clone()], "<span>first</span>");
737 assert_eq!(fix1.replacement, "first");
738
739 let fix2 = result[1].fix.as_ref().unwrap();
740 assert_eq!(&content[fix2.range.clone()], "<strong>second</strong>");
741 assert_eq!(fix2.replacement, "second");
742 }
743}