1use crate::rule::{LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
7use crate::utils::kramdown_utils::{is_kramdown_block_attribute, is_kramdown_extension};
8use crate::utils::range_utils::calculate_html_tag_range;
9use crate::utils::regex_cache::*;
10use std::collections::HashSet;
11
12mod md033_config;
13use md033_config::MD033Config;
14
15#[derive(Clone)]
16pub struct MD033NoInlineHtml {
17 config: MD033Config,
18 allowed: HashSet<String>,
19}
20
21impl Default for MD033NoInlineHtml {
22 fn default() -> Self {
23 let config = MD033Config::default();
24 let allowed = config.allowed_set();
25 Self { config, allowed }
26 }
27}
28
29impl MD033NoInlineHtml {
30 pub fn new() -> Self {
31 Self::default()
32 }
33
34 pub fn with_allowed(allowed_vec: Vec<String>) -> Self {
35 let config = MD033Config {
36 allowed: allowed_vec.clone(),
37 };
38 let allowed = config.allowed_set();
39 Self { config, allowed }
40 }
41
42 pub fn from_config_struct(config: MD033Config) -> Self {
43 let allowed = config.allowed_set();
44 Self { config, allowed }
45 }
46
47 #[inline]
49 fn is_tag_allowed(&self, tag: &str) -> bool {
50 if self.allowed.is_empty() {
51 return false;
52 }
53 let tag = tag.trim_start_matches('<').trim_start_matches('/');
55 let tag_name = tag
56 .split(|c: char| c.is_whitespace() || c == '>' || c == '/')
57 .next()
58 .unwrap_or("");
59 self.allowed.contains(&tag_name.to_lowercase())
60 }
61
62 #[inline]
64 fn is_html_comment(&self, tag: &str) -> bool {
65 tag.starts_with("<!--") && tag.ends_with("-->")
66 }
67
68 #[inline]
70 fn is_likely_type_annotation(&self, tag: &str) -> bool {
71 const COMMON_TYPES: &[&str] = &[
73 "string",
74 "number",
75 "any",
76 "void",
77 "null",
78 "undefined",
79 "array",
80 "promise",
81 "function",
82 "error",
83 "date",
84 "regexp",
85 "symbol",
86 "bigint",
87 "map",
88 "set",
89 "weakmap",
90 "weakset",
91 "iterator",
92 "generator",
93 "t",
94 "u",
95 "v",
96 "k",
97 "e", "userdata",
99 "apiresponse",
100 "config",
101 "options",
102 "params",
103 "result",
104 "response",
105 "request",
106 "data",
107 "item",
108 "element",
109 "node",
110 ];
111
112 let tag_content = tag
113 .trim_start_matches('<')
114 .trim_end_matches('>')
115 .trim_start_matches('/');
116 let tag_name = tag_content
117 .split(|c: char| c.is_whitespace() || c == '>' || c == '/')
118 .next()
119 .unwrap_or("");
120
121 if !tag_content.contains(' ') && !tag_content.contains('=') {
123 COMMON_TYPES.contains(&tag_name.to_ascii_lowercase().as_str())
124 } else {
125 false
126 }
127 }
128
129 #[inline]
131 fn is_email_address(&self, tag: &str) -> bool {
132 let content = tag.trim_start_matches('<').trim_end_matches('>');
133 content.contains('@')
135 && content.chars().all(|c| c.is_alphanumeric() || "@.-_+".contains(c))
136 && content.split('@').count() == 2
137 && content.split('@').all(|part| !part.is_empty())
138 }
139
140 #[inline]
142 fn has_markdown_attribute(&self, tag: &str) -> bool {
143 tag.contains(" markdown>") || tag.contains(" markdown=") || tag.contains(" markdown ")
146 }
147
148 #[inline]
150 fn is_url_in_angle_brackets(&self, tag: &str) -> bool {
151 let content = tag.trim_start_matches('<').trim_end_matches('>');
152 content.starts_with("http://")
154 || content.starts_with("https://")
155 || content.starts_with("ftp://")
156 || content.starts_with("ftps://")
157 || content.starts_with("mailto:")
158 }
159
160 fn find_multiline_html_tags(
162 &self,
163 ctx: &crate::lint_context::LintContext,
164 content: &str,
165 nomarkdown_ranges: &[(usize, usize)],
166 warnings: &mut Vec<LintWarning>,
167 ) {
168 if !content.contains('<') || !content.lines().any(|line| line.trim_end().ends_with('<')) {
170 return;
171 }
172
173 lazy_static::lazy_static! {
175 static ref INCOMPLETE_TAG_START: regex::Regex = regex::Regex::new(r"(?i)<[a-zA-Z][^>]*$").unwrap();
176 }
177
178 let lines: Vec<&str> = content.lines().collect();
179
180 for (i, line) in lines.iter().enumerate() {
181 let line_num = i + 1;
182
183 if line.trim().is_empty() || ctx.is_in_code_block(line_num) {
185 continue;
186 }
187
188 if nomarkdown_ranges
190 .iter()
191 .any(|(start, end)| line_num >= *start && line_num <= *end)
192 {
193 continue;
194 }
195
196 if !line.contains('<') {
198 continue;
199 }
200
201 if let Some(incomplete_match) = INCOMPLETE_TAG_START.find(line) {
203 let start_column = incomplete_match.start() + 1; let mut complete_tag = incomplete_match.as_str().to_string();
207 let mut found_end = false;
208
209 for (j, next_line) in lines.iter().enumerate().skip(i + 1).take(10) {
211 let next_line_num = j + 1;
212
213 if ctx.is_in_code_block(next_line_num) {
215 break;
216 }
217
218 complete_tag.push(' '); complete_tag.push_str(next_line.trim());
220
221 if next_line.contains('>') {
222 found_end = true;
223 break;
224 }
225 }
226
227 if found_end {
228 if let Some(end_pos) = complete_tag.find('>') {
230 let final_tag = &complete_tag[0..=end_pos];
231
232 let skip_mkdocs_markdown = ctx.flavor == crate::config::MarkdownFlavor::MkDocs
234 && self.has_markdown_attribute(final_tag);
235
236 if !self.is_html_comment(final_tag)
237 && !self.is_likely_type_annotation(final_tag)
238 && !self.is_email_address(final_tag)
239 && !self.is_url_in_angle_brackets(final_tag)
240 && !self.is_tag_allowed(final_tag)
241 && !skip_mkdocs_markdown
242 && HTML_TAG_FINDER.is_match(final_tag)
243 {
244 let already_warned =
246 warnings.iter().any(|w| w.line == line_num && w.column == start_column);
247
248 if !already_warned {
249 let (start_line, start_col, end_line, end_col) = calculate_html_tag_range(
250 line_num,
251 line,
252 incomplete_match.start(),
253 incomplete_match.len(),
254 );
255 warnings.push(LintWarning {
256 rule_name: Some(self.name()),
257 line: start_line,
258 column: start_col,
259 end_line,
260 end_column: end_col,
261 message: format!("HTML tag found: {final_tag} (use Markdown syntax instead)"),
262 severity: Severity::Warning,
263 fix: None,
264 });
265 }
266 }
267 }
268 }
269 }
270 }
271 }
272}
273
274impl Rule for MD033NoInlineHtml {
275 fn name(&self) -> &'static str {
276 "MD033"
277 }
278
279 fn description(&self) -> &'static str {
280 "Inline HTML is not allowed"
281 }
282
283 fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
284 let content = ctx.content;
285
286 if content.is_empty() || !ctx.likely_has_html() {
288 return Ok(Vec::new());
289 }
290
291 if !HTML_TAG_QUICK_CHECK.is_match(content) {
293 return Ok(Vec::new());
294 }
295
296 let mut warnings = Vec::new();
297 let lines: Vec<&str> = content.lines().collect();
298
299 let mut in_nomarkdown = false;
301 let mut in_comment = false;
302 let mut nomarkdown_ranges: Vec<(usize, usize)> = Vec::new();
303 let mut nomarkdown_start = 0;
304 let mut comment_start = 0;
305
306 for (i, line) in lines.iter().enumerate() {
308 let line_num = i + 1;
309
310 if line.trim() == "{::nomarkdown}" {
312 in_nomarkdown = true;
313 nomarkdown_start = line_num;
314 } else if line.trim() == "{:/nomarkdown}" && in_nomarkdown {
315 in_nomarkdown = false;
316 nomarkdown_ranges.push((nomarkdown_start, line_num));
317 }
318
319 if line.trim() == "{::comment}" {
321 in_comment = true;
322 comment_start = line_num;
323 } else if line.trim() == "{:/comment}" && in_comment {
324 in_comment = false;
325 nomarkdown_ranges.push((comment_start, line_num));
326 }
327 }
328
329 for (i, line) in lines.iter().enumerate() {
332 let line_num = i + 1;
333
334 if line.trim().is_empty() {
335 continue;
336 }
337 if ctx.is_in_code_block(line_num) {
338 continue;
339 }
340 if line.starts_with(" ") || line.starts_with('\t') {
343 continue;
344 }
345
346 if nomarkdown_ranges
348 .iter()
349 .any(|(start, end)| line_num >= *start && line_num <= *end)
350 {
351 continue;
352 }
353
354 if is_kramdown_extension(line) || is_kramdown_block_attribute(line) {
356 continue;
357 }
358
359 for tag_match in HTML_TAG_FINDER.find_iter(line) {
361 let tag = tag_match.as_str();
362
363 if self.is_html_comment(tag) {
365 continue;
366 }
367
368 if self.is_likely_type_annotation(tag) {
370 continue;
371 }
372
373 if self.is_email_address(tag) {
375 continue;
376 }
377
378 if self.is_url_in_angle_brackets(tag) {
380 continue;
381 }
382
383 let tag_start_col = tag_match.start() + 1; if ctx.is_in_code_span(line_num, tag_start_col) {
386 continue;
387 }
388
389 if self.is_tag_allowed(tag) {
391 continue;
392 }
393
394 if ctx.flavor == crate::config::MarkdownFlavor::MkDocs && self.has_markdown_attribute(tag) {
396 continue;
397 }
398
399 let (start_line, start_col, end_line, end_col) =
401 calculate_html_tag_range(line_num, line, tag_match.start(), tag_match.len());
402 warnings.push(LintWarning {
403 rule_name: Some(self.name()),
404 line: start_line,
405 column: start_col,
406 end_line,
407 end_column: end_col,
408 message: format!("Inline HTML found: {tag} (use Markdown syntax instead)"),
409 severity: Severity::Warning,
410 fix: None,
411 });
412 }
413 }
414
415 self.find_multiline_html_tags(ctx, ctx.content, &nomarkdown_ranges, &mut warnings);
417
418 Ok(warnings)
419 }
420
421 fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
422 Ok(ctx.content.to_string())
424 }
425
426 fn fix_capability(&self) -> crate::rule::FixCapability {
427 crate::rule::FixCapability::Unfixable
428 }
429
430 fn category(&self) -> RuleCategory {
432 RuleCategory::Html
433 }
434
435 fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
437 ctx.content.is_empty() || !ctx.likely_has_html()
438 }
439
440 fn as_any(&self) -> &dyn std::any::Any {
441 self
442 }
443
444 fn default_config_section(&self) -> Option<(String, toml::Value)> {
445 let json_value = serde_json::to_value(&self.config).ok()?;
446 Some((
447 self.name().to_string(),
448 crate::rule_config_serde::json_to_toml_value(&json_value)?,
449 ))
450 }
451
452 fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
453 where
454 Self: Sized,
455 {
456 let rule_config = crate::rule_config_serde::load_rule_config::<MD033Config>(config);
457 Box::new(Self::from_config_struct(rule_config))
458 }
459}
460
461#[cfg(test)]
462mod tests {
463 use super::*;
464 use crate::lint_context::LintContext;
465 use crate::rule::Rule;
466
467 #[test]
468 fn test_md033_basic_html() {
469 let rule = MD033NoInlineHtml::default();
470 let content = "<div>Some content</div>";
471 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
472 let result = rule.check(&ctx).unwrap();
473 assert_eq!(result.len(), 2); assert!(result[0].message.starts_with("Inline HTML found: <div>"));
476 assert!(result[1].message.starts_with("Inline HTML found: </div>"));
477 }
478
479 #[test]
480 fn test_md033_case_insensitive() {
481 let rule = MD033NoInlineHtml::default();
482 let content = "<DiV>Some <B>content</B></dIv>";
483 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
484 let result = rule.check(&ctx).unwrap();
485 assert_eq!(result.len(), 4); assert_eq!(
488 result[0].message,
489 "Inline HTML found: <DiV> (use Markdown syntax instead)"
490 );
491 assert_eq!(
492 result[1].message,
493 "Inline HTML found: <B> (use Markdown syntax instead)"
494 );
495 assert_eq!(
496 result[2].message,
497 "Inline HTML found: </B> (use Markdown syntax instead)"
498 );
499 assert_eq!(
500 result[3].message,
501 "Inline HTML found: </dIv> (use Markdown syntax instead)"
502 );
503 }
504
505 #[test]
506 fn test_md033_allowed_tags() {
507 let rule = MD033NoInlineHtml::with_allowed(vec!["div".to_string(), "br".to_string()]);
508 let content = "<div>Allowed</div><p>Not allowed</p><br/>";
509 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
510 let result = rule.check(&ctx).unwrap();
511 assert_eq!(result.len(), 2);
513 assert_eq!(
514 result[0].message,
515 "Inline HTML found: <p> (use Markdown syntax instead)"
516 );
517 assert_eq!(
518 result[1].message,
519 "Inline HTML found: </p> (use Markdown syntax instead)"
520 );
521
522 let content2 = "<DIV>Allowed</DIV><P>Not allowed</P><BR/>";
524 let ctx2 = LintContext::new(content2, crate::config::MarkdownFlavor::Standard);
525 let result2 = rule.check(&ctx2).unwrap();
526 assert_eq!(result2.len(), 2); assert_eq!(
528 result2[0].message,
529 "Inline HTML found: <P> (use Markdown syntax instead)"
530 );
531 assert_eq!(
532 result2[1].message,
533 "Inline HTML found: </P> (use Markdown syntax instead)"
534 );
535 }
536
537 #[test]
538 fn test_md033_html_comments() {
539 let rule = MD033NoInlineHtml::default();
540 let content = "<!-- This is a comment --> <p>Not a comment</p>";
541 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
542 let result = rule.check(&ctx).unwrap();
543 assert_eq!(result.len(), 2); assert_eq!(
546 result[0].message,
547 "Inline HTML found: <p> (use Markdown syntax instead)"
548 );
549 assert_eq!(
550 result[1].message,
551 "Inline HTML found: </p> (use Markdown syntax instead)"
552 );
553 }
554
555 #[test]
556 fn test_md033_tags_in_links() {
557 let rule = MD033NoInlineHtml::default();
558 let content = "[Link](http://example.com/<div>)";
559 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
560 let result = rule.check(&ctx).unwrap();
561 assert_eq!(result.len(), 1);
563 assert_eq!(
564 result[0].message,
565 "Inline HTML found: <div> (use Markdown syntax instead)"
566 );
567
568 let content2 = "[Link <a>text</a>](url)";
569 let ctx2 = LintContext::new(content2, crate::config::MarkdownFlavor::Standard);
570 let result2 = rule.check(&ctx2).unwrap();
571 assert_eq!(result2.len(), 2); assert_eq!(
574 result2[0].message,
575 "Inline HTML found: <a> (use Markdown syntax instead)"
576 );
577 assert_eq!(
578 result2[1].message,
579 "Inline HTML found: </a> (use Markdown syntax instead)"
580 );
581 }
582
583 #[test]
584 fn test_md033_fix_escaping() {
585 let rule = MD033NoInlineHtml::default();
586 let content = "Text with <div> and <br/> tags.";
587 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
588 let fixed_content = rule.fix(&ctx).unwrap();
589 assert_eq!(fixed_content, content);
591 }
592
593 #[test]
594 fn test_md033_in_code_blocks() {
595 let rule = MD033NoInlineHtml::default();
596 let content = "```html\n<div>Code</div>\n```\n<div>Not code</div>";
597 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
598 let result = rule.check(&ctx).unwrap();
599 assert_eq!(result.len(), 2); assert_eq!(
602 result[0].message,
603 "Inline HTML found: <div> (use Markdown syntax instead)"
604 );
605 assert_eq!(
606 result[1].message,
607 "Inline HTML found: </div> (use Markdown syntax instead)"
608 );
609 }
610
611 #[test]
612 fn test_md033_in_code_spans() {
613 let rule = MD033NoInlineHtml::default();
614 let content = "Text with `<p>in code</p>` span. <br/> Not in span.";
615 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
616 let result = rule.check(&ctx).unwrap();
617 assert_eq!(result.len(), 1);
619 assert_eq!(
620 result[0].message,
621 "Inline HTML found: <br/> (use Markdown syntax instead)"
622 );
623 }
624
625 #[test]
626 fn test_md033_issue_90_code_span_with_diff_block() {
627 let rule = MD033NoInlineHtml::default();
629 let content = r#"# Heading
630
631`<env>`
632
633```diff
634- this
635+ that
636```"#;
637 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
638 let result = rule.check(&ctx).unwrap();
639 assert_eq!(result.len(), 0, "Should not report HTML tags inside code spans");
641 }
642
643 #[test]
644 fn test_md033_multiple_code_spans_with_angle_brackets() {
645 let rule = MD033NoInlineHtml::default();
647 let content = "`<one>` and `<two>` and `<three>` are all code spans";
648 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
649 let result = rule.check(&ctx).unwrap();
650 assert_eq!(result.len(), 0, "Should not report HTML tags inside any code spans");
651 }
652
653 #[test]
654 fn test_md033_nested_angle_brackets_in_code_span() {
655 let rule = MD033NoInlineHtml::default();
657 let content = "Text with `<<nested>>` brackets";
658 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
659 let result = rule.check(&ctx).unwrap();
660 assert_eq!(result.len(), 0, "Should handle nested angle brackets in code spans");
661 }
662
663 #[test]
664 fn test_md033_code_span_at_end_before_code_block() {
665 let rule = MD033NoInlineHtml::default();
667 let content = "Testing `<test>`\n```\ncode here\n```";
668 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
669 let result = rule.check(&ctx).unwrap();
670 assert_eq!(result.len(), 0, "Should handle code span before code block");
671 }
672}