1use crate::rule::{LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
7use crate::utils::kramdown_utils::{is_kramdown_block_attribute, is_kramdown_extension};
8use crate::utils::range_utils::calculate_html_tag_range;
9use crate::utils::regex_cache::*;
10use std::collections::HashSet;
11
12mod md033_config;
13use md033_config::MD033Config;
14
15#[derive(Clone)]
16pub struct MD033NoInlineHtml {
17 config: MD033Config,
18 allowed: HashSet<String>,
19}
20
21impl Default for MD033NoInlineHtml {
22 fn default() -> Self {
23 let config = MD033Config::default();
24 let allowed = config.allowed_set();
25 Self { config, allowed }
26 }
27}
28
29impl MD033NoInlineHtml {
30 pub fn new() -> Self {
31 Self::default()
32 }
33
34 pub fn with_allowed(allowed_vec: Vec<String>) -> Self {
35 let config = MD033Config {
36 allowed: allowed_vec.clone(),
37 };
38 let allowed = config.allowed_set();
39 Self { config, allowed }
40 }
41
42 pub fn from_config_struct(config: MD033Config) -> Self {
43 let allowed = config.allowed_set();
44 Self { config, allowed }
45 }
46
47 #[inline]
49 fn is_tag_allowed(&self, tag: &str) -> bool {
50 if self.allowed.is_empty() {
51 return false;
52 }
53 let tag = tag.trim_start_matches('<').trim_start_matches('/');
55 let tag_name = tag
56 .split(|c: char| c.is_whitespace() || c == '>' || c == '/')
57 .next()
58 .unwrap_or("");
59 self.allowed.contains(&tag_name.to_lowercase())
60 }
61
62 #[inline]
64 fn is_html_comment(&self, tag: &str) -> bool {
65 tag.starts_with("<!--") && tag.ends_with("-->")
66 }
67
68 #[inline]
70 fn is_likely_type_annotation(&self, tag: &str) -> bool {
71 const COMMON_TYPES: &[&str] = &[
73 "string",
74 "number",
75 "any",
76 "void",
77 "null",
78 "undefined",
79 "array",
80 "promise",
81 "function",
82 "error",
83 "date",
84 "regexp",
85 "symbol",
86 "bigint",
87 "map",
88 "set",
89 "weakmap",
90 "weakset",
91 "iterator",
92 "generator",
93 "t",
94 "u",
95 "v",
96 "k",
97 "e", "userdata",
99 "apiresponse",
100 "config",
101 "options",
102 "params",
103 "result",
104 "response",
105 "request",
106 "data",
107 "item",
108 "element",
109 "node",
110 ];
111
112 let tag_content = tag
113 .trim_start_matches('<')
114 .trim_end_matches('>')
115 .trim_start_matches('/');
116 let tag_name = tag_content
117 .split(|c: char| c.is_whitespace() || c == '>' || c == '/')
118 .next()
119 .unwrap_or("");
120
121 if !tag_content.contains(' ') && !tag_content.contains('=') {
123 COMMON_TYPES.contains(&tag_name.to_ascii_lowercase().as_str())
124 } else {
125 false
126 }
127 }
128
129 #[inline]
131 fn is_email_address(&self, tag: &str) -> bool {
132 let content = tag.trim_start_matches('<').trim_end_matches('>');
133 content.contains('@')
135 && content.chars().all(|c| c.is_alphanumeric() || "@.-_+".contains(c))
136 && content.split('@').count() == 2
137 && content.split('@').all(|part| !part.is_empty())
138 }
139
140 #[inline]
142 fn has_markdown_attribute(&self, tag: &str) -> bool {
143 tag.contains(" markdown>") || tag.contains(" markdown=") || tag.contains(" markdown ")
146 }
147
148 #[inline]
150 fn is_url_in_angle_brackets(&self, tag: &str) -> bool {
151 let content = tag.trim_start_matches('<').trim_end_matches('>');
152 content.starts_with("http://")
154 || content.starts_with("https://")
155 || content.starts_with("ftp://")
156 || content.starts_with("ftps://")
157 || content.starts_with("mailto:")
158 }
159
160 fn find_multiline_html_tags(
162 &self,
163 ctx: &crate::lint_context::LintContext,
164 content: &str,
165 nomarkdown_ranges: &[(usize, usize)],
166 warnings: &mut Vec<LintWarning>,
167 ) {
168 if !content.contains('<') || !content.lines().any(|line| line.trim_end().ends_with('<')) {
170 return;
171 }
172
173 lazy_static::lazy_static! {
175 static ref INCOMPLETE_TAG_START: regex::Regex = regex::Regex::new(r"(?i)<[a-zA-Z][^>]*$").unwrap();
176 }
177
178 let lines: Vec<&str> = content.lines().collect();
179
180 for (i, line) in lines.iter().enumerate() {
181 let line_num = i + 1;
182
183 if line.trim().is_empty() || ctx.is_in_code_block(line_num) {
185 continue;
186 }
187
188 if nomarkdown_ranges
190 .iter()
191 .any(|(start, end)| line_num >= *start && line_num <= *end)
192 {
193 continue;
194 }
195
196 if !line.contains('<') {
198 continue;
199 }
200
201 if let Some(incomplete_match) = INCOMPLETE_TAG_START.find(line) {
203 let start_column = incomplete_match.start() + 1; let mut complete_tag = incomplete_match.as_str().to_string();
207 let mut found_end = false;
208
209 for (j, next_line) in lines.iter().enumerate().skip(i + 1).take(10) {
211 let next_line_num = j + 1;
212
213 if ctx.is_in_code_block(next_line_num) {
215 break;
216 }
217
218 complete_tag.push(' '); complete_tag.push_str(next_line.trim());
220
221 if next_line.contains('>') {
222 found_end = true;
223 break;
224 }
225 }
226
227 if found_end {
228 if let Some(end_pos) = complete_tag.find('>') {
230 let final_tag = &complete_tag[0..=end_pos];
231
232 let skip_mkdocs_markdown = ctx.flavor == crate::config::MarkdownFlavor::MkDocs
234 && self.has_markdown_attribute(final_tag);
235
236 if !self.is_html_comment(final_tag)
237 && !self.is_likely_type_annotation(final_tag)
238 && !self.is_email_address(final_tag)
239 && !self.is_url_in_angle_brackets(final_tag)
240 && !self.is_tag_allowed(final_tag)
241 && !skip_mkdocs_markdown
242 && HTML_TAG_FINDER.is_match(final_tag)
243 {
244 let already_warned =
246 warnings.iter().any(|w| w.line == line_num && w.column == start_column);
247
248 if !already_warned {
249 let (start_line, start_col, end_line, end_col) = calculate_html_tag_range(
250 line_num,
251 line,
252 incomplete_match.start(),
253 incomplete_match.len(),
254 );
255 warnings.push(LintWarning {
256 rule_name: Some(self.name()),
257 line: start_line,
258 column: start_col,
259 end_line,
260 end_column: end_col,
261 message: format!("HTML tag found: {final_tag} (use Markdown syntax instead)"),
262 severity: Severity::Warning,
263 fix: None,
264 });
265 }
266 }
267 }
268 }
269 }
270 }
271 }
272}
273
274impl Rule for MD033NoInlineHtml {
275 fn name(&self) -> &'static str {
276 "MD033"
277 }
278
279 fn description(&self) -> &'static str {
280 "Inline HTML is not allowed"
281 }
282
283 fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
284 let content = ctx.content;
285
286 if content.is_empty() || !has_html_tags(content) {
288 return Ok(Vec::new());
289 }
290
291 if !HTML_TAG_QUICK_CHECK.is_match(content) {
293 return Ok(Vec::new());
294 }
295
296 let mut warnings = Vec::new();
297 let lines: Vec<&str> = content.lines().collect();
298
299 let mut in_nomarkdown = false;
301 let mut in_comment = false;
302 let mut nomarkdown_ranges: Vec<(usize, usize)> = Vec::new();
303 let mut nomarkdown_start = 0;
304 let mut comment_start = 0;
305
306 for (i, line) in lines.iter().enumerate() {
308 let line_num = i + 1;
309
310 if line.trim() == "{::nomarkdown}" {
312 in_nomarkdown = true;
313 nomarkdown_start = line_num;
314 } else if line.trim() == "{:/nomarkdown}" && in_nomarkdown {
315 in_nomarkdown = false;
316 nomarkdown_ranges.push((nomarkdown_start, line_num));
317 }
318
319 if line.trim() == "{::comment}" {
321 in_comment = true;
322 comment_start = line_num;
323 } else if line.trim() == "{:/comment}" && in_comment {
324 in_comment = false;
325 nomarkdown_ranges.push((comment_start, line_num));
326 }
327 }
328
329 for (i, line) in lines.iter().enumerate() {
332 let line_num = i + 1;
333
334 if line.trim().is_empty() {
335 continue;
336 }
337 if ctx.is_in_code_block(line_num) {
338 continue;
339 }
340 if line.starts_with(" ") || line.starts_with('\t') {
343 continue;
344 }
345
346 if nomarkdown_ranges
348 .iter()
349 .any(|(start, end)| line_num >= *start && line_num <= *end)
350 {
351 continue;
352 }
353
354 if is_kramdown_extension(line) || is_kramdown_block_attribute(line) {
356 continue;
357 }
358
359 for tag_match in HTML_TAG_FINDER.find_iter(line) {
361 let tag = tag_match.as_str();
362
363 if self.is_html_comment(tag) {
365 continue;
366 }
367
368 if self.is_likely_type_annotation(tag) {
370 continue;
371 }
372
373 if self.is_email_address(tag) {
375 continue;
376 }
377
378 if self.is_url_in_angle_brackets(tag) {
380 continue;
381 }
382
383 let tag_start_col = tag_match.start() + 1; if ctx.is_in_code_span(line_num, tag_start_col) {
386 continue;
387 }
388
389 if self.is_tag_allowed(tag) {
391 continue;
392 }
393
394 if ctx.flavor == crate::config::MarkdownFlavor::MkDocs && self.has_markdown_attribute(tag) {
396 continue;
397 }
398
399 let (start_line, start_col, end_line, end_col) =
401 calculate_html_tag_range(line_num, line, tag_match.start(), tag_match.len());
402 warnings.push(LintWarning {
403 rule_name: Some(self.name()),
404 line: start_line,
405 column: start_col,
406 end_line,
407 end_column: end_col,
408 message: format!("Inline HTML found: {tag} (use Markdown syntax instead)"),
409 severity: Severity::Warning,
410 fix: None,
411 });
412 }
413 }
414
415 self.find_multiline_html_tags(ctx, ctx.content, &nomarkdown_ranges, &mut warnings);
417
418 Ok(warnings)
419 }
420
421 fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
422 Ok(ctx.content.to_string())
424 }
425
426 fn fix_capability(&self) -> crate::rule::FixCapability {
427 crate::rule::FixCapability::Unfixable
428 }
429
430 fn category(&self) -> RuleCategory {
432 RuleCategory::Html
433 }
434
435 fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
437 let content = ctx.content;
438 content.is_empty() || !has_html_tags(content)
439 }
440
441 fn as_any(&self) -> &dyn std::any::Any {
442 self
443 }
444
445 fn default_config_section(&self) -> Option<(String, toml::Value)> {
446 let json_value = serde_json::to_value(&self.config).ok()?;
447 Some((
448 self.name().to_string(),
449 crate::rule_config_serde::json_to_toml_value(&json_value)?,
450 ))
451 }
452
453 fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
454 where
455 Self: Sized,
456 {
457 let rule_config = crate::rule_config_serde::load_rule_config::<MD033Config>(config);
458 Box::new(Self::from_config_struct(rule_config))
459 }
460}
461
462#[cfg(test)]
463mod tests {
464 use super::*;
465 use crate::lint_context::LintContext;
466 use crate::rule::Rule;
467
468 #[test]
469 fn test_md033_basic_html() {
470 let rule = MD033NoInlineHtml::default();
471 let content = "<div>Some content</div>";
472 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
473 let result = rule.check(&ctx).unwrap();
474 assert_eq!(result.len(), 2); assert!(result[0].message.starts_with("Inline HTML found: <div>"));
477 assert!(result[1].message.starts_with("Inline HTML found: </div>"));
478 }
479
480 #[test]
481 fn test_md033_case_insensitive() {
482 let rule = MD033NoInlineHtml::default();
483 let content = "<DiV>Some <B>content</B></dIv>";
484 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
485 let result = rule.check(&ctx).unwrap();
486 assert_eq!(result.len(), 4); assert_eq!(
489 result[0].message,
490 "Inline HTML found: <DiV> (use Markdown syntax instead)"
491 );
492 assert_eq!(
493 result[1].message,
494 "Inline HTML found: <B> (use Markdown syntax instead)"
495 );
496 assert_eq!(
497 result[2].message,
498 "Inline HTML found: </B> (use Markdown syntax instead)"
499 );
500 assert_eq!(
501 result[3].message,
502 "Inline HTML found: </dIv> (use Markdown syntax instead)"
503 );
504 }
505
506 #[test]
507 fn test_md033_allowed_tags() {
508 let rule = MD033NoInlineHtml::with_allowed(vec!["div".to_string(), "br".to_string()]);
509 let content = "<div>Allowed</div><p>Not allowed</p><br/>";
510 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
511 let result = rule.check(&ctx).unwrap();
512 assert_eq!(result.len(), 2);
514 assert_eq!(
515 result[0].message,
516 "Inline HTML found: <p> (use Markdown syntax instead)"
517 );
518 assert_eq!(
519 result[1].message,
520 "Inline HTML found: </p> (use Markdown syntax instead)"
521 );
522
523 let content2 = "<DIV>Allowed</DIV><P>Not allowed</P><BR/>";
525 let ctx2 = LintContext::new(content2, crate::config::MarkdownFlavor::Standard);
526 let result2 = rule.check(&ctx2).unwrap();
527 assert_eq!(result2.len(), 2); assert_eq!(
529 result2[0].message,
530 "Inline HTML found: <P> (use Markdown syntax instead)"
531 );
532 assert_eq!(
533 result2[1].message,
534 "Inline HTML found: </P> (use Markdown syntax instead)"
535 );
536 }
537
538 #[test]
539 fn test_md033_html_comments() {
540 let rule = MD033NoInlineHtml::default();
541 let content = "<!-- This is a comment --> <p>Not a comment</p>";
542 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
543 let result = rule.check(&ctx).unwrap();
544 assert_eq!(result.len(), 2); assert_eq!(
547 result[0].message,
548 "Inline HTML found: <p> (use Markdown syntax instead)"
549 );
550 assert_eq!(
551 result[1].message,
552 "Inline HTML found: </p> (use Markdown syntax instead)"
553 );
554 }
555
556 #[test]
557 fn test_md033_tags_in_links() {
558 let rule = MD033NoInlineHtml::default();
559 let content = "[Link](http://example.com/<div>)";
560 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
561 let result = rule.check(&ctx).unwrap();
562 assert_eq!(result.len(), 1);
564 assert_eq!(
565 result[0].message,
566 "Inline HTML found: <div> (use Markdown syntax instead)"
567 );
568
569 let content2 = "[Link <a>text</a>](url)";
570 let ctx2 = LintContext::new(content2, crate::config::MarkdownFlavor::Standard);
571 let result2 = rule.check(&ctx2).unwrap();
572 assert_eq!(result2.len(), 2); assert_eq!(
575 result2[0].message,
576 "Inline HTML found: <a> (use Markdown syntax instead)"
577 );
578 assert_eq!(
579 result2[1].message,
580 "Inline HTML found: </a> (use Markdown syntax instead)"
581 );
582 }
583
584 #[test]
585 fn test_md033_fix_escaping() {
586 let rule = MD033NoInlineHtml::default();
587 let content = "Text with <div> and <br/> tags.";
588 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
589 let fixed_content = rule.fix(&ctx).unwrap();
590 assert_eq!(fixed_content, content);
592 }
593
594 #[test]
595 fn test_md033_in_code_blocks() {
596 let rule = MD033NoInlineHtml::default();
597 let content = "```html\n<div>Code</div>\n```\n<div>Not code</div>";
598 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
599 let result = rule.check(&ctx).unwrap();
600 assert_eq!(result.len(), 2); assert_eq!(
603 result[0].message,
604 "Inline HTML found: <div> (use Markdown syntax instead)"
605 );
606 assert_eq!(
607 result[1].message,
608 "Inline HTML found: </div> (use Markdown syntax instead)"
609 );
610 }
611
612 #[test]
613 fn test_md033_in_code_spans() {
614 let rule = MD033NoInlineHtml::default();
615 let content = "Text with `<p>in code</p>` span. <br/> Not in span.";
616 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
617 let result = rule.check(&ctx).unwrap();
618 assert_eq!(result.len(), 1);
620 assert_eq!(
621 result[0].message,
622 "Inline HTML found: <br/> (use Markdown syntax instead)"
623 );
624 }
625
626 #[test]
627 fn test_md033_issue_90_code_span_with_diff_block() {
628 let rule = MD033NoInlineHtml::default();
630 let content = r#"# Heading
631
632`<env>`
633
634```diff
635- this
636+ that
637```"#;
638 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
639 let result = rule.check(&ctx).unwrap();
640 assert_eq!(result.len(), 0, "Should not report HTML tags inside code spans");
642 }
643
644 #[test]
645 fn test_md033_multiple_code_spans_with_angle_brackets() {
646 let rule = MD033NoInlineHtml::default();
648 let content = "`<one>` and `<two>` and `<three>` are all code spans";
649 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
650 let result = rule.check(&ctx).unwrap();
651 assert_eq!(result.len(), 0, "Should not report HTML tags inside any code spans");
652 }
653
654 #[test]
655 fn test_md033_nested_angle_brackets_in_code_span() {
656 let rule = MD033NoInlineHtml::default();
658 let content = "Text with `<<nested>>` brackets";
659 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
660 let result = rule.check(&ctx).unwrap();
661 assert_eq!(result.len(), 0, "Should handle nested angle brackets in code spans");
662 }
663
664 #[test]
665 fn test_md033_code_span_at_end_before_code_block() {
666 let rule = MD033NoInlineHtml::default();
668 let content = "Testing `<test>`\n```\ncode here\n```";
669 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
670 let result = rule.check(&ctx).unwrap();
671 assert_eq!(result.len(), 0, "Should handle code span before code block");
672 }
673}