1use crate::rule::{LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
7use crate::utils::document_structure::{DocumentStructure, DocumentStructureExtensions};
8use crate::utils::kramdown_utils::{is_kramdown_block_attribute, is_kramdown_extension};
9use crate::utils::range_utils::calculate_html_tag_range;
10use crate::utils::regex_cache::*;
11use lazy_static::lazy_static;
12use regex::Regex;
13use std::collections::HashSet;
14
15mod md033_config;
16use md033_config::MD033Config;
17
18lazy_static! {
19 static ref HTML_COMMENT_PATTERN: Regex = Regex::new(r"<!--.*?-->").unwrap();
21}
22
23#[derive(Clone)]
24pub struct MD033NoInlineHtml {
25 config: MD033Config,
26 allowed: HashSet<String>,
27}
28
29impl Default for MD033NoInlineHtml {
30 fn default() -> Self {
31 let config = MD033Config::default();
32 let allowed = config.allowed_set();
33 Self { config, allowed }
34 }
35}
36
37impl MD033NoInlineHtml {
38 pub fn new() -> Self {
39 Self::default()
40 }
41
42 pub fn with_allowed(allowed_vec: Vec<String>) -> Self {
43 let config = MD033Config {
44 allowed: allowed_vec.clone(),
45 };
46 let allowed = config.allowed_set();
47 Self { config, allowed }
48 }
49
50 pub fn from_config_struct(config: MD033Config) -> Self {
51 let allowed = config.allowed_set();
52 Self { config, allowed }
53 }
54
55 #[inline]
57 fn is_tag_allowed(&self, tag: &str) -> bool {
58 if self.allowed.is_empty() {
59 return false;
60 }
61 let tag = tag.trim_start_matches('<').trim_start_matches('/');
63 let tag_name = tag
64 .split(|c: char| c.is_whitespace() || c == '>' || c == '/')
65 .next()
66 .unwrap_or("");
67 self.allowed.contains(&tag_name.to_lowercase())
68 }
69
70 #[inline]
72 fn is_html_comment(&self, tag: &str) -> bool {
73 tag.starts_with("<!--") && tag.ends_with("-->")
74 }
75
76 #[inline]
78 fn is_likely_type_annotation(&self, tag: &str) -> bool {
79 const COMMON_TYPES: &[&str] = &[
81 "string",
82 "number",
83 "any",
84 "void",
85 "null",
86 "undefined",
87 "array",
88 "promise",
89 "function",
90 "error",
91 "date",
92 "regexp",
93 "symbol",
94 "bigint",
95 "map",
96 "set",
97 "weakmap",
98 "weakset",
99 "iterator",
100 "generator",
101 "t",
102 "u",
103 "v",
104 "k",
105 "e", "userdata",
107 "apiresponse",
108 "config",
109 "options",
110 "params",
111 "result",
112 "response",
113 "request",
114 "data",
115 "item",
116 "element",
117 "node",
118 ];
119
120 let tag_content = tag
121 .trim_start_matches('<')
122 .trim_end_matches('>')
123 .trim_start_matches('/');
124 let tag_name = tag_content
125 .split(|c: char| c.is_whitespace() || c == '>' || c == '/')
126 .next()
127 .unwrap_or("");
128
129 if !tag_content.contains(' ') && !tag_content.contains('=') {
131 COMMON_TYPES.contains(&tag_name.to_ascii_lowercase().as_str())
132 } else {
133 false
134 }
135 }
136
137 #[inline]
139 fn is_email_address(&self, tag: &str) -> bool {
140 let content = tag.trim_start_matches('<').trim_end_matches('>');
141 content.contains('@')
143 && content.chars().all(|c| c.is_alphanumeric() || "@.-_+".contains(c))
144 && content.split('@').count() == 2
145 && content.split('@').all(|part| !part.is_empty())
146 }
147
148 #[inline]
150 fn has_markdown_attribute(&self, tag: &str) -> bool {
151 tag.contains(" markdown>") || tag.contains(" markdown=") || tag.contains(" markdown ")
154 }
155
156 #[inline]
158 fn is_url_in_angle_brackets(&self, tag: &str) -> bool {
159 let content = tag.trim_start_matches('<').trim_end_matches('>');
160 content.starts_with("http://")
162 || content.starts_with("https://")
163 || content.starts_with("ftp://")
164 || content.starts_with("ftps://")
165 || content.starts_with("mailto:")
166 }
167
168 fn find_multiline_html_tags(
170 &self,
171 ctx: &crate::lint_context::LintContext,
172 content: &str,
173 structure: &DocumentStructure,
174 nomarkdown_ranges: &[(usize, usize)],
175 warnings: &mut Vec<LintWarning>,
176 ) {
177 if !content.contains('<') || !content.lines().any(|line| line.trim_end().ends_with('<')) {
179 return;
180 }
181
182 lazy_static::lazy_static! {
184 static ref INCOMPLETE_TAG_START: regex::Regex = regex::Regex::new(r"(?i)<[a-zA-Z][^>]*$").unwrap();
185 }
186
187 let lines: Vec<&str> = content.lines().collect();
188
189 for (i, line) in lines.iter().enumerate() {
190 let line_num = i + 1;
191
192 if line.trim().is_empty() || structure.is_in_code_block(line_num) {
194 continue;
195 }
196
197 if nomarkdown_ranges
199 .iter()
200 .any(|(start, end)| line_num >= *start && line_num <= *end)
201 {
202 continue;
203 }
204
205 if !line.contains('<') {
207 continue;
208 }
209
210 if let Some(incomplete_match) = INCOMPLETE_TAG_START.find(line) {
212 let start_column = incomplete_match.start() + 1; let mut complete_tag = incomplete_match.as_str().to_string();
216 let mut found_end = false;
217
218 for (j, next_line) in lines.iter().enumerate().skip(i + 1).take(10) {
220 let next_line_num = j + 1;
221
222 if structure.is_in_code_block(next_line_num) {
224 break;
225 }
226
227 complete_tag.push(' '); complete_tag.push_str(next_line.trim());
229
230 if next_line.contains('>') {
231 found_end = true;
232 break;
233 }
234 }
235
236 if found_end {
237 if let Some(end_pos) = complete_tag.find('>') {
239 let final_tag = &complete_tag[0..=end_pos];
240
241 let skip_mkdocs_markdown = ctx.flavor == crate::config::MarkdownFlavor::MkDocs
243 && self.has_markdown_attribute(final_tag);
244
245 if !self.is_html_comment(final_tag)
246 && !self.is_likely_type_annotation(final_tag)
247 && !self.is_email_address(final_tag)
248 && !self.is_url_in_angle_brackets(final_tag)
249 && !self.is_tag_allowed(final_tag)
250 && !skip_mkdocs_markdown
251 && HTML_TAG_FINDER.is_match(final_tag)
252 {
253 let already_warned =
255 warnings.iter().any(|w| w.line == line_num && w.column == start_column);
256
257 if !already_warned {
258 let (start_line, start_col, end_line, end_col) = calculate_html_tag_range(
259 line_num,
260 line,
261 incomplete_match.start(),
262 incomplete_match.len(),
263 );
264 warnings.push(LintWarning {
265 rule_name: Some(self.name()),
266 line: start_line,
267 column: start_col,
268 end_line,
269 end_column: end_col,
270 message: format!("HTML tag found: {final_tag} (use Markdown syntax instead)"),
271 severity: Severity::Warning,
272 fix: None,
273 });
274 }
275 }
276 }
277 }
278 }
279 }
280 }
281}
282
283impl Rule for MD033NoInlineHtml {
284 fn name(&self) -> &'static str {
285 "MD033"
286 }
287
288 fn description(&self) -> &'static str {
289 "Inline HTML is not allowed"
290 }
291
292 fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
293 let content = ctx.content;
294 let structure = DocumentStructure::new(content);
295 self.check_with_structure(ctx, &structure)
296 }
297
298 fn check_with_structure(
300 &self,
301 ctx: &crate::lint_context::LintContext,
302 structure: &DocumentStructure,
303 ) -> LintResult {
304 let content = ctx.content;
305
306 if content.is_empty() || !has_html_tags(content) {
308 return Ok(Vec::new());
309 }
310
311 if !HTML_TAG_QUICK_CHECK.is_match(content) {
313 return Ok(Vec::new());
314 }
315
316 let mut warnings = Vec::new();
317 let lines: Vec<&str> = content.lines().collect();
318
319 let mut in_nomarkdown = false;
321 let mut in_comment = false;
322 let mut nomarkdown_ranges: Vec<(usize, usize)> = Vec::new();
323 let mut nomarkdown_start = 0;
324 let mut comment_start = 0;
325
326 for (i, line) in lines.iter().enumerate() {
328 let line_num = i + 1;
329
330 if line.trim() == "{::nomarkdown}" {
332 in_nomarkdown = true;
333 nomarkdown_start = line_num;
334 } else if line.trim() == "{:/nomarkdown}" && in_nomarkdown {
335 in_nomarkdown = false;
336 nomarkdown_ranges.push((nomarkdown_start, line_num));
337 }
338
339 if line.trim() == "{::comment}" {
341 in_comment = true;
342 comment_start = line_num;
343 } else if line.trim() == "{:/comment}" && in_comment {
344 in_comment = false;
345 nomarkdown_ranges.push((comment_start, line_num));
346 }
347 }
348
349 for (i, line) in lines.iter().enumerate() {
352 let line_num = i + 1;
353
354 if line.trim().is_empty() {
355 continue;
356 }
357 if structure.is_in_code_block(line_num) {
358 continue;
359 }
360 if line.starts_with(" ") || line.starts_with('\t') {
363 continue;
364 }
365
366 if nomarkdown_ranges
368 .iter()
369 .any(|(start, end)| line_num >= *start && line_num <= *end)
370 {
371 continue;
372 }
373
374 if is_kramdown_extension(line) || is_kramdown_block_attribute(line) {
376 continue;
377 }
378
379 for tag_match in HTML_TAG_FINDER.find_iter(line) {
381 let tag = tag_match.as_str();
382
383 if self.is_html_comment(tag) {
385 continue;
386 }
387
388 if self.is_likely_type_annotation(tag) {
390 continue;
391 }
392
393 if self.is_email_address(tag) {
395 continue;
396 }
397
398 if self.is_url_in_angle_brackets(tag) {
400 continue;
401 }
402
403 let tag_start_col = tag_match.start() + 1; if structure.is_in_code_span(line_num, tag_start_col) {
406 continue;
407 }
408
409 if self.is_tag_allowed(tag) {
411 continue;
412 }
413
414 if ctx.flavor == crate::config::MarkdownFlavor::MkDocs && self.has_markdown_attribute(tag) {
416 continue;
417 }
418
419 let (start_line, start_col, end_line, end_col) =
421 calculate_html_tag_range(line_num, line, tag_match.start(), tag_match.len());
422 warnings.push(LintWarning {
423 rule_name: Some(self.name()),
424 line: start_line,
425 column: start_col,
426 end_line,
427 end_column: end_col,
428 message: format!("Inline HTML found: {tag} (use Markdown syntax instead)"),
429 severity: Severity::Warning,
430 fix: None,
431 });
432 }
433 }
434
435 self.find_multiline_html_tags(ctx, ctx.content, structure, &nomarkdown_ranges, &mut warnings);
437
438 Ok(warnings)
439 }
440
441 fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
442 Ok(ctx.content.to_string())
444 }
445
446 fn fix_capability(&self) -> crate::rule::FixCapability {
447 crate::rule::FixCapability::Unfixable
448 }
449
450 fn category(&self) -> RuleCategory {
452 RuleCategory::Html
453 }
454
455 fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
457 let content = ctx.content;
458 content.is_empty() || !has_html_tags(content)
459 }
460
461 fn as_any(&self) -> &dyn std::any::Any {
462 self
463 }
464
465 fn as_maybe_document_structure(&self) -> Option<&dyn crate::rule::MaybeDocumentStructure> {
466 Some(self)
467 }
468
469 fn default_config_section(&self) -> Option<(String, toml::Value)> {
470 let json_value = serde_json::to_value(&self.config).ok()?;
471 Some((
472 self.name().to_string(),
473 crate::rule_config_serde::json_to_toml_value(&json_value)?,
474 ))
475 }
476
477 fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
478 where
479 Self: Sized,
480 {
481 let rule_config = crate::rule_config_serde::load_rule_config::<MD033Config>(config);
482 Box::new(Self::from_config_struct(rule_config))
483 }
484}
485
486impl DocumentStructureExtensions for MD033NoInlineHtml {
487 fn has_relevant_elements(
488 &self,
489 ctx: &crate::lint_context::LintContext,
490 _doc_structure: &DocumentStructure,
491 ) -> bool {
492 ctx.content.contains('<') && ctx.content.contains('>')
494 }
495}
496
497#[cfg(test)]
498mod tests {
499 use super::*;
500 use crate::lint_context::LintContext;
501 use crate::rule::Rule;
502
503 #[test]
504 fn test_md033_basic_html() {
505 let rule = MD033NoInlineHtml::default();
506 let content = "<div>Some content</div>";
507 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
508 let result = rule.check(&ctx).unwrap();
509 assert_eq!(result.len(), 2); assert!(result[0].message.starts_with("Inline HTML found: <div>"));
512 assert!(result[1].message.starts_with("Inline HTML found: </div>"));
513 }
514
515 #[test]
516 fn test_md033_case_insensitive() {
517 let rule = MD033NoInlineHtml::default();
518 let content = "<DiV>Some <B>content</B></dIv>";
519 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
520 let result = rule.check(&ctx).unwrap();
521 assert_eq!(result.len(), 4); assert_eq!(
524 result[0].message,
525 "Inline HTML found: <DiV> (use Markdown syntax instead)"
526 );
527 assert_eq!(
528 result[1].message,
529 "Inline HTML found: <B> (use Markdown syntax instead)"
530 );
531 assert_eq!(
532 result[2].message,
533 "Inline HTML found: </B> (use Markdown syntax instead)"
534 );
535 assert_eq!(
536 result[3].message,
537 "Inline HTML found: </dIv> (use Markdown syntax instead)"
538 );
539 }
540
541 #[test]
542 fn test_md033_allowed_tags() {
543 let rule = MD033NoInlineHtml::with_allowed(vec!["div".to_string(), "br".to_string()]);
544 let content = "<div>Allowed</div><p>Not allowed</p><br/>";
545 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
546 let result = rule.check(&ctx).unwrap();
547 assert_eq!(result.len(), 2);
549 assert_eq!(
550 result[0].message,
551 "Inline HTML found: <p> (use Markdown syntax instead)"
552 );
553 assert_eq!(
554 result[1].message,
555 "Inline HTML found: </p> (use Markdown syntax instead)"
556 );
557
558 let content2 = "<DIV>Allowed</DIV><P>Not allowed</P><BR/>";
560 let ctx2 = LintContext::new(content2, crate::config::MarkdownFlavor::Standard);
561 let result2 = rule.check(&ctx2).unwrap();
562 assert_eq!(result2.len(), 2); assert_eq!(
564 result2[0].message,
565 "Inline HTML found: <P> (use Markdown syntax instead)"
566 );
567 assert_eq!(
568 result2[1].message,
569 "Inline HTML found: </P> (use Markdown syntax instead)"
570 );
571 }
572
573 #[test]
574 fn test_md033_html_comments() {
575 let rule = MD033NoInlineHtml::default();
576 let content = "<!-- This is a comment --> <p>Not a comment</p>";
577 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
578 let result = rule.check(&ctx).unwrap();
579 assert_eq!(result.len(), 2); assert_eq!(
582 result[0].message,
583 "Inline HTML found: <p> (use Markdown syntax instead)"
584 );
585 assert_eq!(
586 result[1].message,
587 "Inline HTML found: </p> (use Markdown syntax instead)"
588 );
589 }
590
591 #[test]
592 fn test_md033_tags_in_links() {
593 let rule = MD033NoInlineHtml::default();
594 let content = "[Link](http://example.com/<div>)";
595 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
596 let result = rule.check(&ctx).unwrap();
597 assert_eq!(result.len(), 1);
599 assert_eq!(
600 result[0].message,
601 "Inline HTML found: <div> (use Markdown syntax instead)"
602 );
603
604 let content2 = "[Link <a>text</a>](url)";
605 let ctx2 = LintContext::new(content2, crate::config::MarkdownFlavor::Standard);
606 let result2 = rule.check(&ctx2).unwrap();
607 assert_eq!(result2.len(), 2); assert_eq!(
610 result2[0].message,
611 "Inline HTML found: <a> (use Markdown syntax instead)"
612 );
613 assert_eq!(
614 result2[1].message,
615 "Inline HTML found: </a> (use Markdown syntax instead)"
616 );
617 }
618
619 #[test]
620 fn test_md033_fix_escaping() {
621 let rule = MD033NoInlineHtml::default();
622 let content = "Text with <div> and <br/> tags.";
623 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
624 let fixed_content = rule.fix(&ctx).unwrap();
625 assert_eq!(fixed_content, content);
627 }
628
629 #[test]
630 fn test_md033_in_code_blocks() {
631 let rule = MD033NoInlineHtml::default();
632 let content = "```html\n<div>Code</div>\n```\n<div>Not code</div>";
633 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
634 let result = rule.check(&ctx).unwrap();
635 assert_eq!(result.len(), 2); assert_eq!(
638 result[0].message,
639 "Inline HTML found: <div> (use Markdown syntax instead)"
640 );
641 assert_eq!(
642 result[1].message,
643 "Inline HTML found: </div> (use Markdown syntax instead)"
644 );
645 }
646
647 #[test]
648 fn test_md033_in_code_spans() {
649 let rule = MD033NoInlineHtml::default();
650 let content = "Text with `<p>in code</p>` span. <br/> Not in span.";
651 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
652 let result = rule.check(&ctx).unwrap();
653 assert_eq!(result.len(), 1);
655 assert_eq!(
656 result[0].message,
657 "Inline HTML found: <br/> (use Markdown syntax instead)"
658 );
659 }
660}