1use crate::rule::{LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
7use crate::utils::kramdown_utils::{is_kramdown_block_attribute, is_kramdown_extension};
8use crate::utils::range_utils::calculate_html_tag_range;
9use crate::utils::regex_cache::*;
10use lazy_static::lazy_static;
11use regex::Regex;
12use std::collections::HashSet;
13
14mod md033_config;
15use md033_config::MD033Config;
16
17lazy_static! {
18 static ref HTML_COMMENT_PATTERN: Regex = Regex::new(r"<!--.*?-->").unwrap();
20}
21
22#[derive(Clone)]
23pub struct MD033NoInlineHtml {
24 config: MD033Config,
25 allowed: HashSet<String>,
26}
27
28impl Default for MD033NoInlineHtml {
29 fn default() -> Self {
30 let config = MD033Config::default();
31 let allowed = config.allowed_set();
32 Self { config, allowed }
33 }
34}
35
36impl MD033NoInlineHtml {
37 pub fn new() -> Self {
38 Self::default()
39 }
40
41 pub fn with_allowed(allowed_vec: Vec<String>) -> Self {
42 let config = MD033Config {
43 allowed: allowed_vec.clone(),
44 };
45 let allowed = config.allowed_set();
46 Self { config, allowed }
47 }
48
49 pub fn from_config_struct(config: MD033Config) -> Self {
50 let allowed = config.allowed_set();
51 Self { config, allowed }
52 }
53
54 #[inline]
56 fn is_tag_allowed(&self, tag: &str) -> bool {
57 if self.allowed.is_empty() {
58 return false;
59 }
60 let tag = tag.trim_start_matches('<').trim_start_matches('/');
62 let tag_name = tag
63 .split(|c: char| c.is_whitespace() || c == '>' || c == '/')
64 .next()
65 .unwrap_or("");
66 self.allowed.contains(&tag_name.to_lowercase())
67 }
68
69 #[inline]
71 fn is_html_comment(&self, tag: &str) -> bool {
72 tag.starts_with("<!--") && tag.ends_with("-->")
73 }
74
75 #[inline]
77 fn is_likely_type_annotation(&self, tag: &str) -> bool {
78 const COMMON_TYPES: &[&str] = &[
80 "string",
81 "number",
82 "any",
83 "void",
84 "null",
85 "undefined",
86 "array",
87 "promise",
88 "function",
89 "error",
90 "date",
91 "regexp",
92 "symbol",
93 "bigint",
94 "map",
95 "set",
96 "weakmap",
97 "weakset",
98 "iterator",
99 "generator",
100 "t",
101 "u",
102 "v",
103 "k",
104 "e", "userdata",
106 "apiresponse",
107 "config",
108 "options",
109 "params",
110 "result",
111 "response",
112 "request",
113 "data",
114 "item",
115 "element",
116 "node",
117 ];
118
119 let tag_content = tag
120 .trim_start_matches('<')
121 .trim_end_matches('>')
122 .trim_start_matches('/');
123 let tag_name = tag_content
124 .split(|c: char| c.is_whitespace() || c == '>' || c == '/')
125 .next()
126 .unwrap_or("");
127
128 if !tag_content.contains(' ') && !tag_content.contains('=') {
130 COMMON_TYPES.contains(&tag_name.to_ascii_lowercase().as_str())
131 } else {
132 false
133 }
134 }
135
136 #[inline]
138 fn is_email_address(&self, tag: &str) -> bool {
139 let content = tag.trim_start_matches('<').trim_end_matches('>');
140 content.contains('@')
142 && content.chars().all(|c| c.is_alphanumeric() || "@.-_+".contains(c))
143 && content.split('@').count() == 2
144 && content.split('@').all(|part| !part.is_empty())
145 }
146
147 #[inline]
149 fn has_markdown_attribute(&self, tag: &str) -> bool {
150 tag.contains(" markdown>") || tag.contains(" markdown=") || tag.contains(" markdown ")
153 }
154
155 #[inline]
157 fn is_url_in_angle_brackets(&self, tag: &str) -> bool {
158 let content = tag.trim_start_matches('<').trim_end_matches('>');
159 content.starts_with("http://")
161 || content.starts_with("https://")
162 || content.starts_with("ftp://")
163 || content.starts_with("ftps://")
164 || content.starts_with("mailto:")
165 }
166
167 fn find_multiline_html_tags(
169 &self,
170 ctx: &crate::lint_context::LintContext,
171 content: &str,
172 nomarkdown_ranges: &[(usize, usize)],
173 warnings: &mut Vec<LintWarning>,
174 ) {
175 if !content.contains('<') || !content.lines().any(|line| line.trim_end().ends_with('<')) {
177 return;
178 }
179
180 lazy_static::lazy_static! {
182 static ref INCOMPLETE_TAG_START: regex::Regex = regex::Regex::new(r"(?i)<[a-zA-Z][^>]*$").unwrap();
183 }
184
185 let lines: Vec<&str> = content.lines().collect();
186
187 for (i, line) in lines.iter().enumerate() {
188 let line_num = i + 1;
189
190 if line.trim().is_empty() || ctx.is_in_code_block(line_num) {
192 continue;
193 }
194
195 if nomarkdown_ranges
197 .iter()
198 .any(|(start, end)| line_num >= *start && line_num <= *end)
199 {
200 continue;
201 }
202
203 if !line.contains('<') {
205 continue;
206 }
207
208 if let Some(incomplete_match) = INCOMPLETE_TAG_START.find(line) {
210 let start_column = incomplete_match.start() + 1; let mut complete_tag = incomplete_match.as_str().to_string();
214 let mut found_end = false;
215
216 for (j, next_line) in lines.iter().enumerate().skip(i + 1).take(10) {
218 let next_line_num = j + 1;
219
220 if ctx.is_in_code_block(next_line_num) {
222 break;
223 }
224
225 complete_tag.push(' '); complete_tag.push_str(next_line.trim());
227
228 if next_line.contains('>') {
229 found_end = true;
230 break;
231 }
232 }
233
234 if found_end {
235 if let Some(end_pos) = complete_tag.find('>') {
237 let final_tag = &complete_tag[0..=end_pos];
238
239 let skip_mkdocs_markdown = ctx.flavor == crate::config::MarkdownFlavor::MkDocs
241 && self.has_markdown_attribute(final_tag);
242
243 if !self.is_html_comment(final_tag)
244 && !self.is_likely_type_annotation(final_tag)
245 && !self.is_email_address(final_tag)
246 && !self.is_url_in_angle_brackets(final_tag)
247 && !self.is_tag_allowed(final_tag)
248 && !skip_mkdocs_markdown
249 && HTML_TAG_FINDER.is_match(final_tag)
250 {
251 let already_warned =
253 warnings.iter().any(|w| w.line == line_num && w.column == start_column);
254
255 if !already_warned {
256 let (start_line, start_col, end_line, end_col) = calculate_html_tag_range(
257 line_num,
258 line,
259 incomplete_match.start(),
260 incomplete_match.len(),
261 );
262 warnings.push(LintWarning {
263 rule_name: Some(self.name()),
264 line: start_line,
265 column: start_col,
266 end_line,
267 end_column: end_col,
268 message: format!("HTML tag found: {final_tag} (use Markdown syntax instead)"),
269 severity: Severity::Warning,
270 fix: None,
271 });
272 }
273 }
274 }
275 }
276 }
277 }
278 }
279}
280
281impl Rule for MD033NoInlineHtml {
282 fn name(&self) -> &'static str {
283 "MD033"
284 }
285
286 fn description(&self) -> &'static str {
287 "Inline HTML is not allowed"
288 }
289
290 fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
291 let content = ctx.content;
292
293 if content.is_empty() || !has_html_tags(content) {
295 return Ok(Vec::new());
296 }
297
298 if !HTML_TAG_QUICK_CHECK.is_match(content) {
300 return Ok(Vec::new());
301 }
302
303 let mut warnings = Vec::new();
304 let lines: Vec<&str> = content.lines().collect();
305
306 let mut in_nomarkdown = false;
308 let mut in_comment = false;
309 let mut nomarkdown_ranges: Vec<(usize, usize)> = Vec::new();
310 let mut nomarkdown_start = 0;
311 let mut comment_start = 0;
312
313 for (i, line) in lines.iter().enumerate() {
315 let line_num = i + 1;
316
317 if line.trim() == "{::nomarkdown}" {
319 in_nomarkdown = true;
320 nomarkdown_start = line_num;
321 } else if line.trim() == "{:/nomarkdown}" && in_nomarkdown {
322 in_nomarkdown = false;
323 nomarkdown_ranges.push((nomarkdown_start, line_num));
324 }
325
326 if line.trim() == "{::comment}" {
328 in_comment = true;
329 comment_start = line_num;
330 } else if line.trim() == "{:/comment}" && in_comment {
331 in_comment = false;
332 nomarkdown_ranges.push((comment_start, line_num));
333 }
334 }
335
336 for (i, line) in lines.iter().enumerate() {
339 let line_num = i + 1;
340
341 if line.trim().is_empty() {
342 continue;
343 }
344 if ctx.is_in_code_block(line_num) {
345 continue;
346 }
347 if line.starts_with(" ") || line.starts_with('\t') {
350 continue;
351 }
352
353 if nomarkdown_ranges
355 .iter()
356 .any(|(start, end)| line_num >= *start && line_num <= *end)
357 {
358 continue;
359 }
360
361 if is_kramdown_extension(line) || is_kramdown_block_attribute(line) {
363 continue;
364 }
365
366 for tag_match in HTML_TAG_FINDER.find_iter(line) {
368 let tag = tag_match.as_str();
369
370 if self.is_html_comment(tag) {
372 continue;
373 }
374
375 if self.is_likely_type_annotation(tag) {
377 continue;
378 }
379
380 if self.is_email_address(tag) {
382 continue;
383 }
384
385 if self.is_url_in_angle_brackets(tag) {
387 continue;
388 }
389
390 let tag_start_col = tag_match.start() + 1; if ctx.is_in_code_span(line_num, tag_start_col) {
393 continue;
394 }
395
396 if self.is_tag_allowed(tag) {
398 continue;
399 }
400
401 if ctx.flavor == crate::config::MarkdownFlavor::MkDocs && self.has_markdown_attribute(tag) {
403 continue;
404 }
405
406 let (start_line, start_col, end_line, end_col) =
408 calculate_html_tag_range(line_num, line, tag_match.start(), tag_match.len());
409 warnings.push(LintWarning {
410 rule_name: Some(self.name()),
411 line: start_line,
412 column: start_col,
413 end_line,
414 end_column: end_col,
415 message: format!("Inline HTML found: {tag} (use Markdown syntax instead)"),
416 severity: Severity::Warning,
417 fix: None,
418 });
419 }
420 }
421
422 self.find_multiline_html_tags(ctx, ctx.content, &nomarkdown_ranges, &mut warnings);
424
425 Ok(warnings)
426 }
427
428 fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
429 Ok(ctx.content.to_string())
431 }
432
433 fn fix_capability(&self) -> crate::rule::FixCapability {
434 crate::rule::FixCapability::Unfixable
435 }
436
437 fn category(&self) -> RuleCategory {
439 RuleCategory::Html
440 }
441
442 fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
444 let content = ctx.content;
445 content.is_empty() || !has_html_tags(content)
446 }
447
448 fn as_any(&self) -> &dyn std::any::Any {
449 self
450 }
451
452 fn default_config_section(&self) -> Option<(String, toml::Value)> {
453 let json_value = serde_json::to_value(&self.config).ok()?;
454 Some((
455 self.name().to_string(),
456 crate::rule_config_serde::json_to_toml_value(&json_value)?,
457 ))
458 }
459
460 fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
461 where
462 Self: Sized,
463 {
464 let rule_config = crate::rule_config_serde::load_rule_config::<MD033Config>(config);
465 Box::new(Self::from_config_struct(rule_config))
466 }
467}
468
469#[cfg(test)]
470mod tests {
471 use super::*;
472 use crate::lint_context::LintContext;
473 use crate::rule::Rule;
474
475 #[test]
476 fn test_md033_basic_html() {
477 let rule = MD033NoInlineHtml::default();
478 let content = "<div>Some content</div>";
479 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
480 let result = rule.check(&ctx).unwrap();
481 assert_eq!(result.len(), 2); assert!(result[0].message.starts_with("Inline HTML found: <div>"));
484 assert!(result[1].message.starts_with("Inline HTML found: </div>"));
485 }
486
487 #[test]
488 fn test_md033_case_insensitive() {
489 let rule = MD033NoInlineHtml::default();
490 let content = "<DiV>Some <B>content</B></dIv>";
491 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
492 let result = rule.check(&ctx).unwrap();
493 assert_eq!(result.len(), 4); assert_eq!(
496 result[0].message,
497 "Inline HTML found: <DiV> (use Markdown syntax instead)"
498 );
499 assert_eq!(
500 result[1].message,
501 "Inline HTML found: <B> (use Markdown syntax instead)"
502 );
503 assert_eq!(
504 result[2].message,
505 "Inline HTML found: </B> (use Markdown syntax instead)"
506 );
507 assert_eq!(
508 result[3].message,
509 "Inline HTML found: </dIv> (use Markdown syntax instead)"
510 );
511 }
512
513 #[test]
514 fn test_md033_allowed_tags() {
515 let rule = MD033NoInlineHtml::with_allowed(vec!["div".to_string(), "br".to_string()]);
516 let content = "<div>Allowed</div><p>Not allowed</p><br/>";
517 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
518 let result = rule.check(&ctx).unwrap();
519 assert_eq!(result.len(), 2);
521 assert_eq!(
522 result[0].message,
523 "Inline HTML found: <p> (use Markdown syntax instead)"
524 );
525 assert_eq!(
526 result[1].message,
527 "Inline HTML found: </p> (use Markdown syntax instead)"
528 );
529
530 let content2 = "<DIV>Allowed</DIV><P>Not allowed</P><BR/>";
532 let ctx2 = LintContext::new(content2, crate::config::MarkdownFlavor::Standard);
533 let result2 = rule.check(&ctx2).unwrap();
534 assert_eq!(result2.len(), 2); assert_eq!(
536 result2[0].message,
537 "Inline HTML found: <P> (use Markdown syntax instead)"
538 );
539 assert_eq!(
540 result2[1].message,
541 "Inline HTML found: </P> (use Markdown syntax instead)"
542 );
543 }
544
545 #[test]
546 fn test_md033_html_comments() {
547 let rule = MD033NoInlineHtml::default();
548 let content = "<!-- This is a comment --> <p>Not a comment</p>";
549 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
550 let result = rule.check(&ctx).unwrap();
551 assert_eq!(result.len(), 2); assert_eq!(
554 result[0].message,
555 "Inline HTML found: <p> (use Markdown syntax instead)"
556 );
557 assert_eq!(
558 result[1].message,
559 "Inline HTML found: </p> (use Markdown syntax instead)"
560 );
561 }
562
563 #[test]
564 fn test_md033_tags_in_links() {
565 let rule = MD033NoInlineHtml::default();
566 let content = "[Link](http://example.com/<div>)";
567 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
568 let result = rule.check(&ctx).unwrap();
569 assert_eq!(result.len(), 1);
571 assert_eq!(
572 result[0].message,
573 "Inline HTML found: <div> (use Markdown syntax instead)"
574 );
575
576 let content2 = "[Link <a>text</a>](url)";
577 let ctx2 = LintContext::new(content2, crate::config::MarkdownFlavor::Standard);
578 let result2 = rule.check(&ctx2).unwrap();
579 assert_eq!(result2.len(), 2); assert_eq!(
582 result2[0].message,
583 "Inline HTML found: <a> (use Markdown syntax instead)"
584 );
585 assert_eq!(
586 result2[1].message,
587 "Inline HTML found: </a> (use Markdown syntax instead)"
588 );
589 }
590
591 #[test]
592 fn test_md033_fix_escaping() {
593 let rule = MD033NoInlineHtml::default();
594 let content = "Text with <div> and <br/> tags.";
595 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
596 let fixed_content = rule.fix(&ctx).unwrap();
597 assert_eq!(fixed_content, content);
599 }
600
601 #[test]
602 fn test_md033_in_code_blocks() {
603 let rule = MD033NoInlineHtml::default();
604 let content = "```html\n<div>Code</div>\n```\n<div>Not code</div>";
605 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
606 let result = rule.check(&ctx).unwrap();
607 assert_eq!(result.len(), 2); assert_eq!(
610 result[0].message,
611 "Inline HTML found: <div> (use Markdown syntax instead)"
612 );
613 assert_eq!(
614 result[1].message,
615 "Inline HTML found: </div> (use Markdown syntax instead)"
616 );
617 }
618
619 #[test]
620 fn test_md033_in_code_spans() {
621 let rule = MD033NoInlineHtml::default();
622 let content = "Text with `<p>in code</p>` span. <br/> Not in span.";
623 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
624 let result = rule.check(&ctx).unwrap();
625 assert_eq!(result.len(), 1);
627 assert_eq!(
628 result[0].message,
629 "Inline HTML found: <br/> (use Markdown syntax instead)"
630 );
631 }
632}