1use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
7use crate::utils::kramdown_utils::{is_kramdown_block_attribute, is_kramdown_extension};
8use crate::utils::regex_cache::*;
9use std::collections::HashSet;
10
11mod md033_config;
12use md033_config::MD033Config;
13
14#[derive(Clone)]
15pub struct MD033NoInlineHtml {
16 config: MD033Config,
17 allowed: HashSet<String>,
18}
19
20impl Default for MD033NoInlineHtml {
21 fn default() -> Self {
22 let config = MD033Config::default();
23 let allowed = config.allowed_set();
24 Self { config, allowed }
25 }
26}
27
28impl MD033NoInlineHtml {
29 pub fn new() -> Self {
30 Self::default()
31 }
32
33 pub fn with_allowed(allowed_vec: Vec<String>) -> Self {
34 let config = MD033Config {
35 allowed: allowed_vec.clone(),
36 };
37 let allowed = config.allowed_set();
38 Self { config, allowed }
39 }
40
41 pub fn from_config_struct(config: MD033Config) -> Self {
42 let allowed = config.allowed_set();
43 Self { config, allowed }
44 }
45
46 #[inline]
48 fn is_tag_allowed(&self, tag: &str) -> bool {
49 if self.allowed.is_empty() {
50 return false;
51 }
52 let tag = tag.trim_start_matches('<').trim_start_matches('/');
54 let tag_name = tag
55 .split(|c: char| c.is_whitespace() || c == '>' || c == '/')
56 .next()
57 .unwrap_or("");
58 self.allowed.contains(&tag_name.to_lowercase())
59 }
60
61 #[inline]
63 fn is_html_comment(&self, tag: &str) -> bool {
64 tag.starts_with("<!--") && tag.ends_with("-->")
65 }
66
67 #[inline]
69 fn is_likely_type_annotation(&self, tag: &str) -> bool {
70 const COMMON_TYPES: &[&str] = &[
72 "string",
73 "number",
74 "any",
75 "void",
76 "null",
77 "undefined",
78 "array",
79 "promise",
80 "function",
81 "error",
82 "date",
83 "regexp",
84 "symbol",
85 "bigint",
86 "map",
87 "set",
88 "weakmap",
89 "weakset",
90 "iterator",
91 "generator",
92 "t",
93 "u",
94 "v",
95 "k",
96 "e", "userdata",
98 "apiresponse",
99 "config",
100 "options",
101 "params",
102 "result",
103 "response",
104 "request",
105 "data",
106 "item",
107 "element",
108 "node",
109 ];
110
111 let tag_content = tag
112 .trim_start_matches('<')
113 .trim_end_matches('>')
114 .trim_start_matches('/');
115 let tag_name = tag_content
116 .split(|c: char| c.is_whitespace() || c == '>' || c == '/')
117 .next()
118 .unwrap_or("");
119
120 if !tag_content.contains(' ') && !tag_content.contains('=') {
122 COMMON_TYPES.contains(&tag_name.to_ascii_lowercase().as_str())
123 } else {
124 false
125 }
126 }
127
128 #[inline]
130 fn is_email_address(&self, tag: &str) -> bool {
131 let content = tag.trim_start_matches('<').trim_end_matches('>');
132 content.contains('@')
134 && content.chars().all(|c| c.is_alphanumeric() || "@.-_+".contains(c))
135 && content.split('@').count() == 2
136 && content.split('@').all(|part| !part.is_empty())
137 }
138
139 #[inline]
141 fn has_markdown_attribute(&self, tag: &str) -> bool {
142 tag.contains(" markdown>") || tag.contains(" markdown=") || tag.contains(" markdown ")
145 }
146
147 #[inline]
149 fn is_url_in_angle_brackets(&self, tag: &str) -> bool {
150 let content = tag.trim_start_matches('<').trim_end_matches('>');
151 content.starts_with("http://")
153 || content.starts_with("https://")
154 || content.starts_with("ftp://")
155 || content.starts_with("ftps://")
156 || content.starts_with("mailto:")
157 }
158
159 fn calculate_fix(
167 &self,
168 content: &str,
169 opening_tag: &str,
170 tag_byte_start: usize,
171 ) -> Option<(std::ops::Range<usize>, String)> {
172 if opening_tag.ends_with("/>") {
174 return Some((tag_byte_start..tag_byte_start + opening_tag.len(), String::new()));
175 }
176
177 let tag_name = opening_tag
179 .trim_start_matches('<')
180 .split(|c: char| c.is_whitespace() || c == '>' || c == '/')
181 .next()?
182 .to_lowercase();
183
184 let closing_tag = format!("</{tag_name}>");
186
187 let search_start = tag_byte_start + opening_tag.len();
189 if let Some(closing_pos) = content[search_start..].find(&closing_tag) {
190 let closing_byte_start = search_start + closing_pos;
191 let closing_byte_end = closing_byte_start + closing_tag.len();
192
193 let inner_content = &content[search_start..closing_byte_start];
195
196 return Some((tag_byte_start..closing_byte_end, inner_content.to_string()));
197 }
198
199 Some((tag_byte_start..tag_byte_start + opening_tag.len(), String::new()))
201 }
202}
203
204impl Rule for MD033NoInlineHtml {
205 fn name(&self) -> &'static str {
206 "MD033"
207 }
208
209 fn description(&self) -> &'static str {
210 "Inline HTML is not allowed"
211 }
212
213 fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
214 let content = ctx.content;
215
216 if content.is_empty() || !ctx.likely_has_html() {
218 return Ok(Vec::new());
219 }
220
221 if !HTML_TAG_QUICK_CHECK.is_match(content) {
223 return Ok(Vec::new());
224 }
225
226 let mut warnings = Vec::new();
227 let lines: Vec<&str> = content.lines().collect();
228
229 let mut in_nomarkdown = false;
231 let mut in_comment = false;
232 let mut nomarkdown_ranges: Vec<(usize, usize)> = Vec::new();
233 let mut nomarkdown_start = 0;
234 let mut comment_start = 0;
235
236 for (i, line) in lines.iter().enumerate() {
237 let line_num = i + 1;
238
239 if line.trim() == "{::nomarkdown}" {
241 in_nomarkdown = true;
242 nomarkdown_start = line_num;
243 } else if line.trim() == "{:/nomarkdown}" && in_nomarkdown {
244 in_nomarkdown = false;
245 nomarkdown_ranges.push((nomarkdown_start, line_num));
246 }
247
248 if line.trim() == "{::comment}" {
250 in_comment = true;
251 comment_start = line_num;
252 } else if line.trim() == "{:/comment}" && in_comment {
253 in_comment = false;
254 nomarkdown_ranges.push((comment_start, line_num));
255 }
256 }
257
258 let html_tags = ctx.html_tags();
260
261 for html_tag in html_tags.iter() {
262 if html_tag.is_closing {
264 continue;
265 }
266
267 let line_num = html_tag.line;
268 let tag_byte_start = html_tag.byte_offset;
269
270 let tag = &content[html_tag.byte_offset..html_tag.byte_end];
272
273 if ctx.line_info(line_num).is_some_and(|info| info.in_code_block) {
275 continue;
276 }
277
278 if let Some(line) = lines.get(line_num.saturating_sub(1)) {
280 if line.starts_with(" ") || line.starts_with('\t') {
281 continue;
282 }
283
284 if is_kramdown_extension(line) || is_kramdown_block_attribute(line) {
286 continue;
287 }
288 }
289
290 if nomarkdown_ranges
292 .iter()
293 .any(|(start, end)| line_num >= *start && line_num <= *end)
294 {
295 continue;
296 }
297
298 if ctx.is_in_html_comment(tag_byte_start) {
300 continue;
301 }
302
303 if self.is_html_comment(tag) {
305 continue;
306 }
307
308 if ctx.flavor.supports_jsx() && html_tag.tag_name.chars().next().is_some_and(|c| c.is_uppercase()) {
310 continue;
311 }
312
313 if self.is_likely_type_annotation(tag) {
315 continue;
316 }
317
318 if self.is_email_address(tag) {
320 continue;
321 }
322
323 if self.is_url_in_angle_brackets(tag) {
325 continue;
326 }
327
328 let tag_start_col = html_tag.start_col + 1; if ctx.is_in_code_span(line_num, tag_start_col) {
331 continue;
332 }
333
334 if self.is_tag_allowed(tag) {
336 continue;
337 }
338
339 if ctx.flavor == crate::config::MarkdownFlavor::MkDocs && self.has_markdown_attribute(tag) {
341 continue;
342 }
343
344 let fix = self
346 .calculate_fix(content, tag, tag_byte_start)
347 .map(|(range, replacement)| Fix { range, replacement });
348
349 warnings.push(LintWarning {
351 rule_name: Some(self.name().to_string()),
352 line: line_num,
353 column: html_tag.start_col + 1, end_line: line_num, end_column: html_tag.end_col + 1, message: format!("Inline HTML found: {tag}"),
357 severity: Severity::Warning,
358 fix,
359 });
360 }
361
362 Ok(warnings)
363 }
364
365 fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
366 Ok(ctx.content.to_string())
368 }
369
370 fn fix_capability(&self) -> crate::rule::FixCapability {
371 crate::rule::FixCapability::Unfixable
372 }
373
374 fn category(&self) -> RuleCategory {
376 RuleCategory::Html
377 }
378
379 fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
381 ctx.content.is_empty() || !ctx.likely_has_html()
382 }
383
384 fn as_any(&self) -> &dyn std::any::Any {
385 self
386 }
387
388 fn default_config_section(&self) -> Option<(String, toml::Value)> {
389 let json_value = serde_json::to_value(&self.config).ok()?;
390 Some((
391 self.name().to_string(),
392 crate::rule_config_serde::json_to_toml_value(&json_value)?,
393 ))
394 }
395
396 fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
397 where
398 Self: Sized,
399 {
400 let rule_config = crate::rule_config_serde::load_rule_config::<MD033Config>(config);
401 Box::new(Self::from_config_struct(rule_config))
402 }
403}
404
405#[cfg(test)]
406mod tests {
407 use super::*;
408 use crate::lint_context::LintContext;
409 use crate::rule::Rule;
410
411 #[test]
412 fn test_md033_basic_html() {
413 let rule = MD033NoInlineHtml::default();
414 let content = "<div>Some content</div>";
415 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
416 let result = rule.check(&ctx).unwrap();
417 assert_eq!(result.len(), 1); assert!(result[0].message.starts_with("Inline HTML found: <div>"));
420 }
421
422 #[test]
423 fn test_md033_case_insensitive() {
424 let rule = MD033NoInlineHtml::default();
425 let content = "<DiV>Some <B>content</B></dIv>";
426 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
427 let result = rule.check(&ctx).unwrap();
428 assert_eq!(result.len(), 2); assert_eq!(result[0].message, "Inline HTML found: <DiV>");
431 assert_eq!(result[1].message, "Inline HTML found: <B>");
432 }
433
434 #[test]
435 fn test_md033_allowed_tags() {
436 let rule = MD033NoInlineHtml::with_allowed(vec!["div".to_string(), "br".to_string()]);
437 let content = "<div>Allowed</div><p>Not allowed</p><br/>";
438 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
439 let result = rule.check(&ctx).unwrap();
440 assert_eq!(result.len(), 1);
442 assert_eq!(result[0].message, "Inline HTML found: <p>");
443
444 let content2 = "<DIV>Allowed</DIV><P>Not allowed</P><BR/>";
446 let ctx2 = LintContext::new(content2, crate::config::MarkdownFlavor::Standard);
447 let result2 = rule.check(&ctx2).unwrap();
448 assert_eq!(result2.len(), 1); assert_eq!(result2[0].message, "Inline HTML found: <P>");
450 }
451
452 #[test]
453 fn test_md033_html_comments() {
454 let rule = MD033NoInlineHtml::default();
455 let content = "<!-- This is a comment --> <p>Not a comment</p>";
456 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
457 let result = rule.check(&ctx).unwrap();
458 assert_eq!(result.len(), 1); assert_eq!(result[0].message, "Inline HTML found: <p>");
461 }
462
463 #[test]
464 fn test_md033_tags_in_links() {
465 let rule = MD033NoInlineHtml::default();
466 let content = "[Link](http://example.com/<div>)";
467 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
468 let result = rule.check(&ctx).unwrap();
469 assert_eq!(result.len(), 1);
471 assert_eq!(result[0].message, "Inline HTML found: <div>");
472
473 let content2 = "[Link <a>text</a>](url)";
474 let ctx2 = LintContext::new(content2, crate::config::MarkdownFlavor::Standard);
475 let result2 = rule.check(&ctx2).unwrap();
476 assert_eq!(result2.len(), 1); assert_eq!(result2[0].message, "Inline HTML found: <a>");
479 }
480
481 #[test]
482 fn test_md033_fix_escaping() {
483 let rule = MD033NoInlineHtml::default();
484 let content = "Text with <div> and <br/> tags.";
485 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
486 let fixed_content = rule.fix(&ctx).unwrap();
487 assert_eq!(fixed_content, content);
489 }
490
491 #[test]
492 fn test_md033_in_code_blocks() {
493 let rule = MD033NoInlineHtml::default();
494 let content = "```html\n<div>Code</div>\n```\n<div>Not code</div>";
495 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
496 let result = rule.check(&ctx).unwrap();
497 assert_eq!(result.len(), 1); assert_eq!(result[0].message, "Inline HTML found: <div>");
500 }
501
502 #[test]
503 fn test_md033_in_code_spans() {
504 let rule = MD033NoInlineHtml::default();
505 let content = "Text with `<p>in code</p>` span. <br/> Not in span.";
506 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
507 let result = rule.check(&ctx).unwrap();
508 assert_eq!(result.len(), 1);
510 assert_eq!(result[0].message, "Inline HTML found: <br/>");
511 }
512
513 #[test]
514 fn test_md033_issue_90_code_span_with_diff_block() {
515 let rule = MD033NoInlineHtml::default();
517 let content = r#"# Heading
518
519`<env>`
520
521```diff
522- this
523+ that
524```"#;
525 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
526 let result = rule.check(&ctx).unwrap();
527 assert_eq!(result.len(), 0, "Should not report HTML tags inside code spans");
529 }
530
531 #[test]
532 fn test_md033_multiple_code_spans_with_angle_brackets() {
533 let rule = MD033NoInlineHtml::default();
535 let content = "`<one>` and `<two>` and `<three>` are all code spans";
536 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
537 let result = rule.check(&ctx).unwrap();
538 assert_eq!(result.len(), 0, "Should not report HTML tags inside any code spans");
539 }
540
541 #[test]
542 fn test_md033_nested_angle_brackets_in_code_span() {
543 let rule = MD033NoInlineHtml::default();
545 let content = "Text with `<<nested>>` brackets";
546 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
547 let result = rule.check(&ctx).unwrap();
548 assert_eq!(result.len(), 0, "Should handle nested angle brackets in code spans");
549 }
550
551 #[test]
552 fn test_md033_code_span_at_end_before_code_block() {
553 let rule = MD033NoInlineHtml::default();
555 let content = "Testing `<test>`\n```\ncode here\n```";
556 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
557 let result = rule.check(&ctx).unwrap();
558 assert_eq!(result.len(), 0, "Should handle code span before code block");
559 }
560
561 #[test]
562 fn test_md033_quick_fix_inline_tag() {
563 let rule = MD033NoInlineHtml::default();
565 let content = "This has <span>inline text</span> that should keep content.";
566 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
567 let result = rule.check(&ctx).unwrap();
568
569 assert_eq!(result.len(), 1, "Should find one HTML tag");
570 assert!(result[0].fix.is_some(), "Should have a fix");
571
572 let fix = result[0].fix.as_ref().unwrap();
573 assert_eq!(&content[fix.range.clone()], "<span>inline text</span>");
574 assert_eq!(fix.replacement, "inline text");
575 }
576
577 #[test]
578 fn test_md033_quick_fix_multiline_tag() {
579 let rule = MD033NoInlineHtml::default();
581 let content = "<div>\nBlock content\n</div>";
582 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
583 let result = rule.check(&ctx).unwrap();
584
585 assert_eq!(result.len(), 1, "Should find one HTML tag");
586 assert!(result[0].fix.is_some(), "Should have a fix");
587
588 let fix = result[0].fix.as_ref().unwrap();
589 assert_eq!(&content[fix.range.clone()], "<div>\nBlock content\n</div>");
590 assert_eq!(fix.replacement, "\nBlock content\n");
591 }
592
593 #[test]
594 fn test_md033_quick_fix_self_closing_tag() {
595 let rule = MD033NoInlineHtml::default();
597 let content = "Self-closing: <br/>";
598 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
599 let result = rule.check(&ctx).unwrap();
600
601 assert_eq!(result.len(), 1, "Should find one HTML tag");
602 assert!(result[0].fix.is_some(), "Should have a fix");
603
604 let fix = result[0].fix.as_ref().unwrap();
605 assert_eq!(&content[fix.range.clone()], "<br/>");
606 assert_eq!(fix.replacement, "");
607 }
608
609 #[test]
610 fn test_md033_quick_fix_multiple_tags() {
611 let rule = MD033NoInlineHtml::default();
613 let content = "<span>first</span> and <strong>second</strong>";
614 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
615 let result = rule.check(&ctx).unwrap();
616
617 assert_eq!(result.len(), 2, "Should find two HTML tags");
618 assert!(result[0].fix.is_some(), "First tag should have a fix");
619 assert!(result[1].fix.is_some(), "Second tag should have a fix");
620
621 let fix1 = result[0].fix.as_ref().unwrap();
622 assert_eq!(&content[fix1.range.clone()], "<span>first</span>");
623 assert_eq!(fix1.replacement, "first");
624
625 let fix2 = result[1].fix.as_ref().unwrap();
626 assert_eq!(&content[fix2.range.clone()], "<strong>second</strong>");
627 assert_eq!(fix2.replacement, "second");
628 }
629}