1use crate::rule::{Fix, LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
7use crate::utils::kramdown_utils::{is_kramdown_block_attribute, is_kramdown_extension};
8use crate::utils::regex_cache::*;
9use std::collections::HashSet;
10
11mod md033_config;
12use md033_config::MD033Config;
13
14#[derive(Clone)]
15pub struct MD033NoInlineHtml {
16 config: MD033Config,
17 allowed: HashSet<String>,
18}
19
20impl Default for MD033NoInlineHtml {
21 fn default() -> Self {
22 let config = MD033Config::default();
23 let allowed = config.allowed_set();
24 Self { config, allowed }
25 }
26}
27
28impl MD033NoInlineHtml {
29 pub fn new() -> Self {
30 Self::default()
31 }
32
33 pub fn with_allowed(allowed_vec: Vec<String>) -> Self {
34 let config = MD033Config {
35 allowed: allowed_vec.clone(),
36 };
37 let allowed = config.allowed_set();
38 Self { config, allowed }
39 }
40
41 pub fn from_config_struct(config: MD033Config) -> Self {
42 let allowed = config.allowed_set();
43 Self { config, allowed }
44 }
45
46 #[inline]
48 fn is_tag_allowed(&self, tag: &str) -> bool {
49 if self.allowed.is_empty() {
50 return false;
51 }
52 let tag = tag.trim_start_matches('<').trim_start_matches('/');
54 let tag_name = tag
55 .split(|c: char| c.is_whitespace() || c == '>' || c == '/')
56 .next()
57 .unwrap_or("");
58 self.allowed.contains(&tag_name.to_lowercase())
59 }
60
61 #[inline]
63 fn is_html_comment(&self, tag: &str) -> bool {
64 tag.starts_with("<!--") && tag.ends_with("-->")
65 }
66
67 #[inline]
69 fn is_likely_type_annotation(&self, tag: &str) -> bool {
70 const COMMON_TYPES: &[&str] = &[
72 "string",
73 "number",
74 "any",
75 "void",
76 "null",
77 "undefined",
78 "array",
79 "promise",
80 "function",
81 "error",
82 "date",
83 "regexp",
84 "symbol",
85 "bigint",
86 "map",
87 "set",
88 "weakmap",
89 "weakset",
90 "iterator",
91 "generator",
92 "t",
93 "u",
94 "v",
95 "k",
96 "e", "userdata",
98 "apiresponse",
99 "config",
100 "options",
101 "params",
102 "result",
103 "response",
104 "request",
105 "data",
106 "item",
107 "element",
108 "node",
109 ];
110
111 let tag_content = tag
112 .trim_start_matches('<')
113 .trim_end_matches('>')
114 .trim_start_matches('/');
115 let tag_name = tag_content
116 .split(|c: char| c.is_whitespace() || c == '>' || c == '/')
117 .next()
118 .unwrap_or("");
119
120 if !tag_content.contains(' ') && !tag_content.contains('=') {
122 COMMON_TYPES.contains(&tag_name.to_ascii_lowercase().as_str())
123 } else {
124 false
125 }
126 }
127
128 #[inline]
130 fn is_email_address(&self, tag: &str) -> bool {
131 let content = tag.trim_start_matches('<').trim_end_matches('>');
132 content.contains('@')
134 && content.chars().all(|c| c.is_alphanumeric() || "@.-_+".contains(c))
135 && content.split('@').count() == 2
136 && content.split('@').all(|part| !part.is_empty())
137 }
138
139 #[inline]
141 fn has_markdown_attribute(&self, tag: &str) -> bool {
142 tag.contains(" markdown>") || tag.contains(" markdown=") || tag.contains(" markdown ")
145 }
146
147 #[inline]
149 fn is_url_in_angle_brackets(&self, tag: &str) -> bool {
150 let content = tag.trim_start_matches('<').trim_end_matches('>');
151 content.starts_with("http://")
153 || content.starts_with("https://")
154 || content.starts_with("ftp://")
155 || content.starts_with("ftps://")
156 || content.starts_with("mailto:")
157 }
158
159 fn calculate_fix(
167 &self,
168 content: &str,
169 opening_tag: &str,
170 tag_byte_start: usize,
171 ) -> Option<(std::ops::Range<usize>, String)> {
172 if opening_tag.ends_with("/>") {
174 return Some((tag_byte_start..tag_byte_start + opening_tag.len(), String::new()));
175 }
176
177 let tag_name = opening_tag
179 .trim_start_matches('<')
180 .split(|c: char| c.is_whitespace() || c == '>' || c == '/')
181 .next()?
182 .to_lowercase();
183
184 let closing_tag = format!("</{tag_name}>");
186
187 let search_start = tag_byte_start + opening_tag.len();
189 if let Some(closing_pos) = content[search_start..].find(&closing_tag) {
190 let closing_byte_start = search_start + closing_pos;
191 let closing_byte_end = closing_byte_start + closing_tag.len();
192
193 let inner_content = &content[search_start..closing_byte_start];
195
196 return Some((tag_byte_start..closing_byte_end, inner_content.to_string()));
197 }
198
199 Some((tag_byte_start..tag_byte_start + opening_tag.len(), String::new()))
201 }
202}
203
204impl Rule for MD033NoInlineHtml {
205 fn name(&self) -> &'static str {
206 "MD033"
207 }
208
209 fn description(&self) -> &'static str {
210 "Inline HTML is not allowed"
211 }
212
213 fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
214 let content = ctx.content;
215
216 if content.is_empty() || !ctx.likely_has_html() {
218 return Ok(Vec::new());
219 }
220
221 if !HTML_TAG_QUICK_CHECK.is_match(content) {
223 return Ok(Vec::new());
224 }
225
226 let mut warnings = Vec::new();
227 let lines: Vec<&str> = content.lines().collect();
228
229 let mut in_nomarkdown = false;
231 let mut in_comment = false;
232 let mut nomarkdown_ranges: Vec<(usize, usize)> = Vec::new();
233 let mut nomarkdown_start = 0;
234 let mut comment_start = 0;
235
236 for (i, line) in lines.iter().enumerate() {
237 let line_num = i + 1;
238
239 if line.trim() == "{::nomarkdown}" {
241 in_nomarkdown = true;
242 nomarkdown_start = line_num;
243 } else if line.trim() == "{:/nomarkdown}" && in_nomarkdown {
244 in_nomarkdown = false;
245 nomarkdown_ranges.push((nomarkdown_start, line_num));
246 }
247
248 if line.trim() == "{::comment}" {
250 in_comment = true;
251 comment_start = line_num;
252 } else if line.trim() == "{:/comment}" && in_comment {
253 in_comment = false;
254 nomarkdown_ranges.push((comment_start, line_num));
255 }
256 }
257
258 let html_tags = ctx.html_tags();
260
261 for html_tag in html_tags.iter() {
262 if html_tag.is_closing {
264 continue;
265 }
266
267 let line_num = html_tag.line;
268 let tag_byte_start = html_tag.byte_offset;
269
270 let tag = &content[html_tag.byte_offset..html_tag.byte_end];
272
273 if ctx.line_info(line_num).is_some_and(|info| info.in_code_block) {
275 continue;
276 }
277
278 if let Some(line) = lines.get(line_num.saturating_sub(1)) {
280 if line.starts_with(" ") || line.starts_with('\t') {
281 continue;
282 }
283
284 if is_kramdown_extension(line) || is_kramdown_block_attribute(line) {
286 continue;
287 }
288 }
289
290 if nomarkdown_ranges
292 .iter()
293 .any(|(start, end)| line_num >= *start && line_num <= *end)
294 {
295 continue;
296 }
297
298 if ctx.is_in_html_comment(tag_byte_start) {
300 continue;
301 }
302
303 if self.is_html_comment(tag) {
305 continue;
306 }
307
308 if ctx.flavor.supports_jsx() && html_tag.tag_name.chars().next().is_some_and(|c| c.is_uppercase()) {
310 continue;
311 }
312
313 if self.is_likely_type_annotation(tag) {
315 continue;
316 }
317
318 if self.is_email_address(tag) {
320 continue;
321 }
322
323 if self.is_url_in_angle_brackets(tag) {
325 continue;
326 }
327
328 if ctx.is_byte_offset_in_code_span(tag_byte_start) {
330 continue;
331 }
332
333 if self.is_tag_allowed(tag) {
335 continue;
336 }
337
338 if ctx.flavor == crate::config::MarkdownFlavor::MkDocs && self.has_markdown_attribute(tag) {
340 continue;
341 }
342
343 let fix = self
345 .calculate_fix(content, tag, tag_byte_start)
346 .map(|(range, replacement)| Fix { range, replacement });
347
348 warnings.push(LintWarning {
350 rule_name: Some(self.name().to_string()),
351 line: line_num,
352 column: html_tag.start_col + 1, end_line: line_num, end_column: html_tag.end_col + 1, message: format!("Inline HTML found: {tag}"),
356 severity: Severity::Warning,
357 fix,
358 });
359 }
360
361 Ok(warnings)
362 }
363
364 fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
365 Ok(ctx.content.to_string())
367 }
368
369 fn fix_capability(&self) -> crate::rule::FixCapability {
370 crate::rule::FixCapability::Unfixable
371 }
372
373 fn category(&self) -> RuleCategory {
375 RuleCategory::Html
376 }
377
378 fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
380 ctx.content.is_empty() || !ctx.likely_has_html()
381 }
382
383 fn as_any(&self) -> &dyn std::any::Any {
384 self
385 }
386
387 fn default_config_section(&self) -> Option<(String, toml::Value)> {
388 let json_value = serde_json::to_value(&self.config).ok()?;
389 Some((
390 self.name().to_string(),
391 crate::rule_config_serde::json_to_toml_value(&json_value)?,
392 ))
393 }
394
395 fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
396 where
397 Self: Sized,
398 {
399 let rule_config = crate::rule_config_serde::load_rule_config::<MD033Config>(config);
400 Box::new(Self::from_config_struct(rule_config))
401 }
402}
403
404#[cfg(test)]
405mod tests {
406 use super::*;
407 use crate::lint_context::LintContext;
408 use crate::rule::Rule;
409
410 #[test]
411 fn test_md033_basic_html() {
412 let rule = MD033NoInlineHtml::default();
413 let content = "<div>Some content</div>";
414 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
415 let result = rule.check(&ctx).unwrap();
416 assert_eq!(result.len(), 1); assert!(result[0].message.starts_with("Inline HTML found: <div>"));
419 }
420
421 #[test]
422 fn test_md033_case_insensitive() {
423 let rule = MD033NoInlineHtml::default();
424 let content = "<DiV>Some <B>content</B></dIv>";
425 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
426 let result = rule.check(&ctx).unwrap();
427 assert_eq!(result.len(), 2); assert_eq!(result[0].message, "Inline HTML found: <DiV>");
430 assert_eq!(result[1].message, "Inline HTML found: <B>");
431 }
432
433 #[test]
434 fn test_md033_allowed_tags() {
435 let rule = MD033NoInlineHtml::with_allowed(vec!["div".to_string(), "br".to_string()]);
436 let content = "<div>Allowed</div><p>Not allowed</p><br/>";
437 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
438 let result = rule.check(&ctx).unwrap();
439 assert_eq!(result.len(), 1);
441 assert_eq!(result[0].message, "Inline HTML found: <p>");
442
443 let content2 = "<DIV>Allowed</DIV><P>Not allowed</P><BR/>";
445 let ctx2 = LintContext::new(content2, crate::config::MarkdownFlavor::Standard, None);
446 let result2 = rule.check(&ctx2).unwrap();
447 assert_eq!(result2.len(), 1); assert_eq!(result2[0].message, "Inline HTML found: <P>");
449 }
450
451 #[test]
452 fn test_md033_html_comments() {
453 let rule = MD033NoInlineHtml::default();
454 let content = "<!-- This is a comment --> <p>Not a comment</p>";
455 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
456 let result = rule.check(&ctx).unwrap();
457 assert_eq!(result.len(), 1); assert_eq!(result[0].message, "Inline HTML found: <p>");
460 }
461
462 #[test]
463 fn test_md033_tags_in_links() {
464 let rule = MD033NoInlineHtml::default();
465 let content = "[Link](http://example.com/<div>)";
466 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
467 let result = rule.check(&ctx).unwrap();
468 assert_eq!(result.len(), 1);
470 assert_eq!(result[0].message, "Inline HTML found: <div>");
471
472 let content2 = "[Link <a>text</a>](url)";
473 let ctx2 = LintContext::new(content2, crate::config::MarkdownFlavor::Standard, None);
474 let result2 = rule.check(&ctx2).unwrap();
475 assert_eq!(result2.len(), 1); assert_eq!(result2[0].message, "Inline HTML found: <a>");
478 }
479
480 #[test]
481 fn test_md033_fix_escaping() {
482 let rule = MD033NoInlineHtml::default();
483 let content = "Text with <div> and <br/> tags.";
484 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
485 let fixed_content = rule.fix(&ctx).unwrap();
486 assert_eq!(fixed_content, content);
488 }
489
490 #[test]
491 fn test_md033_in_code_blocks() {
492 let rule = MD033NoInlineHtml::default();
493 let content = "```html\n<div>Code</div>\n```\n<div>Not code</div>";
494 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
495 let result = rule.check(&ctx).unwrap();
496 assert_eq!(result.len(), 1); assert_eq!(result[0].message, "Inline HTML found: <div>");
499 }
500
501 #[test]
502 fn test_md033_in_code_spans() {
503 let rule = MD033NoInlineHtml::default();
504 let content = "Text with `<p>in code</p>` span. <br/> Not in span.";
505 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
506 let result = rule.check(&ctx).unwrap();
507 assert_eq!(result.len(), 1);
509 assert_eq!(result[0].message, "Inline HTML found: <br/>");
510 }
511
512 #[test]
513 fn test_md033_issue_90_code_span_with_diff_block() {
514 let rule = MD033NoInlineHtml::default();
516 let content = r#"# Heading
517
518`<env>`
519
520```diff
521- this
522+ that
523```"#;
524 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
525 let result = rule.check(&ctx).unwrap();
526 assert_eq!(result.len(), 0, "Should not report HTML tags inside code spans");
528 }
529
530 #[test]
531 fn test_md033_multiple_code_spans_with_angle_brackets() {
532 let rule = MD033NoInlineHtml::default();
534 let content = "`<one>` and `<two>` and `<three>` are all code spans";
535 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
536 let result = rule.check(&ctx).unwrap();
537 assert_eq!(result.len(), 0, "Should not report HTML tags inside any code spans");
538 }
539
540 #[test]
541 fn test_md033_nested_angle_brackets_in_code_span() {
542 let rule = MD033NoInlineHtml::default();
544 let content = "Text with `<<nested>>` brackets";
545 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
546 let result = rule.check(&ctx).unwrap();
547 assert_eq!(result.len(), 0, "Should handle nested angle brackets in code spans");
548 }
549
550 #[test]
551 fn test_md033_code_span_at_end_before_code_block() {
552 let rule = MD033NoInlineHtml::default();
554 let content = "Testing `<test>`\n```\ncode here\n```";
555 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
556 let result = rule.check(&ctx).unwrap();
557 assert_eq!(result.len(), 0, "Should handle code span before code block");
558 }
559
560 #[test]
561 fn test_md033_quick_fix_inline_tag() {
562 let rule = MD033NoInlineHtml::default();
564 let content = "This has <span>inline text</span> that should keep content.";
565 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
566 let result = rule.check(&ctx).unwrap();
567
568 assert_eq!(result.len(), 1, "Should find one HTML tag");
569 assert!(result[0].fix.is_some(), "Should have a fix");
570
571 let fix = result[0].fix.as_ref().unwrap();
572 assert_eq!(&content[fix.range.clone()], "<span>inline text</span>");
573 assert_eq!(fix.replacement, "inline text");
574 }
575
576 #[test]
577 fn test_md033_quick_fix_multiline_tag() {
578 let rule = MD033NoInlineHtml::default();
580 let content = "<div>\nBlock content\n</div>";
581 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
582 let result = rule.check(&ctx).unwrap();
583
584 assert_eq!(result.len(), 1, "Should find one HTML tag");
585 assert!(result[0].fix.is_some(), "Should have a fix");
586
587 let fix = result[0].fix.as_ref().unwrap();
588 assert_eq!(&content[fix.range.clone()], "<div>\nBlock content\n</div>");
589 assert_eq!(fix.replacement, "\nBlock content\n");
590 }
591
592 #[test]
593 fn test_md033_quick_fix_self_closing_tag() {
594 let rule = MD033NoInlineHtml::default();
596 let content = "Self-closing: <br/>";
597 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
598 let result = rule.check(&ctx).unwrap();
599
600 assert_eq!(result.len(), 1, "Should find one HTML tag");
601 assert!(result[0].fix.is_some(), "Should have a fix");
602
603 let fix = result[0].fix.as_ref().unwrap();
604 assert_eq!(&content[fix.range.clone()], "<br/>");
605 assert_eq!(fix.replacement, "");
606 }
607
608 #[test]
609 fn test_md033_quick_fix_multiple_tags() {
610 let rule = MD033NoInlineHtml::default();
612 let content = "<span>first</span> and <strong>second</strong>";
613 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
614 let result = rule.check(&ctx).unwrap();
615
616 assert_eq!(result.len(), 2, "Should find two HTML tags");
617 assert!(result[0].fix.is_some(), "First tag should have a fix");
618 assert!(result[1].fix.is_some(), "Second tag should have a fix");
619
620 let fix1 = result[0].fix.as_ref().unwrap();
621 assert_eq!(&content[fix1.range.clone()], "<span>first</span>");
622 assert_eq!(fix1.replacement, "first");
623
624 let fix2 = result[1].fix.as_ref().unwrap();
625 assert_eq!(&content[fix2.range.clone()], "<strong>second</strong>");
626 assert_eq!(fix2.replacement, "second");
627 }
628}