1use crate::rule::{LintError, LintResult, LintWarning, Rule, RuleCategory, Severity};
7use crate::utils::document_structure::{DocumentStructure, DocumentStructureExtensions};
8use crate::utils::kramdown_utils::{is_kramdown_block_attribute, is_kramdown_extension};
9use crate::utils::range_utils::calculate_html_tag_range;
10use crate::utils::regex_cache::*;
11use lazy_static::lazy_static;
12use regex::Regex;
13use std::collections::HashSet;
14
15mod md033_config;
16use md033_config::MD033Config;
17
18lazy_static! {
19 static ref HTML_COMMENT_PATTERN: Regex = Regex::new(r"<!--.*?-->").unwrap();
21}
22
23#[derive(Clone)]
24pub struct MD033NoInlineHtml {
25 config: MD033Config,
26 allowed: HashSet<String>,
27}
28
29impl Default for MD033NoInlineHtml {
30 fn default() -> Self {
31 let config = MD033Config::default();
32 let allowed = config.allowed_set();
33 Self { config, allowed }
34 }
35}
36
37impl MD033NoInlineHtml {
38 pub fn new() -> Self {
39 Self::default()
40 }
41
42 pub fn with_allowed(allowed_vec: Vec<String>) -> Self {
43 let config = MD033Config {
44 allowed: allowed_vec.clone(),
45 };
46 let allowed = config.allowed_set();
47 Self { config, allowed }
48 }
49
50 pub fn from_config_struct(config: MD033Config) -> Self {
51 let allowed = config.allowed_set();
52 Self { config, allowed }
53 }
54
55 #[inline]
57 fn is_tag_allowed(&self, tag: &str) -> bool {
58 if self.allowed.is_empty() {
59 return false;
60 }
61 let tag = tag.trim_start_matches('<').trim_start_matches('/');
63 let tag_name = tag
64 .split(|c: char| c.is_whitespace() || c == '>' || c == '/')
65 .next()
66 .unwrap_or("");
67 self.allowed.contains(&tag_name.to_lowercase())
68 }
69
70 #[inline]
72 fn is_html_comment(&self, tag: &str) -> bool {
73 tag.starts_with("<!--") && tag.ends_with("-->")
74 }
75
76 #[inline]
78 fn is_likely_type_annotation(&self, tag: &str) -> bool {
79 const COMMON_TYPES: &[&str] = &[
81 "string",
82 "number",
83 "any",
84 "void",
85 "null",
86 "undefined",
87 "array",
88 "promise",
89 "function",
90 "error",
91 "date",
92 "regexp",
93 "symbol",
94 "bigint",
95 "map",
96 "set",
97 "weakmap",
98 "weakset",
99 "iterator",
100 "generator",
101 "t",
102 "u",
103 "v",
104 "k",
105 "e", "userdata",
107 "apiresponse",
108 "config",
109 "options",
110 "params",
111 "result",
112 "response",
113 "request",
114 "data",
115 "item",
116 "element",
117 "node",
118 ];
119
120 let tag_content = tag
121 .trim_start_matches('<')
122 .trim_end_matches('>')
123 .trim_start_matches('/');
124 let tag_name = tag_content
125 .split(|c: char| c.is_whitespace() || c == '>' || c == '/')
126 .next()
127 .unwrap_or("");
128
129 if !tag_content.contains(' ') && !tag_content.contains('=') {
131 COMMON_TYPES.contains(&tag_name.to_ascii_lowercase().as_str())
132 } else {
133 false
134 }
135 }
136
137 #[inline]
139 fn is_email_address(&self, tag: &str) -> bool {
140 let content = tag.trim_start_matches('<').trim_end_matches('>');
141 content.contains('@')
143 && content.chars().all(|c| c.is_alphanumeric() || "@.-_+".contains(c))
144 && content.split('@').count() == 2
145 && content.split('@').all(|part| !part.is_empty())
146 }
147
148 #[inline]
150 fn is_url_in_angle_brackets(&self, tag: &str) -> bool {
151 let content = tag.trim_start_matches('<').trim_end_matches('>');
152 content.starts_with("http://")
154 || content.starts_with("https://")
155 || content.starts_with("ftp://")
156 || content.starts_with("ftps://")
157 || content.starts_with("mailto:")
158 }
159
160 fn find_multiline_html_tags(
162 &self,
163 content: &str,
164 structure: &DocumentStructure,
165 nomarkdown_ranges: &[(usize, usize)],
166 warnings: &mut Vec<LintWarning>,
167 ) {
168 if !content.contains('<') || !content.lines().any(|line| line.trim_end().ends_with('<')) {
170 return;
171 }
172
173 lazy_static::lazy_static! {
175 static ref INCOMPLETE_TAG_START: regex::Regex = regex::Regex::new(r"(?i)<[a-zA-Z][^>]*$").unwrap();
176 }
177
178 let lines: Vec<&str> = content.lines().collect();
179
180 for (i, line) in lines.iter().enumerate() {
181 let line_num = i + 1;
182
183 if line.trim().is_empty() || structure.is_in_code_block(line_num) {
185 continue;
186 }
187
188 if nomarkdown_ranges
190 .iter()
191 .any(|(start, end)| line_num >= *start && line_num <= *end)
192 {
193 continue;
194 }
195
196 if !line.contains('<') {
198 continue;
199 }
200
201 if let Some(incomplete_match) = INCOMPLETE_TAG_START.find(line) {
203 let start_column = incomplete_match.start() + 1; let mut complete_tag = incomplete_match.as_str().to_string();
207 let mut found_end = false;
208
209 for (j, next_line) in lines.iter().enumerate().skip(i + 1).take(10) {
211 let next_line_num = j + 1;
212
213 if structure.is_in_code_block(next_line_num) {
215 break;
216 }
217
218 complete_tag.push(' '); complete_tag.push_str(next_line.trim());
220
221 if next_line.contains('>') {
222 found_end = true;
223 break;
224 }
225 }
226
227 if found_end {
228 if let Some(end_pos) = complete_tag.find('>') {
230 let final_tag = &complete_tag[0..=end_pos];
231
232 if !self.is_html_comment(final_tag)
234 && !self.is_likely_type_annotation(final_tag)
235 && !self.is_email_address(final_tag)
236 && !self.is_url_in_angle_brackets(final_tag)
237 && !self.is_tag_allowed(final_tag)
238 && HTML_TAG_FINDER.is_match(final_tag)
239 {
240 let already_warned =
242 warnings.iter().any(|w| w.line == line_num && w.column == start_column);
243
244 if !already_warned {
245 let (start_line, start_col, end_line, end_col) = calculate_html_tag_range(
246 line_num,
247 line,
248 incomplete_match.start(),
249 incomplete_match.len(),
250 );
251 warnings.push(LintWarning {
252 rule_name: Some(self.name()),
253 line: start_line,
254 column: start_col,
255 end_line,
256 end_column: end_col,
257 message: format!("HTML tag found: {final_tag} (use Markdown syntax instead)"),
258 severity: Severity::Warning,
259 fix: None,
260 });
261 }
262 }
263 }
264 }
265 }
266 }
267 }
268}
269
270impl Rule for MD033NoInlineHtml {
271 fn name(&self) -> &'static str {
272 "MD033"
273 }
274
275 fn description(&self) -> &'static str {
276 "Inline HTML is not allowed"
277 }
278
279 fn check(&self, ctx: &crate::lint_context::LintContext) -> LintResult {
280 let content = ctx.content;
281 let structure = DocumentStructure::new(content);
282 self.check_with_structure(ctx, &structure)
283 }
284
285 fn check_with_structure(
287 &self,
288 ctx: &crate::lint_context::LintContext,
289 structure: &DocumentStructure,
290 ) -> LintResult {
291 let content = ctx.content;
292
293 if content.is_empty() || !has_html_tags(content) {
295 return Ok(Vec::new());
296 }
297
298 if !HTML_TAG_QUICK_CHECK.is_match(content) {
300 return Ok(Vec::new());
301 }
302
303 let mut warnings = Vec::new();
304 let lines: Vec<&str> = content.lines().collect();
305
306 let mut in_nomarkdown = false;
308 let mut in_comment = false;
309 let mut nomarkdown_ranges: Vec<(usize, usize)> = Vec::new();
310 let mut nomarkdown_start = 0;
311 let mut comment_start = 0;
312
313 for (i, line) in lines.iter().enumerate() {
315 let line_num = i + 1;
316
317 if line.trim() == "{::nomarkdown}" {
319 in_nomarkdown = true;
320 nomarkdown_start = line_num;
321 } else if line.trim() == "{:/nomarkdown}" && in_nomarkdown {
322 in_nomarkdown = false;
323 nomarkdown_ranges.push((nomarkdown_start, line_num));
324 }
325
326 if line.trim() == "{::comment}" {
328 in_comment = true;
329 comment_start = line_num;
330 } else if line.trim() == "{:/comment}" && in_comment {
331 in_comment = false;
332 nomarkdown_ranges.push((comment_start, line_num));
333 }
334 }
335
336 for (i, line) in lines.iter().enumerate() {
339 let line_num = i + 1;
340
341 if line.trim().is_empty() {
342 continue;
343 }
344 if structure.is_in_code_block(line_num) {
345 continue;
346 }
347 if line.starts_with(" ") || line.starts_with('\t') {
350 continue;
351 }
352
353 if nomarkdown_ranges
355 .iter()
356 .any(|(start, end)| line_num >= *start && line_num <= *end)
357 {
358 continue;
359 }
360
361 if is_kramdown_extension(line) || is_kramdown_block_attribute(line) {
363 continue;
364 }
365
366 for tag_match in HTML_TAG_FINDER.find_iter(line) {
368 let tag = tag_match.as_str();
369
370 if self.is_html_comment(tag) {
372 continue;
373 }
374
375 if self.is_likely_type_annotation(tag) {
377 continue;
378 }
379
380 if self.is_email_address(tag) {
382 continue;
383 }
384
385 if self.is_url_in_angle_brackets(tag) {
387 continue;
388 }
389
390 let tag_start_col = tag_match.start() + 1; if structure.is_in_code_span(line_num, tag_start_col) {
393 continue;
394 }
395
396 if self.is_tag_allowed(tag) {
398 continue;
399 }
400
401 let (start_line, start_col, end_line, end_col) =
403 calculate_html_tag_range(line_num, line, tag_match.start(), tag_match.len());
404 warnings.push(LintWarning {
405 rule_name: Some(self.name()),
406 line: start_line,
407 column: start_col,
408 end_line,
409 end_column: end_col,
410 message: format!("Inline HTML found: {tag} (use Markdown syntax instead)"),
411 severity: Severity::Warning,
412 fix: None,
413 });
414 }
415 }
416
417 self.find_multiline_html_tags(ctx.content, structure, &nomarkdown_ranges, &mut warnings);
419
420 Ok(warnings)
421 }
422
423 fn fix(&self, ctx: &crate::lint_context::LintContext) -> Result<String, LintError> {
424 Ok(ctx.content.to_string())
426 }
427
428 fn fix_capability(&self) -> crate::rule::FixCapability {
429 crate::rule::FixCapability::Unfixable
430 }
431
432 fn category(&self) -> RuleCategory {
434 RuleCategory::Html
435 }
436
437 fn should_skip(&self, ctx: &crate::lint_context::LintContext) -> bool {
439 let content = ctx.content;
440 content.is_empty() || !has_html_tags(content)
441 }
442
443 fn as_any(&self) -> &dyn std::any::Any {
444 self
445 }
446
447 fn as_maybe_document_structure(&self) -> Option<&dyn crate::rule::MaybeDocumentStructure> {
448 Some(self)
449 }
450
451 fn default_config_section(&self) -> Option<(String, toml::Value)> {
452 let json_value = serde_json::to_value(&self.config).ok()?;
453 Some((
454 self.name().to_string(),
455 crate::rule_config_serde::json_to_toml_value(&json_value)?,
456 ))
457 }
458
459 fn from_config(config: &crate::config::Config) -> Box<dyn Rule>
460 where
461 Self: Sized,
462 {
463 let rule_config = crate::rule_config_serde::load_rule_config::<MD033Config>(config);
464 Box::new(Self::from_config_struct(rule_config))
465 }
466}
467
468impl DocumentStructureExtensions for MD033NoInlineHtml {
469 fn has_relevant_elements(
470 &self,
471 ctx: &crate::lint_context::LintContext,
472 _doc_structure: &DocumentStructure,
473 ) -> bool {
474 ctx.content.contains('<') && ctx.content.contains('>')
476 }
477}
478
479#[cfg(test)]
480mod tests {
481 use super::*;
482 use crate::lint_context::LintContext;
483 use crate::rule::Rule;
484
485 #[test]
486 fn test_md033_basic_html() {
487 let rule = MD033NoInlineHtml::default();
488 let content = "<div>Some content</div>";
489 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
490 let result = rule.check(&ctx).unwrap();
491 assert_eq!(result.len(), 2); assert!(result[0].message.starts_with("Inline HTML found: <div>"));
494 assert!(result[1].message.starts_with("Inline HTML found: </div>"));
495 }
496
497 #[test]
498 fn test_md033_case_insensitive() {
499 let rule = MD033NoInlineHtml::default();
500 let content = "<DiV>Some <B>content</B></dIv>";
501 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
502 let result = rule.check(&ctx).unwrap();
503 assert_eq!(result.len(), 4); assert_eq!(
506 result[0].message,
507 "Inline HTML found: <DiV> (use Markdown syntax instead)"
508 );
509 assert_eq!(
510 result[1].message,
511 "Inline HTML found: <B> (use Markdown syntax instead)"
512 );
513 assert_eq!(
514 result[2].message,
515 "Inline HTML found: </B> (use Markdown syntax instead)"
516 );
517 assert_eq!(
518 result[3].message,
519 "Inline HTML found: </dIv> (use Markdown syntax instead)"
520 );
521 }
522
523 #[test]
524 fn test_md033_allowed_tags() {
525 let rule = MD033NoInlineHtml::with_allowed(vec!["div".to_string(), "br".to_string()]);
526 let content = "<div>Allowed</div><p>Not allowed</p><br/>";
527 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
528 let result = rule.check(&ctx).unwrap();
529 assert_eq!(result.len(), 2);
531 assert_eq!(
532 result[0].message,
533 "Inline HTML found: <p> (use Markdown syntax instead)"
534 );
535 assert_eq!(
536 result[1].message,
537 "Inline HTML found: </p> (use Markdown syntax instead)"
538 );
539
540 let content2 = "<DIV>Allowed</DIV><P>Not allowed</P><BR/>";
542 let ctx2 = LintContext::new(content2, crate::config::MarkdownFlavor::Standard);
543 let result2 = rule.check(&ctx2).unwrap();
544 assert_eq!(result2.len(), 2); assert_eq!(
546 result2[0].message,
547 "Inline HTML found: <P> (use Markdown syntax instead)"
548 );
549 assert_eq!(
550 result2[1].message,
551 "Inline HTML found: </P> (use Markdown syntax instead)"
552 );
553 }
554
555 #[test]
556 fn test_md033_html_comments() {
557 let rule = MD033NoInlineHtml::default();
558 let content = "<!-- This is a comment --> <p>Not a comment</p>";
559 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
560 let result = rule.check(&ctx).unwrap();
561 assert_eq!(result.len(), 2); assert_eq!(
564 result[0].message,
565 "Inline HTML found: <p> (use Markdown syntax instead)"
566 );
567 assert_eq!(
568 result[1].message,
569 "Inline HTML found: </p> (use Markdown syntax instead)"
570 );
571 }
572
573 #[test]
574 fn test_md033_tags_in_links() {
575 let rule = MD033NoInlineHtml::default();
576 let content = "[Link](http://example.com/<div>)";
577 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
578 let result = rule.check(&ctx).unwrap();
579 assert_eq!(result.len(), 1);
581 assert_eq!(
582 result[0].message,
583 "Inline HTML found: <div> (use Markdown syntax instead)"
584 );
585
586 let content2 = "[Link <a>text</a>](url)";
587 let ctx2 = LintContext::new(content2, crate::config::MarkdownFlavor::Standard);
588 let result2 = rule.check(&ctx2).unwrap();
589 assert_eq!(result2.len(), 2); assert_eq!(
592 result2[0].message,
593 "Inline HTML found: <a> (use Markdown syntax instead)"
594 );
595 assert_eq!(
596 result2[1].message,
597 "Inline HTML found: </a> (use Markdown syntax instead)"
598 );
599 }
600
601 #[test]
602 fn test_md033_fix_escaping() {
603 let rule = MD033NoInlineHtml::default();
604 let content = "Text with <div> and <br/> tags.";
605 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
606 let fixed_content = rule.fix(&ctx).unwrap();
607 assert_eq!(fixed_content, content);
609 }
610
611 #[test]
612 fn test_md033_in_code_blocks() {
613 let rule = MD033NoInlineHtml::default();
614 let content = "```html\n<div>Code</div>\n```\n<div>Not code</div>";
615 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
616 let result = rule.check(&ctx).unwrap();
617 assert_eq!(result.len(), 2); assert_eq!(
620 result[0].message,
621 "Inline HTML found: <div> (use Markdown syntax instead)"
622 );
623 assert_eq!(
624 result[1].message,
625 "Inline HTML found: </div> (use Markdown syntax instead)"
626 );
627 }
628
629 #[test]
630 fn test_md033_in_code_spans() {
631 let rule = MD033NoInlineHtml::default();
632 let content = "Text with `<p>in code</p>` span. <br/> Not in span.";
633 let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
634 let result = rule.check(&ctx).unwrap();
635 assert_eq!(result.len(), 1);
637 assert_eq!(
638 result[0].message,
639 "Inline HTML found: <br/> (use Markdown syntax instead)"
640 );
641 }
642}