mdbook_lint_core/rules/standard/
md033.rs1use crate::error::Result;
7use crate::rule::{AstRule, RuleCategory, RuleMetadata};
8use crate::{
9 Document,
10 violation::{Severity, Violation},
11};
12
13pub struct MD033;
15
16impl AstRule for MD033 {
17 fn id(&self) -> &'static str {
18 "MD033"
19 }
20
21 fn name(&self) -> &'static str {
22 "no-inline-html"
23 }
24
25 fn description(&self) -> &'static str {
26 "Inline HTML should be avoided"
27 }
28
29 fn metadata(&self) -> RuleMetadata {
30 RuleMetadata::stable(RuleCategory::Content).introduced_in("mdbook-lint v0.1.0")
31 }
32
33 fn check_ast<'a>(
34 &self,
35 document: &Document,
36 _ast: &'a comrak::nodes::AstNode<'a>,
37 ) -> Result<Vec<Violation>> {
38 let mut violations = Vec::new();
39 let lines = &document.lines;
40
41 let mut in_code_block = false;
42
43 for (line_idx, line) in lines.iter().enumerate() {
44 let line_num = line_idx + 1;
45
46 if line.trim_start().starts_with("```") || line.trim_start().starts_with("~~~") {
48 in_code_block = !in_code_block;
49 continue;
50 }
51
52 if in_code_block {
54 continue;
55 }
56
57 violations.extend(self.check_line_for_html(line, line_num));
59 }
60
61 Ok(violations)
62 }
63}
64
65impl MD033 {
66 fn check_line_for_html(&self, line: &str, line_num: usize) -> Vec<Violation> {
68 let mut violations = Vec::new();
69 let mut chars = line.char_indices().peekable();
70 let mut in_backticks = false;
71
72 while let Some((i, ch)) = chars.next() {
73 match ch {
74 '`' => {
75 in_backticks = !in_backticks;
76 }
77 '<' if !in_backticks => {
78 let remaining = &line[i..];
80
81 if remaining.starts_with("<!--") {
82 if let Some(end) = remaining.find("-->") {
84 let comment = &remaining[..end + 3];
85 violations.push(self.create_violation(
86 format!("Inline HTML element found: {comment}"),
87 line_num,
88 i + 1,
89 Severity::Warning,
90 ));
91 for _ in 0..end + 2 {
93 chars.next();
94 }
95 }
96 } else if let Some(tag_end) = remaining.find('>') {
97 let potential_tag = &remaining[..tag_end + 1];
98 if self.is_html_tag(potential_tag) {
99 violations.push(self.create_violation(
100 format!("Inline HTML element found: {potential_tag}"),
101 line_num,
102 i + 1,
103 Severity::Warning,
104 ));
105 for _ in 0..tag_end {
107 chars.next();
108 }
109 }
110 }
111 }
112 _ => {}
113 }
114 }
115
116 violations
117 }
118
119 fn is_html_tag(&self, s: &str) -> bool {
121 if !s.starts_with('<') || !s.ends_with('>') {
122 return false;
123 }
124
125 let content = &s[1..s.len() - 1];
126 if content.is_empty() {
127 return false;
128 }
129
130 let tag_name = if let Some(stripped) = content.strip_prefix('/') {
132 stripped
133 } else {
134 content
135 }
136 .split_whitespace()
137 .next()
138 .unwrap_or("");
139
140 let html_tags = [
142 "a",
143 "abbr",
144 "b",
145 "br",
146 "cite",
147 "code",
148 "em",
149 "i",
150 "img",
151 "kbd",
152 "mark",
153 "q",
154 "s",
155 "samp",
156 "small",
157 "span",
158 "strong",
159 "sub",
160 "sup",
161 "time",
162 "u",
163 "var",
164 "wbr",
165 "h1",
166 "h2",
167 "h3",
168 "h4",
169 "h5",
170 "h6",
171 "p",
172 "div",
173 "section",
174 "article",
175 "header",
176 "footer",
177 "nav",
178 "aside",
179 "main",
180 "figure",
181 "figcaption",
182 "blockquote",
183 "pre",
184 "ul",
185 "ol",
186 "li",
187 "dl",
188 "dt",
189 "dd",
190 "table",
191 "thead",
192 "tbody",
193 "tfoot",
194 "tr",
195 "th",
196 "td",
197 "form",
198 "input",
199 "button",
200 "select",
201 "option",
202 "textarea",
203 "label",
204 "fieldset",
205 "legend",
206 ];
207
208 html_tags.contains(&tag_name.to_lowercase().as_str())
209 }
210}
211
212#[cfg(test)]
213mod tests {
214 use super::*;
215 use crate::Document;
216 use crate::rule::Rule;
217 use std::path::PathBuf;
218
219 #[test]
220 fn test_md033_no_violations() {
221 let content = r#"# Valid Markdown
222
223This document contains only valid Markdown:
224
225**Bold text** and *italic text*.
226
227`code spans` are fine.
228
229```html
230<p>HTML in code blocks is fine</p>
231<div class="example">
232 <span>This is ignored</span>
233</div>
234```
235
236[Links](https://example.com) are good.
237
238> Blockquotes are fine
239
240- List items
241- More items
242
243## Another heading
244
245Regular paragraphs without HTML.
246"#;
247 let document = Document::new(content.to_string(), PathBuf::from("test.md")).unwrap();
248 let rule = MD033;
249 let violations = rule.check(&document).unwrap();
250
251 assert_eq!(violations.len(), 0);
252 }
253
254 #[test]
255 fn test_md033_html_violations() {
256 let content = r#"# Document with HTML
257
258This paragraph has <strong>inline HTML</strong>.
259
260<p>This is a paragraph tag.</p>
261
262Some text with <em>emphasis</em> and <code>code</code> tags.
263
264<div class="container">
265Block level HTML
266</div>
267
268More content with <span class="highlight">spans</span>.
269"#;
270 let document = Document::new(content.to_string(), PathBuf::from("test.md")).unwrap();
271 let rule = MD033;
272 let violations = rule.check(&document).unwrap();
273
274 assert_eq!(violations.len(), 12);
275 assert!(violations[0].message.contains("<strong>"));
276 assert!(violations[1].message.contains("</strong>"));
277 assert!(violations[2].message.contains("<p>"));
278 assert!(violations[3].message.contains("</p>"));
279 assert!(violations[4].message.contains("<em>"));
280 assert!(violations[5].message.contains("</em>"));
281 assert!(violations[6].message.contains("<code>"));
282 assert!(violations[7].message.contains("</code>"));
283 assert!(violations[8].message.contains("<div"));
284 assert!(violations[9].message.contains("</div>"));
285 assert!(violations[10].message.contains("<span"));
286 assert!(violations[11].message.contains("</span>"));
287 }
288
289 #[test]
290 fn test_md033_html_comments() {
291 let content = r#"# Document with HTML Comments
292
293This has <!-- a comment --> in it.
294
295Regular text here.
296
297<!-- Another comment -->
298"#;
299 let document = Document::new(content.to_string(), PathBuf::from("test.md")).unwrap();
300 let rule = MD033;
301 let violations = rule.check(&document).unwrap();
302
303 assert_eq!(violations.len(), 2);
304 assert!(violations[0].message.contains("<!-- a comment -->"));
305 assert!(violations[1].message.contains("<!-- Another comment -->"));
306 }
307
308 #[test]
309 fn test_md033_code_blocks_ignored() {
310 let content = r#"# Code Blocks Should Be Ignored
311
312```html
313<div class="example">
314 <p>This HTML should be ignored</p>
315 <span>Even this</span>
316</div>
317```
318
319But this <strong>should be detected</strong>.
320
321```javascript
322const html = '<div>This is in JS code</div>';
323```
324
325And this <em>should also be detected</em>.
326
327~~~html
328<article>
329 <header>More HTML to ignore</header>
330</article>
331~~~
332"#;
333 let document = Document::new(content.to_string(), PathBuf::from("test.md")).unwrap();
334 let rule = MD033;
335 let violations = rule.check(&document).unwrap();
336
337 assert_eq!(violations.len(), 4);
338 assert!(violations[0].message.contains("<strong>"));
339 assert!(violations[1].message.contains("</strong>"));
340 assert!(violations[2].message.contains("<em>"));
341 assert!(violations[3].message.contains("</em>"));
342 }
343
344 #[test]
345 fn test_md033_inline_code_ignored() {
346 let content = r#"# Inline Code Should Be Ignored
347
348This `<span>HTML in backticks</span>` should be ignored.
349
350But this <div>should be detected</div>.
351
352Use `<strong>` tags for bold text, but don't use <strong>actual tags</strong>.
353
354Multiple `<code>` spans with `<em>emphasis</em>` should be ignored.
355"#;
356 let document = Document::new(content.to_string(), PathBuf::from("test.md")).unwrap();
357 let rule = MD033;
358 let violations = rule.check(&document).unwrap();
359
360 assert_eq!(violations.len(), 4);
361 assert!(violations[0].message.contains("<div>"));
362 assert!(violations[1].message.contains("</div>"));
363 assert!(violations[2].message.contains("<strong>"));
364 assert!(violations[3].message.contains("</strong>"));
365 }
366
367 #[test]
368 fn test_md033_mixed_content() {
369 let content = r#"# Mixed Content
370
371Regular text with <b>bold HTML</b> tag.
372
373```html
374<p>This should be ignored</p>
375```
376
377Back to regular content with <i>italic</i>.
378
379The `<em>` tag is mentioned in code, but <em>this usage</em> is flagged.
380
381More `<span class="test">code examples</span>` that should be ignored.
382
383Final <strong>HTML usage</strong> to detect.
384"#;
385 let document = Document::new(content.to_string(), PathBuf::from("test.md")).unwrap();
386 let rule = MD033;
387 let violations = rule.check(&document).unwrap();
388
389 assert_eq!(violations.len(), 8);
390 assert!(violations[0].message.contains("<b>"));
391 assert!(violations[1].message.contains("</b>"));
392 assert!(violations[2].message.contains("<i>"));
393 assert!(violations[3].message.contains("</i>"));
394 assert!(violations[4].message.contains("<em>"));
395 assert!(violations[5].message.contains("</em>"));
396 assert!(violations[6].message.contains("<strong>"));
397 assert!(violations[7].message.contains("</strong>"));
398 }
399}