mdbook_lint_core/rules/standard/
md034.rs1use crate::error::Result;
6use crate::rule::{AstRule, RuleCategory, RuleMetadata};
7use crate::{
8 Document,
9 violation::{Severity, Violation},
10};
11use comrak::nodes::AstNode;
12
13pub struct MD034;
15
16impl AstRule for MD034 {
17 fn id(&self) -> &'static str {
18 "MD034"
19 }
20
21 fn name(&self) -> &'static str {
22 "no-bare-urls"
23 }
24
25 fn description(&self) -> &'static str {
26 "Bare URL used"
27 }
28
29 fn metadata(&self) -> RuleMetadata {
30 RuleMetadata::stable(RuleCategory::Content).introduced_in("mdbook-lint v0.1.0")
31 }
32
33 fn check_ast<'a>(&self, document: &Document, _ast: &'a AstNode<'a>) -> Result<Vec<Violation>> {
34 let mut violations = Vec::new();
35 let mut in_code_block = false;
36
37 for (line_number, line) in document.lines.iter().enumerate() {
38 if line.trim_start().starts_with("```") {
40 in_code_block = !in_code_block;
41 continue;
42 }
43
44 if in_code_block {
46 continue;
47 }
48
49 let chars: Vec<char> = line.chars().collect();
51 let mut i = 0;
52
53 while i < chars.len() {
54 if chars[i] == '`' {
56 i += 1;
57 while i < chars.len() && chars[i] != '`' {
59 i += 1;
60 }
61 if i < chars.len() {
62 i += 1; }
64 continue;
65 }
66
67 if chars[i] == '[' {
69 while i < chars.len() && chars[i] != ']' {
71 i += 1;
72 }
73 if i < chars.len() {
74 i += 1; }
76 if i < chars.len() && chars[i] == '(' {
78 while i < chars.len() && chars[i] != ')' {
79 i += 1;
80 }
81 if i < chars.len() {
82 i += 1; }
84 }
85 continue;
86 }
87
88 if chars[i] == '<' {
90 while i < chars.len() && chars[i] != '>' {
91 i += 1;
92 }
93 if i < chars.len() {
94 i += 1; }
96 continue;
97 }
98
99 if i + 7 < chars.len() && self.starts_with_url_scheme(&chars, i) {
101 let start_pos = i;
102 let url = self.extract_url(&chars, i);
103
104 if !url.is_empty() {
105 violations.push(self.create_violation(
106 format!(
107 "Bare URL used: {url}. Consider wrapping in angle brackets: <{url}>"
108 ),
109 line_number + 1, start_pos + 1, Severity::Warning,
112 ));
113 i = start_pos + url.len();
114 } else {
115 i += 1;
116 }
117 } else {
118 i += 1;
119 }
120 }
121 }
122
123 Ok(violations)
124 }
125}
126
127impl MD034 {
128 fn starts_with_url_scheme(&self, chars: &[char], pos: usize) -> bool {
130 let schemes = ["http://", "https://", "ftp://", "mailto:"];
131
132 for scheme in &schemes {
133 let scheme_chars: Vec<char> = scheme.chars().collect();
134 if pos + scheme_chars.len() <= chars.len() {
135 let mut matches = true;
136 for (j, &expected_char) in scheme_chars.iter().enumerate() {
137 if chars[pos + j] != expected_char {
138 matches = false;
139 break;
140 }
141 }
142 if matches {
143 return true;
144 }
145 }
146 }
147 false
148 }
149
150 fn extract_url(&self, chars: &[char], start: usize) -> String {
152 let mut url = String::new();
153 let mut i = start;
154
155 while i < chars.len() {
157 let ch = chars[i];
158 if ch.is_whitespace() || ch == ')' || ch == ']' || ch == '>' || ch == '"' || ch == '\''
159 {
160 break;
161 }
162 url.push(ch);
163 i += 1;
164 }
165
166 while let Some(last_char) = url.chars().last() {
168 if last_char == '.'
169 || last_char == ','
170 || last_char == ';'
171 || last_char == ':'
172 || last_char == '!'
173 || last_char == '?'
174 {
175 url.pop();
176 } else {
177 break;
178 }
179 }
180
181 url
182 }
183}
184
185#[cfg(test)]
186mod tests {
187 use super::*;
188 use crate::Document;
189 use crate::rule::Rule;
190 use std::path::PathBuf;
191
192 #[test]
193 fn test_md034_no_violations() {
194 let content = r#"# Valid URLs
195
196These URLs are properly formatted and should not trigger violations:
197
198- Link: [Google](https://google.com)
199- Angle brackets: <https://example.com>
200- Email: <mailto:test@example.com>
201- Another link: [Local](./page.md)
202
203Text with <https://wrapped-url.com> in angle brackets.
204"#;
205 let document = Document::new(content.to_string(), PathBuf::from("test.md")).unwrap();
206 let rule = MD034;
207 let violations = rule.check(&document).unwrap();
208
209 assert_eq!(violations.len(), 0);
210 }
211
212 #[test]
213 fn test_md034_bare_url_violation() {
214 let content = r#"# Document with Bare URL
215
216This has a bare URL: https://example.com that should be wrapped.
217
218Some content here.
219"#;
220 let document = Document::new(content.to_string(), PathBuf::from("test.md")).unwrap();
221 let rule = MD034;
222 let violations = rule.check(&document).unwrap();
223
224 assert_eq!(violations.len(), 1);
225 assert!(violations[0].message.contains("Bare URL used"));
226 assert!(violations[0].message.contains("https://example.com"));
227 assert!(
228 violations[0]
229 .message
230 .contains("Consider wrapping in angle brackets")
231 );
232 assert_eq!(violations[0].line, 3);
233 }
234
235 #[test]
236 fn test_md034_multiple_bare_urls() {
237 let content = r#"# Multiple Bare URLs
238
239First URL: https://first.com here.
240Second URL: http://second.com there.
241And an email: mailto:test@example.com end.
242"#;
243 let document = Document::new(content.to_string(), PathBuf::from("test.md")).unwrap();
244 let rule = MD034;
245 let violations = rule.check(&document).unwrap();
246
247 assert_eq!(violations.len(), 3);
248 assert!(violations[0].message.contains("https://first.com"));
249 assert!(violations[1].message.contains("http://second.com"));
250 assert!(violations[2].message.contains("mailto:test@example.com"));
251 assert_eq!(violations[0].line, 3);
252 assert_eq!(violations[1].line, 4);
253 assert_eq!(violations[2].line, 5);
254 }
255
256 #[test]
257 fn test_md034_ignores_links_and_wrapped_urls() {
258 let content = r#"# Mixed URLs
259
260This [valid link](https://good.com) is fine.
261This <https://wrapped.com> is also fine.
262But this https://bare.com is not.
263Another [link](mailto:test@example.com) is good.
264"#;
265 let document = Document::new(content.to_string(), PathBuf::from("test.md")).unwrap();
266 let rule = MD034;
267 let violations = rule.check(&document).unwrap();
268
269 assert_eq!(violations.len(), 1);
270 assert!(violations[0].message.contains("https://bare.com"));
271 assert_eq!(violations[0].line, 5);
272 }
273
274 #[test]
275 fn test_md034_code_blocks_ignored() {
276 let content = r#"# Code Examples
277
278This https://bare-url.com should be detected.
279
280```
281This https://code-example.com should be ignored.
282```
283
284`This https://inline-code.com should be ignored.`
285
286Another https://bare-url2.com should be detected.
287"#;
288 let document = Document::new(content.to_string(), PathBuf::from("test.md")).unwrap();
289 let rule = MD034;
290 let violations = rule.check(&document).unwrap();
291
292 assert_eq!(violations.len(), 2);
293 assert_eq!(violations[0].line, 3);
294 assert_eq!(violations[1].line, 11);
295 }
296
297 #[test]
298 fn test_md034_url_with_trailing_punctuation() {
299 let content = r#"# URLs with Punctuation
300
301Visit https://example.com. for more info.
302Check out https://test.com, it's great.
303See https://other.com; it has details.
304The URL is https://final.com: very useful.
305"#;
306 let document = Document::new(content.to_string(), PathBuf::from("test.md")).unwrap();
307 let rule = MD034;
308 let violations = rule.check(&document).unwrap();
309
310 assert_eq!(violations.len(), 4);
311 assert!(violations[0].message.contains("https://example.com"));
313 assert!(violations[1].message.contains("https://test.com"));
314 assert!(violations[2].message.contains("https://other.com"));
315 assert!(violations[3].message.contains("https://final.com"));
316 }
317
318 #[test]
319 fn test_md034_complex_urls() {
320 let content = r#"# Complex URLs
321
322This https://example.com/path?param=value&other=test#anchor is complex.
323This ftp://files.example.com/path/file.txt is an FTP URL.
324"#;
325 let document = Document::new(content.to_string(), PathBuf::from("test.md")).unwrap();
326 let rule = MD034;
327 let violations = rule.check(&document).unwrap();
328
329 assert_eq!(violations.len(), 2);
330 assert!(
331 violations[0]
332 .message
333 .contains("https://example.com/path?param=value&other=test#anchor")
334 );
335 assert!(
336 violations[1]
337 .message
338 .contains("ftp://files.example.com/path/file.txt")
339 );
340 }
341
342 #[test]
343 fn test_md034_no_false_positives() {
344 let content = r#"# No False Positives
345
346This text mentions http but not as a URL: "The HTTP protocol is important."
347This talks about https: "HTTPS encryption is secure."
348This is not a URL: http:something or https:other
349
350Normal text without URLs should be fine.
351"#;
352 let document = Document::new(content.to_string(), PathBuf::from("test.md")).unwrap();
353 let rule = MD034;
354 let violations = rule.check(&document).unwrap();
355
356 assert_eq!(violations.len(), 0);
357 }
358}