mdbook_lint_core/rules/standard/
md034.rs

1//! MD034: Bare URL without angle brackets
2//!
3//! This rule checks for bare URLs that should be enclosed in angle brackets.
4
5use crate::error::Result;
6use crate::rule::{AstRule, RuleCategory, RuleMetadata};
7use crate::{
8    Document,
9    violation::{Severity, Violation},
10};
11use comrak::nodes::AstNode;
12
13/// Rule to check for bare URLs without angle brackets
14pub struct MD034;
15
16impl AstRule for MD034 {
17    fn id(&self) -> &'static str {
18        "MD034"
19    }
20
21    fn name(&self) -> &'static str {
22        "no-bare-urls"
23    }
24
25    fn description(&self) -> &'static str {
26        "Bare URL used"
27    }
28
29    fn metadata(&self) -> RuleMetadata {
30        RuleMetadata::stable(RuleCategory::Content).introduced_in("mdbook-lint v0.1.0")
31    }
32
33    fn check_ast<'a>(&self, document: &Document, _ast: &'a AstNode<'a>) -> Result<Vec<Violation>> {
34        let mut violations = Vec::new();
35        let mut in_code_block = false;
36
37        for (line_number, line) in document.lines.iter().enumerate() {
38            // Track code block state
39            if line.trim_start().starts_with("```") {
40                in_code_block = !in_code_block;
41                continue;
42            }
43
44            // Skip lines inside code blocks
45            if in_code_block {
46                continue;
47            }
48
49            // Parse the line character by character looking for bare URLs
50            let chars: Vec<char> = line.chars().collect();
51            let mut i = 0;
52
53            while i < chars.len() {
54                // Skip inline code spans
55                if chars[i] == '`' {
56                    i += 1;
57                    // Find the closing backtick
58                    while i < chars.len() && chars[i] != '`' {
59                        i += 1;
60                    }
61                    if i < chars.len() {
62                        i += 1; // Skip closing backtick
63                    }
64                    continue;
65                }
66
67                // Skip content inside links [text](url) or <url>
68                if chars[i] == '[' {
69                    // Skip to end of link
70                    while i < chars.len() && chars[i] != ']' {
71                        i += 1;
72                    }
73                    if i < chars.len() {
74                        i += 1; // Skip ']'
75                    }
76                    // Skip the (url) part if it exists
77                    if i < chars.len() && chars[i] == '(' {
78                        while i < chars.len() && chars[i] != ')' {
79                            i += 1;
80                        }
81                        if i < chars.len() {
82                            i += 1; // Skip ')'
83                        }
84                    }
85                    continue;
86                }
87
88                // Skip URLs already in angle brackets
89                if chars[i] == '<' {
90                    while i < chars.len() && chars[i] != '>' {
91                        i += 1;
92                    }
93                    if i < chars.len() {
94                        i += 1; // Skip '>'
95                    }
96                    continue;
97                }
98
99                // Look for URLs starting with http:// or https://
100                if i + 7 < chars.len() && self.starts_with_url_scheme(&chars, i) {
101                    let start_pos = i;
102                    let url = self.extract_url(&chars, i);
103
104                    if !url.is_empty() {
105                        violations.push(self.create_violation(
106                            format!(
107                                "Bare URL used: {url}. Consider wrapping in angle brackets: <{url}>"
108                            ),
109                            line_number + 1, // 1-based line numbers
110                            start_pos + 1,   // 1-based column
111                            Severity::Warning,
112                        ));
113                        i = start_pos + url.len();
114                    } else {
115                        i += 1;
116                    }
117                } else {
118                    i += 1;
119                }
120            }
121        }
122
123        Ok(violations)
124    }
125}
126
127impl MD034 {
128    /// Check if the character sequence starts with a URL scheme
129    fn starts_with_url_scheme(&self, chars: &[char], pos: usize) -> bool {
130        let schemes = ["http://", "https://", "ftp://", "mailto:"];
131
132        for scheme in &schemes {
133            let scheme_chars: Vec<char> = scheme.chars().collect();
134            if pos + scheme_chars.len() <= chars.len() {
135                let mut matches = true;
136                for (j, &expected_char) in scheme_chars.iter().enumerate() {
137                    if chars[pos + j] != expected_char {
138                        matches = false;
139                        break;
140                    }
141                }
142                if matches {
143                    return true;
144                }
145            }
146        }
147        false
148    }
149
150    /// Extract a complete URL starting at the given position
151    fn extract_url(&self, chars: &[char], start: usize) -> String {
152        let mut url = String::new();
153        let mut i = start;
154
155        // Extract until we hit whitespace or certain delimiters
156        while i < chars.len() {
157            let ch = chars[i];
158            if ch.is_whitespace() || ch == ')' || ch == ']' || ch == '>' || ch == '"' || ch == '\''
159            {
160                break;
161            }
162            url.push(ch);
163            i += 1;
164        }
165
166        // Remove common trailing punctuation that's probably sentence punctuation
167        while let Some(last_char) = url.chars().last() {
168            if last_char == '.'
169                || last_char == ','
170                || last_char == ';'
171                || last_char == ':'
172                || last_char == '!'
173                || last_char == '?'
174            {
175                url.pop();
176            } else {
177                break;
178            }
179        }
180
181        url
182    }
183}
184
185#[cfg(test)]
186mod tests {
187    use super::*;
188    use crate::Document;
189    use crate::rule::Rule;
190    use std::path::PathBuf;
191
192    #[test]
193    fn test_md034_no_violations() {
194        let content = r#"# Valid URLs
195
196These URLs are properly formatted and should not trigger violations:
197
198- Link: [Google](https://google.com)
199- Angle brackets: <https://example.com>
200- Email: <mailto:test@example.com>
201- Another link: [Local](./page.md)
202
203Text with <https://wrapped-url.com> in angle brackets.
204"#;
205        let document = Document::new(content.to_string(), PathBuf::from("test.md")).unwrap();
206        let rule = MD034;
207        let violations = rule.check(&document).unwrap();
208
209        assert_eq!(violations.len(), 0);
210    }
211
212    #[test]
213    fn test_md034_bare_url_violation() {
214        let content = r#"# Document with Bare URL
215
216This has a bare URL: https://example.com that should be wrapped.
217
218Some content here.
219"#;
220        let document = Document::new(content.to_string(), PathBuf::from("test.md")).unwrap();
221        let rule = MD034;
222        let violations = rule.check(&document).unwrap();
223
224        assert_eq!(violations.len(), 1);
225        assert!(violations[0].message.contains("Bare URL used"));
226        assert!(violations[0].message.contains("https://example.com"));
227        assert!(
228            violations[0]
229                .message
230                .contains("Consider wrapping in angle brackets")
231        );
232        assert_eq!(violations[0].line, 3);
233    }
234
235    #[test]
236    fn test_md034_multiple_bare_urls() {
237        let content = r#"# Multiple Bare URLs
238
239First URL: https://first.com here.
240Second URL: http://second.com there.
241And an email: mailto:test@example.com end.
242"#;
243        let document = Document::new(content.to_string(), PathBuf::from("test.md")).unwrap();
244        let rule = MD034;
245        let violations = rule.check(&document).unwrap();
246
247        assert_eq!(violations.len(), 3);
248        assert!(violations[0].message.contains("https://first.com"));
249        assert!(violations[1].message.contains("http://second.com"));
250        assert!(violations[2].message.contains("mailto:test@example.com"));
251        assert_eq!(violations[0].line, 3);
252        assert_eq!(violations[1].line, 4);
253        assert_eq!(violations[2].line, 5);
254    }
255
256    #[test]
257    fn test_md034_ignores_links_and_wrapped_urls() {
258        let content = r#"# Mixed URLs
259
260This [valid link](https://good.com) is fine.
261This <https://wrapped.com> is also fine.
262But this https://bare.com is not.
263Another [link](mailto:test@example.com) is good.
264"#;
265        let document = Document::new(content.to_string(), PathBuf::from("test.md")).unwrap();
266        let rule = MD034;
267        let violations = rule.check(&document).unwrap();
268
269        assert_eq!(violations.len(), 1);
270        assert!(violations[0].message.contains("https://bare.com"));
271        assert_eq!(violations[0].line, 5);
272    }
273
274    #[test]
275    fn test_md034_code_blocks_ignored() {
276        let content = r#"# Code Examples
277
278This https://bare-url.com should be detected.
279
280```
281This https://code-example.com should be ignored.
282```
283
284`This https://inline-code.com should be ignored.`
285
286Another https://bare-url2.com should be detected.
287"#;
288        let document = Document::new(content.to_string(), PathBuf::from("test.md")).unwrap();
289        let rule = MD034;
290        let violations = rule.check(&document).unwrap();
291
292        assert_eq!(violations.len(), 2);
293        assert_eq!(violations[0].line, 3);
294        assert_eq!(violations[1].line, 11);
295    }
296
297    #[test]
298    fn test_md034_url_with_trailing_punctuation() {
299        let content = r#"# URLs with Punctuation
300
301Visit https://example.com. for more info.
302Check out https://test.com, it's great.
303See https://other.com; it has details.
304The URL is https://final.com: very useful.
305"#;
306        let document = Document::new(content.to_string(), PathBuf::from("test.md")).unwrap();
307        let rule = MD034;
308        let violations = rule.check(&document).unwrap();
309
310        assert_eq!(violations.len(), 4);
311        // Check that URLs are detected (don't worry about exact punctuation handling)
312        assert!(violations[0].message.contains("https://example.com"));
313        assert!(violations[1].message.contains("https://test.com"));
314        assert!(violations[2].message.contains("https://other.com"));
315        assert!(violations[3].message.contains("https://final.com"));
316    }
317
318    #[test]
319    fn test_md034_complex_urls() {
320        let content = r#"# Complex URLs
321
322This https://example.com/path?param=value&other=test#anchor is complex.
323This ftp://files.example.com/path/file.txt is an FTP URL.
324"#;
325        let document = Document::new(content.to_string(), PathBuf::from("test.md")).unwrap();
326        let rule = MD034;
327        let violations = rule.check(&document).unwrap();
328
329        assert_eq!(violations.len(), 2);
330        assert!(
331            violations[0]
332                .message
333                .contains("https://example.com/path?param=value&other=test#anchor")
334        );
335        assert!(
336            violations[1]
337                .message
338                .contains("ftp://files.example.com/path/file.txt")
339        );
340    }
341
342    #[test]
343    fn test_md034_no_false_positives() {
344        let content = r#"# No False Positives
345
346This text mentions http but not as a URL: "The HTTP protocol is important."
347This talks about https: "HTTPS encryption is secure."
348This is not a URL: http:something or https:other
349
350Normal text without URLs should be fine.
351"#;
352        let document = Document::new(content.to_string(), PathBuf::from("test.md")).unwrap();
353        let rule = MD034;
354        let violations = rule.check(&document).unwrap();
355
356        assert_eq!(violations.len(), 0);
357    }
358}