1use std::rc::Rc;
2
3use linkify::{LinkFinder, LinkKind};
4use tree_sitter::Node;
5
6use crate::{
7 linter::{range_from_tree_sitter, RuleViolation},
8 rules::{Context, Rule, RuleLinter, RuleType},
9};
10
11pub(crate) struct MD034Linter {
12 context: Rc<Context>,
13 violations: Vec<RuleViolation>,
14}
15
16impl MD034Linter {
17 pub fn new(context: Rc<Context>) -> Self {
18 Self {
19 context,
20 violations: Vec::new(),
21 }
22 }
23}
24
25impl RuleLinter for MD034Linter {
26 fn feed(&mut self, node: &Node) {
27 if node.kind() == "paragraph" {
29 let content = self.context.document_content.borrow();
30 let text = node.utf8_text(content.as_bytes()).unwrap_or("").to_string();
31 let node_range = node.range();
32 drop(content); self.check_for_bare_urls_in_text(&text, &node_range);
35 }
36 }
37
38 fn finalize(&mut self) -> Vec<RuleViolation> {
39 std::mem::take(&mut self.violations)
40 }
41}
42
43impl MD034Linter {
44 fn check_for_bare_urls_in_text(&mut self, text: &str, paragraph_range: &tree_sitter::Range) {
45 let finder = LinkFinder::new();
46
47 for link in finder.links(text) {
48 let link_start = link.start();
49 let link_end = link.end();
50 let link_text = link.as_str();
51
52 if !self.is_link_properly_formatted(text, link_start, link_text, link.kind()) {
54 let violation_range = tree_sitter::Range {
55 start_byte: paragraph_range.start_byte + link_start,
56 end_byte: paragraph_range.start_byte + link_end,
57 start_point: tree_sitter::Point {
58 row: paragraph_range.start_point.row,
59 column: paragraph_range.start_point.column + link_start,
60 },
61 end_point: tree_sitter::Point {
62 row: paragraph_range.start_point.row,
63 column: paragraph_range.start_point.column + link_end,
64 },
65 };
66
67 self.violations.push(RuleViolation::new(
68 &MD034,
69 format!("{} [Context: \"{}\"]", MD034.description, link_text),
70 self.context.file_path.clone(),
71 range_from_tree_sitter(&violation_range),
72 ));
73 }
74 }
75 }
76
77 fn is_link_properly_formatted(
78 &self,
79 text: &str,
80 link_start: usize,
81 link_text: &str,
82 link_kind: &LinkKind,
83 ) -> bool {
84 match link_kind {
85 LinkKind::Url => self.is_url_properly_formatted(text, link_start, link_text),
86 LinkKind::Email => self.is_email_properly_formatted(text, link_start, link_text),
87 _ => true, }
89 }
90
91 fn is_url_properly_formatted(&self, text: &str, url_start: usize, url_text: &str) -> bool {
92 if url_text.starts_with('`') {
94 return true;
96 }
97
98 if url_start > 0 && text.chars().nth(url_start - 1) == Some('<') {
100 let url_end = url_start + url_text.len();
101 if url_end < text.len() && text.chars().nth(url_end) == Some('>') {
102 return true;
103 }
104 }
105
106 if let Some(link_start) = text[..url_start].rfind("](") {
108 if url_start == link_start + 2 {
109 return true; }
111 let after_paren = link_start + 2;
113 let prefix_text = &text[after_paren..url_start];
114 if prefix_text.chars().all(|c| c.is_alphabetic() || c == ':') {
115 return true; }
117 }
118
119 if let Some(bracket_start) = text[..url_start].rfind('[') {
121 let url_end = url_start + url_text.len();
123 if let Some(_bracket_end) = text[url_end..].find("](") {
124 let link_text = &text[bracket_start + 1..url_start];
126 if !link_text.contains('[') && !link_text.contains(']') {
127 return true; }
129 }
130 }
131
132 if let Some(attr_start) = text[..url_start].rfind("href=\"") {
134 if url_start == attr_start + 6 {
135 return true;
136 }
137 }
138 if let Some(attr_start) = text[..url_start].rfind("href='") {
139 if url_start == attr_start + 6 {
140 return true;
141 }
142 }
143
144 let before_url = &text[..url_start];
146 let after_url = &text[url_start + url_text.len()..];
147
148 let backticks_before = before_url.matches('`').count();
149 if backticks_before % 2 == 1 {
150 if after_url.contains('`') {
153 return true;
154 }
155 }
156
157 false
158 }
159
160 fn is_email_properly_formatted(
161 &self,
162 text: &str,
163 email_start: usize,
164 email_text: &str,
165 ) -> bool {
166 if email_text.starts_with('`') {
168 return true;
170 }
171
172 if let Some(link_start) = text[..email_start].rfind("](") {
174 let after_paren = link_start + 2;
176 if email_start == after_paren {
177 return true; }
179 let prefix_text = &text[after_paren..email_start];
180 if prefix_text.chars().all(|c| c.is_alphabetic() || c == ':') {
181 return true; }
183 }
184
185 let mut check_start = email_start;
187
188 while check_start > 0 {
190 let char_at = text.chars().nth(check_start - 1);
191 if char_at == Some('<') {
192 let email_end = email_start + email_text.len();
193 if email_end < text.len() && text.chars().nth(email_end) == Some('>') {
194 return true;
195 }
196 break;
197 } else if char_at
198 .map(|c| c.is_alphabetic() || c == ':')
199 .unwrap_or(false)
200 {
201 check_start -= 1;
203 } else {
204 break;
205 }
206 }
207
208 if let Some(bracket_start) = text[..email_start].rfind('[') {
210 let email_end = email_start + email_text.len();
212 if let Some(_bracket_end) = text[email_end..].find("](") {
213 let link_text = &text[bracket_start + 1..email_start];
215 if !link_text.contains('[') && !link_text.contains(']') {
216 return true; }
218 }
219 }
220
221 let before_email = &text[..email_start];
223 let after_email = &text[email_start + email_text.len()..];
224
225 let backticks_before = before_email.matches('`').count();
227 if backticks_before % 2 == 1 {
228 if after_email.contains('`') {
231 return true;
232 }
233 }
234
235 false
236 }
237}
238
239pub const MD034: Rule = Rule {
240 id: "MD034",
241 alias: "no-bare-urls",
242 tags: &["links", "url"],
243 description: "Bare URL used",
244 rule_type: RuleType::Token,
245 required_nodes: &["text"], new_linter: |context| Box::new(MD034Linter::new(context)),
247};
248
249#[cfg(test)]
250mod test {
251 use std::path::PathBuf;
252
253 use crate::config::RuleSeverity;
254 use crate::linter::MultiRuleLinter;
255 use crate::test_utils::test_helpers::test_config_with_rules;
256
257 fn test_config() -> crate::config::QuickmarkConfig {
258 test_config_with_rules(vec![
259 ("no-bare-urls", RuleSeverity::Error),
260 ("heading-increment", RuleSeverity::Off),
261 ("heading-style", RuleSeverity::Off),
262 ("line-length", RuleSeverity::Off),
263 ])
264 }
265
266 #[test]
267 fn test_bare_url_detection() {
268 let input = "Visit https://example.com for more info.";
269
270 let config = test_config();
271 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
272 let violations = linter.analyze();
273
274 assert_eq!(1, violations.len());
276 let violation = &violations[0];
277 assert_eq!("MD034", violation.rule().id);
278 assert!(violation.message().contains("Bare URL used"));
279 assert!(violation.message().contains("https://example.com"));
280 }
281
282 #[test]
283 fn test_bare_email_detection() {
284 let input = "Email me at user@example.com for questions.";
285
286 let config = test_config();
287 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
288 let violations = linter.analyze();
289
290 assert_eq!(1, violations.len());
291 let violation = &violations[0];
292 assert_eq!("MD034", violation.rule().id);
293 assert!(violation.message().contains("user@example.com"));
294 }
295
296 #[test]
297 fn test_angle_bracket_urls_no_violation() {
298 let input = "Visit <https://example.com> for more info.";
299
300 let config = test_config();
301 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
302 let violations = linter.analyze();
303
304 assert_eq!(0, violations.len());
306 }
307
308 #[test]
309 fn test_angle_bracket_emails_no_violation() {
310 let input = "Email me at <user@example.com> for questions.";
311
312 let config = test_config();
313 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
314 let violations = linter.analyze();
315
316 assert_eq!(0, violations.len());
317 }
318
319 #[test]
320 fn test_code_span_urls_no_violation() {
321 let input = "Not a link: `https://example.com`";
322
323 let config = test_config();
324 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
325 let violations = linter.analyze();
326
327 assert_eq!(0, violations.len());
329 }
330
331 #[test]
332 fn test_markdown_link_urls_no_violation() {
333 let input = "Visit [the site](https://example.com) for more info.";
334
335 let config = test_config();
336 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
337 let violations = linter.analyze();
338
339 assert_eq!(0, violations.len());
341 }
342
343 #[test]
344 fn test_html_tag_urls_no_violation() {
345 let input = "<a href='https://example.com'>Link text</a>";
346
347 let config = test_config();
348 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
349 let violations = linter.analyze();
350
351 assert_eq!(0, violations.len());
353 }
354
355 #[test]
356 fn test_multiple_bare_urls() {
357 let input = "Visit https://first.com and https://second.com and email admin@site.com";
358
359 let config = test_config();
360 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
361 let violations = linter.analyze();
362
363 assert_eq!(3, violations.len());
365 }
366
367 #[test]
368 fn test_mixed_urls_and_proper_links() {
369 let input = "Visit https://bare.com and [proper link](https://proper.com) and <https://formatted.com>";
370
371 let config = test_config();
372 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
373 let violations = linter.analyze();
374
375 assert_eq!(1, violations.len());
377 assert!(violations[0].message().contains("https://bare.com"));
378 }
379
380 #[test]
381 fn test_mailto_urls_in_markdown_links_no_violation() {
382 let input = "Email [support](mailto:user@example.com) for help.";
383
384 let config = test_config();
385 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
386 let violations = linter.analyze();
387
388 assert_eq!(0, violations.len());
390 }
391
392 #[test]
393 fn test_urls_in_markdown_link_text_no_violation() {
394 let input = "[link text with https://example.com in it](https://proper-target.com)";
395
396 let config = test_config();
397 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
398 let violations = linter.analyze();
399
400 assert_eq!(0, violations.len());
402 }
403
404 #[test]
405 fn test_emails_in_markdown_link_text_no_violation() {
406 let input = "[contact user@example.com for support](https://contact-form.com)";
407
408 let config = test_config();
409 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
410 let violations = linter.analyze();
411
412 assert_eq!(0, violations.len());
414 }
415
416 #[test]
417 fn test_scheme_prefixes_in_markdown_links_no_violation() {
418 let input = "Try [FTP site](ftp://files.example.com) and [secure site](https://secure.example.com).";
419
420 let config = test_config();
421 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
422 let violations = linter.analyze();
423
424 assert_eq!(0, violations.len());
426 }
427
428 #[test]
429 fn test_nested_markdown_scenarios() {
430 let input = "Links bind to the innermost [link that https://example.com link](https://target.com) but https://bare.com should trigger.";
431
432 let config = test_config();
433 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
434 let violations = linter.analyze();
435
436 assert_eq!(1, violations.len());
438 assert!(violations[0].message().contains("https://bare.com"));
439 }
440
441 #[test]
442 fn test_complex_mixed_scenarios() {
443 let input = r#"
444Visit https://bare.com for info.
445Email [support](mailto:help@example.com) or bare.email@example.com.
446Check [site with https://url-in-text.com info](https://real-target.com).
447Use <https://angle-bracketed.com> or `https://code-span.com`.
448"#;
449
450 let config = test_config();
451 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
452 let violations = linter.analyze();
453
454 assert_eq!(2, violations.len());
464
465 let violation_contexts: Vec<String> = violations
466 .iter()
467 .map(|v| {
468 let msg = v.message();
470 let start = msg.find("[Context: \"").unwrap() + 11;
471 let end = msg.find("\"]").unwrap();
472 msg[start..end].to_string()
473 })
474 .collect();
475
476 assert!(violation_contexts.contains(&"https://bare.com".to_string()));
477 assert!(violation_contexts.contains(&"bare.email@example.com".to_string()));
478 }
479
480 #[test]
481 fn test_international_domains_and_emails() {
482 let input = "Visit https://müller.example and email ünser@müller.example for info.";
483
484 let config = test_config();
485 let mut linter = MultiRuleLinter::new_for_document(PathBuf::from("test.md"), config, input);
486 let violations = linter.analyze();
487
488 assert_eq!(2, violations.len());
490
491 let violation_contexts: Vec<String> = violations
492 .iter()
493 .map(|v| {
494 let msg = v.message();
495 let start = msg.find("[Context: \"").unwrap() + 11;
496 let end = msg.find("\"]").unwrap();
497 msg[start..end].to_string()
498 })
499 .collect();
500
501 assert!(violation_contexts.contains(&"https://müller.example".to_string()));
502 assert!(violation_contexts.contains(&"ünser@müller.example".to_string()));
503 }
504}