1mod emails;
2#[cfg(all(test, feature = "golden-tests"))]
3mod golden_test;
4mod host;
5mod junk_data;
6mod urls;
7
8pub use emails::find_emails;
9pub use urls::find_urls;
10
11#[derive(Debug, Clone)]
12pub struct DetectionConfig {
13 pub max_emails: usize,
14 pub max_urls: usize,
15 pub unique: bool,
16}
17
18impl Default for DetectionConfig {
19 fn default() -> Self {
20 Self {
21 max_emails: 50,
22 max_urls: 50,
23 unique: true,
24 }
25 }
26}
27
28#[cfg(test)]
29mod tests {
30 use super::{DetectionConfig, find_emails, find_urls};
31
32 #[test]
33 fn test_find_emails_threshold() {
34 let text = "a@b.com\nc@d.com\ne@f.com\n";
35 let config = DetectionConfig {
36 max_emails: 2,
37 ..Default::default()
38 };
39 let emails = find_emails(text, &config);
40 assert_eq!(emails.len(), 2);
41 assert_eq!(emails[0].email, "a@b.com");
42 assert_eq!(emails[0].start_line, 1);
43 }
44
45 #[test]
46 fn test_find_urls_threshold() {
47 let text = "http://a.com\nhttp://b.com\nhttp://c.com\n";
48 let config = DetectionConfig {
49 max_urls: 2,
50 ..Default::default()
51 };
52 let urls = find_urls(text, &config);
53 assert_eq!(urls.len(), 2);
54 assert_eq!(urls[0].url, "http://a.com/");
55 assert_eq!(urls[1].url, "http://b.com/");
56 }
57
58 #[test]
59 fn test_find_emails_filters_local_machine_domains() {
60 let text = "admin@rust-lang.org\ngeisse@shopgates-mac-mini-3.local\n";
61 let config = DetectionConfig::default();
62 let emails = find_emails(text, &config);
63
64 assert_eq!(emails.len(), 1);
65 assert_eq!(emails[0].email, "admin@rust-lang.org");
66 }
67
68 #[test]
69 fn test_find_urls_ignores_email_like_ftp_token() {
70 let text = "See ftp.mtuci@gmail.com for details.";
71 let config = DetectionConfig::default();
72 let urls = find_urls(text, &config);
73
74 assert!(urls.is_empty(), "urls: {urls:#?}");
75 }
76
77 #[test]
78 fn test_find_urls_keeps_plain_ftp_hostname() {
79 let text = "Mirror: ftp.gnu.org/gnu/tar/";
80 let config = DetectionConfig::default();
81 let urls = find_urls(text, &config);
82
83 assert_eq!(urls.len(), 1, "urls: {urls:#?}");
84 assert_eq!(urls[0].url, "http://ftp.gnu.org/gnu/tar/");
85 }
86
87 #[test]
88 fn test_find_urls_splits_literal_escaped_newline_separated_urls() {
89 let text = "https://docs.celeryq.dev/en/latest/userguide/workers.html#concurrency\\nhttps://docs.celeryq.dev/en/latest/userguide/concurrency/eventlet.html";
90 let config = DetectionConfig::default();
91 let urls = find_urls(text, &config);
92
93 let values: Vec<_> = urls.into_iter().map(|url| url.url).collect();
94 assert_eq!(
95 values,
96 vec![
97 "https://docs.celeryq.dev/en/latest/userguide/workers.html#concurrency".to_string(),
98 "https://docs.celeryq.dev/en/latest/userguide/concurrency/eventlet.html"
99 .to_string(),
100 ]
101 );
102 }
103
104 #[test]
105 fn test_find_urls_strips_template_credentials_from_git_urls() {
106 let text = "Repo: https://user:{ACCESS_TOKEN}@github.com/apache/airflow.git";
107 let config = DetectionConfig::default();
108 let urls = find_urls(text, &config);
109
110 assert_eq!(urls.len(), 1, "urls: {urls:#?}");
111 assert_eq!(urls[0].url, "https://github.com/apache/airflow.git");
112 }
113}