1mod emails;
2#[cfg(all(test, feature = "golden-tests"))]
3mod golden_test;
4mod host;
5mod junk_data;
6mod urls;
7
8pub use emails::find_emails;
9pub use urls::find_urls;
10
11#[derive(Debug, Clone)]
12pub struct DetectionConfig {
13 pub max_emails: usize,
14 pub max_urls: usize,
15 pub unique: bool,
16}
17
18impl Default for DetectionConfig {
19 fn default() -> Self {
20 Self {
21 max_emails: 50,
22 max_urls: 50,
23 unique: true,
24 }
25 }
26}
27
28#[cfg(test)]
29mod tests {
30 use super::{DetectionConfig, find_emails, find_urls};
31 use crate::models::LineNumber;
32
33 #[test]
34 fn test_find_emails_threshold() {
35 let text = "a@b.com\nc@d.com\ne@f.com\n";
36 let config = DetectionConfig {
37 max_emails: 2,
38 ..Default::default()
39 };
40 let emails = find_emails(text, &config);
41 assert_eq!(emails.len(), 2);
42 assert_eq!(emails[0].email, "a@b.com");
43 assert_eq!(emails[0].start_line, LineNumber::ONE);
44 }
45
46 #[test]
47 fn test_find_urls_threshold() {
48 let text = "http://a.com\nhttp://b.com\nhttp://c.com\n";
49 let config = DetectionConfig {
50 max_urls: 2,
51 ..Default::default()
52 };
53 let urls = find_urls(text, &config);
54 assert_eq!(urls.len(), 2);
55 assert_eq!(urls[0].url, "http://a.com/");
56 assert_eq!(urls[1].url, "http://b.com/");
57 }
58
59 #[test]
60 fn test_find_emails_filters_local_machine_domains() {
61 let text = "admin@rust-lang.org\ngeisse@shopgates-mac-mini-3.local\n";
62 let config = DetectionConfig::default();
63 let emails = find_emails(text, &config);
64
65 assert_eq!(emails.len(), 1);
66 assert_eq!(emails[0].email, "admin@rust-lang.org");
67 }
68
69 #[test]
70 fn test_find_urls_ignores_email_like_ftp_token() {
71 let text = "See ftp.mtuci@gmail.com for details.";
72 let config = DetectionConfig::default();
73 let urls = find_urls(text, &config);
74
75 assert!(urls.is_empty(), "urls: {urls:#?}");
76 }
77
78 #[test]
79 fn test_find_urls_keeps_plain_ftp_hostname() {
80 let text = "Mirror: ftp.gnu.org/gnu/tar/";
81 let config = DetectionConfig::default();
82 let urls = find_urls(text, &config);
83
84 assert_eq!(urls.len(), 1, "urls: {urls:#?}");
85 assert_eq!(urls[0].url, "http://ftp.gnu.org/gnu/tar/");
86 }
87
88 #[test]
89 fn test_find_urls_splits_literal_escaped_newline_separated_urls() {
90 let text = "https://docs.celeryq.dev/en/latest/userguide/workers.html#concurrency\\nhttps://docs.celeryq.dev/en/latest/userguide/concurrency/eventlet.html";
91 let config = DetectionConfig::default();
92 let urls = find_urls(text, &config);
93
94 let values: Vec<_> = urls.into_iter().map(|url| url.url).collect();
95 assert_eq!(
96 values,
97 vec![
98 "https://docs.celeryq.dev/en/latest/userguide/workers.html#concurrency".to_string(),
99 "https://docs.celeryq.dev/en/latest/userguide/concurrency/eventlet.html"
100 .to_string(),
101 ]
102 );
103 }
104
105 #[test]
106 fn test_find_urls_strips_template_credentials_from_git_urls() {
107 let text = "Repo: https://user:{ACCESS_TOKEN}@github.com/example/project.git";
108 let config = DetectionConfig::default();
109 let urls = find_urls(text, &config);
110
111 assert_eq!(urls.len(), 1, "urls: {urls:#?}");
112 assert_eq!(urls[0].url, "https://github.com/example/project.git");
113 }
114
115 #[test]
116 fn test_find_urls_strips_percent_encoded_template_credentials_from_git_urls() {
117 let text = "Repo: https://user:%7BACCESS_TOKEN%7D@github.com/example/project.git";
118 let config = DetectionConfig::default();
119 let urls = find_urls(text, &config);
120
121 assert_eq!(urls.len(), 1, "urls: {urls:#?}");
122 assert_eq!(urls[0].url, "https://github.com/example/project.git");
123 }
124
125 #[test]
126 fn test_find_urls_dedupes_plain_and_templated_git_urls_after_sanitization() {
127 let text = concat!(
128 "https://github.com/example/project.git\n",
129 "https://user:%7BACCESS_TOKEN%7D@github.com/example/project.git\n",
130 );
131 let config = DetectionConfig::default();
132 let urls = find_urls(text, &config);
133
134 assert_eq!(urls.len(), 1, "urls: {urls:#?}");
135 assert_eq!(urls[0].url, "https://github.com/example/project.git");
136 }
137
138 #[test]
139 fn test_find_urls_strips_trailing_backticks() {
140 let text = "Docs: https://github.com/example/project.git``";
141 let config = DetectionConfig::default();
142 let urls = find_urls(text, &config);
143
144 assert_eq!(urls.len(), 1, "urls: {urls:#?}");
145 assert_eq!(urls[0].url, "https://github.com/example/project.git");
146 }
147}