1mod emails;
2#[cfg(all(test, feature = "golden-tests"))]
3mod golden_test;
4mod host;
5mod junk_data;
6mod urls;
7
8pub use emails::find_emails;
9pub use urls::find_urls;
10
11#[derive(Debug, Clone)]
12pub struct DetectionConfig {
13 pub max_emails: usize,
14 pub max_urls: usize,
15 pub unique: bool,
16}
17
18impl Default for DetectionConfig {
19 fn default() -> Self {
20 Self {
21 max_emails: 50,
22 max_urls: 50,
23 unique: true,
24 }
25 }
26}
27
28#[cfg(test)]
29mod tests {
30 use super::{DetectionConfig, find_emails, find_urls};
31 use crate::models::LineNumber;
32
33 #[test]
34 fn test_find_emails_threshold() {
35 let text = "a@b.com\nc@d.com\ne@f.com\n";
36 let config = DetectionConfig {
37 max_emails: 2,
38 ..Default::default()
39 };
40 let emails = find_emails(text, &config);
41 assert_eq!(emails.len(), 2);
42 assert_eq!(emails[0].email, "a@b.com");
43 assert_eq!(emails[0].start_line, LineNumber::ONE);
44 }
45
46 #[test]
47 fn test_find_urls_threshold() {
48 let text = "http://a.com\nhttp://b.com\nhttp://c.com\n";
49 let config = DetectionConfig {
50 max_urls: 2,
51 ..Default::default()
52 };
53 let urls = find_urls(text, &config);
54 assert_eq!(urls.len(), 2);
55 assert_eq!(urls[0].url, "http://a.com/");
56 assert_eq!(urls[1].url, "http://b.com/");
57 }
58
59 #[test]
60 fn test_find_emails_filters_local_machine_domains() {
61 let text = "admin@rust-lang.org\ngeisse@shopgates-mac-mini-3.local\n";
62 let config = DetectionConfig::default();
63 let emails = find_emails(text, &config);
64
65 assert_eq!(emails.len(), 1);
66 assert_eq!(emails[0].email, "admin@rust-lang.org");
67 }
68
69 #[test]
70 fn test_find_emails_ignores_literal_escaped_newline_code_artifacts() {
71 let text = r#"email": "global_writer@email.com\n@app.route\n@csrf.exempt\nuser5@email.com"#;
72 let config = DetectionConfig::default();
73 let emails = find_emails(text, &config);
74
75 let values: Vec<_> = emails.into_iter().map(|email| email.email).collect();
76 assert_eq!(
77 values,
78 vec![
79 "global_writer@email.com".to_string(),
80 "user5@email.com".to_string(),
81 ]
82 );
83 }
84
85 #[test]
86 fn test_find_urls_ignores_email_like_ftp_token() {
87 let text = "See ftp.mtuci@gmail.com for details.";
88 let config = DetectionConfig::default();
89 let urls = find_urls(text, &config);
90
91 assert!(urls.is_empty(), "urls: {urls:#?}");
92 }
93
94 #[test]
95 fn test_find_urls_keeps_plain_ftp_hostname() {
96 let text = "Mirror: ftp.gnu.org/gnu/tar/";
97 let config = DetectionConfig::default();
98 let urls = find_urls(text, &config);
99
100 assert_eq!(urls.len(), 1, "urls: {urls:#?}");
101 assert_eq!(urls[0].url, "http://ftp.gnu.org/gnu/tar/");
102 }
103
104 #[test]
105 fn test_find_urls_splits_literal_escaped_newline_separated_urls() {
106 let text = "https://docs.celeryq.dev/en/latest/userguide/workers.html#concurrency\\nhttps://docs.celeryq.dev/en/latest/userguide/concurrency/eventlet.html";
107 let config = DetectionConfig::default();
108 let urls = find_urls(text, &config);
109
110 let values: Vec<_> = urls.into_iter().map(|url| url.url).collect();
111 assert_eq!(
112 values,
113 vec![
114 "https://docs.celeryq.dev/en/latest/userguide/workers.html#concurrency".to_string(),
115 "https://docs.celeryq.dev/en/latest/userguide/concurrency/eventlet.html"
116 .to_string(),
117 ]
118 );
119 }
120
121 #[test]
122 fn test_find_urls_strips_template_credentials_from_git_urls() {
123 let text = "Repo: https://user:{ACCESS_TOKEN}@github.com/example/project.git";
124 let config = DetectionConfig::default();
125 let urls = find_urls(text, &config);
126
127 assert_eq!(urls.len(), 1, "urls: {urls:#?}");
128 assert_eq!(urls[0].url, "https://github.com/example/project.git");
129 }
130
131 #[test]
132 fn test_find_urls_strips_percent_encoded_template_credentials_from_git_urls() {
133 let text = "Repo: https://user:%7BACCESS_TOKEN%7D@github.com/example/project.git";
134 let config = DetectionConfig::default();
135 let urls = find_urls(text, &config);
136
137 assert_eq!(urls.len(), 1, "urls: {urls:#?}");
138 assert_eq!(urls[0].url, "https://github.com/example/project.git");
139 }
140
141 #[test]
142 fn test_find_urls_dedupes_plain_and_templated_git_urls_after_sanitization() {
143 let text = concat!(
144 "https://github.com/example/project.git\n",
145 "https://user:%7BACCESS_TOKEN%7D@github.com/example/project.git\n",
146 );
147 let config = DetectionConfig::default();
148 let urls = find_urls(text, &config);
149
150 assert_eq!(urls.len(), 1, "urls: {urls:#?}");
151 assert_eq!(urls[0].url, "https://github.com/example/project.git");
152 }
153
154 #[test]
155 fn test_find_urls_strips_trailing_backticks() {
156 let text = "Docs: https://github.com/example/project.git``";
157 let config = DetectionConfig::default();
158 let urls = find_urls(text, &config);
159
160 assert_eq!(urls.len(), 1, "urls: {urls:#?}");
161 assert_eq!(urls[0].url, "https://github.com/example/project.git");
162 }
163
164 #[test]
165 fn test_find_urls_ignores_markdown_emphasis_inside_hostname() {
166 let text = "Use https://**yourcompany**.atlassian.net for Jira Cloud.";
167 let config = DetectionConfig::default();
168 let urls = find_urls(text, &config);
169
170 assert!(urls.is_empty(), "urls: {urls:#?}");
171 }
172
173 #[test]
174 fn test_find_urls_filters_code_variable_host_artifacts() {
175 let text = "loginUrl = \"http://os.environ['DD_BASE_URL']/login\"";
176 let config = DetectionConfig::default();
177 let urls = find_urls(text, &config);
178
179 assert!(urls.is_empty(), "urls: {urls:#?}");
180 }
181}