Skip to main content

provenant/finder/
mod.rs

1mod emails;
2#[cfg(all(test, feature = "golden-tests"))]
3mod golden_test;
4mod host;
5mod junk_data;
6mod urls;
7
8pub use emails::find_emails;
9pub use urls::find_urls;
10
11#[derive(Debug, Clone)]
12pub struct DetectionConfig {
13    pub max_emails: usize,
14    pub max_urls: usize,
15    pub unique: bool,
16}
17
18impl Default for DetectionConfig {
19    fn default() -> Self {
20        Self {
21            max_emails: 50,
22            max_urls: 50,
23            unique: true,
24        }
25    }
26}
27
28#[cfg(test)]
29mod tests {
30    use super::{DetectionConfig, find_emails, find_urls};
31
32    #[test]
33    fn test_find_emails_threshold() {
34        let text = "a@b.com\nc@d.com\ne@f.com\n";
35        let config = DetectionConfig {
36            max_emails: 2,
37            ..Default::default()
38        };
39        let emails = find_emails(text, &config);
40        assert_eq!(emails.len(), 2);
41        assert_eq!(emails[0].email, "a@b.com");
42        assert_eq!(emails[0].start_line, 1);
43    }
44
45    #[test]
46    fn test_find_urls_threshold() {
47        let text = "http://a.com\nhttp://b.com\nhttp://c.com\n";
48        let config = DetectionConfig {
49            max_urls: 2,
50            ..Default::default()
51        };
52        let urls = find_urls(text, &config);
53        assert_eq!(urls.len(), 2);
54        assert_eq!(urls[0].url, "http://a.com/");
55        assert_eq!(urls[1].url, "http://b.com/");
56    }
57}