provenant-cli 0.0.15

Rust-based ScanCode-compatible scanner for licenses, package metadata, SBOMs, and provenance data.
Documentation
mod emails;
#[cfg(all(test, feature = "golden-tests"))]
mod golden_test;
mod host;
mod junk_data;
mod urls;

pub use emails::find_emails;
pub use urls::find_urls;

#[derive(Debug, Clone)]
pub struct DetectionConfig {
    pub max_emails: usize,
    pub max_urls: usize,
    pub unique: bool,
}

impl Default for DetectionConfig {
    fn default() -> Self {
        Self {
            max_emails: 50,
            max_urls: 50,
            unique: true,
        }
    }
}

#[cfg(test)]
mod tests {
    use super::{DetectionConfig, find_emails, find_urls};
    use crate::models::LineNumber;

    #[test]
    fn test_find_emails_threshold() {
        let text = "a@b.com\nc@d.com\ne@f.com\n";
        let config = DetectionConfig {
            max_emails: 2,
            ..Default::default()
        };
        let emails = find_emails(text, &config);
        assert_eq!(emails.len(), 2);
        assert_eq!(emails[0].email, "a@b.com");
        assert_eq!(emails[0].start_line, LineNumber::ONE);
    }

    #[test]
    fn test_find_urls_threshold() {
        let text = "http://a.com\nhttp://b.com\nhttp://c.com\n";
        let config = DetectionConfig {
            max_urls: 2,
            ..Default::default()
        };
        let urls = find_urls(text, &config);
        assert_eq!(urls.len(), 2);
        assert_eq!(urls[0].url, "http://a.com/");
        assert_eq!(urls[1].url, "http://b.com/");
    }

    #[test]
    fn test_find_emails_filters_local_machine_domains() {
        let text = "admin@rust-lang.org\ngeisse@shopgates-mac-mini-3.local\n";
        let config = DetectionConfig::default();
        let emails = find_emails(text, &config);

        assert_eq!(emails.len(), 1);
        assert_eq!(emails[0].email, "admin@rust-lang.org");
    }

    #[test]
    fn test_find_urls_ignores_email_like_ftp_token() {
        let text = "See ftp.mtuci@gmail.com for details.";
        let config = DetectionConfig::default();
        let urls = find_urls(text, &config);

        assert!(urls.is_empty(), "urls: {urls:#?}");
    }

    #[test]
    fn test_find_urls_keeps_plain_ftp_hostname() {
        let text = "Mirror: ftp.gnu.org/gnu/tar/";
        let config = DetectionConfig::default();
        let urls = find_urls(text, &config);

        assert_eq!(urls.len(), 1, "urls: {urls:#?}");
        assert_eq!(urls[0].url, "http://ftp.gnu.org/gnu/tar/");
    }

    #[test]
    fn test_find_urls_splits_literal_escaped_newline_separated_urls() {
        let text = "https://docs.celeryq.dev/en/latest/userguide/workers.html#concurrency\\nhttps://docs.celeryq.dev/en/latest/userguide/concurrency/eventlet.html";
        let config = DetectionConfig::default();
        let urls = find_urls(text, &config);

        let values: Vec<_> = urls.into_iter().map(|url| url.url).collect();
        assert_eq!(
            values,
            vec![
                "https://docs.celeryq.dev/en/latest/userguide/workers.html#concurrency".to_string(),
                "https://docs.celeryq.dev/en/latest/userguide/concurrency/eventlet.html"
                    .to_string(),
            ]
        );
    }

    #[test]
    fn test_find_urls_strips_template_credentials_from_git_urls() {
        let text = "Repo: https://user:{ACCESS_TOKEN}@github.com/example/project.git";
        let config = DetectionConfig::default();
        let urls = find_urls(text, &config);

        assert_eq!(urls.len(), 1, "urls: {urls:#?}");
        assert_eq!(urls[0].url, "https://github.com/example/project.git");
    }

    #[test]
    fn test_find_urls_strips_percent_encoded_template_credentials_from_git_urls() {
        let text = "Repo: https://user:%7BACCESS_TOKEN%7D@github.com/example/project.git";
        let config = DetectionConfig::default();
        let urls = find_urls(text, &config);

        assert_eq!(urls.len(), 1, "urls: {urls:#?}");
        assert_eq!(urls[0].url, "https://github.com/example/project.git");
    }

    #[test]
    fn test_find_urls_dedupes_plain_and_templated_git_urls_after_sanitization() {
        let text = concat!(
            "https://github.com/example/project.git\n",
            "https://user:%7BACCESS_TOKEN%7D@github.com/example/project.git\n",
        );
        let config = DetectionConfig::default();
        let urls = find_urls(text, &config);

        assert_eq!(urls.len(), 1, "urls: {urls:#?}");
        assert_eq!(urls[0].url, "https://github.com/example/project.git");
    }

    #[test]
    fn test_find_urls_strips_trailing_backticks() {
        let text = "Docs: https://github.com/example/project.git``";
        let config = DetectionConfig::default();
        let urls = find_urls(text, &config);

        assert_eq!(urls.len(), 1, "urls: {urls:#?}");
        assert_eq!(urls[0].url, "https://github.com/example/project.git");
    }
}