use chrono::Utc;
use std::collections::HashMap;
use super::test_utils::{dir, file};
use super::*;
use crate::assembly;
use crate::license_detection::index::LicenseIndex;
use crate::license_detection::models::{License as RuntimeLicense, Rule, RuleKind};
use crate::models::{Copyright, Holder, Match, Package, PackageData, PackageType, Tallies};
use crate::scan_result_shaping::normalize_paths;
use serde_json::json;
fn sample_runtime_license(key: &str, name: &str, spdx_license_key: Option<&str>) -> RuntimeLicense {
RuntimeLicense {
key: key.to_string(),
short_name: Some(name.to_string()),
name: name.to_string(),
language: Some("en".to_string()),
spdx_license_key: spdx_license_key.map(str::to_string),
other_spdx_license_keys: vec![],
category: Some("Permissive".to_string()),
owner: Some("Example Owner".to_string()),
homepage_url: Some("https://example.com/license".to_string()),
text: format!("{name} text"),
reference_urls: vec!["https://example.com/license".to_string()],
osi_license_key: spdx_license_key.map(str::to_string),
text_urls: vec!["https://example.com/license.txt".to_string()],
osi_url: Some("https://opensource.org/licenses/example".to_string()),
faq_url: Some("https://example.com/faq".to_string()),
other_urls: vec!["https://example.com/other".to_string()],
notes: None,
is_deprecated: false,
is_exception: false,
is_unknown: false,
is_generic: false,
replaced_by: vec![],
minimum_coverage: None,
standard_notice: Some("Standard notice".to_string()),
ignorable_copyrights: None,
ignorable_holders: None,
ignorable_authors: None,
ignorable_urls: None,
ignorable_emails: None,
}
}
fn sample_rule(identifier: &str, expression: &str, rule_kind: RuleKind) -> Rule {
Rule {
identifier: identifier.to_string(),
license_expression: expression.to_string(),
text: format!("{identifier} text"),
tokens: vec![],
rule_kind,
is_false_positive: false,
is_required_phrase: false,
is_from_license: false,
relevance: 100,
minimum_coverage: None,
has_stored_minimum_coverage: false,
is_continuous: true,
required_phrase_spans: vec![],
stopwords_by_pos: HashMap::new(),
referenced_filenames: None,
ignorable_urls: None,
ignorable_emails: None,
ignorable_copyrights: None,
ignorable_holders: None,
ignorable_authors: None,
language: None,
notes: None,
length_unique: 0,
high_length_unique: 0,
high_length: 0,
min_matched_length: 0,
min_high_matched_length: 0,
min_matched_length_unique: 0,
min_high_matched_length_unique: 0,
is_small: false,
is_tiny: false,
starts_with_license: false,
ends_with_license: false,
is_deprecated: false,
spdx_license_key: None,
other_spdx_license_keys: vec![],
}
}
#[test]
fn collect_top_level_license_references_includes_clues_packages_and_sorted_deduped_refs() {
let licenses = vec![
sample_runtime_license("apache-2.0", "Apache License 2.0", Some("Apache-2.0")),
sample_runtime_license("bsd-simplified", "BSD 2-Clause", Some("BSD-2-Clause")),
sample_runtime_license("mit", "MIT License", Some("MIT")),
sample_runtime_license(
"unknown-license-reference",
"Unknown License Reference",
None,
),
];
let mut license_index = LicenseIndex::default();
for license in &licenses {
license_index
.licenses_by_key
.insert(license.key.clone(), license.clone());
}
license_index.rules_by_rid = vec![
sample_rule("apache-2.0_1.RULE", "apache-2.0", RuleKind::Text),
sample_rule(
"license-clue_1.RULE",
"unknown-license-reference",
RuleKind::Clue,
),
];
let mut source = file("project/src/lib.rs");
source.license_expression = Some("mit".to_string());
source.license_detections = vec![crate::models::LicenseDetection {
license_expression: "mit".to_string(),
license_expression_spdx: "MIT".to_string(),
matches: vec![Match {
license_expression: "mit".to_string(),
license_expression_spdx: "MIT".to_string(),
from_file: Some("project/src/lib.rs".to_string()),
start_line: 1,
end_line: 2,
matcher: Some("1-hash".to_string()),
score: 100.0,
matched_length: Some(10),
match_coverage: Some(100.0),
rule_relevance: Some(100),
rule_identifier: Some("apache-2.0_1.RULE".to_string()),
rule_url: None,
matched_text: None,
referenced_filenames: None,
matched_text_diagnostics: None,
}],
identifier: None,
detection_log: vec![],
}];
source.license_clues = vec![Match {
license_expression: "unknown-license-reference".to_string(),
license_expression_spdx: "LicenseRef-scancode-unknown-license-reference".to_string(),
from_file: Some("project/NOTICE".to_string()),
start_line: 1,
end_line: 1,
matcher: Some("2-aho".to_string()),
score: 100.0,
matched_length: Some(4),
match_coverage: Some(100.0),
rule_relevance: Some(100),
rule_identifier: Some("license-clue_1.RULE".to_string()),
rule_url: None,
matched_text: None,
referenced_filenames: None,
matched_text_diagnostics: None,
}];
source.package_data = vec![PackageData {
package_type: Some(PackageType::Npm),
declared_license_expression: Some("bsd-simplified".to_string()),
..PackageData::default()
}];
let mut package = super::test_utils::package("pkg:npm/demo?uuid=test", "project/package.json");
package.package_type = Some(PackageType::Npm);
package.declared_license_expression = Some("apache-2.0".to_string());
let (license_references, license_rule_references) =
collect_top_level_license_references(&[dir("project"), source], &[package], &license_index);
assert_eq!(
license_references
.iter()
.map(|reference| reference.spdx_license_key.as_str())
.collect::<Vec<_>>(),
vec![
"Apache-2.0",
"BSD-2-Clause",
"MIT",
"LicenseRef-scancode-unknown-license-reference",
]
);
assert_eq!(
license_rule_references
.iter()
.map(|reference| reference.identifier.as_str())
.collect::<Vec<_>>(),
vec!["apache-2.0_1.RULE", "license-clue_1.RULE"]
);
assert!(license_rule_references[1].is_license_clue);
assert_eq!(license_references[0].key.as_deref(), Some("apache-2.0"));
assert_eq!(
license_references[0].category.as_deref(),
Some("Permissive")
);
assert_eq!(license_references[0].language.as_deref(), Some("en"));
assert_eq!(
license_references[0].owner.as_deref(),
Some("Example Owner")
);
assert_eq!(
license_references[0].homepage_url.as_deref(),
Some("https://example.com/license")
);
assert_eq!(
license_references[0].osi_license_key.as_deref(),
Some("Apache-2.0")
);
assert_eq!(
license_references[0].text_urls,
vec!["https://example.com/license.txt".to_string()]
);
assert_eq!(
license_references[0].osi_url.as_deref(),
Some("https://opensource.org/licenses/example")
);
assert!(!license_references[0].is_exception);
assert_eq!(
license_references[0].standard_notice.as_deref(),
Some("Standard notice")
);
assert!(license_references[0].scancode_url.is_some());
assert_eq!(license_rule_references[0].relevance, Some(100));
}
#[test]
fn collect_top_level_license_references_returns_empty_for_empty_inputs() {
let license_index = LicenseIndex::default();
let (license_references, license_rule_references) =
collect_top_level_license_references(&[], &[], &license_index);
assert!(license_references.is_empty());
assert!(license_rule_references.is_empty());
}
#[test]
fn collect_top_level_license_references_marks_synthetic_spdx_rules() {
let license_index = LicenseIndex {
rules_by_rid: vec![Rule {
identifier: "spdx_license_id_mit_for_mit.RULE".to_string(),
license_expression: "mit".to_string(),
text: "MIT".to_string(),
tokens: vec![],
rule_kind: RuleKind::Tag,
is_false_positive: false,
is_required_phrase: false,
is_from_license: false,
relevance: 100,
minimum_coverage: Some(0),
has_stored_minimum_coverage: false,
is_continuous: false,
required_phrase_spans: vec![],
stopwords_by_pos: HashMap::new(),
referenced_filenames: None,
ignorable_urls: None,
ignorable_emails: None,
ignorable_copyrights: None,
ignorable_holders: None,
ignorable_authors: None,
language: Some("en".to_string()),
notes: None,
length_unique: 0,
high_length_unique: 0,
high_length: 0,
min_matched_length: 0,
min_high_matched_length: 0,
min_matched_length_unique: 0,
min_high_matched_length_unique: 0,
is_small: false,
is_tiny: false,
starts_with_license: false,
ends_with_license: false,
is_deprecated: false,
spdx_license_key: Some("MIT".to_string()),
other_spdx_license_keys: vec![],
}],
..LicenseIndex::default()
};
let mut source = file("project/Cargo.toml");
source.license_detections = vec![crate::models::LicenseDetection {
license_expression: "mit".to_string(),
license_expression_spdx: "MIT".to_string(),
matches: vec![Match {
license_expression: "mit".to_string(),
license_expression_spdx: "MIT".to_string(),
from_file: Some("project/Cargo.toml".to_string()),
start_line: 1,
end_line: 1,
matcher: Some("1-spdx-id".to_string()),
score: 100.0,
matched_length: Some(1),
match_coverage: Some(100.0),
rule_relevance: Some(100),
rule_identifier: Some("spdx_license_id_mit_for_mit.RULE".to_string()),
rule_url: None,
matched_text: Some("MIT".to_string()),
referenced_filenames: None,
matched_text_diagnostics: None,
}],
identifier: Some("mit-id".to_string()),
detection_log: vec![],
}];
let (_, license_rule_references) =
collect_top_level_license_references(&[source], &[], &license_index);
assert_eq!(license_rule_references.len(), 1);
assert!(license_rule_references[0].is_synthetic);
assert!(license_rule_references[0].rule_url.is_none());
assert_eq!(license_rule_references[0].length, 0);
assert!(!license_rule_references[0].skip_for_required_phrase_generation);
}
#[test]
fn apply_local_file_reference_following_resolves_root_license_file() {
let mut license = file("project/LICENSE");
license.license_expression = Some("mit".to_string());
license.license_detections = vec![crate::models::LicenseDetection {
license_expression: "mit".to_string(),
license_expression_spdx: "MIT".to_string(),
matches: vec![Match {
license_expression: "mit".to_string(),
license_expression_spdx: "MIT".to_string(),
from_file: Some("project/LICENSE".to_string()),
start_line: 1,
end_line: 20,
matcher: Some("1-hash".to_string()),
score: 100.0,
matched_length: Some(100),
match_coverage: Some(100.0),
rule_relevance: Some(100),
rule_identifier: Some("mit.LICENSE".to_string()),
rule_url: None,
matched_text: None,
referenced_filenames: None,
matched_text_diagnostics: None,
}],
detection_log: vec![],
identifier: Some("mit-license".to_string()),
}];
let mut notice = file("project/src/notice.js");
notice.license_expression = Some("unknown-license-reference".to_string());
notice.license_detections = vec![crate::models::LicenseDetection {
license_expression: "unknown-license-reference".to_string(),
license_expression_spdx: "LicenseRef-scancode-unknown-license-reference".to_string(),
matches: vec![Match {
license_expression: "unknown-license-reference".to_string(),
license_expression_spdx: "LicenseRef-scancode-unknown-license-reference".to_string(),
from_file: Some("project/src/notice.js".to_string()),
start_line: 2,
end_line: 2,
matcher: Some("2-aho".to_string()),
score: 100.0,
matched_length: Some(2),
match_coverage: Some(100.0),
rule_relevance: Some(100),
rule_identifier: Some("unknown-license-reference_see-license_1.RULE".to_string()),
rule_url: None,
matched_text: Some("See LICENSE".to_string()),
referenced_filenames: Some(vec!["LICENSE".to_string()]),
matched_text_diagnostics: None,
}],
detection_log: vec![],
identifier: Some("unknown-ref".to_string()),
}];
let mut files = vec![dir("project"), license, notice];
let mut packages = Vec::new();
apply_package_reference_following(&mut files, &mut packages);
let notice = files
.iter()
.find(|file| file.path == "project/src/notice.js")
.expect("notice file should exist");
assert_eq!(notice.license_expression.as_deref(), Some("mit"));
assert_eq!(
notice.license_detections[0].detection_log,
vec!["unknown-reference-to-local-file"]
);
assert_eq!(notice.license_detections[0].matches.len(), 2);
assert_eq!(
notice.license_detections[0].matches[1].from_file.as_deref(),
Some("project/LICENSE")
);
}
#[test]
fn apply_local_file_reference_following_requires_exact_filename_match() {
let mut license = file("project/LICENSE");
license.license_expression = Some("mit".to_string());
license.license_detections = vec![crate::models::LicenseDetection {
license_expression: "mit".to_string(),
license_expression_spdx: "MIT".to_string(),
matches: vec![Match {
license_expression: "mit".to_string(),
license_expression_spdx: "MIT".to_string(),
from_file: Some("project/LICENSE".to_string()),
start_line: 1,
end_line: 20,
matcher: Some("1-hash".to_string()),
score: 100.0,
matched_length: Some(100),
match_coverage: Some(100.0),
rule_relevance: Some(100),
rule_identifier: Some("mit.LICENSE".to_string()),
rule_url: None,
matched_text: None,
referenced_filenames: None,
matched_text_diagnostics: None,
}],
detection_log: vec![],
identifier: Some("mit-license".to_string()),
}];
let mut notice = file("project/src/notice.js");
notice.license_expression = Some("unknown-license-reference".to_string());
notice.license_detections = vec![crate::models::LicenseDetection {
license_expression: "unknown-license-reference".to_string(),
license_expression_spdx: "LicenseRef-scancode-unknown-license-reference".to_string(),
matches: vec![Match {
license_expression: "unknown-license-reference".to_string(),
license_expression_spdx: "LicenseRef-scancode-unknown-license-reference".to_string(),
from_file: Some("project/src/notice.js".to_string()),
start_line: 2,
end_line: 2,
matcher: Some("2-aho".to_string()),
score: 100.0,
matched_length: Some(2),
match_coverage: Some(100.0),
rule_relevance: Some(100),
rule_identifier: Some("unknown-license-reference_see-license_1.RULE".to_string()),
rule_url: None,
matched_text: Some("See LICENSE.txt".to_string()),
referenced_filenames: Some(vec!["LICENSE.txt".to_string()]),
matched_text_diagnostics: None,
}],
detection_log: vec![],
identifier: Some("unknown-ref".to_string()),
}];
let mut files = vec![dir("project"), license, notice];
let mut packages = Vec::new();
apply_package_reference_following(&mut files, &mut packages);
let notice = files
.iter()
.find(|file| file.path == "project/src/notice.js")
.expect("notice file should exist");
assert_eq!(
notice.license_expression.as_deref(),
Some("unknown-license-reference")
);
assert_eq!(notice.license_detections[0].matches.len(), 1);
}
#[test]
fn apply_local_file_reference_following_resolves_files_beside_manifest() {
let package_uid = "pkg:pypi/demo?uuid=test".to_string();
let mut package = super::test_utils::package(&package_uid, "project/demo.dist-info/METADATA");
package.datafile_paths = vec!["project/demo.dist-info/METADATA".to_string()];
let mut license = file("project/demo.dist-info/LICENSE");
license.license_expression = Some("mit".to_string());
license.license_detections = vec![crate::models::LicenseDetection {
license_expression: "mit".to_string(),
license_expression_spdx: "MIT".to_string(),
matches: vec![Match {
license_expression: "mit".to_string(),
license_expression_spdx: "MIT".to_string(),
from_file: Some("project/demo.dist-info/LICENSE".to_string()),
start_line: 1,
end_line: 20,
matcher: Some("1-hash".to_string()),
score: 100.0,
matched_length: Some(100),
match_coverage: Some(100.0),
rule_relevance: Some(100),
rule_identifier: Some("mit.LICENSE".to_string()),
rule_url: None,
matched_text: None,
referenced_filenames: None,
matched_text_diagnostics: None,
}],
detection_log: vec![],
identifier: Some("mit-license".to_string()),
}];
let mut source = file("project/demo/__init__.py");
source.for_packages = vec![package_uid.clone()];
source.license_expression = Some("unknown-license-reference".to_string());
source.license_detections = vec![crate::models::LicenseDetection {
license_expression: "unknown-license-reference".to_string(),
license_expression_spdx: "LicenseRef-scancode-unknown-license-reference".to_string(),
matches: vec![Match {
license_expression: "unknown-license-reference".to_string(),
license_expression_spdx: "LicenseRef-scancode-unknown-license-reference".to_string(),
from_file: Some("project/demo/__init__.py".to_string()),
start_line: 1,
end_line: 1,
matcher: Some("2-aho".to_string()),
score: 100.0,
matched_length: Some(2),
match_coverage: Some(100.0),
rule_relevance: Some(100),
rule_identifier: Some("unknown-license-reference_see-license_1.RULE".to_string()),
rule_url: None,
matched_text: Some("See LICENSE".to_string()),
referenced_filenames: Some(vec!["LICENSE".to_string()]),
matched_text_diagnostics: None,
}],
detection_log: vec![],
identifier: Some("unknown-ref".to_string()),
}];
let mut files = vec![dir("project"), license, source];
let mut packages = vec![package];
apply_package_reference_following(&mut files, &mut packages);
let source = files
.iter()
.find(|file| file.path == "project/demo/__init__.py")
.expect("source file should exist");
assert_eq!(source.license_expression.as_deref(), Some("mit"));
assert_eq!(
source.license_detections[0].matches[1].from_file.as_deref(),
Some("project/demo.dist-info/LICENSE")
);
}
#[test]
fn apply_package_reference_following_resolves_manifest_origin_local_file() {
let package_uid = "pkg:cargo/demo?uuid=test".to_string();
let mut package = super::test_utils::package(&package_uid, "project/Cargo.toml");
package.datafile_paths = vec!["project/Cargo.toml".to_string()];
package.license_detections = vec![crate::models::LicenseDetection {
license_expression: "unknown-license-reference".to_string(),
license_expression_spdx: "LicenseRef-scancode-unknown-license-reference".to_string(),
matches: vec![Match {
license_expression: "unknown-license-reference".to_string(),
license_expression_spdx: "LicenseRef-scancode-unknown-license-reference".to_string(),
from_file: Some("project/Cargo.toml".to_string()),
start_line: 5,
end_line: 5,
matcher: Some("parser-declared-license".to_string()),
score: 100.0,
matched_length: Some(1),
match_coverage: Some(100.0),
rule_relevance: Some(100),
rule_identifier: None,
rule_url: None,
matched_text: Some("MIT".to_string()),
referenced_filenames: Some(vec!["LICENSE".to_string()]),
matched_text_diagnostics: None,
}],
detection_log: vec![],
identifier: Some("unknown-ref".to_string()),
}];
let mut manifest = file("project/Cargo.toml");
manifest.for_packages = vec![package_uid.clone()];
manifest.package_data = vec![PackageData {
package_type: Some(PackageType::Cargo),
license_detections: package.license_detections.clone(),
..Default::default()
}];
let mut license = file("project/LICENSE");
license.license_expression = Some("mit".to_string());
license.license_detections = vec![crate::models::LicenseDetection {
license_expression: "mit".to_string(),
license_expression_spdx: "MIT".to_string(),
matches: vec![Match {
license_expression: "mit".to_string(),
license_expression_spdx: "MIT".to_string(),
from_file: Some("project/LICENSE".to_string()),
start_line: 1,
end_line: 20,
matcher: Some("1-hash".to_string()),
score: 100.0,
matched_length: Some(100),
match_coverage: Some(100.0),
rule_relevance: Some(100),
rule_identifier: Some("mit.LICENSE".to_string()),
rule_url: None,
matched_text: None,
referenced_filenames: None,
matched_text_diagnostics: None,
}],
detection_log: vec![],
identifier: Some("mit-license".to_string()),
}];
let mut files = vec![dir("project"), manifest, license];
let mut packages = vec![package];
apply_package_reference_following(&mut files, &mut packages);
assert_eq!(
packages[0].declared_license_expression.as_deref(),
Some("mit")
);
assert_eq!(packages[0].license_detections[0].matches.len(), 2);
assert_eq!(
packages[0].license_detections[0].matches[1]
.from_file
.as_deref(),
Some("project/LICENSE")
);
assert_eq!(
files[1].package_data[0]
.declared_license_expression
.as_deref(),
Some("mit")
);
}
#[test]
fn apply_package_reference_following_falls_back_to_root_when_package_missing() {
let mut root_copying = file("project/COPYING");
root_copying.license_expression = Some("gpl-3.0".to_string());
root_copying.license_detections = vec![crate::models::LicenseDetection {
license_expression: "gpl-3.0".to_string(),
license_expression_spdx: "GPL-3.0-only".to_string(),
matches: vec![Match {
license_expression: "gpl-3.0".to_string(),
license_expression_spdx: "GPL-3.0-only".to_string(),
from_file: Some("project/COPYING".to_string()),
start_line: 1,
end_line: 10,
matcher: Some("1-hash".to_string()),
score: 100.0,
matched_length: Some(50),
match_coverage: Some(100.0),
rule_relevance: Some(100),
rule_identifier: Some("gpl-3.0.LICENSE".to_string()),
rule_url: None,
matched_text: None,
referenced_filenames: None,
matched_text_diagnostics: None,
}],
detection_log: vec![],
identifier: Some("gpl-root".to_string()),
}];
let mut po = file("project/po/en_US.po");
po.license_expression = Some("unknown-license-reference".to_string());
po.license_detections = vec![crate::models::LicenseDetection {
license_expression: "unknown-license-reference".to_string(),
license_expression_spdx: "LicenseRef-scancode-unknown-license-reference".to_string(),
matches: vec![Match {
license_expression: "unknown-license-reference".to_string(),
license_expression_spdx: "LicenseRef-scancode-unknown-license-reference".to_string(),
from_file: Some("project/po/en_US.po".to_string()),
start_line: 1,
end_line: 1,
matcher: Some("2-aho".to_string()),
score: 100.0,
matched_length: Some(5),
match_coverage: Some(100.0),
rule_relevance: Some(100),
rule_identifier: Some("unknown-license-reference_see-license_1.RULE".to_string()),
rule_url: None,
matched_text: Some("same license as package".to_string()),
referenced_filenames: Some(vec!["COPYING".to_string()]),
matched_text_diagnostics: None,
}],
detection_log: vec![],
identifier: Some("unknown-ref".to_string()),
}];
let mut files = vec![dir("project"), root_copying, po];
let mut packages = Vec::new();
apply_package_reference_following(&mut files, &mut packages);
let po = files
.iter()
.find(|file| file.path == "project/po/en_US.po")
.expect("po file should exist");
assert_eq!(po.license_expression.as_deref(), Some("gpl-3.0"));
assert_eq!(
po.license_detections[0].detection_log,
vec!["unknown-reference-to-local-file"]
);
}
#[test]
fn apply_package_reference_following_inherits_license_from_package_context() {
let package_uid = "pkg:pypi/demo?uuid=test".to_string();
let mut package = super::test_utils::package(&package_uid, "project/PKG-INFO");
package.datafile_paths = vec!["project/PKG-INFO".to_string()];
package.license_detections = vec![crate::models::LicenseDetection {
license_expression: "bsd-new".to_string(),
license_expression_spdx: "BSD-3-Clause".to_string(),
matches: vec![Match {
license_expression: "bsd-new".to_string(),
license_expression_spdx: "BSD-3-Clause".to_string(),
from_file: Some("project/PKG-INFO".to_string()),
start_line: 1,
end_line: 1,
matcher: Some("1-hash".to_string()),
score: 99.0,
matched_length: Some(5),
match_coverage: Some(100.0),
rule_relevance: Some(99),
rule_identifier: Some("pypi_bsd_license.RULE".to_string()),
rule_url: None,
matched_text: None,
referenced_filenames: None,
matched_text_diagnostics: None,
}],
detection_log: vec![],
identifier: Some("package-license".to_string()),
}];
let mut source = file("project/locale/django.po");
source.for_packages = vec![package_uid.clone()];
source.license_expression = Some("free-unknown".to_string());
source.license_detections = vec![crate::models::LicenseDetection {
license_expression: "free-unknown".to_string(),
license_expression_spdx: "LicenseRef-scancode-free-unknown".to_string(),
matches: vec![Match {
license_expression: "free-unknown".to_string(),
license_expression_spdx: "LicenseRef-scancode-free-unknown".to_string(),
from_file: Some("project/locale/django.po".to_string()),
start_line: 1,
end_line: 1,
matcher: Some("2-aho".to_string()),
score: 100.0,
matched_length: Some(11),
match_coverage: Some(100.0),
rule_relevance: Some(100),
rule_identifier: Some("free-unknown-package_1.RULE".to_string()),
rule_url: None,
matched_text: None,
referenced_filenames: Some(vec!["INHERIT_LICENSE_FROM_PACKAGE".to_string()]),
matched_text_diagnostics: None,
}],
detection_log: vec![],
identifier: Some("unknown-package-ref".to_string()),
}];
let mut files = vec![dir("project"), source];
let mut packages = vec![package];
apply_package_reference_following(&mut files, &mut packages);
let source = files
.iter()
.find(|file| file.path == "project/locale/django.po")
.expect("source file should exist");
assert_eq!(source.license_expression.as_deref(), Some("bsd-new"));
assert_eq!(
source.license_detections[0].detection_log,
vec!["unknown-reference-in-file-to-package"]
);
assert_eq!(source.license_detections[0].matches.len(), 2);
assert_eq!(
source.license_detections[0].matches[1].from_file.as_deref(),
Some("project/PKG-INFO")
);
}
#[test]
fn apply_package_reference_following_falls_back_to_root_for_missing_package_reference() {
let mut root_copying = file("project/COPYING");
root_copying.license_expression = Some("gpl-3.0".to_string());
root_copying.license_detections = vec![crate::models::LicenseDetection {
license_expression: "gpl-3.0".to_string(),
license_expression_spdx: "GPL-3.0-only".to_string(),
matches: vec![Match {
license_expression: "gpl-3.0".to_string(),
license_expression_spdx: "GPL-3.0-only".to_string(),
from_file: Some("project/COPYING".to_string()),
start_line: 1,
end_line: 10,
matcher: Some("1-hash".to_string()),
score: 100.0,
matched_length: Some(50),
match_coverage: Some(100.0),
rule_relevance: Some(100),
rule_identifier: Some("gpl-3.0.LICENSE".to_string()),
rule_url: None,
matched_text: None,
referenced_filenames: None,
matched_text_diagnostics: None,
}],
detection_log: vec![],
identifier: Some("gpl-root".to_string()),
}];
let mut po = file("project/po/en_US.po");
po.license_expression = Some("free-unknown".to_string());
po.license_detections = vec![crate::models::LicenseDetection {
license_expression: "free-unknown".to_string(),
license_expression_spdx: "LicenseRef-scancode-free-unknown".to_string(),
matches: vec![Match {
license_expression: "free-unknown".to_string(),
license_expression_spdx: "LicenseRef-scancode-free-unknown".to_string(),
from_file: Some("project/po/en_US.po".to_string()),
start_line: 1,
end_line: 1,
matcher: Some("2-aho".to_string()),
score: 100.0,
matched_length: Some(5),
match_coverage: Some(100.0),
rule_relevance: Some(100),
rule_identifier: Some("free-unknown-package_2.RULE".to_string()),
rule_url: None,
matched_text: None,
referenced_filenames: Some(vec!["INHERIT_LICENSE_FROM_PACKAGE".to_string()]),
matched_text_diagnostics: None,
}],
detection_log: vec![],
identifier: Some("unknown-package-ref".to_string()),
}];
let mut files = vec![dir("project"), root_copying, po];
let mut packages = Vec::new();
apply_package_reference_following(&mut files, &mut packages);
let po = files
.iter()
.find(|file| file.path == "project/po/en_US.po")
.expect("po file should exist");
assert_eq!(po.license_expression.as_deref(), Some("gpl-3.0"));
assert_eq!(
po.license_detections[0].detection_log,
vec!["unknown-reference-in-file-to-nonexistent-package"]
);
assert_eq!(
po.license_detections[0].matches[1].from_file.as_deref(),
Some("project/COPYING")
);
}
#[test]
fn apply_package_reference_following_leaves_ambiguous_multi_package_file_unresolved() {
let first_uid = "pkg:pypi/demo-a?uuid=test".to_string();
let second_uid = "pkg:pypi/demo-b?uuid=test".to_string();
let mut first_package = super::test_utils::package(&first_uid, "project/a/PKG-INFO");
first_package.datafile_paths = vec!["project/a/PKG-INFO".to_string()];
first_package.license_detections = vec![crate::models::LicenseDetection {
license_expression: "mit".to_string(),
license_expression_spdx: "MIT".to_string(),
matches: vec![Match {
license_expression: "mit".to_string(),
license_expression_spdx: "MIT".to_string(),
from_file: Some("project/a/PKG-INFO".to_string()),
start_line: 1,
end_line: 1,
matcher: Some("1-hash".to_string()),
score: 100.0,
matched_length: Some(5),
match_coverage: Some(100.0),
rule_relevance: Some(100),
rule_identifier: Some("mit.LICENSE".to_string()),
rule_url: None,
matched_text: None,
referenced_filenames: None,
matched_text_diagnostics: None,
}],
detection_log: vec![],
identifier: Some("mit-license".to_string()),
}];
let mut second_package = super::test_utils::package(&second_uid, "project/b/PKG-INFO");
second_package.datafile_paths = vec!["project/b/PKG-INFO".to_string()];
second_package.license_detections = vec![crate::models::LicenseDetection {
license_expression: "apache-2.0".to_string(),
license_expression_spdx: "Apache-2.0".to_string(),
matches: vec![Match {
license_expression: "apache-2.0".to_string(),
license_expression_spdx: "Apache-2.0".to_string(),
from_file: Some("project/b/PKG-INFO".to_string()),
start_line: 1,
end_line: 1,
matcher: Some("1-hash".to_string()),
score: 100.0,
matched_length: Some(5),
match_coverage: Some(100.0),
rule_relevance: Some(100),
rule_identifier: Some("apache-2.0.LICENSE".to_string()),
rule_url: None,
matched_text: None,
referenced_filenames: None,
matched_text_diagnostics: None,
}],
detection_log: vec![],
identifier: Some("apache-license".to_string()),
}];
let mut shared_file = file("project/shared/locale.po");
shared_file.for_packages = vec![first_uid, second_uid];
shared_file.license_expression = Some("free-unknown".to_string());
shared_file.license_detections = vec![crate::models::LicenseDetection {
license_expression: "free-unknown".to_string(),
license_expression_spdx: "LicenseRef-scancode-free-unknown".to_string(),
matches: vec![Match {
license_expression: "free-unknown".to_string(),
license_expression_spdx: "LicenseRef-scancode-free-unknown".to_string(),
from_file: Some("project/shared/locale.po".to_string()),
start_line: 1,
end_line: 1,
matcher: Some("2-aho".to_string()),
score: 100.0,
matched_length: Some(11),
match_coverage: Some(100.0),
rule_relevance: Some(100),
rule_identifier: Some("free-unknown-package_1.RULE".to_string()),
rule_url: None,
matched_text: None,
referenced_filenames: Some(vec!["INHERIT_LICENSE_FROM_PACKAGE".to_string()]),
matched_text_diagnostics: None,
}],
detection_log: vec![],
identifier: Some("unknown-package-ref".to_string()),
}];
let mut files = vec![dir("project"), shared_file];
let mut packages = vec![first_package, second_package];
apply_package_reference_following(&mut files, &mut packages);
let shared_file = files
.iter()
.find(|file| file.path == "project/shared/locale.po")
.expect("shared file should exist");
assert_eq!(
shared_file.license_expression.as_deref(),
Some("free-unknown")
);
assert_eq!(shared_file.license_detections[0].matches.len(), 1);
assert!(shared_file.license_detections[0].detection_log.is_empty());
}
#[test]
fn collect_top_level_license_detections_groups_file_detections_and_preserves_paths() {
let mut first = file("project/src/lib.rs");
first.license_detections = vec![crate::models::LicenseDetection {
license_expression: "mit".to_string(),
license_expression_spdx: "MIT".to_string(),
matches: vec![Match {
license_expression: "mit".to_string(),
license_expression_spdx: "MIT".to_string(),
from_file: Some("project/src/lib.rs".to_string()),
start_line: 1,
end_line: 3,
matcher: Some("1-hash".to_string()),
score: 100.0,
matched_length: Some(10),
match_coverage: Some(100.0),
rule_relevance: Some(100),
rule_identifier: Some("mit.LICENSE".to_string()),
rule_url: None,
matched_text: None,
referenced_filenames: None,
matched_text_diagnostics: None,
}],
detection_log: vec!["imperfect-match-coverage".to_string()],
identifier: Some("mit-shared-id".to_string()),
}];
let mut second = file("project/src/other.rs");
second.license_detections = vec![crate::models::LicenseDetection {
license_expression: "mit".to_string(),
license_expression_spdx: "MIT".to_string(),
matches: vec![Match {
license_expression: "mit".to_string(),
license_expression_spdx: "MIT".to_string(),
from_file: Some("project/src/other.rs".to_string()),
start_line: 4,
end_line: 6,
matcher: Some("1-hash".to_string()),
score: 100.0,
matched_length: Some(10),
match_coverage: Some(100.0),
rule_relevance: Some(100),
rule_identifier: Some("mit.LICENSE".to_string()),
rule_url: None,
matched_text: None,
referenced_filenames: None,
matched_text_diagnostics: None,
}],
detection_log: vec![],
identifier: Some("mit-shared-id".to_string()),
}];
let mut third = file("project/src/apache.rs");
third.license_detections = vec![crate::models::LicenseDetection {
license_expression: "apache-2.0".to_string(),
license_expression_spdx: "Apache-2.0".to_string(),
matches: vec![Match {
license_expression: "apache-2.0".to_string(),
license_expression_spdx: "Apache-2.0".to_string(),
from_file: Some("project/src/apache.rs".to_string()),
start_line: 1,
end_line: 12,
matcher: Some("2-aho".to_string()),
score: 100.0,
matched_length: Some(120),
match_coverage: Some(100.0),
rule_relevance: Some(100),
rule_identifier: Some("apache-2.0_2.RULE".to_string()),
rule_url: None,
matched_text: None,
referenced_filenames: None,
matched_text_diagnostics: None,
}],
detection_log: vec![],
identifier: Some("apache-2.0-id".to_string()),
}];
let detections = collect_top_level_license_detections(&[first, second, third]);
assert_eq!(detections.len(), 2);
assert_eq!(detections[0].license_expression, "apache-2.0");
assert_eq!(detections[0].detection_count, 1);
assert_eq!(detections[1].identifier, "mit-shared-id");
assert_eq!(detections[1].detection_count, 2);
assert_eq!(
detections[1].reference_matches[0].from_file.as_deref(),
Some("project/src/lib.rs")
);
assert_eq!(detections[1].reference_matches.len(), 2);
assert_eq!(
detections[1].detection_log,
vec!["imperfect-match-coverage".to_string()]
);
}
#[test]
fn collect_top_level_license_detections_counts_same_identifier_regions_in_one_file() {
let mut file = file("project/src/lib.rs");
file.license_detections = vec![
crate::models::LicenseDetection {
license_expression: "mit".to_string(),
license_expression_spdx: "MIT".to_string(),
matches: vec![Match {
license_expression: "mit".to_string(),
license_expression_spdx: "MIT".to_string(),
from_file: Some("project/src/lib.rs".to_string()),
start_line: 1,
end_line: 3,
matcher: Some("1-hash".to_string()),
score: 100.0,
matched_length: Some(10),
match_coverage: Some(100.0),
rule_relevance: Some(100),
rule_identifier: Some("mit.LICENSE".to_string()),
rule_url: None,
matched_text: None,
referenced_filenames: None,
matched_text_diagnostics: None,
}],
detection_log: vec![],
identifier: Some("mit-shared-id".to_string()),
},
crate::models::LicenseDetection {
license_expression: "mit".to_string(),
license_expression_spdx: "MIT".to_string(),
matches: vec![Match {
license_expression: "mit".to_string(),
license_expression_spdx: "MIT".to_string(),
from_file: Some("project/src/lib.rs".to_string()),
start_line: 20,
end_line: 25,
matcher: Some("2-aho".to_string()),
score: 100.0,
matched_length: Some(12),
match_coverage: Some(100.0),
rule_relevance: Some(100),
rule_identifier: Some("mit_3.RULE".to_string()),
rule_url: None,
matched_text: None,
referenced_filenames: None,
matched_text_diagnostics: None,
}],
detection_log: vec![],
identifier: Some("mit-shared-id".to_string()),
},
];
let detections = collect_top_level_license_detections(&[file]);
assert_eq!(detections.len(), 1);
assert_eq!(detections[0].detection_count, 2);
assert_eq!(detections[0].reference_matches.len(), 2);
}
#[test]
fn collect_top_level_license_detections_includes_package_origin_detections() {
let mut manifest = file("project/package.json");
manifest.package_data = vec![PackageData {
package_type: Some(PackageType::Npm),
license_detections: vec![crate::models::LicenseDetection {
license_expression: "mit".to_string(),
license_expression_spdx: "MIT".to_string(),
matches: vec![Match {
license_expression: "mit".to_string(),
license_expression_spdx: "MIT".to_string(),
from_file: None,
start_line: 1,
end_line: 1,
matcher: Some("parser-declared-license".to_string()),
score: 100.0,
matched_length: Some(1),
match_coverage: Some(100.0),
rule_relevance: Some(100),
rule_identifier: None,
rule_url: None,
matched_text: Some("MIT".to_string()),
referenced_filenames: None,
matched_text_diagnostics: None,
}],
detection_log: vec![],
identifier: None,
}],
other_license_detections: vec![crate::models::LicenseDetection {
license_expression: "apache-2.0".to_string(),
license_expression_spdx: "Apache-2.0".to_string(),
matches: vec![Match {
license_expression: "apache-2.0".to_string(),
license_expression_spdx: "Apache-2.0".to_string(),
from_file: None,
start_line: 2,
end_line: 2,
matcher: Some("parser-declared-license".to_string()),
score: 100.0,
matched_length: Some(1),
match_coverage: Some(100.0),
rule_relevance: Some(100),
rule_identifier: None,
rule_url: None,
matched_text: Some("Apache-2.0".to_string()),
referenced_filenames: None,
matched_text_diagnostics: None,
}],
detection_log: vec![],
identifier: None,
}],
..PackageData::default()
}];
manifest.backfill_license_provenance();
let detections = collect_top_level_license_detections(&[manifest]);
assert_eq!(detections.len(), 2);
assert_eq!(detections[0].license_expression, "apache-2.0");
assert_eq!(detections[1].license_expression, "mit");
assert_eq!(
detections[1].reference_matches[0].from_file.as_deref(),
Some("project/package.json")
);
}
#[test]
fn create_output_preserves_top_level_license_references_from_context() {
let start = Utc::now();
let end = start;
let output = create_output(
start,
end,
crate::scanner::ProcessResult {
files: vec![dir("project")],
excluded_count: 0,
},
CreateOutputContext {
total_dirs: 1,
assembly_result: assembly::AssemblyResult {
packages: vec![],
dependencies: vec![],
},
license_detections: vec![],
license_references: vec![crate::models::LicenseReference {
key: Some("mit".to_string()),
language: Some("en".to_string()),
name: "MIT License".to_string(),
short_name: "MIT".to_string(),
owner: Some("Example Owner".to_string()),
homepage_url: Some("https://example.com/license".to_string()),
spdx_license_key: "MIT".to_string(),
other_spdx_license_keys: vec![],
osi_license_key: Some("MIT".to_string()),
text_urls: vec!["https://example.com/license.txt".to_string()],
osi_url: Some("https://opensource.org/licenses/MIT".to_string()),
faq_url: Some("https://example.com/faq".to_string()),
other_urls: vec!["https://example.com/other".to_string()],
category: None,
is_exception: false,
is_unknown: false,
is_generic: false,
notes: None,
minimum_coverage: None,
standard_notice: None,
ignorable_copyrights: vec![],
ignorable_holders: vec![],
ignorable_authors: vec![],
ignorable_urls: vec![],
ignorable_emails: vec![],
scancode_url: None,
licensedb_url: None,
spdx_url: None,
text: "MIT text".to_string(),
}],
license_rule_references: vec![crate::models::LicenseRuleReference {
identifier: "mit_1.RULE".to_string(),
license_expression: "mit".to_string(),
is_license_text: true,
is_license_notice: false,
is_license_reference: false,
is_license_tag: false,
is_license_clue: false,
is_license_intro: false,
language: None,
rule_url: None,
is_required_phrase: false,
skip_for_required_phrase_generation: false,
is_continuous: false,
is_synthetic: false,
is_from_license: false,
length: 0,
relevance: None,
minimum_coverage: None,
referenced_filenames: vec![],
notes: None,
ignorable_copyrights: vec![],
ignorable_holders: vec![],
ignorable_authors: vec![],
ignorable_urls: vec![],
ignorable_emails: vec![],
text: None,
}],
options: CreateOutputOptions {
facet_rules: &[],
include_classify: false,
include_tallies_by_facet: false,
include_summary: false,
include_license_clarity_score: false,
include_tallies: false,
include_tallies_with_details: false,
include_tallies_of_key_files: false,
include_generated: false,
},
},
);
assert_eq!(output.license_references.len(), 1);
assert_eq!(output.license_rule_references.len(), 1);
assert_eq!(output.license_references[0].spdx_license_key, "MIT");
assert_eq!(output.license_rule_references[0].identifier, "mit_1.RULE");
}
#[test]
fn create_output_preserves_top_level_license_detections_from_context() {
let start = Utc::now();
let end = start;
let output = create_output(
start,
end,
crate::scanner::ProcessResult {
files: vec![dir("project")],
excluded_count: 0,
},
CreateOutputContext {
total_dirs: 1,
assembly_result: assembly::AssemblyResult {
packages: vec![],
dependencies: vec![],
},
license_detections: vec![crate::models::TopLevelLicenseDetection {
identifier: "mit-id".to_string(),
license_expression: "mit".to_string(),
license_expression_spdx: "MIT".to_string(),
detection_count: 2,
detection_log: vec![],
reference_matches: vec![Match {
license_expression: "mit".to_string(),
license_expression_spdx: "MIT".to_string(),
from_file: Some("project/LICENSE".to_string()),
start_line: 1,
end_line: 20,
matcher: Some("1-hash".to_string()),
score: 100.0,
matched_length: Some(20),
match_coverage: Some(100.0),
rule_relevance: Some(100),
rule_identifier: Some("mit.LICENSE".to_string()),
rule_url: None,
matched_text: None,
referenced_filenames: None,
matched_text_diagnostics: None,
}],
}],
license_references: vec![],
license_rule_references: vec![],
options: CreateOutputOptions {
facet_rules: &[],
include_classify: false,
include_tallies_by_facet: false,
include_summary: false,
include_license_clarity_score: false,
include_tallies: false,
include_tallies_with_details: false,
include_tallies_of_key_files: false,
include_generated: false,
},
},
);
assert_eq!(output.license_detections.len(), 1);
assert_eq!(output.license_detections[0].identifier, "mit-id");
assert_eq!(output.license_detections[0].detection_count, 2);
}
#[test]
fn create_output_gates_summary_tallies_and_generated_sections() {
let license_rel = "project/LICENSE".to_string();
let mut disabled_license = file(&license_rel);
disabled_license.is_generated = Some(true);
disabled_license.tallies = Some(Tallies::default());
let start = Utc::now();
let end = start;
let output_without_flags = create_output(
start,
end,
crate::scanner::ProcessResult {
files: vec![dir("project"), disabled_license],
excluded_count: 0,
},
CreateOutputContext {
total_dirs: 1,
assembly_result: assembly::AssemblyResult {
packages: vec![],
dependencies: vec![],
},
license_detections: vec![],
license_references: vec![],
license_rule_references: vec![],
options: CreateOutputOptions {
facet_rules: &[],
include_classify: false,
include_tallies_by_facet: false,
include_summary: false,
include_license_clarity_score: false,
include_tallies: false,
include_tallies_with_details: false,
include_tallies_of_key_files: false,
include_generated: false,
},
},
);
assert!(output_without_flags.summary.is_none());
assert!(output_without_flags.tallies.is_none());
assert!(output_without_flags.tallies_of_key_files.is_none());
assert!(
output_without_flags
.files
.iter()
.all(|file| file.is_generated.is_none())
);
let mut enabled_license = file(&license_rel);
enabled_license.is_generated = Some(true);
enabled_license.license_expression = Some("mit".to_string());
enabled_license.license_detections = vec![crate::models::LicenseDetection {
license_expression: "mit".to_string(),
license_expression_spdx: "MIT".to_string(),
matches: vec![Match {
license_expression: "mit".to_string(),
license_expression_spdx: "MIT".to_string(),
from_file: Some(license_rel.clone()),
start_line: 1,
end_line: 1,
matcher: Some("1-hash".to_string()),
score: 100.0,
matched_length: Some(10),
match_coverage: Some(100.0),
rule_relevance: Some(100),
rule_identifier: None,
rule_url: None,
matched_text: None,
referenced_filenames: None,
matched_text_diagnostics: None,
}],
identifier: None,
detection_log: vec![],
}];
let output_with_flags = create_output(
start,
end,
crate::scanner::ProcessResult {
files: vec![dir("project"), enabled_license],
excluded_count: 0,
},
CreateOutputContext {
total_dirs: 1,
assembly_result: assembly::AssemblyResult {
packages: vec![],
dependencies: vec![],
},
license_detections: vec![],
license_references: vec![],
license_rule_references: vec![],
options: CreateOutputOptions {
facet_rules: &[],
include_classify: false,
include_tallies_by_facet: false,
include_summary: true,
include_license_clarity_score: true,
include_tallies: true,
include_tallies_with_details: true,
include_tallies_of_key_files: true,
include_generated: true,
},
},
);
assert!(output_with_flags.summary.is_some());
assert!(output_with_flags.tallies.is_some());
assert!(output_with_flags.tallies_of_key_files.is_some());
assert!(
output_with_flags
.files
.iter()
.find(|file| file.path == license_rel)
.is_some_and(|file| file.is_generated == Some(true) && file.tallies.is_some())
);
}
#[test]
fn create_output_preserves_scanner_generated_flags_without_scan_root() {
let start = Utc::now();
let end = start;
let mut generated = file("project/generated.c");
generated.is_generated = Some(true);
let mut plain = file("project/plain.c");
plain.is_generated = Some(false);
let mut missing = file("project/missing.c");
missing.is_generated = None;
let output = create_output(
start,
end,
crate::scanner::ProcessResult {
files: vec![dir("project"), generated, plain, missing],
excluded_count: 0,
},
CreateOutputContext {
total_dirs: 1,
assembly_result: assembly::AssemblyResult {
packages: vec![],
dependencies: vec![],
},
license_detections: vec![],
license_references: vec![],
license_rule_references: vec![],
options: CreateOutputOptions {
facet_rules: &[],
include_classify: false,
include_tallies_by_facet: false,
include_summary: false,
include_license_clarity_score: false,
include_tallies: false,
include_tallies_with_details: false,
include_tallies_of_key_files: false,
include_generated: true,
},
},
);
let generated_flags: Vec<_> = output
.files
.iter()
.map(|file| (file.path.as_str(), file.is_generated))
.collect();
assert_eq!(
generated_flags,
vec![
("project", Some(false)),
("project/generated.c", Some(true)),
("project/plain.c", Some(false)),
("project/missing.c", Some(false)),
]
);
}
#[test]
fn create_output_score_only_keeps_clarity_without_full_summary_fields() {
let start = Utc::now();
let end = start;
let mut license = file("project/LICENSE");
license.license_expression = Some("mit".to_string());
license.license_detections = vec![crate::models::LicenseDetection {
license_expression: "mit".to_string(),
license_expression_spdx: "MIT".to_string(),
matches: vec![Match {
license_expression: "mit".to_string(),
license_expression_spdx: "MIT".to_string(),
from_file: Some("project/LICENSE".to_string()),
start_line: 1,
end_line: 1,
matcher: Some("1-hash".to_string()),
score: 100.0,
matched_length: Some(10),
match_coverage: Some(100.0),
rule_relevance: Some(100),
rule_identifier: None,
rule_url: None,
matched_text: None,
referenced_filenames: None,
matched_text_diagnostics: None,
}],
identifier: None,
detection_log: vec![],
}];
let output = create_output(
start,
end,
crate::scanner::ProcessResult {
files: vec![dir("project"), license],
excluded_count: 0,
},
CreateOutputContext {
total_dirs: 1,
assembly_result: assembly::AssemblyResult {
packages: vec![],
dependencies: vec![],
},
license_detections: vec![],
license_references: vec![],
license_rule_references: vec![],
options: CreateOutputOptions {
facet_rules: &[],
include_classify: false,
include_tallies_by_facet: false,
include_summary: false,
include_license_clarity_score: true,
include_tallies: false,
include_tallies_with_details: false,
include_tallies_of_key_files: false,
include_generated: false,
},
},
);
let summary = output.summary.expect("score-only summary exists");
assert_eq!(summary.declared_license_expression.as_deref(), Some("mit"));
assert!(summary.license_clarity_score.is_some());
assert!(summary.declared_holder.is_none());
assert!(summary.primary_language.is_none());
assert!(summary.other_license_expressions.is_empty());
assert!(summary.other_holders.is_empty());
assert!(summary.other_languages.is_empty());
}
#[test]
fn create_output_preserves_file_level_license_clues_in_json_shape() {
let start = Utc::now();
let end = start;
let mut clue_file = file("project/NOTICE");
clue_file.license_clues = vec![Match {
license_expression: "unknown-license-reference".to_string(),
license_expression_spdx: "LicenseRef-scancode-unknown-license-reference".to_string(),
from_file: Some("project/NOTICE".to_string()),
start_line: 1,
end_line: 2,
matcher: Some("2-aho".to_string()),
score: 100.0,
matched_length: Some(19),
match_coverage: Some(100.0),
rule_relevance: Some(100),
rule_identifier: Some("license-clue_1.RULE".to_string()),
rule_url: Some("https://example.com/license-clue_1.RULE".to_string()),
matched_text: Some(
"This product currently only contains code developed by authors".to_string(),
),
referenced_filenames: None,
matched_text_diagnostics: None,
}];
let output = create_output(
start,
end,
crate::scanner::ProcessResult {
files: vec![dir("project"), clue_file],
excluded_count: 0,
},
CreateOutputContext {
total_dirs: 1,
assembly_result: assembly::AssemblyResult {
packages: vec![],
dependencies: vec![],
},
license_detections: vec![],
license_references: vec![],
license_rule_references: vec![],
options: CreateOutputOptions {
facet_rules: &[],
include_classify: false,
include_tallies_by_facet: false,
include_summary: false,
include_license_clarity_score: false,
include_tallies: false,
include_tallies_with_details: false,
include_tallies_of_key_files: false,
include_generated: false,
},
},
);
let value = serde_json::to_value(&output).expect("output should serialize");
let notice = value["files"]
.as_array()
.expect("files array")
.iter()
.find(|entry| entry["path"] == json!("project/NOTICE"))
.expect("notice file present");
assert_eq!(notice["license_detections"], json!([]));
assert_eq!(
notice["detected_license_expression_spdx"],
serde_json::Value::Null
);
assert_eq!(
notice["license_clues"][0]["license_expression"],
"unknown-license-reference"
);
assert_eq!(notice["license_clues"][0]["matcher"], "2-aho");
}
#[test]
fn create_output_preserves_empty_package_data_license_and_dependency_arrays() {
let start = Utc::now();
let end = start;
let mut manifest = file("project/package.json");
manifest.package_data = vec![PackageData {
package_type: Some(PackageType::Npm),
name: Some("demo".to_string()),
version: Some("1.0.0".to_string()),
..PackageData::default()
}];
let output = create_output(
start,
end,
crate::scanner::ProcessResult {
files: vec![dir("project"), manifest],
excluded_count: 0,
},
CreateOutputContext {
total_dirs: 1,
assembly_result: assembly::AssemblyResult {
packages: vec![],
dependencies: vec![],
},
license_detections: vec![],
license_references: vec![],
license_rule_references: vec![],
options: CreateOutputOptions {
facet_rules: &[],
include_classify: false,
include_tallies_by_facet: false,
include_summary: false,
include_license_clarity_score: false,
include_tallies: false,
include_tallies_with_details: false,
include_tallies_of_key_files: false,
include_generated: false,
},
},
);
let value = serde_json::to_value(&output).expect("output should serialize");
let package_data = value["files"]
.as_array()
.expect("files array")
.iter()
.find(|entry| entry["path"] == json!("project/package.json"))
.and_then(|entry| entry["package_data"].as_array())
.and_then(|package_data| package_data.first())
.expect("package data entry present");
assert_eq!(package_data["license_detections"], json!([]));
assert_eq!(package_data["dependencies"], json!([]));
}
#[test]
fn create_output_tallies_by_facet_does_not_leak_resource_tallies() {
let start = Utc::now();
let end = start;
let mut source = file("project/src/lib.rs");
source.programming_language = Some("Rust".to_string());
let facet_defs = ["dev=*.rs".to_string()];
let facet_rules = build_facet_rules(&facet_defs).expect("facet rules compile");
let output = create_output(
start,
end,
crate::scanner::ProcessResult {
files: vec![dir("project"), dir("project/src"), source],
excluded_count: 0,
},
CreateOutputContext {
total_dirs: 2,
assembly_result: assembly::AssemblyResult {
packages: vec![],
dependencies: vec![],
},
license_detections: vec![],
license_references: vec![],
license_rule_references: vec![],
options: CreateOutputOptions {
facet_rules: &facet_rules,
include_classify: false,
include_tallies_by_facet: true,
include_summary: false,
include_license_clarity_score: false,
include_tallies: false,
include_tallies_with_details: false,
include_tallies_of_key_files: false,
include_generated: false,
},
},
);
assert!(output.tallies_by_facet.is_some());
assert!(output.files.iter().all(|file| file.tallies.is_none()));
}
#[test]
fn create_output_promotes_package_metadata_without_summary_flags() {
let start = Utc::now();
let end = start;
let package_uid = "pkg:npm/demo?uuid=test".to_string();
let mut license = file("project/LICENSE");
license.for_packages = vec![package_uid.clone()];
license.copyrights = vec![Copyright {
copyright: "Copyright Example Corp.".to_string(),
start_line: 1,
end_line: 1,
}];
license.holders = vec![Holder {
holder: "Example Corp.".to_string(),
start_line: 1,
end_line: 1,
}];
let package = Package {
package_uid,
datafile_paths: vec!["project/package.json".to_string()],
..super::test_utils::package("pkg:npm/demo?uuid=test", "project/package.json")
};
let output = create_output(
start,
end,
crate::scanner::ProcessResult {
files: vec![dir("project"), license],
excluded_count: 0,
},
CreateOutputContext {
total_dirs: 1,
assembly_result: assembly::AssemblyResult {
packages: vec![package],
dependencies: vec![],
},
license_detections: vec![],
license_references: vec![],
license_rule_references: vec![],
options: CreateOutputOptions {
facet_rules: &[],
include_classify: false,
include_tallies_by_facet: false,
include_summary: false,
include_license_clarity_score: false,
include_tallies: false,
include_tallies_with_details: false,
include_tallies_of_key_files: false,
include_generated: false,
},
},
);
assert_eq!(output.packages[0].holder.as_deref(), Some("Example Corp."));
assert_eq!(
output.packages[0].copyright.as_deref(),
Some("Copyright Example Corp.")
);
}
#[test]
fn create_output_summary_still_resolves_after_strip_root_normalization() {
let start = Utc::now();
let end = start;
let temp = tempfile::tempdir().expect("tempdir should be created");
let root = temp.path().join("project");
let manifest_path = root.join("demo.gemspec");
std::fs::create_dir_all(&root).expect("root should exist");
let mut manifest = file(manifest_path.to_str().unwrap());
manifest.package_data = vec![crate::models::PackageData {
package_type: Some(crate::models::PackageType::Gem),
datasource_id: Some(crate::models::DatasourceId::Gemspec),
declared_license_expression: Some("mit".to_string()),
declared_license_expression_spdx: Some("MIT".to_string()),
purl: Some("pkg:gem/demo@1.0.0".to_string()),
..Default::default()
}];
manifest.license_expression = Some("mit".to_string());
manifest.license_detections = vec![crate::models::LicenseDetection {
license_expression: "mit".to_string(),
license_expression_spdx: "MIT".to_string(),
matches: vec![Match {
license_expression: "mit".to_string(),
license_expression_spdx: "MIT".to_string(),
from_file: Some("project/demo.gemspec".to_string()),
start_line: 1,
end_line: 1,
matcher: Some("1-spdx-id".to_string()),
score: 100.0,
matched_length: Some(1),
match_coverage: Some(100.0),
rule_relevance: Some(100),
rule_identifier: None,
rule_url: None,
matched_text: None,
referenced_filenames: None,
matched_text_diagnostics: None,
}],
identifier: None,
detection_log: vec![],
}];
let mut files = vec![dir(root.to_str().unwrap()), manifest];
normalize_paths(&mut files, root.to_str().unwrap(), true, false);
let assembly_result = assembly::assemble(&mut files);
let output = create_output(
start,
end,
crate::scanner::ProcessResult {
files,
excluded_count: 0,
},
CreateOutputContext {
total_dirs: 1,
assembly_result,
license_detections: vec![],
license_references: vec![],
license_rule_references: vec![],
options: CreateOutputOptions {
facet_rules: &[],
include_classify: false,
include_tallies_by_facet: false,
include_summary: true,
include_license_clarity_score: false,
include_tallies: false,
include_tallies_with_details: false,
include_tallies_of_key_files: false,
include_generated: false,
},
},
);
assert_eq!(
output
.summary
.and_then(|summary| summary.declared_license_expression),
Some("mit".to_string())
);
}
#[test]
fn create_output_classify_only_sets_key_file_flags() {
let start = Utc::now();
let end = start;
let output = create_output(
start,
end,
crate::scanner::ProcessResult {
files: vec![dir("project"), file("project/README.md")],
excluded_count: 0,
},
CreateOutputContext {
total_dirs: 1,
assembly_result: assembly::AssemblyResult {
packages: vec![],
dependencies: vec![],
},
license_detections: vec![],
license_references: vec![],
license_rule_references: vec![],
options: CreateOutputOptions {
facet_rules: &[],
include_classify: true,
include_tallies_by_facet: false,
include_summary: false,
include_license_clarity_score: false,
include_tallies: false,
include_tallies_with_details: false,
include_tallies_of_key_files: false,
include_generated: false,
},
},
);
let readme = output
.files
.iter()
.find(|file| file.path == "project/README.md")
.expect("README should exist");
assert!(readme.is_readme);
assert!(readme.is_top_level);
assert!(readme.is_key_file);
}