use std::collections::{BTreeMap, BTreeSet, VecDeque};
use std::process::Command;
use serde_json::Value;
const ALLOWED: &[&str] = &[
"MIT",
"Apache-2.0",
"BSD-2-Clause",
"BSD-3-Clause",
"0BSD",
"Unlicense",
"MPL-2.0",
"Zlib",
"Unicode-3.0",
];
const SHIP_TARGETS: &[&str] = &["x86_64-unknown-linux-gnu", "aarch64-apple-darwin"];
const DOCUMENTED_NONTRIVIAL: &[(&str, &str)] = &[
("unicode-ident", "(MIT OR Apache-2.0) AND Unicode-3.0"),
("encoding_rs", "(Apache-2.0 OR MIT) AND BSD-3-Clause"),
("zlib-rs", "Zlib"),
];
fn metadata_for_target(target: &str) -> Value {
let cargo = std::env::var("CARGO").unwrap_or_else(|_| "cargo".to_string());
let manifest_dir = env!("CARGO_MANIFEST_DIR");
let workspace_manifest = format!("{manifest_dir}/../../Cargo.toml");
let out = Command::new(&cargo)
.args([
"metadata",
"--format-version",
"1",
"--filter-platform",
target,
"--manifest-path",
&workspace_manifest,
])
.env_remove("RUSTFLAGS")
.output()
.unwrap_or_else(|e| panic!("failed to run `{cargo} metadata` for {target}: {e}"));
assert!(
out.status.success(),
"`cargo metadata --filter-platform {target}` failed:\n{}",
String::from_utf8_lossy(&out.stderr),
);
serde_json::from_slice(&out.stdout)
.unwrap_or_else(|e| panic!("`cargo metadata` for {target} was not valid JSON: {e}"))
}
fn shipped_closure(meta: &Value) -> BTreeMap<String, (String, String)> {
let packages = meta["packages"].as_array().expect("metadata.packages");
let mut by_id: BTreeMap<&str, (String, String)> = BTreeMap::new();
let mut roots: Vec<String> = Vec::new();
for p in packages {
let id = p["id"].as_str().expect("package.id");
let name = p["name"].as_str().expect("package.name").to_string();
let license = p["license"].as_str().unwrap_or("").to_string();
if name == "dbmd-cli" || name == "dbmd-core" {
roots.push(id.to_string());
}
by_id.insert(id, (name, license));
}
assert_eq!(
roots.len(),
2,
"expected to find both workspace crates (dbmd-cli, dbmd-core) in metadata",
);
let nodes = meta["resolve"]["nodes"]
.as_array()
.expect("metadata.resolve.nodes");
let mut normal_deps: BTreeMap<&str, Vec<&str>> = BTreeMap::new();
for n in nodes {
let id = n["id"].as_str().expect("node.id");
let mut deps: Vec<&str> = Vec::new();
for d in n["deps"].as_array().expect("node.deps") {
let is_normal = d["dep_kinds"]
.as_array()
.expect("dep.dep_kinds")
.iter()
.any(|k| k["kind"].is_null());
if is_normal {
deps.push(d["pkg"].as_str().expect("dep.pkg"));
}
}
normal_deps.insert(id, deps);
}
let mut seen: BTreeSet<String> = BTreeSet::new();
let mut queue: VecDeque<String> = roots.into_iter().collect();
let mut shipped: BTreeMap<String, (String, String)> = BTreeMap::new();
while let Some(id) = queue.pop_front() {
if !seen.insert(id.clone()) {
continue;
}
if let Some(info) = by_id.get(id.as_str()) {
shipped.insert(id.clone(), info.clone());
}
if let Some(deps) = normal_deps.get(id.as_str()) {
for dep in deps {
if !seen.contains(*dep) {
queue.push_back((*dep).to_string());
}
}
}
}
shipped
}
fn token_allowed(token: &str) -> bool {
let base = token.split(" WITH ").next().unwrap_or(token).trim();
ALLOWED.iter().any(|a| a.eq_ignore_ascii_case(base))
}
fn expr_satisfiable(expr: &str) -> bool {
let tokens = tokenize(expr);
let mut pos = 0;
let val = parse_or(&tokens, &mut pos);
debug_assert_eq!(pos, tokens.len(), "unconsumed tokens in `{expr}`");
val
}
#[derive(Debug, Clone, PartialEq)]
enum Tok {
And,
Or,
LParen,
RParen,
Ident(String),
}
fn tokenize(expr: &str) -> Vec<Tok> {
let normalized = expr
.replace('/', " OR ")
.replace('(', " ( ")
.replace(')', " ) ");
let mut out = Vec::new();
let mut words = normalized.split_whitespace().peekable();
while let Some(w) = words.next() {
match w {
"AND" => out.push(Tok::And),
"OR" => out.push(Tok::Or),
"(" => out.push(Tok::LParen),
")" => out.push(Tok::RParen),
"WITH" => {
if let Some(exc) = words.next() {
if let Some(Tok::Ident(prev)) = out.last_mut() {
prev.push_str(" WITH ");
prev.push_str(exc);
} else {
out.push(Tok::Ident(format!("WITH {exc}")));
}
}
}
other => out.push(Tok::Ident(other.to_string())),
}
}
out
}
fn parse_or(tokens: &[Tok], pos: &mut usize) -> bool {
let mut acc = parse_and(tokens, pos);
while matches!(tokens.get(*pos), Some(Tok::Or)) {
*pos += 1;
let rhs = parse_and(tokens, pos);
acc = acc || rhs;
}
acc
}
fn parse_and(tokens: &[Tok], pos: &mut usize) -> bool {
let mut acc = parse_atom(tokens, pos);
while matches!(tokens.get(*pos), Some(Tok::And)) {
*pos += 1;
let rhs = parse_atom(tokens, pos);
acc = acc && rhs;
}
acc
}
fn parse_atom(tokens: &[Tok], pos: &mut usize) -> bool {
match tokens.get(*pos) {
Some(Tok::LParen) => {
*pos += 1;
let v = parse_or(tokens, pos);
assert!(
matches!(tokens.get(*pos), Some(Tok::RParen)),
"unbalanced parentheses in SPDX expression",
);
*pos += 1;
v
}
Some(Tok::Ident(id)) => {
*pos += 1;
token_allowed(id)
}
other => panic!("unexpected token while parsing SPDX expression: {other:?}"),
}
}
#[test]
fn shipped_dependencies_are_permissive_only() {
let mut shipped: BTreeMap<String, (String, String)> = BTreeMap::new();
for target in SHIP_TARGETS {
let meta = metadata_for_target(target);
for (id, info) in shipped_closure(&meta) {
shipped.entry(id).or_insert(info);
}
}
assert!(
shipped.len() > 50,
"shipped closure looks implausibly small ({} crates) — metadata walk is likely broken",
shipped.len(),
);
let mut violations: Vec<String> = Vec::new();
let mut missing_license: Vec<String> = Vec::new();
for (name, license) in shipped.values() {
if license.is_empty() {
missing_license.push(name.clone());
continue;
}
if !expr_satisfiable(license) {
violations.push(format!(" {name}: \"{license}\""));
}
}
assert!(
missing_license.is_empty(),
"shipped crate(s) declare no SPDX license (cannot be verified permissive): {missing_license:?}",
);
assert!(
violations.is_empty(),
"shipped dependency license(s) are NOT satisfiable from the permissive allowlist \
{ALLOWED:?}.\nEach line is a crate compiled into the released `dbmd` binary whose \
SPDX expression contains a term off the allowlist (e.g. a copyleft `AND`, or a new \
permissive license not yet recorded). Remove the crate, or — if it is genuinely \
permissive — add the identifier to BOTH `ALLOWED` here and THIRD_PARTY_NOTICES.\n{}",
violations.join("\n"),
);
}
#[test]
fn documented_nontrivial_licenses_match_reality() {
let mut shipped: BTreeMap<String, String> = BTreeMap::new();
for target in SHIP_TARGETS {
let meta = metadata_for_target(target);
for (_id, (name, license)) in shipped_closure(&meta) {
shipped.entry(name).or_insert(license);
}
}
for (crate_name, expected_license) in DOCUMENTED_NONTRIVIAL {
match shipped.get(*crate_name) {
None => panic!(
"THIRD_PARTY_NOTICES calls out `{crate_name}` ({expected_license}) as a shipped \
crate with a non-MIT/Apache obligation, but it is no longer in the shipped \
closure. Update the NOTICES call-out (and this list) to match the current tree.",
),
Some(actual) => assert_eq!(
actual, expected_license,
"license string for shipped crate `{crate_name}` changed: NOTICES documents \
\"{expected_license}\" but cargo metadata reports \"{actual}\". Re-check the new \
license against the allowlist and update THIRD_PARTY_NOTICES.",
),
}
}
}
fn deny_toml_allow_list() -> BTreeSet<String> {
let manifest_dir = env!("CARGO_MANIFEST_DIR");
let deny_path = format!("{manifest_dir}/../../deny.toml");
let text = std::fs::read_to_string(&deny_path)
.unwrap_or_else(|e| panic!("read deny.toml at {deny_path}: {e}"));
let mut in_licenses = false;
let mut in_allow = false;
let mut out: BTreeSet<String> = BTreeSet::new();
for raw in text.lines() {
let line = raw.trim();
if line.starts_with('[') {
in_licenses = line == "[licenses]";
in_allow = false;
continue;
}
if !in_licenses {
continue;
}
if !in_allow {
if line.starts_with("allow") && line.contains('[') {
in_allow = true;
collect_quoted(line, &mut out); if line.contains(']') {
break;
}
}
continue;
}
let closing = line.contains(']');
collect_quoted(line, &mut out);
if closing {
break;
}
}
assert!(
!out.is_empty(),
"could not parse a non-empty [licenses].allow array from deny.toml \
(parser or file shape changed)",
);
out
}
fn collect_quoted(line: &str, out: &mut BTreeSet<String>) {
let code = line.split('#').next().unwrap_or("");
let bytes = code.as_bytes();
let mut i = 0;
while i < bytes.len() {
if bytes[i] == b'"' {
let start = i + 1;
let mut j = start;
while j < bytes.len() && bytes[j] != b'"' {
j += 1;
}
out.insert(code[start..j].to_string());
i = j + 1;
} else {
i += 1;
}
}
}
#[test]
fn allow_list_matches_deny_toml() {
let from_test: BTreeSet<String> = ALLOWED.iter().map(|s| s.to_string()).collect();
let from_toml = deny_toml_allow_list();
assert_eq!(
from_test,
from_toml,
"the permissive allow-list has drifted between this test's ALLOWED and \
deny.toml's [licenses].allow. Keep them identical.\n only in test: {:?}\n \
only in deny.toml: {:?}",
from_test.difference(&from_toml).collect::<Vec<_>>(),
from_toml.difference(&from_test).collect::<Vec<_>>(),
);
}
#[test]
fn spdx_and_or_semantics() {
assert!(expr_satisfiable("MIT"));
assert!(expr_satisfiable("Apache-2.0"));
assert!(expr_satisfiable("Zlib"));
assert!(expr_satisfiable("Unicode-3.0"));
assert!(!expr_satisfiable("GPL-3.0"));
assert!(!expr_satisfiable("AGPL-3.0-only"));
assert!(!expr_satisfiable("LGPL-2.1-only"));
assert!(expr_satisfiable("MIT OR Apache-2.0"));
assert!(expr_satisfiable("Apache-2.0 OR GPL-3.0")); assert!(expr_satisfiable("Unlicense OR MIT"));
assert!(!expr_satisfiable("GPL-3.0 OR AGPL-3.0"));
assert!(!expr_satisfiable("MIT AND GPL-3.0"));
assert!(!expr_satisfiable("(MIT OR Apache-2.0) AND GPL-3.0"));
assert!(expr_satisfiable("(MIT OR Apache-2.0) AND Unicode-3.0")); assert!(expr_satisfiable("(Apache-2.0 OR MIT) AND BSD-3-Clause"));
assert!(expr_satisfiable("MIT/Apache-2.0"));
assert!(expr_satisfiable("Apache-2.0/MIT"));
assert!(expr_satisfiable("Apache-2.0 WITH LLVM-exception"));
assert!(!expr_satisfiable("GPL-3.0 WITH Classpath-exception-2.0"));
}