use std::time::Duration;
use anyhow::{Context, Result};
use reqwest::header::{HeaderMap, HeaderValue, ACCEPT, IF_NONE_MATCH, USER_AGENT};
use reqwest::{Client as HttpClient, StatusCode};
use serde::{Deserialize, Deserializer, Serialize};
use thiserror::Error;
use crate::storage::Cache;
pub const DEPS_DEV_API_BASE: &str = "https://api.deps.dev";
const TTL_DEPS_DEV: Duration = Duration::from_secs(24 * 3600);
#[derive(Debug, Error)]
pub enum DepsDevError {
#[error("deps.dev returned 404 not found")]
NotFound,
#[error("deps.dev returned {status}: {body}")]
Other { status: u16, body: String },
}
#[derive(Debug, Clone)]
pub struct Client {
http: HttpClient,
base_url: String,
cache: Cache,
}
impl Client {
#[must_use]
pub fn new(http: HttpClient, cache: Cache) -> Self {
Self {
http,
base_url: DEPS_DEV_API_BASE.to_string(),
cache,
}
}
#[must_use]
pub fn with_base_url(mut self, url: impl Into<String>) -> Self {
self.base_url = url.into();
self
}
pub async fn project_packages(&self, owner: &str, repo: &str) -> Result<Vec<PackageRef>> {
let key = format!("deps_dev:projects:{owner}/{repo}:packageversions");
let project_key = encode_project_key(owner, repo);
let path = format!("/v3alpha/projects/{project_key}:packageversions");
let body = match self.fetch_json(&key, &path, TTL_DEPS_DEV).await {
Ok(b) => b,
Err(e) => {
if let Some(DepsDevError::NotFound) = e.downcast_ref::<DepsDevError>() {
return Ok(Vec::new());
}
return Err(e);
},
};
let parsed: ProjectVersionsResponse = serde_json::from_slice(&body)
.context("parse deps.dev project :packageversions response")?;
Ok(first_party_packages_from_versions(
&parsed.versions,
owner,
repo,
))
}
pub async fn package(&self, system: &str, name: &str) -> Result<PackageInfo> {
let key = format!("deps_dev:systems:{system}:{name}");
let encoded_name = encode_path_segment(name);
let path = format!("/v3/systems/{system}/packages/{encoded_name}");
let body = self.fetch_json(&key, &path, TTL_DEPS_DEV).await?;
let parsed: PackageInfo =
serde_json::from_slice(&body).context("parse deps.dev PackageInfo")?;
Ok(parsed)
}
async fn fetch_json(&self, cache_key: &str, path: &str, ttl: Duration) -> Result<Vec<u8>> {
let cached = self.cache.get(cache_key)?;
if let Some(entry) = &cached {
if !entry.is_stale() {
return Ok(entry.body.clone());
}
}
let cached_etag = cached.as_ref().and_then(|e| e.etag.clone());
let cached_body = cached.as_ref().map(|e| e.body.clone());
let url = format!("{}{}", self.base_url, path);
let mut headers = HeaderMap::new();
headers.insert(USER_AGENT, HeaderValue::from_static("repo-trust"));
headers.insert(ACCEPT, HeaderValue::from_static("application/json"));
if let Some(e) = &cached_etag {
headers.insert(IF_NONE_MATCH, HeaderValue::from_str(e)?);
}
let resp = self
.http
.get(&url)
.headers(headers)
.send()
.await
.with_context(|| format!("GET {url}"))?;
match resp.status() {
StatusCode::NOT_MODIFIED => {
let body = cached_body
.ok_or_else(|| anyhow::anyhow!("304 received without cached body"))?;
self.cache
.put(cache_key, cached_etag.as_deref(), &body, ttl)?;
Ok(body)
},
StatusCode::OK => {
let new_etag = resp
.headers()
.get("etag")
.and_then(|h| h.to_str().ok())
.map(str::to_string);
let body = resp.bytes().await?;
self.cache.put(cache_key, new_etag.as_deref(), &body, ttl)?;
Ok(body.to_vec())
},
StatusCode::NOT_FOUND => Err(DepsDevError::NotFound.into()),
s => {
let body = resp.text().await.unwrap_or_default();
Err(DepsDevError::Other {
status: s.as_u16(),
body,
}
.into())
},
}
}
}
fn encode_path_segment(s: &str) -> String {
s.replace('/', "%2F").replace(':', "%3A")
}
fn encode_project_key(owner: &str, repo: &str) -> String {
format!(
"github.com%2F{}%2F{}",
encode_path_segment(owner),
encode_path_segment(repo)
)
}
const FIRST_PARTY_RELATIONS: &[&str] = &["GO_ORIGIN"];
const MIN_FIRST_PARTY_VERSIONS: usize = 2;
#[derive(Debug, Deserialize)]
struct VersionEntry {
#[serde(rename = "versionKey")]
version_key: VersionKeyWire,
#[serde(default, rename = "relationProvenance")]
relation_provenance: Option<String>,
#[serde(default, rename = "relationType")]
relation_type: Option<String>,
}
#[derive(Debug, Deserialize)]
struct VersionKeyWire {
system: String,
name: String,
#[serde(default)]
#[allow(dead_code)] version: Option<String>,
}
#[derive(Debug, Deserialize)]
struct ProjectVersionsResponse {
#[serde(default)]
versions: Vec<VersionEntry>,
}
fn first_party_packages_from_versions(
versions: &[VersionEntry],
owner: &str,
repo: &str,
) -> Vec<PackageRef> {
use std::collections::BTreeMap;
let mut buckets: BTreeMap<(String, String), Vec<&VersionEntry>> = BTreeMap::new();
for v in versions {
buckets
.entry((v.version_key.system.clone(), v.version_key.name.clone()))
.or_default()
.push(v);
}
let mut out: Vec<PackageRef> = Vec::new();
for ((system, name), entries) in buckets {
if entries.len() < MIN_FIRST_PARTY_VERSIONS {
continue;
}
let any_first_party = entries.iter().any(|v| {
let rel = v
.relation_provenance
.as_deref()
.or(v.relation_type.as_deref())
.unwrap_or("");
FIRST_PARTY_RELATIONS.contains(&rel) || name_matches_repo(&name, owner, repo)
});
if any_first_party {
out.push(PackageRef { system, name });
}
}
out
}
fn name_matches_repo(pkg_name: &str, owner: &str, repo: &str) -> bool {
let n = pkg_name.to_ascii_lowercase();
let o = owner.to_ascii_lowercase();
let r = repo.to_ascii_lowercase();
if n == r {
return true;
}
let path_suffix = format!("/{o}/{r}");
let scope_form = format!("@{o}/{r}");
n.ends_with(&path_suffix) || n == scope_form
}
#[derive(Debug, Clone, Serialize, PartialEq, Eq, PartialOrd, Ord)]
pub struct PackageRef {
pub system: String,
pub name: String,
}
impl<'de> Deserialize<'de> for PackageRef {
fn deserialize<D>(de: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
#[derive(Deserialize)]
struct Inner {
system: String,
name: String,
}
#[derive(Deserialize)]
#[serde(untagged)]
enum Wire {
Flat(Inner),
PackageKey {
#[serde(rename = "packageKey")]
package_key: Inner,
},
VersionKey {
#[serde(rename = "versionKey")]
version_key: Inner,
},
}
let inner = match Wire::deserialize(de)? {
Wire::Flat(i) => i,
Wire::PackageKey { package_key: i } => i,
Wire::VersionKey { version_key: i } => i,
};
Ok(Self {
system: inner.system,
name: inner.name,
})
}
}
#[derive(Debug, Clone, Serialize)]
pub struct PackageInfo {
pub system: String,
pub name: String,
pub weekly_downloads: Option<u64>,
pub latest_version: Option<String>,
}
impl<'de> Deserialize<'de> for PackageInfo {
fn deserialize<D>(de: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
#[derive(Deserialize)]
struct VersionKey {
version: String,
}
#[derive(Deserialize)]
struct VersionEntry {
#[serde(rename = "versionKey")]
version_key: VersionKey,
#[serde(default, rename = "isDefault")]
is_default: bool,
}
#[derive(Deserialize)]
struct PackageKey {
system: String,
name: String,
}
#[derive(Deserialize)]
struct NestedShape {
#[serde(rename = "packageKey")]
package_key: PackageKey,
#[serde(default)]
versions: Vec<VersionEntry>,
}
#[derive(Deserialize)]
struct FlatShape {
system: String,
name: String,
#[serde(
default,
rename = "weeklyDownloads",
deserialize_with = "deserialize_string_to_u64_option"
)]
weekly_downloads: Option<u64>,
#[serde(default, rename = "latestVersion")]
latest_version: Option<String>,
}
#[derive(Deserialize)]
#[serde(untagged)]
enum Wire {
Nested(NestedShape),
Flat(FlatShape),
}
Ok(match Wire::deserialize(de)? {
Wire::Nested(n) => {
let latest_version = n
.versions
.iter()
.find(|v| v.is_default)
.or(n.versions.last())
.map(|v| v.version_key.version.clone());
PackageInfo {
system: n.package_key.system,
name: n.package_key.name,
weekly_downloads: None,
latest_version,
}
},
Wire::Flat(f) => PackageInfo {
system: f.system,
name: f.name,
weekly_downloads: f.weekly_downloads,
latest_version: f.latest_version,
},
})
}
}
#[cfg(test)]
#[derive(Debug, Default, Deserialize)]
struct ProjectPackagesResponse {
#[serde(default, alias = "versions")]
packages: Vec<PackageRef>,
}
fn deserialize_string_to_u64_option<'de, D>(de: D) -> Result<Option<u64>, D::Error>
where
D: Deserializer<'de>,
{
let value = Option::<serde_json::Value>::deserialize(de)?;
match value {
None | Some(serde_json::Value::Null) => Ok(None),
Some(serde_json::Value::String(s)) => Ok(s.parse::<u64>().ok()),
Some(serde_json::Value::Number(n)) => Ok(n.as_u64()),
Some(_other) => Ok(None),
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn deserialize_weekly_downloads_from_string() {
let json = r#"{"system":"NPM","name":"x","weeklyDownloads":"42"}"#;
let info: PackageInfo = serde_json::from_str(json).unwrap();
assert_eq!(info.weekly_downloads, Some(42));
}
#[test]
fn deserialize_weekly_downloads_from_number() {
let json = r#"{"system":"NPM","name":"x","weeklyDownloads":42}"#;
let info: PackageInfo = serde_json::from_str(json).unwrap();
assert_eq!(info.weekly_downloads, Some(42));
}
#[test]
fn deserialize_weekly_downloads_missing() {
let json = r#"{"system":"NPM","name":"x"}"#;
let info: PackageInfo = serde_json::from_str(json).unwrap();
assert_eq!(info.weekly_downloads, None);
}
#[test]
fn deserialize_weekly_downloads_null() {
let json = r#"{"system":"NPM","name":"x","weeklyDownloads":null}"#;
let info: PackageInfo = serde_json::from_str(json).unwrap();
assert_eq!(info.weekly_downloads, None);
}
#[test]
fn deserialize_weekly_downloads_unparseable_string() {
let json = r#"{"system":"NPM","name":"x","weeklyDownloads":"not a number"}"#;
let info: PackageInfo = serde_json::from_str(json).unwrap();
assert_eq!(info.weekly_downloads, None);
}
type FixtureRow = (
&'static str,
&'static str,
&'static str,
&'static [(&'static str, &'static str)],
);
const FIXTURES: &[FixtureRow] = &[
("tokio-rs_tokio", "tokio-rs", "tokio", &[("CARGO", "tokio")]),
("django_django", "django", "django", &[("PYPI", "django")]),
(
"kubernetes_kubernetes",
"kubernetes",
"kubernetes",
&[("GO", "github.com/kubernetes/kubernetes")],
),
];
#[test]
fn project_packages_response_parses_real_fixtures() {
for (stem, _owner, _repo, must_contain) in FIXTURES {
let path = format!(
"{}/tests/fixtures/deps_dev/{stem}.json",
env!("CARGO_MANIFEST_DIR")
);
let body =
std::fs::read(&path).unwrap_or_else(|e| panic!("missing fixture {path}: {e}"));
let parsed: ProjectPackagesResponse = serde_json::from_slice(&body)
.unwrap_or_else(|e| panic!("failed to parse {path}: {e}"));
assert!(
!parsed.packages.is_empty(),
"fixture {stem} must yield at least one PackageRef; got 0",
);
for (sys, name) in *must_contain {
assert!(
parsed.packages.iter().any(|p| {
p.system.eq_ignore_ascii_case(sys)
&& p.name.eq_ignore_ascii_case(name)
}),
"fixture {stem} must contain ({sys}, {name}); got {} unique pkgs starting with {:?}",
parsed.packages.len(),
parsed.packages.iter().take(5).collect::<Vec<_>>(),
);
}
}
}
#[test]
fn first_party_filter_yields_expected_packages_on_real_fixtures() {
for (stem, owner, repo, must_contain) in FIXTURES {
let path = format!(
"{}/tests/fixtures/deps_dev/{stem}.json",
env!("CARGO_MANIFEST_DIR")
);
let body = std::fs::read(&path).unwrap();
let parsed: ProjectVersionsResponse = serde_json::from_slice(&body)
.unwrap_or_else(|e| panic!("failed to parse {path}: {e}"));
let pkgs = first_party_packages_from_versions(&parsed.versions, owner, repo);
assert!(
!pkgs.is_empty(),
"first-party filter zeroed out fixture {stem}; got: {pkgs:?}",
);
for (sys, name) in *must_contain {
assert!(
pkgs.iter().any(|p| {
p.system.eq_ignore_ascii_case(sys) && p.name.eq_ignore_ascii_case(name)
}),
"first-party filter dropped expected ({sys}, {name}) for {stem}; got: {pkgs:?}",
);
}
}
}
#[test]
fn first_party_filter_zeros_octocat_hello_world() {
let path = format!(
"{}/tests/fixtures/deps_dev/octocat_Hello-World.json",
env!("CARGO_MANIFEST_DIR")
);
let body = std::fs::read(&path).unwrap();
let parsed: ProjectVersionsResponse = serde_json::from_slice(&body).unwrap();
let pkgs = first_party_packages_from_versions(&parsed.versions, "octocat", "Hello-World");
assert!(
pkgs.is_empty(),
"octocat/Hello-World must yield 0 first-party packages (the demo \
repo has only single-pseudo-version GO entries + transitive NPM \
mentions); got: {pkgs:?}",
);
}
#[test]
fn first_party_filter_keeps_verified_provenance_with_enough_versions() {
let body = br#"{
"versions": [
{
"versionKey": { "system": "GO", "name": "github.com/o/r", "version": "v1.0.0" },
"relationProvenance": "GO_ORIGIN"
},
{
"versionKey": { "system": "GO", "name": "github.com/o/r", "version": "v1.1.0" },
"relationProvenance": "GO_ORIGIN"
}
]
}"#;
let parsed: ProjectVersionsResponse = serde_json::from_slice(body).unwrap();
let pkgs = first_party_packages_from_versions(&parsed.versions, "o", "r");
assert_eq!(pkgs.len(), 1);
assert_eq!(pkgs[0].system, "GO");
}
#[test]
fn first_party_filter_keeps_unverified_provenance_when_name_matches() {
let body = br#"{
"versions": [
{
"versionKey": { "system": "CARGO", "name": "tokio", "version": "1.0.0" },
"relationProvenance": "UNVERIFIED_METADATA"
},
{
"versionKey": { "system": "CARGO", "name": "tokio", "version": "1.1.0" },
"relationProvenance": "UNVERIFIED_METADATA"
}
]
}"#;
let parsed: ProjectVersionsResponse = serde_json::from_slice(body).unwrap();
let pkgs = first_party_packages_from_versions(&parsed.versions, "tokio-rs", "tokio");
assert_eq!(pkgs.len(), 1);
assert_eq!(pkgs[0].name, "tokio");
}
#[test]
fn first_party_filter_drops_unverified_provenance_when_name_mismatches() {
let body = br#"{
"versions": [
{
"versionKey": { "system": "CARGO", "name": "broker_tokio", "version": "0.1.0" },
"relationProvenance": "UNVERIFIED_METADATA"
},
{
"versionKey": { "system": "CARGO", "name": "broker_tokio", "version": "0.2.0" },
"relationProvenance": "UNVERIFIED_METADATA"
}
]
}"#;
let parsed: ProjectVersionsResponse = serde_json::from_slice(body).unwrap();
let pkgs = first_party_packages_from_versions(&parsed.versions, "tokio-rs", "tokio");
assert!(
pkgs.is_empty(),
"transitive mention must be dropped; got: {pkgs:?}"
);
}
#[test]
fn first_party_filter_drops_single_version_packages() {
let body = br#"{
"versions": [
{
"versionKey": { "system": "GO", "name": "github.com/o/Hello-World", "version": "v0.0.1" },
"relationProvenance": "GO_ORIGIN"
}
]
}"#;
let parsed: ProjectVersionsResponse = serde_json::from_slice(body).unwrap();
let pkgs = first_party_packages_from_versions(&parsed.versions, "o", "Hello-World");
assert!(
pkgs.is_empty(),
"single-version entry must be dropped; got: {pkgs:?}"
);
}
#[test]
fn first_party_filter_treats_missing_provenance_as_not_first_party() {
let body = br#"{
"versions": [
{ "versionKey": { "system": "CARGO", "name": "x", "version": "1.0.0" } },
{ "versionKey": { "system": "CARGO", "name": "x", "version": "1.1.0" } }
]
}"#;
let parsed: ProjectVersionsResponse = serde_json::from_slice(body).unwrap();
let pkgs = first_party_packages_from_versions(&parsed.versions, "owner-y", "y");
assert!(
pkgs.is_empty(),
"missing provenance + no name-match → drop; got: {pkgs:?}"
);
}
#[test]
fn name_matches_repo_owner_aware() {
assert!(name_matches_repo(
"github.com/tokio-rs/tokio",
"tokio-rs",
"tokio"
));
assert!(name_matches_repo(
"github.com/Kubernetes/Kubernetes",
"kubernetes",
"kubernetes"
));
assert!(name_matches_repo(
"@octocat/hello-world",
"octocat",
"Hello-World"
));
assert!(name_matches_repo("django", "django", "django"));
assert!(!name_matches_repo("broker_tokio", "tokio-rs", "tokio"));
assert!(!name_matches_repo("tokio_macros", "tokio-rs", "tokio"));
assert!(!name_matches_repo(
"@nloyyjuqc/hello-world",
"octocat",
"Hello-World"
));
assert!(!name_matches_repo(
"github.com/someone-else/Hello-World",
"octocat",
"Hello-World"
));
}
#[test]
fn project_packages_handles_legacy_flat_shape() {
let body = br#"{ "packages": [ { "system": "CARGO", "name": "x" } ] }"#;
let parsed: ProjectPackagesResponse = serde_json::from_slice(body).unwrap();
assert_eq!(parsed.packages.len(), 1);
assert_eq!(parsed.packages[0].name, "x");
}
#[test]
fn project_packages_handles_packagekey_nested_shape() {
let body =
br#"{ "packages": [ { "packageKey": { "system": "NPM", "name": "lodash" } } ] }"#;
let parsed: ProjectPackagesResponse = serde_json::from_slice(body).unwrap();
assert_eq!(parsed.packages.len(), 1);
assert_eq!(parsed.packages[0].system, "NPM");
assert_eq!(parsed.packages[0].name, "lodash");
}
#[test]
fn package_ref_sorts_by_system_then_name() {
let mut v = vec![
PackageRef {
system: "NPM".into(),
name: "b".into(),
},
PackageRef {
system: "GO".into(),
name: "a".into(),
},
PackageRef {
system: "NPM".into(),
name: "a".into(),
},
];
v.sort();
assert_eq!(
v,
vec![
PackageRef {
system: "GO".into(),
name: "a".into(),
},
PackageRef {
system: "NPM".into(),
name: "a".into(),
},
PackageRef {
system: "NPM".into(),
name: "b".into(),
},
]
);
}
}