1use anyhow::{Context, Result, bail};
14use reqwest::blocking::Client;
15use reqwest::header::{ACCEPT, HeaderMap, HeaderValue, USER_AGENT};
16use serde::Deserialize;
17
18use libverify_core::evidence::DependencySignatureEvidence;
19use libverify_core::evidence::VerificationOutcome;
20
21const PYPI_SIMPLE_URL: &str = "https://pypi.org/simple";
22
23pub struct PypiAttestationClient {
24 client: Client,
25}
26
27#[derive(Debug, Clone)]
29pub struct PypiProvenance {
30 pub source_repo: Option<String>,
31 pub signer_identity: Option<String>,
32 pub transparency_log_index: Option<String>,
33}
34
35impl PypiAttestationClient {
36 pub fn new() -> Result<Self> {
37 let mut headers = HeaderMap::new();
38 headers.insert(
39 USER_AGENT,
40 HeaderValue::from_static("libverify-github/0.1.0"),
41 );
42
43 let client = Client::builder()
44 .default_headers(headers)
45 .timeout(std::time::Duration::from_secs(10))
46 .build()
47 .context("failed to create PyPI attestation HTTP client")?;
48 Ok(Self { client })
49 }
50
51 pub fn fetch_provenance(&self, name: &str, version: &str) -> Result<Option<PypiProvenance>> {
54 let provenance_url = self.find_provenance_url(name, version)?;
56 let provenance_url = match provenance_url {
57 Some(url) => url,
58 None => return Ok(None),
59 };
60
61 let response = self
63 .client
64 .get(&provenance_url)
65 .header(ACCEPT, "application/vnd.pypi.integrity.v1+json")
66 .send()
67 .with_context(|| format!("PyPI provenance request failed for {name}@{version}"))?;
68
69 let status = response.status();
70 if status.as_u16() == 404 {
71 return Ok(None);
72 }
73 if !status.is_success() {
74 bail!(
75 "PyPI provenance API error for {name}@{version}: {}",
76 status.as_u16()
77 );
78 }
79
80 let payload: ProvenanceResponse = response
81 .json()
82 .with_context(|| format!("failed to parse PyPI provenance for {name}@{version}"))?;
83
84 let bundle = match payload.attestation_bundles.into_iter().next() {
85 Some(b) => b,
86 None => return Ok(None),
87 };
88
89 let source_repo = bundle.publisher.as_ref().map(|p| {
90 if p.repository.starts_with("http") {
92 p.repository.clone()
93 } else {
94 format!("https://github.com/{}", p.repository)
95 }
96 });
97
98 let signer_identity = bundle.publisher.as_ref().map(|p| match &p.workflow {
99 Some(wf) => format!("{}@{}", p.repository, wf),
100 None => p.repository.clone(),
101 });
102
103 let tlog_index = bundle
104 .attestations
105 .into_iter()
106 .next()
107 .and_then(|a| a.verification_material)
108 .and_then(|vm| vm.transparency_entries)
109 .and_then(|entries| entries.into_iter().next())
110 .map(|entry| entry.log_index);
111
112 Ok(Some(PypiProvenance {
113 source_repo,
114 signer_identity,
115 transparency_log_index: tlog_index,
116 }))
117 }
118
119 fn find_provenance_url(&self, name: &str, version: &str) -> Result<Option<String>> {
122 let normalized = name.to_lowercase().replace('_', "-");
124 let url = format!("{PYPI_SIMPLE_URL}/{normalized}/");
125
126 let response = self
127 .client
128 .get(&url)
129 .header(ACCEPT, "application/vnd.pypi.simple.v1+json")
130 .send()
131 .with_context(|| format!("PyPI Simple API request failed for {name}"))?;
132
133 if !response.status().is_success() {
134 return Ok(None);
135 }
136
137 let listing: SimpleApiResponse = response
138 .json()
139 .with_context(|| format!("failed to parse PyPI Simple API for {name}"))?;
140
141 let version_prefix = format!("{normalized}-{version}");
143 let matching: Vec<&SimpleFile> = listing
144 .files
145 .iter()
146 .filter(|f| {
147 let fname = f.filename.to_lowercase().replace('_', "-");
148 fname.starts_with(&version_prefix) && f.provenance.is_some()
149 })
150 .collect();
151
152 let chosen = matching
154 .iter()
155 .find(|f| f.filename.ends_with(".tar.gz"))
156 .or_else(|| matching.first());
157
158 Ok(chosen.and_then(|f| f.provenance.clone()))
159 }
160
161 pub fn enrich_pypi_deps(&self, deps: &mut [DependencySignatureEvidence]) {
164 const CONCURRENCY: usize = 16;
165
166 let pypi_indices: Vec<usize> = deps
167 .iter()
168 .enumerate()
169 .filter(|(_, d)| d.registry.as_deref() == Some("pypi.org"))
170 .map(|(i, _)| i)
171 .collect();
172
173 if pypi_indices.is_empty() {
174 return;
175 }
176
177 let total = pypi_indices.len();
178 eprintln!("Fetching PyPI provenance for {total} packages ({CONCURRENCY} concurrent)...");
179
180 let queries: Vec<(usize, String, String)> = pypi_indices
181 .iter()
182 .map(|&i| (i, deps[i].name.clone(), deps[i].version.clone()))
183 .collect();
184
185 let results: Vec<(usize, Option<PypiProvenance>)> = std::thread::scope(|scope| {
186 let (tx, rx) = std::sync::mpsc::channel::<(usize, String, String)>();
187 let rx = std::sync::Arc::new(std::sync::Mutex::new(rx));
188 let (result_tx, result_rx) =
189 std::sync::mpsc::channel::<(usize, Option<PypiProvenance>)>();
190 let done = std::sync::Arc::new(std::sync::atomic::AtomicUsize::new(0));
191
192 let workers: Vec<_> = (0..CONCURRENCY.min(total))
193 .map(|_| {
194 let rx = rx.clone();
195 let result_tx = result_tx.clone();
196 let done = done.clone();
197 let client = &self;
198 scope.spawn(move || {
199 loop {
200 let work = {
201 let guard = rx.lock().unwrap();
202 guard.recv().ok()
203 };
204 match work {
205 Some((idx, name, version)) => {
206 let prov = match client.fetch_provenance(&name, &version) {
207 Ok(p) => p,
208 Err(e) => {
209 eprintln!(
210 "Warning: PyPI attestation for {name}@{version}: {e:#}"
211 );
212 None
213 }
214 };
215 let count = done.fetch_add(1, std::sync::atomic::Ordering::Relaxed) + 1;
216 if count.is_multiple_of(50) || count == total {
217 eprint!("\r [{count}/{total}]");
218 }
219 let _ = result_tx.send((idx, prov));
220 }
221 None => break,
222 }
223 }
224 })
225 })
226 .collect();
227
228 drop(result_tx);
229
230 for q in queries {
231 let _ = tx.send(q);
232 }
233 drop(tx);
234
235 let results: Vec<_> = result_rx.iter().collect();
236
237 for w in workers {
238 let _ = w.join();
239 }
240
241 results
242 });
243
244 eprintln!();
245
246 let mut enriched = 0usize;
247 for (idx, prov) in results {
248 if let Some(prov) = prov {
249 let dep = &mut deps[idx];
250 dep.source_repo = prov.source_repo;
251 dep.signer_identity = prov.signer_identity;
252 if let Some(log_index) = prov.transparency_log_index {
253 dep.transparency_log_uri =
254 Some(format!("https://search.sigstore.dev/?logIndex={log_index}"));
255 }
256 if dep.verification == VerificationOutcome::ChecksumMatch {
257 dep.verification = VerificationOutcome::Verified;
258 dep.signature_mechanism = Some("sigstore".to_string());
259 }
260 enriched += 1;
261 }
262 }
263
264 eprintln!(" {enriched}/{total} PyPI packages have provenance attestations");
265 }
266}
267
268#[derive(Debug, Deserialize)]
271struct SimpleApiResponse {
272 files: Vec<SimpleFile>,
273}
274
275#[derive(Debug, Deserialize)]
276struct SimpleFile {
277 filename: String,
278 provenance: Option<String>,
279}
280
281#[derive(Debug, Deserialize)]
284struct ProvenanceResponse {
285 attestation_bundles: Vec<AttestationBundle>,
286}
287
288#[derive(Debug, Deserialize)]
289struct AttestationBundle {
290 publisher: Option<Publisher>,
291 attestations: Vec<PypiAttestation>,
292}
293
294#[derive(Debug, Deserialize)]
295struct Publisher {
296 repository: String,
297 workflow: Option<String>,
298}
299
300#[derive(Debug, Deserialize)]
301struct PypiAttestation {
302 verification_material: Option<PypiVerificationMaterial>,
303}
304
305#[derive(Debug, Deserialize)]
306struct PypiVerificationMaterial {
307 transparency_entries: Option<Vec<TransparencyEntry>>,
308}
309
310#[derive(Debug, Deserialize)]
311#[serde(rename_all = "camelCase")]
312struct TransparencyEntry {
313 log_index: String,
314}
315
316#[cfg(test)]
317mod tests {
318 use super::*;
319
320 #[test]
321 fn simple_api_response_deserializes() {
322 let json = r#"{
323 "files": [
324 {
325 "filename": "foo-1.0.0.tar.gz",
326 "provenance": "https://pypi.org/integrity/foo/1.0.0/foo-1.0.0.tar.gz/provenance"
327 },
328 {
329 "filename": "foo-1.0.0-py3-none-any.whl",
330 "provenance": null
331 }
332 ]
333 }"#;
334 let resp: SimpleApiResponse = serde_json::from_str(json).unwrap();
335 assert_eq!(resp.files.len(), 2);
336 assert!(resp.files[0].provenance.is_some());
337 assert!(resp.files[1].provenance.is_none());
338 }
339
340 #[test]
341 fn provenance_response_deserializes() {
342 let json = r#"{
343 "attestation_bundles": [{
344 "publisher": {
345 "kind": "GitHub",
346 "repository": "pyca/cryptography",
347 "workflow": "pypi-publish.yml",
348 "environment": null
349 },
350 "attestations": [{
351 "version": 1,
352 "verification_material": {
353 "transparency_entries": [{
354 "logIndex": "152047507",
355 "logId": {"keyId": "test"}
356 }]
357 }
358 }]
359 }]
360 }"#;
361 let resp: ProvenanceResponse = serde_json::from_str(json).unwrap();
362 let bundle = &resp.attestation_bundles[0];
363 assert_eq!(
364 bundle.publisher.as_ref().unwrap().repository,
365 "pyca/cryptography"
366 );
367 let tlog = &bundle.attestations[0]
368 .verification_material
369 .as_ref()
370 .unwrap()
371 .transparency_entries
372 .as_ref()
373 .unwrap()[0];
374 assert_eq!(tlog.log_index, "152047507");
375 }
376}