Skip to main content

libverify_github/
pypi_attestation.rs

1//! PyPI Attestation API client (PEP 740).
2//!
3//! Fetches Sigstore-based provenance attestations from PyPI's Integrity API
4//! to enrich `DependencySignatureEvidence` with publisher identity, source
5//! repository, and transparency log information.
6//!
7//! Two-phase approach:
8//! 1. Simple API (`/simple/{project}/`) → find provenance URL for the version's sdist
9//! 2. Integrity API (provenance URL) → fetch attestation with publisher + Rekor entry
10//!
11//! API docs: https://docs.pypi.org/api/integrity/
12
13use anyhow::{Context, Result, bail};
14use reqwest::blocking::Client;
15use reqwest::header::{ACCEPT, HeaderMap, HeaderValue, USER_AGENT};
16use serde::Deserialize;
17
18use libverify_core::evidence::DependencySignatureEvidence;
19use libverify_core::evidence::VerificationOutcome;
20
21const PYPI_SIMPLE_URL: &str = "https://pypi.org/simple";
22
23pub struct PypiAttestationClient {
24    client: Client,
25}
26
27/// Provenance data extracted from a PyPI attestation.
28#[derive(Debug, Clone)]
29pub struct PypiProvenance {
30    pub source_repo: Option<String>,
31    pub signer_identity: Option<String>,
32    pub transparency_log_index: Option<String>,
33}
34
35impl PypiAttestationClient {
36    pub fn new() -> Result<Self> {
37        let mut headers = HeaderMap::new();
38        headers.insert(
39            USER_AGENT,
40            HeaderValue::from_static("libverify-github/0.1.0"),
41        );
42
43        let client = Client::builder()
44            .default_headers(headers)
45            .timeout(std::time::Duration::from_secs(10))
46            .build()
47            .context("failed to create PyPI attestation HTTP client")?;
48        Ok(Self { client })
49    }
50
51    /// Fetch provenance for a single package version.
52    /// Returns `None` if the package has no attestation.
53    pub fn fetch_provenance(&self, name: &str, version: &str) -> Result<Option<PypiProvenance>> {
54        // Phase 1: Get provenance URL from Simple API
55        let provenance_url = self.find_provenance_url(name, version)?;
56        let provenance_url = match provenance_url {
57            Some(url) => url,
58            None => return Ok(None),
59        };
60
61        // Phase 2: Fetch provenance
62        let response = self
63            .client
64            .get(&provenance_url)
65            .header(ACCEPT, "application/vnd.pypi.integrity.v1+json")
66            .send()
67            .with_context(|| format!("PyPI provenance request failed for {name}@{version}"))?;
68
69        let status = response.status();
70        if status.as_u16() == 404 {
71            return Ok(None);
72        }
73        if !status.is_success() {
74            bail!(
75                "PyPI provenance API error for {name}@{version}: {}",
76                status.as_u16()
77            );
78        }
79
80        let payload: ProvenanceResponse = response
81            .json()
82            .with_context(|| format!("failed to parse PyPI provenance for {name}@{version}"))?;
83
84        let bundle = match payload.attestation_bundles.into_iter().next() {
85            Some(b) => b,
86            None => return Ok(None),
87        };
88
89        let source_repo = bundle.publisher.as_ref().map(|p| {
90            // Normalize to full URL if it's just owner/repo
91            if p.repository.starts_with("http") {
92                p.repository.clone()
93            } else {
94                format!("https://github.com/{}", p.repository)
95            }
96        });
97
98        let signer_identity = bundle.publisher.as_ref().map(|p| match &p.workflow {
99            Some(wf) => format!("{}@{}", p.repository, wf),
100            None => p.repository.clone(),
101        });
102
103        let tlog_index = bundle
104            .attestations
105            .into_iter()
106            .next()
107            .and_then(|a| a.verification_material)
108            .and_then(|vm| vm.transparency_entries)
109            .and_then(|entries| entries.into_iter().next())
110            .map(|entry| entry.log_index);
111
112        Ok(Some(PypiProvenance {
113            source_repo,
114            signer_identity,
115            transparency_log_index: tlog_index,
116        }))
117    }
118
119    /// Find the provenance URL for a package version from the Simple API.
120    /// Prefers sdist (.tar.gz), falls back to first wheel.
121    fn find_provenance_url(&self, name: &str, version: &str) -> Result<Option<String>> {
122        // PyPI normalizes names: underscores → hyphens, lowercase
123        let normalized = name.to_lowercase().replace('_', "-");
124        let url = format!("{PYPI_SIMPLE_URL}/{normalized}/");
125
126        let response = self
127            .client
128            .get(&url)
129            .header(ACCEPT, "application/vnd.pypi.simple.v1+json")
130            .send()
131            .with_context(|| format!("PyPI Simple API request failed for {name}"))?;
132
133        if !response.status().is_success() {
134            return Ok(None);
135        }
136
137        let listing: SimpleApiResponse = response
138            .json()
139            .with_context(|| format!("failed to parse PyPI Simple API for {name}"))?;
140
141        // Filter to files matching this version
142        let version_prefix = format!("{normalized}-{version}");
143        let matching: Vec<&SimpleFile> = listing
144            .files
145            .iter()
146            .filter(|f| {
147                let fname = f.filename.to_lowercase().replace('_', "-");
148                fname.starts_with(&version_prefix) && f.provenance.is_some()
149            })
150            .collect();
151
152        // Prefer sdist, then any file with provenance
153        let chosen = matching
154            .iter()
155            .find(|f| f.filename.ends_with(".tar.gz"))
156            .or_else(|| matching.first());
157
158        Ok(chosen.and_then(|f| f.provenance.clone()))
159    }
160
161    /// Enrich PyPI dependencies in-place with provenance from the attestation API.
162    /// Uses bounded parallel fetching.
163    pub fn enrich_pypi_deps(&self, deps: &mut [DependencySignatureEvidence]) {
164        const CONCURRENCY: usize = 16;
165
166        let pypi_indices: Vec<usize> = deps
167            .iter()
168            .enumerate()
169            .filter(|(_, d)| d.registry.as_deref() == Some("pypi.org"))
170            .map(|(i, _)| i)
171            .collect();
172
173        if pypi_indices.is_empty() {
174            return;
175        }
176
177        let total = pypi_indices.len();
178        eprintln!("Fetching PyPI provenance for {total} packages ({CONCURRENCY} concurrent)...");
179
180        let queries: Vec<(usize, String, String)> = pypi_indices
181            .iter()
182            .map(|&i| (i, deps[i].name.clone(), deps[i].version.clone()))
183            .collect();
184
185        let results: Vec<(usize, Option<PypiProvenance>)> = std::thread::scope(|scope| {
186            let (tx, rx) = std::sync::mpsc::channel::<(usize, String, String)>();
187            let rx = std::sync::Arc::new(std::sync::Mutex::new(rx));
188            let (result_tx, result_rx) =
189                std::sync::mpsc::channel::<(usize, Option<PypiProvenance>)>();
190            let done = std::sync::Arc::new(std::sync::atomic::AtomicUsize::new(0));
191
192            let workers: Vec<_> = (0..CONCURRENCY.min(total))
193                .map(|_| {
194                    let rx = rx.clone();
195                    let result_tx = result_tx.clone();
196                    let done = done.clone();
197                    let client = &self;
198                    scope.spawn(move || {
199                        loop {
200                            let work = {
201                                let guard = rx.lock().unwrap();
202                                guard.recv().ok()
203                            };
204                            match work {
205                                Some((idx, name, version)) => {
206                                    let prov = match client.fetch_provenance(&name, &version) {
207                                        Ok(p) => p,
208                                        Err(e) => {
209                                            eprintln!(
210                                                "Warning: PyPI attestation for {name}@{version}: {e:#}"
211                                            );
212                                            None
213                                        }
214                                    };
215                                    let count = done.fetch_add(1, std::sync::atomic::Ordering::Relaxed) + 1;
216                                    if count.is_multiple_of(50) || count == total {
217                                        eprint!("\r  [{count}/{total}]");
218                                    }
219                                    let _ = result_tx.send((idx, prov));
220                                }
221                                None => break,
222                            }
223                        }
224                    })
225                })
226                .collect();
227
228            drop(result_tx);
229
230            for q in queries {
231                let _ = tx.send(q);
232            }
233            drop(tx);
234
235            let results: Vec<_> = result_rx.iter().collect();
236
237            for w in workers {
238                let _ = w.join();
239            }
240
241            results
242        });
243
244        eprintln!();
245
246        let mut enriched = 0usize;
247        for (idx, prov) in results {
248            if let Some(prov) = prov {
249                let dep = &mut deps[idx];
250                dep.source_repo = prov.source_repo;
251                dep.signer_identity = prov.signer_identity;
252                if let Some(log_index) = prov.transparency_log_index {
253                    dep.transparency_log_uri =
254                        Some(format!("https://search.sigstore.dev/?logIndex={log_index}"));
255                }
256                if dep.verification == VerificationOutcome::ChecksumMatch {
257                    dep.verification = VerificationOutcome::Verified;
258                    dep.signature_mechanism = Some("sigstore".to_string());
259                }
260                enriched += 1;
261            }
262        }
263
264        eprintln!("  {enriched}/{total} PyPI packages have provenance attestations");
265    }
266}
267
268// --- PyPI Simple API response types ---
269
270#[derive(Debug, Deserialize)]
271struct SimpleApiResponse {
272    files: Vec<SimpleFile>,
273}
274
275#[derive(Debug, Deserialize)]
276struct SimpleFile {
277    filename: String,
278    provenance: Option<String>,
279}
280
281// --- PyPI Integrity API response types ---
282
283#[derive(Debug, Deserialize)]
284struct ProvenanceResponse {
285    attestation_bundles: Vec<AttestationBundle>,
286}
287
288#[derive(Debug, Deserialize)]
289struct AttestationBundle {
290    publisher: Option<Publisher>,
291    attestations: Vec<PypiAttestation>,
292}
293
294#[derive(Debug, Deserialize)]
295struct Publisher {
296    repository: String,
297    workflow: Option<String>,
298}
299
300#[derive(Debug, Deserialize)]
301struct PypiAttestation {
302    verification_material: Option<PypiVerificationMaterial>,
303}
304
305#[derive(Debug, Deserialize)]
306struct PypiVerificationMaterial {
307    transparency_entries: Option<Vec<TransparencyEntry>>,
308}
309
310#[derive(Debug, Deserialize)]
311#[serde(rename_all = "camelCase")]
312struct TransparencyEntry {
313    log_index: String,
314}
315
316#[cfg(test)]
317mod tests {
318    use super::*;
319
320    #[test]
321    fn simple_api_response_deserializes() {
322        let json = r#"{
323            "files": [
324                {
325                    "filename": "foo-1.0.0.tar.gz",
326                    "provenance": "https://pypi.org/integrity/foo/1.0.0/foo-1.0.0.tar.gz/provenance"
327                },
328                {
329                    "filename": "foo-1.0.0-py3-none-any.whl",
330                    "provenance": null
331                }
332            ]
333        }"#;
334        let resp: SimpleApiResponse = serde_json::from_str(json).unwrap();
335        assert_eq!(resp.files.len(), 2);
336        assert!(resp.files[0].provenance.is_some());
337        assert!(resp.files[1].provenance.is_none());
338    }
339
340    #[test]
341    fn provenance_response_deserializes() {
342        let json = r#"{
343            "attestation_bundles": [{
344                "publisher": {
345                    "kind": "GitHub",
346                    "repository": "pyca/cryptography",
347                    "workflow": "pypi-publish.yml",
348                    "environment": null
349                },
350                "attestations": [{
351                    "version": 1,
352                    "verification_material": {
353                        "transparency_entries": [{
354                            "logIndex": "152047507",
355                            "logId": {"keyId": "test"}
356                        }]
357                    }
358                }]
359            }]
360        }"#;
361        let resp: ProvenanceResponse = serde_json::from_str(json).unwrap();
362        let bundle = &resp.attestation_bundles[0];
363        assert_eq!(
364            bundle.publisher.as_ref().unwrap().repository,
365            "pyca/cryptography"
366        );
367        let tlog = &bundle.attestations[0]
368            .verification_material
369            .as_ref()
370            .unwrap()
371            .transparency_entries
372            .as_ref()
373            .unwrap()[0];
374        assert_eq!(tlog.log_index, "152047507");
375    }
376}