Skip to main content

libverify_github/
npm_attestation.rs

1//! npm Registry Attestation API client.
2//!
3//! Fetches Sigstore-based provenance attestations from the npm registry
4//! to enrich `DependencySignatureEvidence` with signer identity, source
5//! repository, source commit, and transparency log information.
6//!
7//! API endpoint: `https://registry.npmjs.org/-/npm/v1/attestations/{name}@{version}`
8//!
9//! Each response contains up to two attestations:
10//! - **publish attestation** (`predicateType: .../npm/attestation/.../publish/v0.1`)
11//! - **SLSA provenance** (`predicateType: https://slsa.dev/provenance/v1`)
12//!
13//! We extract provenance data from the SLSA provenance attestation's
14//! DSSE envelope payload (base64-encoded in-toto Statement v1).
15
16use anyhow::{Context, Result, bail};
17use reqwest::blocking::Client;
18use reqwest::header::{ACCEPT, HeaderMap, HeaderValue, USER_AGENT};
19use serde::Deserialize;
20
21use libverify_core::evidence::DependencySignatureEvidence;
22use libverify_core::evidence::VerificationOutcome;
23
24const REGISTRY_URL: &str = "https://registry.npmjs.org";
25const SLSA_PROVENANCE_V1: &str = "https://slsa.dev/provenance/v1";
26
27pub struct NpmAttestationClient {
28    client: Client,
29}
30
31/// Provenance data extracted from an npm SLSA attestation.
32#[derive(Debug, Clone)]
33pub struct NpmProvenance {
34    pub source_repo: Option<String>,
35    pub source_commit: Option<String>,
36    pub signer_identity: Option<String>,
37    pub transparency_log_index: Option<String>,
38}
39
40impl NpmAttestationClient {
41    pub fn new() -> Result<Self> {
42        let mut headers = HeaderMap::new();
43        headers.insert(ACCEPT, HeaderValue::from_static("application/json"));
44        headers.insert(
45            USER_AGENT,
46            HeaderValue::from_static("libverify-github/0.1.0"),
47        );
48
49        let client = Client::builder()
50            .default_headers(headers)
51            .timeout(std::time::Duration::from_secs(10))
52            .build()
53            .context("failed to create npm attestation HTTP client")?;
54        Ok(Self { client })
55    }
56
57    /// Fetch provenance for a single package version.
58    /// Returns `None` if the package has no SLSA provenance attestation.
59    pub fn fetch_provenance(&self, name: &str, version: &str) -> Result<Option<NpmProvenance>> {
60        let url = format!("{REGISTRY_URL}/-/npm/v1/attestations/{name}@{version}");
61        let response = self
62            .client
63            .get(&url)
64            .send()
65            .with_context(|| format!("npm attestation request failed for {name}@{version}"))?;
66
67        let status = response.status();
68        if status.as_u16() == 404 {
69            return Ok(None);
70        }
71        if !status.is_success() {
72            bail!(
73                "npm attestation API error for {name}@{version}: {} {}",
74                status.as_u16(),
75                status.canonical_reason().unwrap_or("Unknown")
76            );
77        }
78
79        let payload: AttestationResponse = response
80            .json()
81            .with_context(|| format!("failed to parse attestation for {name}@{version}"))?;
82
83        // Find the SLSA provenance attestation
84        let slsa = payload
85            .attestations
86            .iter()
87            .find(|a| a.predicate_type == SLSA_PROVENANCE_V1);
88
89        let slsa = match slsa {
90            Some(a) => a,
91            None => return Ok(None),
92        };
93
94        let bundle = match &slsa.bundle {
95            Some(b) => b,
96            None => return Ok(None),
97        };
98
99        // Extract transparency log entry
100        let tlog_index = bundle
101            .verification_material
102            .as_ref()
103            .and_then(|vm| vm.tlog_entries.as_ref())
104            .and_then(|entries| entries.first())
105            .map(|entry| entry.log_index.clone());
106
107        // Decode the DSSE payload to get provenance predicate
108        let payload_b64 = match &bundle.dsse_envelope {
109            Some(env) => &env.payload,
110            None => {
111                return Ok(Some(NpmProvenance {
112                    source_repo: None,
113                    source_commit: None,
114                    signer_identity: None,
115                    transparency_log_index: tlog_index,
116                }));
117            }
118        };
119
120        let payload_bytes = base64_decode(payload_b64)?;
121        let statement: InTotoStatement =
122            serde_json::from_slice(&payload_bytes).context("failed to parse in-toto statement")?;
123
124        let (source_repo, source_commit, signer_identity) = match statement.predicate {
125            Some(predicate) => {
126                let repo = predicate
127                    .build_definition
128                    .as_ref()
129                    .and_then(|bd| bd.external_parameters.as_ref())
130                    .and_then(|ep| ep.workflow.as_ref())
131                    .map(|w| w.repository.clone());
132
133                let commit = predicate
134                    .build_definition
135                    .as_ref()
136                    .and_then(|bd| bd.resolved_dependencies.as_ref())
137                    .and_then(|deps| deps.first())
138                    .and_then(|dep| dep.digest.as_ref())
139                    .and_then(|d| d.git_commit.clone());
140
141                // Signer identity: use the workflow URI as identity
142                // (matches the SAN in the Sigstore cert)
143                let identity = predicate
144                    .build_definition
145                    .as_ref()
146                    .and_then(|bd| bd.external_parameters.as_ref())
147                    .and_then(|ep| ep.workflow.as_ref())
148                    .map(|w| {
149                        format!(
150                            "{}/.github/workflows/{}@{}",
151                            w.repository,
152                            w.path.strip_prefix(".github/workflows/").unwrap_or(&w.path),
153                            w.r#ref
154                        )
155                    });
156
157                (repo, commit, identity)
158            }
159            None => (None, None, None),
160        };
161
162        Ok(Some(NpmProvenance {
163            source_repo,
164            source_commit,
165            signer_identity,
166            transparency_log_index: tlog_index,
167        }))
168    }
169
170    /// Enrich npm dependencies in-place with provenance data from the attestation API.
171    /// Non-npm dependencies and dependencies that lack attestations are left unchanged.
172    ///
173    /// Uses a bounded worker pool (`CONCURRENCY` threads) to handle large
174    /// dependency trees efficiently. Progress is reported to stderr.
175    pub fn enrich_npm_deps(&self, deps: &mut [DependencySignatureEvidence]) {
176        const CONCURRENCY: usize = 16;
177
178        // Collect indices of npm deps to enrich
179        let npm_indices: Vec<usize> = deps
180            .iter()
181            .enumerate()
182            .filter(|(_, d)| d.registry.as_deref() == Some("registry.npmjs.org"))
183            .map(|(i, _)| i)
184            .collect();
185
186        if npm_indices.is_empty() {
187            return;
188        }
189
190        let total = npm_indices.len();
191        eprintln!("Fetching npm provenance for {total} packages ({CONCURRENCY} concurrent)...");
192
193        // Collect (index, name, version) for the work queue
194        let queries: Vec<(usize, String, String)> = npm_indices
195            .iter()
196            .map(|&i| (i, deps[i].name.clone(), deps[i].version.clone()))
197            .collect();
198
199        // Worker pool: CONCURRENCY threads pull from a shared work queue
200        let results: Vec<(usize, Option<NpmProvenance>)> = std::thread::scope(|scope| {
201            let (tx, rx) = std::sync::mpsc::channel::<(usize, String, String)>();
202            let rx = std::sync::Arc::new(std::sync::Mutex::new(rx));
203            let result_tx_orig = std::sync::mpsc::channel::<(usize, Option<NpmProvenance>)>();
204            let result_tx = result_tx_orig.0;
205            let result_rx = result_tx_orig.1;
206            let done = std::sync::Arc::new(std::sync::atomic::AtomicUsize::new(0));
207
208            // Spawn worker threads
209            let workers: Vec<_> = (0..CONCURRENCY.min(total))
210                .map(|_| {
211                    let rx = rx.clone();
212                    let result_tx = result_tx.clone();
213                    let done = done.clone();
214                    let client = &self;
215                    scope.spawn(move || {
216                        loop {
217                            let work = {
218                                let guard = rx.lock().unwrap();
219                                guard.recv().ok()
220                            };
221                            match work {
222                                Some((idx, name, version)) => {
223                                    let prov = match client.fetch_provenance(&name, &version) {
224                                        Ok(p) => p,
225                                        Err(e) => {
226                                            eprintln!(
227                                                "Warning: npm attestation for {name}@{version}: {e:#}"
228                                            );
229                                            None
230                                        }
231                                    };
232                                    let count = done.fetch_add(1, std::sync::atomic::Ordering::Relaxed) + 1;
233                                    if count.is_multiple_of(50) || count == total {
234                                        eprint!("\r  [{count}/{total}]");
235                                    }
236                                    let _ = result_tx.send((idx, prov));
237                                }
238                                None => break, // Channel closed, no more work
239                            }
240                        }
241                    })
242                })
243                .collect();
244
245            // Drop our copy of result_tx so result_rx closes when workers finish
246            drop(result_tx);
247
248            // Enqueue all work
249            for q in queries {
250                let _ = tx.send(q);
251            }
252            drop(tx); // Signal no more work
253
254            // Collect results
255            let results: Vec<_> = result_rx.iter().collect();
256
257            // Wait for workers to finish
258            for w in workers {
259                let _ = w.join();
260            }
261
262            results
263        });
264
265        eprintln!();
266
267        // Apply results
268        let mut enriched = 0usize;
269        for (idx, prov) in results {
270            if let Some(prov) = prov {
271                let dep = &mut deps[idx];
272                dep.source_repo = prov.source_repo;
273                dep.source_commit = prov.source_commit;
274                dep.signer_identity = prov.signer_identity;
275                if let Some(log_index) = prov.transparency_log_index {
276                    dep.transparency_log_uri =
277                        Some(format!("https://search.sigstore.dev/?logIndex={log_index}"));
278                }
279                if dep.verification == VerificationOutcome::ChecksumMatch {
280                    dep.verification = VerificationOutcome::Verified;
281                    dep.signature_mechanism = Some("sigstore".to_string());
282                }
283                enriched += 1;
284            }
285        }
286
287        eprintln!("  {enriched}/{total} npm packages have provenance attestations");
288    }
289}
290
291/// Decode base64 (standard or URL-safe) with optional padding.
292fn base64_decode(input: &str) -> Result<Vec<u8>> {
293    use base64::Engine;
294    base64::engine::general_purpose::STANDARD
295        .decode(input)
296        .or_else(|_| base64::engine::general_purpose::STANDARD_NO_PAD.decode(input))
297        .or_else(|_| base64::engine::general_purpose::URL_SAFE.decode(input))
298        .or_else(|_| base64::engine::general_purpose::URL_SAFE_NO_PAD.decode(input))
299        .context("failed to base64-decode attestation payload")
300}
301
302// --- npm attestation API response types ---
303
304#[derive(Debug, Deserialize)]
305struct AttestationResponse {
306    attestations: Vec<Attestation>,
307}
308
309#[derive(Debug, Deserialize)]
310#[serde(rename_all = "camelCase")]
311struct Attestation {
312    predicate_type: String,
313    bundle: Option<SigstoreBundle>,
314}
315
316#[derive(Debug, Deserialize)]
317#[serde(rename_all = "camelCase")]
318struct SigstoreBundle {
319    verification_material: Option<VerificationMaterial>,
320    dsse_envelope: Option<DsseEnvelope>,
321}
322
323#[derive(Debug, Deserialize)]
324#[serde(rename_all = "camelCase")]
325struct VerificationMaterial {
326    tlog_entries: Option<Vec<TlogEntry>>,
327}
328
329#[derive(Debug, Deserialize)]
330#[serde(rename_all = "camelCase")]
331struct TlogEntry {
332    log_index: String,
333}
334
335#[derive(Debug, Deserialize)]
336struct DsseEnvelope {
337    payload: String,
338}
339
340// --- in-toto Statement / SLSA Provenance ---
341
342#[derive(Debug, Deserialize)]
343#[serde(rename_all = "camelCase")]
344struct InTotoStatement {
345    predicate: Option<SlsaPredicate>,
346}
347
348#[derive(Debug, Deserialize)]
349#[serde(rename_all = "camelCase")]
350struct SlsaPredicate {
351    build_definition: Option<BuildDefinition>,
352}
353
354#[derive(Debug, Deserialize)]
355#[serde(rename_all = "camelCase")]
356struct BuildDefinition {
357    external_parameters: Option<ExternalParameters>,
358    resolved_dependencies: Option<Vec<ResolvedDependency>>,
359}
360
361#[derive(Debug, Deserialize)]
362struct ExternalParameters {
363    workflow: Option<Workflow>,
364}
365
366#[derive(Debug, Deserialize)]
367struct Workflow {
368    #[serde(rename = "ref")]
369    r#ref: String,
370    repository: String,
371    path: String,
372}
373
374#[derive(Debug, Deserialize)]
375struct ResolvedDependency {
376    digest: Option<Digest>,
377}
378
379#[derive(Debug, Deserialize)]
380#[serde(rename_all = "camelCase")]
381struct Digest {
382    git_commit: Option<String>,
383}
384
385#[cfg(test)]
386mod tests {
387    use super::*;
388
389    #[test]
390    fn base64_decode_standard() {
391        let encoded =
392            base64::Engine::encode(&base64::engine::general_purpose::STANDARD, b"hello world");
393        let decoded = base64_decode(&encoded).unwrap();
394        assert_eq!(decoded, b"hello world");
395    }
396
397    #[test]
398    fn attestation_response_deserializes() {
399        let json = r#"{
400            "attestations": [
401                {
402                    "predicateType": "https://slsa.dev/provenance/v1",
403                    "bundle": {
404                        "verificationMaterial": {
405                            "tlogEntries": [{"logIndex": "12345"}]
406                        },
407                        "dsseEnvelope": {
408                            "payload": "eyJwcmVkaWNhdGVUeXBlIjoiaHR0cHM6Ly9zbHNhLmRldi9wcm92ZW5hbmNlL3YxIiwicHJlZGljYXRlIjp7ImJ1aWxkRGVmaW5pdGlvbiI6eyJleHRlcm5hbFBhcmFtZXRlcnMiOnsid29ya2Zsb3ciOnsicmVmIjoicmVmcy9oZWFkcy9tYWluIiwicmVwb3NpdG9yeSI6Imh0dHBzOi8vZ2l0aHViLmNvbS9leGFtcGxlL3JlcG8iLCJwYXRoIjoiLmdpdGh1Yi93b3JrZmxvd3MvcmVsZWFzZS55bWwifX0sInJlc29sdmVkRGVwZW5kZW5jaWVzIjpbeyJkaWdlc3QiOnsiZ2l0Q29tbWl0IjoiYWJjMTIzIn19XX19fQ=="
409                        }
410                    }
411                }
412            ]
413        }"#;
414
415        let resp: AttestationResponse = serde_json::from_str(json).unwrap();
416        assert_eq!(resp.attestations.len(), 1);
417        assert_eq!(resp.attestations[0].predicate_type, SLSA_PROVENANCE_V1);
418
419        let bundle = resp.attestations[0].bundle.as_ref().unwrap();
420        let tlog = bundle
421            .verification_material
422            .as_ref()
423            .unwrap()
424            .tlog_entries
425            .as_ref()
426            .unwrap();
427        assert_eq!(tlog[0].log_index, "12345");
428    }
429}