Skip to main content

aube_resolver/
primer.rs

1use aube_manifest::BundledDependencies;
2use aube_registry::{Attestations, Dist, NpmUser, Packument, PeerDepMeta, VersionMetadata};
3use std::collections::BTreeMap;
4use std::io::{Cursor, Write};
5use std::path::{Path, PathBuf};
6use std::sync::OnceLock;
7use std::time::Duration;
8
9#[path = "primer_schema.rs"]
10mod primer_schema;
11
12pub(crate) use primer_schema::Seed;
13use primer_schema::{
14    PrimerBundledDependencies, PrimerDist, PrimerPackument, PrimerPeerDepMeta,
15    PrimerVersionMetadata,
16};
17
18const PRIMER_FORMAT: &str = "rkyv-v1";
19const PRUNE_AGE: Duration = Duration::from_secs(30 * 24 * 60 * 60);
20const AUTO_PRUNE_COOLDOWN: Duration = Duration::from_secs(24 * 60 * 60);
21const AUTO_PRUNE_DENOMINATOR: u8 = 100;
22
23include!(concat!(env!("OUT_DIR"), "/primer_index.rs"));
24
25#[derive(Default)]
26pub struct PruneStats {
27    pub files: u64,
28    pub bytes: u64,
29}
30
31impl Seed {
32    pub(crate) fn packument(&self) -> Packument {
33        self.packument.to_packument()
34    }
35}
36
37impl PrimerPackument {
38    fn to_packument(&self) -> Packument {
39        let mut time = BTreeMap::new();
40        let versions = self
41            .versions
42            .iter()
43            .map(|v| {
44                if let Some(published_at) = v.published_at.as_ref() {
45                    time.insert(v.version.clone(), published_at.clone());
46                }
47                (
48                    v.version.clone(),
49                    v.metadata.to_version_metadata(&self.name, &v.version),
50                )
51            })
52            .collect();
53        Packument {
54            name: self.name.clone(),
55            modified: self.modified.clone(),
56            versions,
57            dist_tags: self.dist_tags.clone(),
58            time,
59        }
60    }
61}
62
63impl PrimerVersionMetadata {
64    fn to_version_metadata(&self, name: &str, version: &str) -> VersionMetadata {
65        VersionMetadata {
66            name: name.to_owned(),
67            version: version.to_owned(),
68            dependencies: self.dependencies.clone(),
69            dev_dependencies: BTreeMap::new(),
70            peer_dependencies: self.peer_dependencies.clone(),
71            peer_dependencies_meta: self
72                .peer_dependencies_meta
73                .iter()
74                .map(|(name, meta)| (name.clone(), meta.to_peer_dep_meta()))
75                .collect(),
76            optional_dependencies: self.optional_dependencies.clone(),
77            bundled_dependencies: self
78                .bundled_dependencies
79                .as_ref()
80                .map(PrimerBundledDependencies::to_bundled_dependencies),
81            dist: self.dist.as_ref().map(|d| d.to_dist(name, version)),
82            os: self.os.clone(),
83            cpu: self.cpu.clone(),
84            libc: self.libc.clone(),
85            engines: self.engines.clone(),
86            license: self.license.clone(),
87            funding_url: self.funding_url.clone(),
88            bin: self.bin.clone(),
89            has_install_script: self.has_install_script,
90            deprecated: self.deprecated.clone(),
91            approver: None,
92            npm_user: self.trusted_publisher.then(|| NpmUser {
93                trusted_publisher: Some(serde_json::json!({"id": "npm-primer"})),
94            }),
95        }
96    }
97}
98
99impl PrimerPeerDepMeta {
100    fn to_peer_dep_meta(&self) -> PeerDepMeta {
101        PeerDepMeta {
102            optional: self.optional,
103        }
104    }
105}
106
107impl PrimerBundledDependencies {
108    fn to_bundled_dependencies(&self) -> BundledDependencies {
109        match self {
110            Self::List(v) => BundledDependencies::List(v.clone()),
111            Self::All(v) => BundledDependencies::All(*v),
112        }
113    }
114}
115
116impl PrimerDist {
117    fn to_dist(&self, name: &str, version: &str) -> Dist {
118        Dist {
119            tarball: self
120                .tarball
121                .clone()
122                .unwrap_or_else(|| deterministic_tarball_url(name, version)),
123            integrity: self.integrity.clone(),
124            shasum: None,
125            unpacked_size: None,
126            attestations: self.provenance.then(|| Attestations {
127                provenance: Some(serde_json::json!({
128                    "predicateType": "https://slsa.dev/provenance/v1"
129                })),
130            }),
131        }
132    }
133}
134
135/// Reconstruct the npmjs tarball URL when the primer omitted it
136/// (the common case — see PrimerDist::tarball docs). Mirrors
137/// `RegistryClient::tarball_url`'s format for `registry.npmjs.org`.
138/// In force-metadata-primer mode the URL is rewritten to the active
139/// registry by the resolver, so this default is only consulted on
140/// the default-registry path.
141fn deterministic_tarball_url(name: &str, version: &str) -> String {
142    let unscoped = name
143        .strip_prefix('@')
144        .and_then(|rest| rest.split('/').nth(1))
145        .unwrap_or(name);
146    format!("https://registry.npmjs.org/{name}/-/{unscoped}-{version}.tgz")
147}
148
149static GENERATED_AT: OnceLock<Option<String>> = OnceLock::new();
150static AUTO_PRUNED: OnceLock<()> = OnceLock::new();
151
152pub(crate) fn get(name: &str) -> Option<Seed> {
153    let (_, offset, len) = PRIMER_INDEX
154        .binary_search_by(|(candidate, _, _)| candidate.cmp(&name))
155        .ok()
156        .and_then(|idx| PRIMER_INDEX.get(idx))?;
157    auto_prune_once();
158    let end = offset.checked_add(*len)?;
159    let compressed = PRIMER_BLOB.get(*offset..end)?;
160    let archived = zstd::stream::decode_all(Cursor::new(compressed)).ok()?;
161    rkyv::from_bytes::<Seed, rkyv::rancor::Error>(&archived).ok()
162}
163
164pub(crate) fn covers_cutoff(cutoff: &str) -> bool {
165    generated_at().is_some_and(|generated_at| generated_at.as_str() >= cutoff)
166}
167
168fn generated_at() -> Option<&'static String> {
169    GENERATED_AT
170        .get_or_init(|| {
171            let secs = option_env!("AUBE_PRIMER_GENERATED_AT")?.parse().ok()?;
172            Some(crate::types::format_iso8601_utc(secs))
173        })
174        .as_ref()
175}
176
177fn auto_prune_once() {
178    AUTO_PRUNED.get_or_init(|| {
179        if let Some(dir) = primer_cache_dir() {
180            auto_prune(&dir);
181        }
182    });
183}
184
185fn auto_prune(dir: &Path) {
186    if !random_byte().is_multiple_of(AUTO_PRUNE_DENOMINATOR) {
187        return;
188    }
189    if let Err(e) = prune_old(dir, PRUNE_AGE, false, Some(AUTO_PRUNE_COOLDOWN)) {
190        tracing::debug!("failed to prune old primer cache files: {e}");
191    }
192}
193
194pub fn prune_cache(dry_run: bool, age: Duration) -> std::io::Result<PruneStats> {
195    let Some(dir) = primer_cache_dir() else {
196        return Ok(PruneStats::default());
197    };
198    prune_old(&dir, age, dry_run, None)
199}
200
201fn prune_old(
202    dir: &Path,
203    age: Duration,
204    dry_run: bool,
205    sentinel_cooldown: Option<Duration>,
206) -> std::io::Result<PruneStats> {
207    let mut stats = PruneStats::default();
208    std::fs::create_dir_all(dir)?;
209    let sentinel = dir.join(".auto_prune");
210    if let Some(cooldown) = sentinel_cooldown
211        && let Ok(modified) = sentinel.metadata().and_then(|m| m.modified())
212        && modified.elapsed().unwrap_or_default() < cooldown
213    {
214        return Ok(stats);
215    }
216    if sentinel_cooldown.is_some() {
217        touch(&sentinel)?;
218    }
219    let entries = std::fs::read_dir(dir)?;
220    for entry in entries {
221        let entry = entry?;
222        let path = entry.path();
223        let Some(name) = path.file_name().and_then(|s| s.to_str()) else {
224            continue;
225        };
226        if !is_primer_cache_file(name) {
227            continue;
228        }
229        let metadata = entry.metadata()?;
230        if metadata.modified()?.elapsed().unwrap_or_default() > age {
231            stats.files += 1;
232            stats.bytes += metadata.len();
233            if !dry_run {
234                std::fs::remove_file(&path)?;
235            }
236        }
237    }
238    Ok(stats)
239}
240
241fn touch(path: &Path) -> std::io::Result<()> {
242    if let Some(parent) = path.parent() {
243        std::fs::create_dir_all(parent)?;
244    }
245    std::fs::OpenOptions::new()
246        .create(true)
247        .write(true)
248        .truncate(true)
249        .open(path)?
250        .write_all(b"\n")
251}
252
253fn is_primer_cache_file(name: &str) -> bool {
254    name.starts_with(&format!("{PRIMER_FORMAT}-")) && name.ends_with(".rkyv")
255}
256
257fn random_byte() -> u8 {
258    let nanos = std::time::SystemTime::now()
259        .duration_since(std::time::UNIX_EPOCH)
260        .map(|d| d.as_nanos())
261        .unwrap_or_default();
262    (nanos as u8) ^ (std::process::id() as u8)
263}
264
265fn primer_cache_dir() -> Option<PathBuf> {
266    // First-class config knob, read under the active embedder's brand
267    // (`AUBE_CACHE_DIR` for standalone aube, `<BRAND>_CACHE_DIR` for an embedder
268    // with its own `config_env_prefix`) via `config_env` — never the branded
269    // `AUBE_*` form under such a host.
270    if let Some(base) = aube_util::env::config_env("CACHE_DIR") {
271        return Some(PathBuf::from(base).join("primer"));
272    }
273    // Active embedder's `cache_namespace` (standalone aube → "aube"), not a literal,
274    // so the primer lands beside the packument cache in aube-store's `cache_dir`
275    // rather than under an aube-named path in a host embedder's $XDG_CACHE.
276    cache_base_dir().map(|p| p.join(aube_util::embedder().cache_namespace).join("primer"))
277}
278
279#[cfg(unix)]
280fn cache_base_dir() -> Option<PathBuf> {
281    std::env::var_os("XDG_CACHE_HOME")
282        .map(PathBuf::from)
283        .or_else(|| std::env::var_os("HOME").map(|h| PathBuf::from(h).join(".cache")))
284}
285
286#[cfg(windows)]
287fn cache_base_dir() -> Option<PathBuf> {
288    std::env::var_os("LOCALAPPDATA").map(PathBuf::from)
289}
290
291#[cfg(test)]
292mod tests {
293    use super::*;
294
295    #[test]
296    fn bundled_primer_loads() {
297        let Some((name, _, _)) = PRIMER_INDEX.first() else {
298            return;
299        };
300        assert!(super::get(name).is_some());
301    }
302
303    #[test]
304    fn bundled_primer_synthesizes_tarball_urls() {
305        // The generator omits the tarball URL when it matches the
306        // deterministic `{registry}/{name}/-/{unscoped}-{version}.tgz`
307        // pattern. Verify the runtime fills it in correctly: every
308        // dist must surface a tarball URL whose path segments match
309        // the package name + version we asked for, so a synthesis bug
310        // that drops or swaps either field can't pass silently.
311        let Some((name, _, _)) = PRIMER_INDEX.first() else {
312            return;
313        };
314        let packument = super::get(name).expect("primer hit").packument();
315        let (version, meta) = packument
316            .versions
317            .iter()
318            .find(|(_, v)| v.dist.is_some())
319            .expect("packument has at least one version with dist metadata");
320        let dist = meta.dist.as_ref().unwrap();
321        assert!(
322            dist.tarball.starts_with("https://"),
323            "tarball: {}",
324            dist.tarball
325        );
326        assert!(dist.tarball.ends_with(".tgz"), "tarball: {}", dist.tarball);
327        assert!(
328            dist.tarball.contains(*name),
329            "tarball {} missing package name {name}",
330            dist.tarball,
331        );
332        assert!(
333            dist.tarball.contains(version),
334            "tarball {} missing version {version}",
335            dist.tarball,
336        );
337    }
338
339    #[test]
340    fn deterministic_tarball_url_handles_scoped_names() {
341        assert_eq!(
342            deterministic_tarball_url("react", "18.2.0"),
343            "https://registry.npmjs.org/react/-/react-18.2.0.tgz"
344        );
345        assert_eq!(
346            deterministic_tarball_url("@types/node", "20.10.0"),
347            "https://registry.npmjs.org/@types/node/-/node-20.10.0.tgz"
348        );
349    }
350
351    #[test]
352    fn primer_cache_file_match_is_narrow() {
353        assert!(is_primer_cache_file("rkyv-v1-abc.rkyv"));
354        assert!(!is_primer_cache_file(".auto_prune"));
355        assert!(!is_primer_cache_file("rkyv-v1-abc.tmp"));
356        assert!(!is_primer_cache_file("other-v1-abc.rkyv"));
357    }
358
359    #[test]
360    fn prune_removes_old_extracted_primer_files() {
361        let temp = tempfile::tempdir().unwrap();
362        let dir = temp.path();
363        std::fs::write(dir.join("rkyv-v1-old-0-old.rkyv"), "{}").unwrap();
364        std::fs::write(dir.join("packument.json"), "{}").unwrap();
365        let stats = prune_old(dir, Duration::from_secs(0), false, None).unwrap();
366        assert_eq!(stats.files, 1);
367        assert!(!dir.join("rkyv-v1-old-0-old.rkyv").exists());
368        assert!(dir.join("packument.json").exists());
369    }
370
371    #[test]
372    fn prune_sentinel_uses_own_cooldown() {
373        let temp = tempfile::tempdir().unwrap();
374        let dir = temp.path();
375        let primer_file = dir.join("rkyv-v1-old-0-old.rkyv");
376        std::fs::write(&primer_file, "{}").unwrap();
377        touch(&dir.join(".auto_prune")).unwrap();
378
379        let stats = prune_old(
380            dir,
381            Duration::from_secs(0),
382            false,
383            Some(Duration::from_secs(60)),
384        )
385        .unwrap();
386
387        assert_eq!(stats.files, 0);
388        assert!(primer_file.exists());
389    }
390}