Skip to main content

aube_resolver/
local_source.rs

1use crate::{Error, ResolveTask};
2use aube_lockfile::{LocalSource, LockedPackage};
3use aube_registry::client::RegistryClient;
4use aube_util::path::normalize_lexical;
5use std::collections::BTreeMap;
6use std::path::{Path, PathBuf};
7
8/// Rewrite a `LocalSource` whose path is relative to `importer_root`
9/// into one whose path is relative to `project_root`, so downstream
10/// code (install.rs, linker) can resolve the target with a single
11/// `project_root.join(rel)` regardless of which workspace importer
12/// declared it.
13///
14/// Both the join-then-diff intermediate and the returned path are
15/// lexically normalized — `Path::join` and `pathdiff::diff_paths`
16/// leave `..` components in place, which means `packages/app` +
17/// `../../vendor-dir` would otherwise produce
18/// `packages/app/../../vendor-dir`. That non-canonical form fed into
19/// `dep_path`'s hash would produce a different key for every
20/// importer declaring the same target, and would also leak into the
21/// lockfile's `version:` string.
22pub(crate) fn rebase_local(
23    local: &LocalSource,
24    importer_root: &Path,
25    project_root: &Path,
26) -> LocalSource {
27    // The fast path: importer_root == project_root. Root-importer
28    // installs take this branch, which is also the single-project
29    // case — no rewrite needed and we preserve the raw specifier
30    // bytes for a byte-identical lockfile round-trip.
31    if importer_root == project_root {
32        if let LocalSource::Exec(path) = local {
33            return LocalSource::Exec(normalize_lexical(path));
34        }
35        return local.clone();
36    }
37    let Some(local_path) = local.path() else {
38        // Non-path sources (git) have nothing to rebase.
39        return local.clone();
40    };
41    let abs = normalize_lexical(&importer_root.join(local_path));
42    let rebased = pathdiff::diff_paths(&abs, project_root).map_or(abs, |p| normalize_lexical(&p));
43    match local {
44        LocalSource::Directory(_) => LocalSource::Directory(rebased),
45        LocalSource::Tarball(_) => LocalSource::Tarball(rebased),
46        LocalSource::Link(_) => LocalSource::Link(rebased),
47        LocalSource::Portal(_) => LocalSource::Portal(rebased),
48        LocalSource::Exec(_) => LocalSource::Exec(rebased),
49        LocalSource::Git(_) | LocalSource::RemoteTarball(_) => local.clone(),
50    }
51}
52
53/// Resolve an `exec:` generator path and reject scripts outside the project root.
54pub fn resolve_exec_script_path(
55    local: &LocalSource,
56    project_root: &Path,
57) -> Result<PathBuf, String> {
58    let LocalSource::Exec(rel) = local else {
59        return Err("resolve_exec_script_path called on non-exec source".to_string());
60    };
61    let script = project_root.join(rel);
62    if !script.is_file() {
63        return Err(format!("{} is not a file", script.display()));
64    }
65    let canonical_root = project_root
66        .canonicalize()
67        .map_err(|e| format!("canonicalize project root {}: {e}", project_root.display()))?;
68    let canonical_script = script
69        .canonicalize()
70        .map_err(|e| format!("canonicalize exec script {}: {e}", script.display()))?;
71    if !canonical_script.starts_with(&canonical_root) {
72        return Err(format!(
73            "{} resolves outside project root {}",
74            script.display(),
75            canonical_root.display()
76        ));
77    }
78    Ok(canonical_script)
79}
80
81/// Walk a gzipped npm tarball once and return the raw bytes of its
82/// top-level `package.json` entry. The wrapper directory name varies
83/// (`package/`, but also e.g. GitHub's `owner-repo-<sha>/`), so we
84/// match on the entry's basename plus a 2-component depth check
85/// rather than a hardcoded prefix. Errors come back as plain
86/// `String`s so each caller can wrap them with its own package
87/// identity in whatever error type it prefers — used by both the
88/// `file:` tarball path (`read_local_manifest`) and the remote
89/// tarball resolver (`resolve_remote_tarball`).
90/// Hard upper bound on the bytes read from the gzipped tarball stream
91/// while looking for `package.json`. A 64 MiB ceiling is far above any
92/// real npm package and keeps a hostile gzip bomb from amplifying into
93/// arbitrary RAM. Mirrors `aube-store::MAX_TARBALL_DECOMPRESSED_BYTES`
94/// in spirit — the resolver path was missed in the original cap pass.
95const MAX_RESOLVE_TARBALL_DECOMPRESSED_BYTES: u64 = 64 * 1024 * 1024;
96const MAX_RESOLVE_PACKAGE_JSON_BYTES: u64 = 8 * 1024 * 1024;
97
98fn read_tarball_package_json(bytes: &[u8]) -> Result<Vec<u8>, String> {
99    use std::io::Read;
100    // Cap on the DECOMPRESSED output of the gzip stream so a hostile
101    // tarball with large dummy entries before `package.json` cannot
102    // amplify the fixed compressed input window into arbitrary RAM.
103    // `bytes.take` would only bound the compressed read, which the
104    // decoder is free to expand without ceiling.
105    let gz = flate2::read::GzDecoder::new(bytes);
106    let capped = gz.take(MAX_RESOLVE_TARBALL_DECOMPRESSED_BYTES);
107    let mut archive = tar::Archive::new(capped);
108    for entry in archive.entries().map_err(|e| e.to_string())? {
109        let entry = entry.map_err(|e| e.to_string())?;
110        let entry_path = entry.path().map_err(|e| e.to_string())?.to_path_buf();
111        if entry_path
112            .file_name()
113            .and_then(|n| n.to_str())
114            .is_some_and(|n| n == "package.json")
115            && entry_path.components().count() == 2
116        {
117            let mut buf = Vec::new();
118            entry
119                .take(MAX_RESOLVE_PACKAGE_JSON_BYTES + 1)
120                .read_to_end(&mut buf)
121                .map_err(|e| e.to_string())?;
122            if buf.len() as u64 > MAX_RESOLVE_PACKAGE_JSON_BYTES {
123                return Err("package.json exceeds 8 MiB cap".to_string());
124            }
125            return Ok(buf);
126        }
127    }
128    Err("tarball has no top-level package.json".to_string())
129}
130
131/// Read the `package.json` of a `file:` / `link:` target to discover
132/// the real package name, version, and production dependencies.
133///
134/// For `LocalSource::Directory`, `LocalSource::Link`, and
135/// `LocalSource::Portal` we read the target dir's `package.json`
136/// directly. For `LocalSource::Tarball` we open the `.tgz`, find the
137/// first `*/package.json` entry, and parse its contents without
138/// extracting the rest of the archive.
139pub(crate) fn read_local_manifest(
140    local: &LocalSource,
141    importer_root: &Path,
142) -> Result<(String, String, BTreeMap<String, String>), Error> {
143    let Some(local_path) = local.path() else {
144        return Err(Error::Registry(
145            local.specifier(),
146            "read_local_manifest called on non-path source".to_string(),
147        ));
148    };
149    let path = importer_root.join(local_path);
150
151    let content = match local {
152        LocalSource::Directory(_) | LocalSource::Link(_) | LocalSource::Portal(_) => {
153            std::fs::read(path.join("package.json"))
154                .map_err(|e| Error::Registry(local.specifier(), e.to_string()))?
155        }
156        LocalSource::Tarball(_) => {
157            let bytes = std::fs::read(&path)
158                .map_err(|e| Error::Registry(local.specifier(), e.to_string()))?;
159            read_tarball_package_json(&bytes).map_err(|e| Error::Registry(local.specifier(), e))?
160        }
161        LocalSource::Exec(_) | LocalSource::Git(_) | LocalSource::RemoteTarball(_) => {
162            return Err(Error::Registry(
163                local.specifier(),
164                "read_local_manifest: generated or remote source handled separately".to_string(),
165            ));
166        }
167    };
168
169    let pj: aube_manifest::PackageJson = sonic_rs::from_slice(&content)
170        .or_else(|_| serde_json::from_slice(&content))
171        .map_err(|e| Error::Registry(local.specifier(), e.to_string()))?;
172    Ok((
173        pj.name.unwrap_or_default(),
174        pj.version.unwrap_or_else(|| "0.0.0".to_string()),
175        pj.dependencies,
176    ))
177}
178
179pub(crate) async fn resolve_exec_manifest(
180    name: &str,
181    local: &LocalSource,
182    project_root: &Path,
183) -> Result<(String, BTreeMap<String, String>), Error> {
184    let LocalSource::Exec(_) = local else {
185        return Err(Error::Registry(
186            name.to_string(),
187            "resolve_exec_manifest called on non-exec source".to_string(),
188        ));
189    };
190    let script = resolve_exec_script_path(local, project_root).map_err(|e| {
191        Error::Registry(
192            name.to_string(),
193            format!("exec dependency {}: {e}", local.specifier()),
194        )
195    })?;
196
197    let temp = tempfile::Builder::new()
198        .prefix("aube-exec-resolve-")
199        .tempdir()
200        .map_err(|e| Error::Registry(name.to_string(), e.to_string()))?;
201    let build_dir = temp.path().join("build");
202    let temp_dir = temp.path().join("temp");
203    std::fs::create_dir_all(&build_dir)
204        .map_err(|e| Error::Registry(name.to_string(), e.to_string()))?;
205    std::fs::create_dir_all(&temp_dir)
206        .map_err(|e| Error::Registry(name.to_string(), e.to_string()))?;
207
208    let env = serde_json::json!({
209        "tempDir": temp_dir,
210        "buildDir": build_dir,
211        "locator": format!("{name}@{}", local.specifier()),
212    });
213    let status = tokio::process::Command::new("node")
214        .arg("-e")
215        .arg(crate::YARN_EXEC_WRAPPER)
216        .arg(&script)
217        .env("AUBE_YARN_EXEC_ENV", env.to_string())
218        .current_dir(project_root)
219        .status()
220        .await
221        .map_err(|e| {
222            Error::Registry(
223                name.to_string(),
224                format!("execute {} with Node.js from PATH: {e}", local.specifier()),
225            )
226        })?;
227    if !status.success() {
228        return Err(Error::Registry(
229            name.to_string(),
230            format!(
231                "exec dependency {} failed with status {status}",
232                local.specifier()
233            ),
234        ));
235    }
236
237    let content = std::fs::read(build_dir.join("package.json")).map_err(|e| {
238        Error::Registry(
239            name.to_string(),
240            format!("read generated package.json for {}: {e}", local.specifier()),
241        )
242    })?;
243    let pj: aube_manifest::PackageJson = sonic_rs::from_slice(&content)
244        .or_else(|_| serde_json::from_slice(&content))
245        .map_err(|e| Error::Registry(name.to_string(), e.to_string()))?;
246    Ok((
247        pj.version.unwrap_or_else(|| "0.0.0".to_string()),
248        pj.dependencies,
249    ))
250}
251
252pub(crate) fn dep_path_for(name: &str, version: &str) -> String {
253    format!("{name}@{version}")
254}
255
256/// Match specifier prefixes that resolve to a non-registry source
257/// (`file:`, `link:`, `portal:`, `exec:`, or a git URL form). Used
258/// by the resolver to decide whether to dispatch the local/git branch
259/// instead of the normal version-range lookup.
260pub(crate) fn is_non_registry_specifier(s: &str) -> bool {
261    if s.starts_with("link:") {
262        return true;
263    }
264    if s.starts_with("portal:") {
265        return true;
266    }
267    if s.starts_with("exec:") {
268        return true;
269    }
270    // Git first so `https://host/repo.git` dispatches the git branch
271    // rather than the broader bare-http tarball branch below.
272    if aube_lockfile::parse_git_spec(s).is_some() {
273        return true;
274    }
275    // Any remaining bare `http(s)://` URL is a tarball URL, per npm
276    // semantics — the `.tgz` suffix is not required.
277    if aube_lockfile::LocalSource::looks_like_remote_tarball_url(s) {
278        return true;
279    }
280    // `file:` is a local-path prefix only when it *isn't* also a git
281    // URL form — parse_git_spec already matched `file://…/repo.git`
282    // above, so anything that reaches here is treated as a path.
283    s.starts_with("file:")
284}
285
286pub(crate) fn should_block_exotic_subdep(
287    task: &ResolveTask,
288    resolved: &BTreeMap<String, LockedPackage>,
289    block_exotic_subdeps: bool,
290) -> bool {
291    block_exotic_subdeps
292        && !task.is_root
293        && !task
294            .parent
295            .as_ref()
296            .and_then(|parent| resolved.get(parent))
297            .is_some_and(|pkg| {
298                matches!(
299                    pkg.local_source,
300                    Some(LocalSource::Directory(_))
301                        | Some(LocalSource::Link(_))
302                        | Some(LocalSource::Portal(_))
303                        | Some(LocalSource::Exec(_))
304                )
305            })
306}
307
308/// Turn a raw `GitSource` (committish parsed from the user's
309/// specifier, empty `resolved`) into a fully-resolved one by either
310/// fetching a hosted-tarball over HTTPS (github / gitlab / bitbucket
311/// public reads, matching what npm `pacote` and pnpm
312/// `gitHostedTarballFetcher` do) or, for any other host or any
313/// codeload-unreachable case, falling back to `git ls-remote` +
314/// shallow clone. The materialized tree lives in a commit-keyed temp
315/// directory shared with install-time materialization, so the same
316/// extraction or clone is never repeated within a single `aube
317/// install`.
318///
319/// Hosted-tarball routing matches npm/pnpm semantics: the lockfile's
320/// stored `url` is canonical-identity only — even when it carries an
321/// SSH form the user has no key for, we re-derive an HTTPS URL from
322/// the `(host, owner, repo)` tuple at fetch time. Returns the
323/// original URL unchanged in `LocalSource::Git.url` so a subsequent
324/// `aube install` produces the same lockfile bytes (cross-tool
325/// compat with pnpm / npm / yarn).
326pub(crate) async fn resolve_git_source(
327    name: &str,
328    git: &aube_lockfile::GitSource,
329    shallow: bool,
330    client: Option<&RegistryClient>,
331) -> Result<
332    (
333        LocalSource,
334        String,
335        BTreeMap<String, String>,
336        Option<String>,
337    ),
338    Error,
339> {
340    let original_url = git.url.clone();
341    let committish = git.committish.clone();
342    let subpath = git.subpath.clone();
343    let hosted = aube_lockfile::parse_hosted_git(&original_url);
344    // Use the HTTPS form when talking to git for hosted hosts — the
345    // lockfile-canonical `git+ssh://git@…` URL would dial SSH and
346    // fail for users with no `~/.ssh/`. Non-hosted URLs go through
347    // unchanged so SSH-only setups keep working.
348    let runtime_url = hosted
349        .as_ref()
350        .map(|h| h.https_url())
351        .unwrap_or_else(|| original_url.clone());
352
353    // Resolve the committish to a 40-char SHA. `git_resolve_ref`
354    // short-circuits on a SHA and shells `git ls-remote` for branch /
355    // tag / HEAD. Passing the rewritten HTTPS URL means hosted
356    // branch/tag refs are pinnable from a host with no SSH key
357    // configured.
358    let runtime_url_for_ref = runtime_url.clone();
359    let committish_for_ref = committish.clone();
360    let name_for_ref = name.to_string();
361    let resolved_sha = tokio::task::spawn_blocking(move || -> Result<String, Error> {
362        let seed = aube_store::git_resolve_ref(&runtime_url_for_ref, committish_for_ref.as_deref())
363            .map_err(|e| Error::Registry(name_for_ref.clone(), e.to_string()))?;
364        // Only full SHAs survive — abbreviated user-written prefixes
365        // come back unchanged from `git_resolve_ref` and need to fall
366        // through to the clone path so `git checkout <prefix>` can
367        // expand them.
368        Ok(seed)
369    })
370    .await
371    .map_err(|e| {
372        Error::Registry(
373            name.to_string(),
374            format!("git ls-remote task panicked: {e}"),
375        )
376    })??;
377
378    let codeload_url = hosted.as_ref().and_then(|h| h.tarball_url(&resolved_sha));
379
380    // Cache hit fast path: skip the HTTPS round-trip when a prior call
381    // (the resolver's earlier visit to this dep, or a previous install)
382    // already populated the codeload cache. Mirrors `git_shallow_clone`'s
383    // top-of-function reuse check.
384    if codeload_url.is_some()
385        && git.integrity.is_some()
386        && let Some((clone_dir, _head_sha)) = aube_store::codeload_cache_lookup(
387            &original_url,
388            &resolved_sha,
389            git.integrity.as_deref(),
390        )
391    {
392        let integrity = aube_store::codeload_cache_integrity(
393            &original_url,
394            &resolved_sha,
395            git.integrity.as_deref(),
396        );
397        let pkg_root = match &subpath {
398            Some(sub) => clone_dir.join(sub),
399            None => clone_dir.clone(),
400        };
401        let manifest_bytes = std::fs::read(pkg_root.join("package.json")).map_err(|e| {
402            let where_ = subpath
403                .as_deref()
404                .map(|s| format!(" at /{s}"))
405                .unwrap_or_default();
406            Error::Registry(
407                name.to_string(),
408                format!("read package.json in cached codeload extract{where_}: {e}"),
409            )
410        })?;
411        let pj: aube_manifest::PackageJson = serde_json::from_slice(&manifest_bytes)
412            .map_err(|e| Error::Registry(name.to_string(), e.to_string()))?;
413        let version = pj.version.unwrap_or_else(|| "0.0.0".to_string());
414        return Ok((
415            LocalSource::Git(aube_lockfile::GitSource {
416                url: original_url,
417                committish,
418                resolved: resolved_sha,
419                integrity: git.integrity.clone(),
420                subpath,
421            }),
422            version,
423            pj.dependencies,
424            integrity,
425        ));
426    }
427
428    // Try the codeload fast path when applicable. `client` is None for
429    // resolve paths that don't have a registry client wired up
430    // (`aube import`'s lockfile-only flow); those just fall through.
431    if let (Some(c), Some(url_to_fetch)) = (client, codeload_url.as_deref()) {
432        match c.fetch_tarball_bytes(url_to_fetch).await {
433            Ok(bytes) => {
434                // Extract into the commit-keyed cache and read the
435                // (possibly subpath-scoped) `package.json` like the
436                // clone path does. Return the original lockfile URL
437                // in `LocalSource::Git.url` for cross-tool round-trip.
438                let bytes_vec = bytes.to_vec();
439                if let Some(pinned) = &git.integrity {
440                    aube_store::verify_integrity(&bytes_vec, pinned)
441                        .map_err(|e| Error::Registry(name.to_string(), e.to_string()))?;
442                }
443                let integrity = git
444                    .integrity
445                    .clone()
446                    .unwrap_or_else(|| aube_store::sha512_integrity(&bytes_vec));
447                let url_for_extract = original_url.clone();
448                let sha_for_extract = resolved_sha.clone();
449                let integrity_for_extract = integrity.clone();
450                let subpath_for_extract = subpath.clone();
451                let name_for_extract = name.to_string();
452                let extracted = tokio::task::spawn_blocking(move || -> Result<_, Error> {
453                    let (clone_dir, resolved) = aube_store::extract_codeload_tarball(
454                        &bytes_vec,
455                        &url_for_extract,
456                        &sha_for_extract,
457                        Some(&integrity_for_extract),
458                    )
459                    .map_err(|e| Error::Registry(name_for_extract.clone(), e.to_string()))?;
460                    let pkg_root = match &subpath_for_extract {
461                        Some(sub) => clone_dir.join(sub),
462                        None => clone_dir.clone(),
463                    };
464                    let manifest_bytes =
465                        std::fs::read(pkg_root.join("package.json")).map_err(|e| {
466                            let where_ = subpath_for_extract
467                                .as_deref()
468                                .map(|s| format!(" at /{s}"))
469                                .unwrap_or_default();
470                            Error::Registry(
471                                name_for_extract.clone(),
472                                format!("read package.json in codeload extract{where_}: {e}"),
473                            )
474                        })?;
475                    let pj: aube_manifest::PackageJson = serde_json::from_slice(&manifest_bytes)
476                        .map_err(|e| Error::Registry(name_for_extract.clone(), e.to_string()))?;
477                    let version = pj.version.unwrap_or_else(|| "0.0.0".to_string());
478                    Ok((resolved, version, pj.dependencies))
479                })
480                .await
481                .map_err(|e| {
482                    Error::Registry(name.to_string(), format!("codeload extract panicked: {e}"))
483                })?;
484                let integrity = aube_store::sha512_integrity(&bytes);
485                match extracted {
486                    Ok((resolved, version, deps)) => {
487                        return Ok((
488                            LocalSource::Git(aube_lockfile::GitSource {
489                                url: original_url,
490                                committish,
491                                resolved,
492                                integrity: Some(integrity.clone()),
493                                subpath,
494                            }),
495                            version,
496                            deps,
497                            Some(integrity),
498                        ));
499                    }
500                    Err(e) => {
501                        // Mirror the installer: a corrupt or
502                        // unexpectedly-shaped tarball (CDN hiccup,
503                        // unsafe-path rejection, Windows symlink) falls
504                        // through to `git clone`, which inherits the
505                        // user's git credential helper and can write
506                        // symlinks via git's admin-aware path.
507                        tracing::debug!(
508                            name,
509                            "codeload extract failed, falling back to git clone: {e}",
510                        );
511                    }
512                }
513            }
514            Err(e) => {
515                // Codeload 404s on private repos (it doesn't accept
516                // npm-registry auth) — fall through to `git
517                // clone`, which inherits the user's git credential
518                // helper / ssh keys for private access.
519                tracing::debug!(
520                    name,
521                    url = %aube_util::url::redact_url(url_to_fetch),
522                    "codeload fetch failed, falling back to git clone: {e}",
523                );
524            }
525        }
526    }
527
528    // Fallback: shallow git clone over the rewritten HTTPS URL (or the
529    // original URL for non-hosted hosts). Same `spawn_blocking` dance
530    // the original implementation used.
531    let runtime_url_for_clone = runtime_url;
532    let original_url_for_lockfile = original_url.clone();
533    let resolved_sha_for_clone = resolved_sha.clone();
534    let subpath_for_clone = subpath.clone();
535    let name_for_clone = name.to_string();
536    let (local, version, deps) = tokio::task::spawn_blocking(move || -> Result<_, Error> {
537        let (clone_dir, resolved) =
538            aube_store::git_shallow_clone(&runtime_url_for_clone, &resolved_sha_for_clone, shallow)
539                .map_err(|e| Error::Registry(name_for_clone.clone(), e.to_string()))?;
540        let pkg_root = match &subpath_for_clone {
541            Some(sub) => clone_dir.join(sub),
542            None => clone_dir.clone(),
543        };
544        let manifest_bytes = std::fs::read(pkg_root.join("package.json")).map_err(|e| {
545            let where_ = subpath_for_clone
546                .as_deref()
547                .map(|s| format!(" at /{s}"))
548                .unwrap_or_default();
549            Error::Registry(
550                name_for_clone.clone(),
551                format!("read package.json in clone{where_}: {e}"),
552            )
553        })?;
554        let pj: aube_manifest::PackageJson = serde_json::from_slice(&manifest_bytes)
555            .map_err(|e| Error::Registry(name_for_clone.clone(), e.to_string()))?;
556        let version = pj.version.unwrap_or_else(|| "0.0.0".to_string());
557        Ok((
558            LocalSource::Git(aube_lockfile::GitSource {
559                url: original_url_for_lockfile,
560                committish,
561                resolved,
562                integrity: None,
563                subpath: subpath_for_clone,
564            }),
565            version,
566            pj.dependencies,
567        ))
568    })
569    .await
570    .map_err(|e| Error::Registry(name.to_string(), format!("git task panicked: {e}")))??;
571    Ok((local, version, deps, None))
572}
573
574/// Fetch a remote tarball URL, compute its sha512 integrity, and read
575/// the enclosed `package.json` for version + transitive deps. Returns
576/// a fully-populated `LocalSource::RemoteTarball` alongside the
577/// manifest tuple the resolver's local-dep branch expects.
578pub(crate) async fn resolve_remote_tarball(
579    name: &str,
580    tarball: &aube_lockfile::RemoteTarballSource,
581    client: &RegistryClient,
582) -> Result<(LocalSource, String, BTreeMap<String, String>), Error> {
583    let bytes = client
584        .fetch_tarball_bytes(&tarball.url)
585        .await
586        .map_err(|e| {
587            Error::Registry(
588                name.to_string(),
589                format!("fetch {}: {e}", aube_util::url::redact_url(&tarball.url)),
590            )
591        })?;
592    let name_owned = name.to_string();
593    let url = aube_util::url::redact_url(&tarball.url);
594    let (integrity, version, deps) = tokio::task::spawn_blocking(move || -> Result<_, Error> {
595        let integrity = aube_store::sha512_integrity(&bytes);
596
597        // Walk the tarball once to pull out the top-level
598        // `package.json` (wrapper name varies, so the helper looks
599        // at the first path component's basename, not a hardcoded
600        // `package/package.json`).
601        let manifest_bytes = read_tarball_package_json(&bytes)
602            .map_err(|e| Error::Registry(name_owned.clone(), format!("tarball {url}: {e}")))?;
603        let pj: aube_manifest::PackageJson = serde_json::from_slice(&manifest_bytes)
604            .map_err(|e| Error::Registry(name_owned.clone(), e.to_string()))?;
605        let version = pj.version.unwrap_or_else(|| "0.0.0".to_string());
606        Ok((integrity, version, pj.dependencies))
607    })
608    .await
609    .map_err(|e| Error::Registry(name.to_string(), format!("tarball task panicked: {e}")))??;
610    Ok((
611        LocalSource::RemoteTarball(aube_lockfile::RemoteTarballSource {
612            url: tarball.url.clone(),
613            integrity,
614            git_hosted: tarball.git_hosted,
615        }),
616        version,
617        deps,
618    ))
619}
620
621#[cfg(test)]
622mod rebase_local_tests {
623    use super::*;
624    use std::path::{Path, PathBuf};
625
626    #[test]
627    fn workspace_file_climbs_out_of_importer_to_root_sibling() {
628        // packages/app importer declares `file:../../vendor-dir`.
629        // Expected result: `vendor-dir` (workspace-root relative),
630        // collapsed down from the intermediate
631        // `packages/app/../../vendor-dir` form.
632        let local = LocalSource::Directory(PathBuf::from("../../vendor-dir"));
633        let rebased = rebase_local(&local, Path::new("packages/app"), Path::new(""));
634        match rebased {
635            LocalSource::Directory(p) => assert_eq!(p, PathBuf::from("vendor-dir")),
636            other => panic!("expected Directory, got {other:?}"),
637        }
638    }
639
640    #[test]
641    fn two_importers_referencing_same_target_collide_on_dep_path() {
642        // Both importers end up pointing at the same on-disk path —
643        // the encoded dep_path must match so they de-dupe in the
644        // lockfile.
645        let a = rebase_local(
646            &LocalSource::Directory(PathBuf::from("../../vendor-dir")),
647            Path::new("packages/app"),
648            Path::new(""),
649        );
650        let b = rebase_local(
651            &LocalSource::Directory(PathBuf::from("../vendor-dir")),
652            Path::new("packages"),
653            Path::new(""),
654        );
655        assert_eq!(a.dep_path("vendor-dir"), b.dep_path("vendor-dir"));
656    }
657
658    #[test]
659    fn root_and_transitive_exec_paths_collide_on_dep_path() {
660        let root = rebase_local(
661            &LocalSource::Exec(PathBuf::from("./scripts/generate-exec.js")),
662            Path::new(""),
663            Path::new(""),
664        );
665        let transitive = rebase_local(
666            &LocalSource::Exec(PathBuf::from("../../scripts/generate-exec.js")),
667            Path::new("packages/portal"),
668            Path::new(""),
669        );
670        assert_eq!(root.dep_path("exec-pkg"), transitive.dep_path("exec-pkg"));
671    }
672
673    #[test]
674    fn normalize_preserves_unresolvable_leading_parent() {
675        // `..` at the root of the project is still meaningful —
676        // don't silently drop it.
677        assert_eq!(
678            normalize_lexical(Path::new("../vendor")),
679            PathBuf::from("../vendor")
680        );
681    }
682
683    #[test]
684    fn dep_path_and_specifier_use_posix_separators() {
685        // Backslash-separated input (as Windows would store) must
686        // hash and render the same as a forward-slash equivalent so
687        // a checked-in lockfile resolves identically on either OS.
688        let win = LocalSource::Directory(PathBuf::from("vendor\\nested\\dir"));
689        let unix = LocalSource::Directory(PathBuf::from("vendor/nested/dir"));
690        assert_eq!(win.dep_path("foo"), unix.dep_path("foo"));
691        assert_eq!(win.specifier(), "file:vendor/nested/dir");
692        assert_eq!(unix.specifier(), "file:vendor/nested/dir");
693    }
694
695    #[test]
696    fn exec_script_must_stay_inside_project_root() {
697        let temp = tempfile::tempdir().unwrap();
698        let project_root = temp.path().join("project");
699        let outside = temp.path().join("outside.js");
700        std::fs::create_dir(&project_root).unwrap();
701        std::fs::write(&outside, "").unwrap();
702
703        let local = LocalSource::Exec(PathBuf::from("../outside.js"));
704        let err = resolve_exec_script_path(&local, &project_root).unwrap_err();
705        assert!(err.contains("resolves outside project root"), "{err}");
706    }
707
708    #[test]
709    fn exec_script_inside_project_root_is_allowed() {
710        let temp = tempfile::tempdir().unwrap();
711        let project_root = temp.path().join("project");
712        let script_dir = project_root.join("scripts");
713        let script = script_dir.join("generate.js");
714        std::fs::create_dir_all(&script_dir).unwrap();
715        std::fs::write(&script, "").unwrap();
716
717        let local = LocalSource::Exec(PathBuf::from("scripts/generate.js"));
718        let resolved = resolve_exec_script_path(&local, &project_root).unwrap();
719        assert_eq!(resolved, script.canonicalize().unwrap());
720    }
721}
722
723#[cfg(test)]
724mod cve_audit_tarball_bomb {
725    use super::*;
726    use std::io::Write;
727
728    fn build_zero_tarball(uncompressed_size: usize) -> Vec<u8> {
729        let mut tar_buf: Vec<u8> = Vec::new();
730        {
731            let mut builder = tar::Builder::new(&mut tar_buf);
732            let payload = vec![0u8; uncompressed_size];
733            let mut header = tar::Header::new_gnu();
734            header.set_path("pkg/package.json").unwrap();
735            header.set_size(payload.len() as u64);
736            header.set_mode(0o644);
737            header.set_cksum();
738            builder.append(&header, &payload[..]).unwrap();
739            builder.finish().unwrap();
740        }
741        let mut gz = Vec::new();
742        {
743            let mut enc = flate2::write::GzEncoder::new(&mut gz, flate2::Compression::best());
744            enc.write_all(&tar_buf).unwrap();
745            enc.finish().unwrap();
746        }
747        gz
748    }
749
750    fn build_dummy_then_package_json(dummy_size: usize) -> Vec<u8> {
751        let mut tar_buf: Vec<u8> = Vec::new();
752        {
753            let mut builder = tar::Builder::new(&mut tar_buf);
754            let dummy = vec![0u8; dummy_size];
755            let mut h1 = tar::Header::new_gnu();
756            h1.set_path("pkg/dummy.bin").unwrap();
757            h1.set_size(dummy.len() as u64);
758            h1.set_mode(0o644);
759            h1.set_cksum();
760            builder.append(&h1, &dummy[..]).unwrap();
761            let manifest = b"{\"name\":\"x\",\"version\":\"0.0.1\"}";
762            let mut h2 = tar::Header::new_gnu();
763            h2.set_path("pkg/package.json").unwrap();
764            h2.set_size(manifest.len() as u64);
765            h2.set_mode(0o644);
766            h2.set_cksum();
767            builder.append(&h2, &manifest[..]).unwrap();
768            builder.finish().unwrap();
769        }
770        let mut gz = Vec::new();
771        {
772            let mut enc = flate2::write::GzEncoder::new(&mut gz, flate2::Compression::best());
773            enc.write_all(&tar_buf).unwrap();
774            enc.finish().unwrap();
775        }
776        gz
777    }
778
779    #[test]
780    fn read_tarball_package_json_rejects_decompression_bomb() {
781        let bomb = build_zero_tarball(200 * 1024 * 1024);
782        assert!(
783            bomb.len() < 400 * 1024,
784            "compressed bomb too large to call this an amplification: {}",
785            bomb.len()
786        );
787        let result = read_tarball_package_json(&bomb);
788        assert!(
789            result.is_err(),
790            "200 MiB decompressed payload must be rejected by the cap, got {:?}",
791            result.as_ref().map(|b| b.len())
792        );
793    }
794
795    #[test]
796    fn read_tarball_package_json_rejects_dummy_entry_amplification() {
797        let bomb = build_dummy_then_package_json(200 * 1024 * 1024);
798        assert!(
799            bomb.len() < 400 * 1024,
800            "compressed multi-entry bomb too large: {}",
801            bomb.len()
802        );
803        let result = read_tarball_package_json(&bomb);
804        assert!(
805            result.is_err(),
806            "decompressed dummy entry preceding package.json must hit the output cap"
807        );
808    }
809}