Skip to main content

aube_resolver/
local_source.rs

1use crate::{Error, ResolveTask};
2use aube_lockfile::{LocalSource, LockedPackage};
3use aube_registry::client::RegistryClient;
4use aube_util::path::normalize_lexical;
5use std::collections::BTreeMap;
6use std::path::{Path, PathBuf};
7
8/// Rewrite a `LocalSource` whose path is relative to `importer_root`
9/// into one whose path is relative to `project_root`, so downstream
10/// code (install.rs, linker) can resolve the target with a single
11/// `project_root.join(rel)` regardless of which workspace importer
12/// declared it.
13///
14/// Both the join-then-diff intermediate and the returned path are
15/// lexically normalized — `Path::join` and `pathdiff::diff_paths`
16/// leave `..` components in place, which means `packages/app` +
17/// `../../vendor-dir` would otherwise produce
18/// `packages/app/../../vendor-dir`. That non-canonical form fed into
19/// `dep_path`'s hash would produce a different key for every
20/// importer declaring the same target, and would also leak into the
21/// lockfile's `version:` string.
22pub(crate) fn rebase_local(
23    local: &LocalSource,
24    importer_root: &Path,
25    project_root: &Path,
26) -> LocalSource {
27    // The fast path: importer_root == project_root. Root-importer
28    // installs take this branch, which is also the single-project
29    // case — no rewrite needed and we preserve the raw specifier
30    // bytes for a byte-identical lockfile round-trip.
31    if importer_root == project_root {
32        if let LocalSource::Exec(path) = local {
33            return LocalSource::Exec(normalize_lexical(path));
34        }
35        return local.clone();
36    }
37    let Some(local_path) = local.path() else {
38        // Non-path sources (git) have nothing to rebase.
39        return local.clone();
40    };
41    let abs = normalize_lexical(&importer_root.join(local_path));
42    let rebased = pathdiff::diff_paths(&abs, project_root).map_or(abs, |p| normalize_lexical(&p));
43    match local {
44        LocalSource::Directory(_) => LocalSource::Directory(rebased),
45        LocalSource::Tarball(_) => LocalSource::Tarball(rebased),
46        LocalSource::Link(_) => LocalSource::Link(rebased),
47        LocalSource::Portal(_) => LocalSource::Portal(rebased),
48        LocalSource::Exec(_) => LocalSource::Exec(rebased),
49        LocalSource::Git(_) | LocalSource::RemoteTarball(_) => local.clone(),
50    }
51}
52
53/// Resolve an `exec:` generator path and reject scripts outside the project root.
54pub fn resolve_exec_script_path(
55    local: &LocalSource,
56    project_root: &Path,
57) -> Result<PathBuf, String> {
58    let LocalSource::Exec(rel) = local else {
59        return Err("resolve_exec_script_path called on non-exec source".to_string());
60    };
61    let script = project_root.join(rel);
62    if !script.is_file() {
63        return Err(format!("{} is not a file", script.display()));
64    }
65    let canonical_root = project_root
66        .canonicalize()
67        .map_err(|e| format!("canonicalize project root {}: {e}", project_root.display()))?;
68    let canonical_script = script
69        .canonicalize()
70        .map_err(|e| format!("canonicalize exec script {}: {e}", script.display()))?;
71    if !canonical_script.starts_with(&canonical_root) {
72        return Err(format!(
73            "{} resolves outside project root {}",
74            script.display(),
75            canonical_root.display()
76        ));
77    }
78    Ok(canonical_script)
79}
80
81/// Walk a gzipped npm tarball once and return the raw bytes of its
82/// top-level `package.json` entry. The wrapper directory name varies
83/// (`package/`, but also e.g. GitHub's `owner-repo-<sha>/`), so we
84/// match on the entry's basename plus a 2-component depth check
85/// rather than a hardcoded prefix. Errors come back as plain
86/// `String`s so each caller can wrap them with its own package
87/// identity in whatever error type it prefers — used by both the
88/// `file:` tarball path (`read_local_manifest`) and the remote
89/// tarball resolver (`resolve_remote_tarball`).
90/// Hard upper bound on the bytes read from the gzipped tarball stream
91/// while looking for `package.json`. A 64 MiB ceiling is far above any
92/// real npm package and keeps a hostile gzip bomb from amplifying into
93/// arbitrary RAM. Mirrors `aube-store::MAX_TARBALL_DECOMPRESSED_BYTES`
94/// in spirit — the resolver path was missed in the original cap pass.
95const MAX_RESOLVE_TARBALL_DECOMPRESSED_BYTES: u64 = 64 * 1024 * 1024;
96const MAX_RESOLVE_PACKAGE_JSON_BYTES: u64 = 8 * 1024 * 1024;
97
98fn read_tarball_package_json(bytes: &[u8]) -> Result<Vec<u8>, String> {
99    use std::io::Read;
100    // Cap on the DECOMPRESSED output of the gzip stream so a hostile
101    // tarball with large dummy entries before `package.json` cannot
102    // amplify the fixed compressed input window into arbitrary RAM.
103    // `bytes.take` would only bound the compressed read, which the
104    // decoder is free to expand without ceiling.
105    let gz = flate2::read::GzDecoder::new(bytes);
106    let capped = gz.take(MAX_RESOLVE_TARBALL_DECOMPRESSED_BYTES);
107    let mut archive = tar::Archive::new(capped);
108    for entry in archive.entries().map_err(|e| e.to_string())? {
109        let entry = entry.map_err(|e| e.to_string())?;
110        let entry_path = entry.path().map_err(|e| e.to_string())?.to_path_buf();
111        if entry_path
112            .file_name()
113            .and_then(|n| n.to_str())
114            .is_some_and(|n| n == "package.json")
115            && entry_path.components().count() == 2
116        {
117            let mut buf = Vec::new();
118            entry
119                .take(MAX_RESOLVE_PACKAGE_JSON_BYTES + 1)
120                .read_to_end(&mut buf)
121                .map_err(|e| e.to_string())?;
122            if buf.len() as u64 > MAX_RESOLVE_PACKAGE_JSON_BYTES {
123                return Err("package.json exceeds 8 MiB cap".to_string());
124            }
125            return Ok(buf);
126        }
127    }
128    Err("tarball has no top-level package.json".to_string())
129}
130
131/// Read the `package.json` of a `file:` / `link:` target to discover
132/// the real package name, version, and production dependencies.
133///
134/// For `LocalSource::Directory`, `LocalSource::Link`, and
135/// `LocalSource::Portal` we read the target dir's `package.json`
136/// directly. For `LocalSource::Tarball` we open the `.tgz`, find the
137/// first `*/package.json` entry, and parse its contents without
138/// extracting the rest of the archive.
139pub(crate) fn read_local_manifest(
140    local: &LocalSource,
141    importer_root: &Path,
142) -> Result<(String, String, BTreeMap<String, String>), Error> {
143    let Some(local_path) = local.path() else {
144        return Err(Error::Registry(
145            local.specifier(),
146            "read_local_manifest called on non-path source".to_string(),
147        ));
148    };
149    let path = importer_root.join(local_path);
150
151    let content = match local {
152        LocalSource::Directory(_) | LocalSource::Link(_) | LocalSource::Portal(_) => {
153            std::fs::read(path.join("package.json"))
154                .map_err(|e| Error::Registry(local.specifier(), e.to_string()))?
155        }
156        LocalSource::Tarball(_) => {
157            let bytes = std::fs::read(&path)
158                .map_err(|e| Error::Registry(local.specifier(), e.to_string()))?;
159            read_tarball_package_json(&bytes).map_err(|e| Error::Registry(local.specifier(), e))?
160        }
161        LocalSource::Exec(_) | LocalSource::Git(_) | LocalSource::RemoteTarball(_) => {
162            return Err(Error::Registry(
163                local.specifier(),
164                "read_local_manifest: generated or remote source handled separately".to_string(),
165            ));
166        }
167    };
168
169    let pj: aube_manifest::PackageJson = sonic_rs::from_slice(&content)
170        .or_else(|_| serde_json::from_slice(&content))
171        .map_err(|e| Error::Registry(local.specifier(), e.to_string()))?;
172    Ok((
173        pj.name.unwrap_or_default(),
174        pj.version.unwrap_or_else(|| "0.0.0".to_string()),
175        pj.dependencies,
176    ))
177}
178
179pub(crate) async fn resolve_exec_manifest(
180    name: &str,
181    local: &LocalSource,
182    project_root: &Path,
183) -> Result<(String, BTreeMap<String, String>), Error> {
184    let LocalSource::Exec(_) = local else {
185        return Err(Error::Registry(
186            name.to_string(),
187            "resolve_exec_manifest called on non-exec source".to_string(),
188        ));
189    };
190    let script = resolve_exec_script_path(local, project_root).map_err(|e| {
191        Error::Registry(
192            name.to_string(),
193            format!("exec dependency {}: {e}", local.specifier()),
194        )
195    })?;
196
197    let temp = tempfile::Builder::new()
198        .prefix("aube-exec-resolve-")
199        .tempdir()
200        .map_err(|e| Error::Registry(name.to_string(), e.to_string()))?;
201    let build_dir = temp.path().join("build");
202    let temp_dir = temp.path().join("temp");
203    std::fs::create_dir_all(&build_dir)
204        .map_err(|e| Error::Registry(name.to_string(), e.to_string()))?;
205    std::fs::create_dir_all(&temp_dir)
206        .map_err(|e| Error::Registry(name.to_string(), e.to_string()))?;
207
208    let env = serde_json::json!({
209        "tempDir": temp_dir,
210        "buildDir": build_dir,
211        "locator": format!("{name}@{}", local.specifier()),
212    });
213    let status = tokio::process::Command::new("node")
214        .arg("-e")
215        .arg(crate::YARN_EXEC_WRAPPER)
216        .arg(&script)
217        .env("AUBE_YARN_EXEC_ENV", env.to_string())
218        .current_dir(project_root)
219        .status()
220        .await
221        .map_err(|e| {
222            Error::Registry(
223                name.to_string(),
224                format!("execute {} with Node.js from PATH: {e}", local.specifier()),
225            )
226        })?;
227    if !status.success() {
228        return Err(Error::Registry(
229            name.to_string(),
230            format!(
231                "exec dependency {} failed with status {status}",
232                local.specifier()
233            ),
234        ));
235    }
236
237    let content = std::fs::read(build_dir.join("package.json")).map_err(|e| {
238        Error::Registry(
239            name.to_string(),
240            format!("read generated package.json for {}: {e}", local.specifier()),
241        )
242    })?;
243    let pj: aube_manifest::PackageJson = sonic_rs::from_slice(&content)
244        .or_else(|_| serde_json::from_slice(&content))
245        .map_err(|e| Error::Registry(name.to_string(), e.to_string()))?;
246    Ok((
247        pj.version.unwrap_or_else(|| "0.0.0".to_string()),
248        pj.dependencies,
249    ))
250}
251
252pub(crate) fn dep_path_for(name: &str, version: &str) -> String {
253    format!("{name}@{version}")
254}
255
256/// Match specifier prefixes that resolve to a non-registry source
257/// (`file:`, `link:`, `portal:`, `exec:`, or a git URL form). Used
258/// by the resolver to decide whether to dispatch the local/git branch
259/// instead of the normal version-range lookup.
260pub(crate) fn is_non_registry_specifier(s: &str) -> bool {
261    if s.starts_with("link:") {
262        return true;
263    }
264    if s.starts_with("portal:") {
265        return true;
266    }
267    if s.starts_with("exec:") {
268        return true;
269    }
270    // Git first so `https://host/repo.git` dispatches the git branch
271    // rather than the broader bare-http tarball branch below.
272    if aube_lockfile::parse_git_spec(s).is_some() {
273        return true;
274    }
275    // Any remaining bare `http(s)://` URL is a tarball URL, per npm
276    // semantics — the `.tgz` suffix is not required.
277    if aube_lockfile::LocalSource::looks_like_remote_tarball_url(s) {
278        return true;
279    }
280    // `file:` is a local-path prefix only when it *isn't* also a git
281    // URL form — parse_git_spec already matched `file://…/repo.git`
282    // above, so anything that reaches here is treated as a path.
283    s.starts_with("file:")
284}
285
286pub(crate) fn should_block_exotic_subdep(
287    task: &ResolveTask,
288    resolved: &BTreeMap<String, LockedPackage>,
289    block_exotic_subdeps: bool,
290) -> bool {
291    block_exotic_subdeps
292        && !task.is_root
293        && !task
294            .parent
295            .as_ref()
296            .and_then(|parent| resolved.get(parent))
297            .is_some_and(|pkg| {
298                matches!(
299                    pkg.local_source,
300                    Some(LocalSource::Directory(_))
301                        | Some(LocalSource::Link(_))
302                        | Some(LocalSource::Portal(_))
303                        | Some(LocalSource::Exec(_))
304                )
305            })
306}
307
308/// Pick the lockfile source representation for a *resolved* hosted-git
309/// dependency. pnpm records a github / gitlab / bitbucket dep pinned to
310/// a 40-char commit SHA as a **codeload tarball** (`RemoteTarball`) —
311/// not a `git` resolution — whenever a flat HTTPS archive URL exists
312/// (`codeload_url`) and there's no `&path:` subdir selector. aube
313/// already *fetches* that tarball; emitting it as `RemoteTarball` makes
314/// the written lockfile match pnpm (codeload key + `version:` +
315/// `resolution: {tarball, gitHosted}`) instead of the divergent
316/// `<url>.git#<sha>` / `resolution: {type: git, repo, commit}` form.
317///
318/// Falls back to `Git` for: non-hosted or `git+ssh://` sources (no
319/// codeload URL — pnpm keeps those as `type: git` too), branch/tag refs
320/// that never pinned to a SHA, and `&path:` subpath selectors (a flat
321/// tarball can't address a repo subdirectory).
322fn hosted_git_local_source(
323    original_url: String,
324    committish: Option<String>,
325    resolved: String,
326    subpath: Option<String>,
327    integrity: Option<String>,
328    codeload_url: Option<&str>,
329) -> LocalSource {
330    match (subpath.as_deref(), codeload_url) {
331        (None, Some(codeload)) => LocalSource::RemoteTarball(aube_lockfile::RemoteTarballSource {
332            url: codeload.to_string(),
333            integrity: integrity.unwrap_or_default(),
334            git_hosted: true,
335        }),
336        _ => LocalSource::Git(aube_lockfile::GitSource {
337            url: original_url,
338            committish,
339            resolved,
340            integrity,
341            subpath,
342        }),
343    }
344}
345
346/// Turn a raw `GitSource` (committish parsed from the user's
347/// specifier, empty `resolved`) into a fully-resolved one by either
348/// fetching a hosted-tarball over HTTPS (github / gitlab / bitbucket
349/// public reads, matching what npm `pacote` and pnpm
350/// `gitHostedTarballFetcher` do) or, for any other host or any
351/// codeload-unreachable case, falling back to `git ls-remote` +
352/// shallow clone. The materialized tree lives in a commit-keyed temp
353/// directory shared with install-time materialization, so the same
354/// extraction or clone is never repeated within a single `aube
355/// install`.
356///
357/// Hosted-tarball routing matches npm/pnpm semantics: the lockfile's
358/// stored `url` is canonical-identity only — even when it carries an
359/// SSH form the user has no key for, we re-derive an HTTPS URL from
360/// the `(host, owner, repo)` tuple at fetch time. Returns the
361/// original URL unchanged in `LocalSource::Git.url` so a subsequent
362/// `aube install` produces the same lockfile bytes (cross-tool
363/// compat with pnpm / npm / yarn).
364pub(crate) async fn resolve_git_source(
365    name: &str,
366    git: &aube_lockfile::GitSource,
367    shallow: bool,
368    client: Option<&RegistryClient>,
369) -> Result<
370    (
371        LocalSource,
372        String,
373        BTreeMap<String, String>,
374        Option<String>,
375    ),
376    Error,
377> {
378    let original_url = git.url.clone();
379    let committish = git.committish.clone();
380    let subpath = git.subpath.clone();
381    let hosted = aube_lockfile::parse_hosted_git(&original_url);
382    // Use the HTTPS form when talking to git for hosted hosts — the
383    // lockfile-canonical `git+ssh://git@…` URL would dial SSH and
384    // fail for users with no `~/.ssh/`. Non-hosted URLs go through
385    // unchanged so SSH-only setups keep working.
386    let runtime_url = hosted
387        .as_ref()
388        .map(|h| h.https_url())
389        .unwrap_or_else(|| original_url.clone());
390
391    // Resolve the committish to a 40-char SHA. `git_resolve_ref`
392    // short-circuits on a SHA and shells `git ls-remote` for branch /
393    // tag / HEAD. Passing the rewritten HTTPS URL means hosted
394    // branch/tag refs are pinnable from a host with no SSH key
395    // configured.
396    let runtime_url_for_ref = runtime_url.clone();
397    let committish_for_ref = committish.clone();
398    let name_for_ref = name.to_string();
399    let resolved_sha = tokio::task::spawn_blocking(move || -> Result<String, Error> {
400        let seed = aube_store::git_resolve_ref(&runtime_url_for_ref, committish_for_ref.as_deref())
401            .map_err(|e| Error::Registry(name_for_ref.clone(), e.to_string()))?;
402        // Only full SHAs survive — abbreviated user-written prefixes
403        // come back unchanged from `git_resolve_ref` and need to fall
404        // through to the clone path so `git checkout <prefix>` can
405        // expand them.
406        Ok(seed)
407    })
408    .await
409    .map_err(|e| {
410        Error::Registry(
411            name.to_string(),
412            format!("git ls-remote task panicked: {e}"),
413        )
414    })??;
415
416    let codeload_url = hosted.as_ref().and_then(|h| h.tarball_url(&resolved_sha));
417
418    // Cache hit fast path: skip the HTTPS round-trip when a prior call
419    // (the resolver's earlier visit to this dep, or a previous install)
420    // already populated the codeload cache. Mirrors `git_shallow_clone`'s
421    // top-of-function reuse check.
422    if codeload_url.is_some()
423        && git.integrity.is_some()
424        && let Some((clone_dir, _head_sha)) = aube_store::codeload_cache_lookup(
425            &original_url,
426            &resolved_sha,
427            git.integrity.as_deref(),
428        )
429    {
430        let integrity = aube_store::codeload_cache_integrity(
431            &original_url,
432            &resolved_sha,
433            git.integrity.as_deref(),
434        );
435        let pkg_root = match &subpath {
436            Some(sub) => clone_dir.join(sub),
437            None => clone_dir.clone(),
438        };
439        let manifest_bytes = std::fs::read(pkg_root.join("package.json")).map_err(|e| {
440            let where_ = subpath
441                .as_deref()
442                .map(|s| format!(" at /{s}"))
443                .unwrap_or_default();
444            Error::Registry(
445                name.to_string(),
446                format!("read package.json in cached codeload extract{where_}: {e}"),
447            )
448        })?;
449        let pj: aube_manifest::PackageJson = serde_json::from_slice(&manifest_bytes)
450            .map_err(|e| Error::Registry(name.to_string(), e.to_string()))?;
451        let version = pj.version.unwrap_or_else(|| "0.0.0".to_string());
452        return Ok((
453            hosted_git_local_source(
454                original_url,
455                committish,
456                resolved_sha,
457                subpath,
458                git.integrity.clone(),
459                codeload_url.as_deref(),
460            ),
461            version,
462            pj.dependencies,
463            integrity,
464        ));
465    }
466
467    // Try the codeload fast path when applicable. `client` is None for
468    // resolve paths that don't have a registry client wired up
469    // (`aube import`'s lockfile-only flow); those just fall through.
470    if let (Some(c), Some(url_to_fetch)) = (client, codeload_url.as_deref()) {
471        match c.fetch_tarball_bytes(url_to_fetch).await {
472            Ok(bytes) => {
473                // Extract into the commit-keyed cache and read the
474                // (possibly subpath-scoped) `package.json` like the
475                // clone path does. Return the original lockfile URL
476                // in `LocalSource::Git.url` for cross-tool round-trip.
477                let bytes_vec = bytes.to_vec();
478                if let Some(pinned) = &git.integrity {
479                    aube_store::verify_integrity(&bytes_vec, pinned)
480                        .map_err(|e| Error::Registry(name.to_string(), e.to_string()))?;
481                }
482                let integrity = git
483                    .integrity
484                    .clone()
485                    .unwrap_or_else(|| aube_store::sha512_integrity(&bytes_vec));
486                let url_for_extract = original_url.clone();
487                let sha_for_extract = resolved_sha.clone();
488                let integrity_for_extract = integrity.clone();
489                let subpath_for_extract = subpath.clone();
490                let name_for_extract = name.to_string();
491                let extracted = tokio::task::spawn_blocking(move || -> Result<_, Error> {
492                    let (clone_dir, resolved) = aube_store::extract_codeload_tarball(
493                        &bytes_vec,
494                        &url_for_extract,
495                        &sha_for_extract,
496                        Some(&integrity_for_extract),
497                    )
498                    .map_err(|e| Error::Registry(name_for_extract.clone(), e.to_string()))?;
499                    let pkg_root = match &subpath_for_extract {
500                        Some(sub) => clone_dir.join(sub),
501                        None => clone_dir.clone(),
502                    };
503                    let manifest_bytes =
504                        std::fs::read(pkg_root.join("package.json")).map_err(|e| {
505                            let where_ = subpath_for_extract
506                                .as_deref()
507                                .map(|s| format!(" at /{s}"))
508                                .unwrap_or_default();
509                            Error::Registry(
510                                name_for_extract.clone(),
511                                format!("read package.json in codeload extract{where_}: {e}"),
512                            )
513                        })?;
514                    let pj: aube_manifest::PackageJson = serde_json::from_slice(&manifest_bytes)
515                        .map_err(|e| Error::Registry(name_for_extract.clone(), e.to_string()))?;
516                    let version = pj.version.unwrap_or_else(|| "0.0.0".to_string());
517                    Ok((resolved, version, pj.dependencies))
518                })
519                .await
520                .map_err(|e| {
521                    Error::Registry(name.to_string(), format!("codeload extract panicked: {e}"))
522                })?;
523                let integrity = aube_store::sha512_integrity(&bytes);
524                match extracted {
525                    Ok((resolved, version, deps)) => {
526                        return Ok((
527                            hosted_git_local_source(
528                                original_url,
529                                committish,
530                                resolved,
531                                subpath,
532                                Some(integrity.clone()),
533                                Some(url_to_fetch),
534                            ),
535                            version,
536                            deps,
537                            Some(integrity),
538                        ));
539                    }
540                    Err(e) => {
541                        // Mirror the installer: a corrupt or
542                        // unexpectedly-shaped tarball (CDN hiccup,
543                        // unsafe-path rejection, Windows symlink) falls
544                        // through to `git clone`, which inherits the
545                        // user's git credential helper and can write
546                        // symlinks via git's admin-aware path.
547                        tracing::debug!(
548                            name,
549                            "codeload extract failed, falling back to git clone: {e}",
550                        );
551                    }
552                }
553            }
554            Err(e) => {
555                // Codeload 404s on private repos (it doesn't accept
556                // npm-registry auth) — fall through to `git
557                // clone`, which inherits the user's git credential
558                // helper / ssh keys for private access.
559                tracing::debug!(
560                    name,
561                    url = %aube_util::url::redact_url(url_to_fetch),
562                    "codeload fetch failed, falling back to git clone: {e}",
563                );
564            }
565        }
566    }
567
568    // Fallback: shallow git clone over the rewritten HTTPS URL (or the
569    // original URL for non-hosted hosts). Same `spawn_blocking` dance
570    // the original implementation used.
571    let runtime_url_for_clone = runtime_url;
572    let original_url_for_lockfile = original_url.clone();
573    let resolved_sha_for_clone = resolved_sha.clone();
574    let subpath_for_clone = subpath.clone();
575    let name_for_clone = name.to_string();
576    let (local, version, deps) = tokio::task::spawn_blocking(move || -> Result<_, Error> {
577        let (clone_dir, resolved) =
578            aube_store::git_shallow_clone(&runtime_url_for_clone, &resolved_sha_for_clone, shallow)
579                .map_err(|e| Error::Registry(name_for_clone.clone(), e.to_string()))?;
580        let pkg_root = match &subpath_for_clone {
581            Some(sub) => clone_dir.join(sub),
582            None => clone_dir.clone(),
583        };
584        let manifest_bytes = std::fs::read(pkg_root.join("package.json")).map_err(|e| {
585            let where_ = subpath_for_clone
586                .as_deref()
587                .map(|s| format!(" at /{s}"))
588                .unwrap_or_default();
589            Error::Registry(
590                name_for_clone.clone(),
591                format!("read package.json in clone{where_}: {e}"),
592            )
593        })?;
594        let pj: aube_manifest::PackageJson = serde_json::from_slice(&manifest_bytes)
595            .map_err(|e| Error::Registry(name_for_clone.clone(), e.to_string()))?;
596        let version = pj.version.unwrap_or_else(|| "0.0.0".to_string());
597        Ok((
598            LocalSource::Git(aube_lockfile::GitSource {
599                url: original_url_for_lockfile,
600                committish,
601                resolved,
602                integrity: None,
603                subpath: subpath_for_clone,
604            }),
605            version,
606            pj.dependencies,
607        ))
608    })
609    .await
610    .map_err(|e| Error::Registry(name.to_string(), format!("git task panicked: {e}")))??;
611    Ok((local, version, deps, None))
612}
613
614/// Fetch a remote tarball URL, compute its sha512 integrity, and read
615/// the enclosed `package.json` for version + transitive deps. Returns
616/// a fully-populated `LocalSource::RemoteTarball` alongside the
617/// manifest tuple the resolver's local-dep branch expects.
618pub(crate) async fn resolve_remote_tarball(
619    name: &str,
620    tarball: &aube_lockfile::RemoteTarballSource,
621    client: &RegistryClient,
622) -> Result<(LocalSource, String, BTreeMap<String, String>), Error> {
623    let bytes = client
624        .fetch_tarball_bytes(&tarball.url)
625        .await
626        .map_err(|e| {
627            Error::Registry(
628                name.to_string(),
629                format!("fetch {}: {e}", aube_util::url::redact_url(&tarball.url)),
630            )
631        })?;
632    let name_owned = name.to_string();
633    let url = aube_util::url::redact_url(&tarball.url);
634    let (integrity, version, deps) = tokio::task::spawn_blocking(move || -> Result<_, Error> {
635        let integrity = aube_store::sha512_integrity(&bytes);
636
637        // Walk the tarball once to pull out the top-level
638        // `package.json` (wrapper name varies, so the helper looks
639        // at the first path component's basename, not a hardcoded
640        // `package/package.json`).
641        let manifest_bytes = read_tarball_package_json(&bytes)
642            .map_err(|e| Error::Registry(name_owned.clone(), format!("tarball {url}: {e}")))?;
643        let pj: aube_manifest::PackageJson = serde_json::from_slice(&manifest_bytes)
644            .map_err(|e| Error::Registry(name_owned.clone(), e.to_string()))?;
645        let version = pj.version.unwrap_or_else(|| "0.0.0".to_string());
646        Ok((integrity, version, pj.dependencies))
647    })
648    .await
649    .map_err(|e| Error::Registry(name.to_string(), format!("tarball task panicked: {e}")))??;
650    Ok((
651        LocalSource::RemoteTarball(aube_lockfile::RemoteTarballSource {
652            url: tarball.url.clone(),
653            integrity,
654            git_hosted: tarball.git_hosted,
655        }),
656        version,
657        deps,
658    ))
659}
660
661#[cfg(test)]
662mod rebase_local_tests {
663    use super::*;
664    use std::path::{Path, PathBuf};
665
666    #[test]
667    fn workspace_file_climbs_out_of_importer_to_root_sibling() {
668        // packages/app importer declares `file:../../vendor-dir`.
669        // Expected result: `vendor-dir` (workspace-root relative),
670        // collapsed down from the intermediate
671        // `packages/app/../../vendor-dir` form.
672        let local = LocalSource::Directory(PathBuf::from("../../vendor-dir"));
673        let rebased = rebase_local(&local, Path::new("packages/app"), Path::new(""));
674        match rebased {
675            LocalSource::Directory(p) => assert_eq!(p, PathBuf::from("vendor-dir")),
676            other => panic!("expected Directory, got {other:?}"),
677        }
678    }
679
680    #[test]
681    fn two_importers_referencing_same_target_collide_on_dep_path() {
682        // Both importers end up pointing at the same on-disk path —
683        // the encoded dep_path must match so they de-dupe in the
684        // lockfile.
685        let a = rebase_local(
686            &LocalSource::Directory(PathBuf::from("../../vendor-dir")),
687            Path::new("packages/app"),
688            Path::new(""),
689        );
690        let b = rebase_local(
691            &LocalSource::Directory(PathBuf::from("../vendor-dir")),
692            Path::new("packages"),
693            Path::new(""),
694        );
695        assert_eq!(a.dep_path("vendor-dir"), b.dep_path("vendor-dir"));
696    }
697
698    #[test]
699    fn root_and_transitive_exec_paths_collide_on_dep_path() {
700        let root = rebase_local(
701            &LocalSource::Exec(PathBuf::from("./scripts/generate-exec.js")),
702            Path::new(""),
703            Path::new(""),
704        );
705        let transitive = rebase_local(
706            &LocalSource::Exec(PathBuf::from("../../scripts/generate-exec.js")),
707            Path::new("packages/portal"),
708            Path::new(""),
709        );
710        assert_eq!(root.dep_path("exec-pkg"), transitive.dep_path("exec-pkg"));
711    }
712
713    #[test]
714    fn normalize_preserves_unresolvable_leading_parent() {
715        // `..` at the root of the project is still meaningful —
716        // don't silently drop it.
717        assert_eq!(
718            normalize_lexical(Path::new("../vendor")),
719            PathBuf::from("../vendor")
720        );
721    }
722
723    #[test]
724    fn dep_path_and_specifier_use_posix_separators() {
725        // Backslash-separated input (as Windows would store) must
726        // hash and render the same as a forward-slash equivalent so
727        // a checked-in lockfile resolves identically on either OS.
728        let win = LocalSource::Directory(PathBuf::from("vendor\\nested\\dir"));
729        let unix = LocalSource::Directory(PathBuf::from("vendor/nested/dir"));
730        assert_eq!(win.dep_path("foo"), unix.dep_path("foo"));
731        assert_eq!(win.specifier(), "file:vendor/nested/dir");
732        assert_eq!(unix.specifier(), "file:vendor/nested/dir");
733    }
734
735    #[test]
736    fn exec_script_must_stay_inside_project_root() {
737        let temp = tempfile::tempdir().unwrap();
738        let project_root = temp.path().join("project");
739        let outside = temp.path().join("outside.js");
740        std::fs::create_dir(&project_root).unwrap();
741        std::fs::write(&outside, "").unwrap();
742
743        let local = LocalSource::Exec(PathBuf::from("../outside.js"));
744        let err = resolve_exec_script_path(&local, &project_root).unwrap_err();
745        assert!(err.contains("resolves outside project root"), "{err}");
746    }
747
748    #[test]
749    fn exec_script_inside_project_root_is_allowed() {
750        let temp = tempfile::tempdir().unwrap();
751        let project_root = temp.path().join("project");
752        let script_dir = project_root.join("scripts");
753        let script = script_dir.join("generate.js");
754        std::fs::create_dir_all(&script_dir).unwrap();
755        std::fs::write(&script, "").unwrap();
756
757        let local = LocalSource::Exec(PathBuf::from("scripts/generate.js"));
758        let resolved = resolve_exec_script_path(&local, &project_root).unwrap();
759        assert_eq!(resolved, script.canonicalize().unwrap());
760    }
761}
762
763#[cfg(test)]
764mod cve_audit_tarball_bomb {
765    use super::*;
766    use std::io::Write;
767
768    fn build_zero_tarball(uncompressed_size: usize) -> Vec<u8> {
769        let mut tar_buf: Vec<u8> = Vec::new();
770        {
771            let mut builder = tar::Builder::new(&mut tar_buf);
772            let payload = vec![0u8; uncompressed_size];
773            let mut header = tar::Header::new_gnu();
774            header.set_path("pkg/package.json").unwrap();
775            header.set_size(payload.len() as u64);
776            header.set_mode(0o644);
777            header.set_cksum();
778            builder.append(&header, &payload[..]).unwrap();
779            builder.finish().unwrap();
780        }
781        let mut gz = Vec::new();
782        {
783            let mut enc = flate2::write::GzEncoder::new(&mut gz, flate2::Compression::best());
784            enc.write_all(&tar_buf).unwrap();
785            enc.finish().unwrap();
786        }
787        gz
788    }
789
790    fn build_dummy_then_package_json(dummy_size: usize) -> Vec<u8> {
791        let mut tar_buf: Vec<u8> = Vec::new();
792        {
793            let mut builder = tar::Builder::new(&mut tar_buf);
794            let dummy = vec![0u8; dummy_size];
795            let mut h1 = tar::Header::new_gnu();
796            h1.set_path("pkg/dummy.bin").unwrap();
797            h1.set_size(dummy.len() as u64);
798            h1.set_mode(0o644);
799            h1.set_cksum();
800            builder.append(&h1, &dummy[..]).unwrap();
801            let manifest = b"{\"name\":\"x\",\"version\":\"0.0.1\"}";
802            let mut h2 = tar::Header::new_gnu();
803            h2.set_path("pkg/package.json").unwrap();
804            h2.set_size(manifest.len() as u64);
805            h2.set_mode(0o644);
806            h2.set_cksum();
807            builder.append(&h2, &manifest[..]).unwrap();
808            builder.finish().unwrap();
809        }
810        let mut gz = Vec::new();
811        {
812            let mut enc = flate2::write::GzEncoder::new(&mut gz, flate2::Compression::best());
813            enc.write_all(&tar_buf).unwrap();
814            enc.finish().unwrap();
815        }
816        gz
817    }
818
819    #[test]
820    fn read_tarball_package_json_rejects_decompression_bomb() {
821        let bomb = build_zero_tarball(200 * 1024 * 1024);
822        assert!(
823            bomb.len() < 400 * 1024,
824            "compressed bomb too large to call this an amplification: {}",
825            bomb.len()
826        );
827        let result = read_tarball_package_json(&bomb);
828        assert!(
829            result.is_err(),
830            "200 MiB decompressed payload must be rejected by the cap, got {:?}",
831            result.as_ref().map(|b| b.len())
832        );
833    }
834
835    #[test]
836    fn read_tarball_package_json_rejects_dummy_entry_amplification() {
837        let bomb = build_dummy_then_package_json(200 * 1024 * 1024);
838        assert!(
839            bomb.len() < 400 * 1024,
840            "compressed multi-entry bomb too large: {}",
841            bomb.len()
842        );
843        let result = read_tarball_package_json(&bomb);
844        assert!(
845            result.is_err(),
846            "decompressed dummy entry preceding package.json must hit the output cap"
847        );
848    }
849}
850
851#[cfg(test)]
852mod hosted_git_local_source_tests {
853    use super::*;
854
855    const SHA: &str = "78e559baa908942097330f7967dfbf623ebc2529";
856
857    #[test]
858    fn hosted_sha_without_subpath_becomes_codeload_remote_tarball() {
859        let codeload = format!("https://codeload.github.com/xmppo/node-expat/tar.gz/{SHA}");
860        let src = hosted_git_local_source(
861            "git+ssh://git@github.com/xmppo/node-expat.git".to_string(),
862            Some(format!("v2.4.3#{SHA}")),
863            SHA.to_string(),
864            None,
865            Some("sha512-deadbeef".to_string()),
866            Some(codeload.as_str()),
867        );
868        match src {
869            LocalSource::RemoteTarball(t) => {
870                // pnpm keys the lockfile entry by this flat tarball URL.
871                assert_eq!(t.url, codeload);
872                assert_eq!(t.integrity, "sha512-deadbeef");
873                assert!(t.git_hosted, "codeload archives must flag gitHosted");
874                // The specifier the writer threads into snapshot deps and
875                // the packages key is exactly the codeload URL.
876                assert_eq!(
877                    LocalSource::RemoteTarball(t).specifier(),
878                    codeload,
879                    "specifier must be the bare codeload URL pnpm records"
880                );
881            }
882            other => panic!("expected RemoteTarball, got {other:?}"),
883        }
884    }
885
886    #[test]
887    fn subpath_selector_stays_git() {
888        // A flat tarball can't address a repo subdirectory, so pnpm keeps
889        // `&path:` deps as `type: git`. We must too.
890        let codeload = format!("https://codeload.github.com/acme/mono/tar.gz/{SHA}");
891        let src = hosted_git_local_source(
892            "git+ssh://git@github.com/acme/mono.git".to_string(),
893            Some(SHA.to_string()),
894            SHA.to_string(),
895            Some("packages/leaf".to_string()),
896            Some("sha512-x".to_string()),
897            Some(codeload.as_str()),
898        );
899        match src {
900            LocalSource::Git(g) => {
901                assert_eq!(g.resolved, SHA);
902                assert_eq!(g.subpath.as_deref(), Some("packages/leaf"));
903            }
904            other => panic!("expected Git with subpath, got {other:?}"),
905        }
906    }
907
908    #[test]
909    fn no_codeload_url_stays_git() {
910        // Non-hosted / ssh-only sources have no flat archive URL; pnpm
911        // records those as `type: git` and so do we.
912        let src = hosted_git_local_source(
913            "git+ssh://git@example.com/internal/dep.git".to_string(),
914            Some(SHA.to_string()),
915            SHA.to_string(),
916            None,
917            Some("sha512-y".to_string()),
918            None,
919        );
920        match src {
921            LocalSource::Git(g) => {
922                assert_eq!(g.url, "git+ssh://git@example.com/internal/dep.git");
923                assert_eq!(g.integrity.as_deref(), Some("sha512-y"));
924            }
925            other => panic!("expected Git, got {other:?}"),
926        }
927    }
928}