mod cargo_toml_rewrite;
mod path_deps;
mod pyproject;
mod unpack;
mod utils;
use crate::pyproject_toml::SdistGenerator;
use crate::{ModuleWriter, PyProjectToml, SDistWriter, VirtualWriter};
use anyhow::{Context, Result, bail};
use cargo_metadata::camino::{self, Utf8Path};
use ignore::overrides::Override;
use normpath::PathExt as _;
use path_slash::PathExt as _;
use std::collections::{HashMap, HashSet};
use std::env;
use std::ffi::OsStr;
use std::io::Write;
use std::path::{Path, PathBuf};
use std::process::Command;
use std::str;
use tracing::{debug, trace, warn};
use self::cargo_toml_rewrite::{
WorkspaceManifestInheritance, parse_toml_file, parse_workspace_manifest_inheritance,
resolve_workspace_inheritance, rewrite_cargo_toml, rewrite_cargo_toml_package_field,
rewrite_cargo_toml_targets, strip_non_workspace_tables,
};
pub use self::path_deps::{PathDependency, find_path_deps};
use self::pyproject::{add_pyproject_metadata, add_pyproject_toml, add_python_sources};
pub use self::unpack::{UnpackedSdist, unpack_sdist};
use self::utils::{common_path_prefix, is_compiled_artifact, normalize_path};
#[derive(Debug)]
struct ManifestAsset {
source: PathBuf,
filename: String,
}
fn resolve_manifest_asset(
manifest_dir: &Path,
field_value: &Path,
kind: &str,
allowed_root: Option<&Path>,
) -> Result<ManifestAsset> {
let file = manifest_dir.join(field_value);
let abs_file = file
.normalize()
.with_context(|| {
format!(
"{kind} path `{}` does not exist or is invalid",
file.display()
)
})?
.into_path_buf();
if let Some(allowed_root) = allowed_root {
let allowed_root = allowed_root
.normalize()
.with_context(|| {
format!(
"allowed root `{}` does not exist or is invalid",
allowed_root.display()
)
})?
.into_path_buf();
if !abs_file.starts_with(&allowed_root) {
bail!(
"{kind} path `{}` resolves outside allowed root `{}`",
file.display(),
allowed_root.display()
);
}
}
let filename = file
.file_name()
.and_then(OsStr::to_str)
.with_context(|| format!("{kind} path `{}` has no filename", file.display()))?
.to_string();
Ok(ManifestAsset {
source: abs_file,
filename,
})
}
fn resolve_and_add_manifest_asset(
writer: &mut VirtualWriter<SDistWriter>,
manifest_dir: &Path,
field_value: &Path,
target_dir: &Path,
kind: &str,
allowed_root: Option<&Path>,
) -> Result<ManifestAsset> {
let asset = resolve_manifest_asset(manifest_dir, field_value, kind, allowed_root)?;
writer.add_file(target_dir.join(&asset.filename), &asset.source, false)?;
Ok(asset)
}
fn cargo_package_file_list(manifest_path: &Path) -> Result<Vec<String>> {
debug!(
"Getting cargo package file list for {}",
manifest_path.display()
);
let args = ["package", "--list", "--allow-dirty", "--manifest-path"];
let output = Command::new("cargo")
.args(args)
.arg(manifest_path)
.output()
.with_context(|| {
format!(
"Failed to run `cargo {} {}`",
args.join(" "),
manifest_path.display()
)
})?;
if !output.status.success() {
bail!(
"Failed to query file list from cargo: {}\n--- Manifest path: {}\n--- Stdout:\n{}\n--- Stderr:\n{}",
output.status,
manifest_path.display(),
String::from_utf8_lossy(&output.stdout),
String::from_utf8_lossy(&output.stderr),
);
}
if !output.stderr.is_empty() {
eprintln!(
"From `cargo {} {}`:",
args.join(" "),
manifest_path.display()
);
std::io::stderr().write_all(&output.stderr)?;
}
let files = str::from_utf8(&output.stdout)
.context("Cargo printed invalid utf-8 ಠ_ಠ")?
.lines()
.map(String::from)
.collect();
Ok(files)
}
struct AddCrateOptions<'a> {
is_root: bool,
known_path_deps: Option<&'a HashMap<String, PathDependency>>,
skip_prefixes: Vec<PathBuf>,
skip_cargo_toml: bool,
package_metadata: &'a cargo_metadata::Package,
workspace_inheritance: Option<&'a WorkspaceManifestInheritance>,
}
fn add_crate_to_source_distribution(
writer: &mut VirtualWriter<SDistWriter>,
manifest_path: &Path,
prefix: &Path,
readme: Option<&ManifestAsset>,
license_file: Option<&ManifestAsset>,
opts: &AddCrateOptions<'_>,
) -> Result<()> {
let file_list = cargo_package_file_list(manifest_path)?;
trace!("File list: {:?}", file_list);
let manifest_dir = manifest_path.parent().unwrap();
let target_source: Vec<_> = file_list
.iter()
.map(|relative_to_manifest| {
let relative_to_cwd = manifest_dir.join(relative_to_manifest.as_str());
(relative_to_manifest.as_str(), relative_to_cwd)
})
.filter(|(target, source)| {
if *target == "Cargo.toml.orig" {
false
} else if *target == "Cargo.toml" {
false
} else if opts.is_root && *target == "pyproject.toml" {
false
} else if prefix.components().count() == 1 && *target == "pyproject.toml" {
debug!(
"Skipping potentially non-main {}",
prefix.join(target).display()
);
false
} else if opts
.skip_prefixes
.iter()
.any(|p| Path::new(target).starts_with(p))
{
debug!(
"Skipping {} (will be added separately)",
prefix.join(target).display()
);
false
} else if is_compiled_artifact(Path::new(target)) {
debug!("Ignoring {}", target);
false
} else {
source.is_file() && !writer.exclude(source) && !writer.exclude(prefix.join(target))
}
})
.collect();
let packaged_files: HashSet<PathBuf> = target_source
.iter()
.map(|(target, _)| normalize_path(Path::new(target)))
.collect();
let target_source: Vec<_> = target_source
.into_iter()
.filter(|(target, _)| !writer.contains_target(prefix.join(target)))
.collect();
if !opts.skip_cargo_toml {
let mut document = parse_toml_file(manifest_path, "Cargo.toml")?;
rewrite_cargo_toml_package_field(
&mut document,
manifest_path,
"readme",
readme.map(|a| a.filename.as_str()),
)?;
rewrite_cargo_toml_package_field(
&mut document,
manifest_path,
"license-file",
license_file.map(|a| a.filename.as_str()),
)?;
if let Some(known_path_deps) = opts.known_path_deps {
rewrite_cargo_toml(&mut document, manifest_path, known_path_deps)?;
}
if opts.workspace_inheritance.is_some() {
resolve_workspace_inheritance(
&mut document,
opts.package_metadata,
opts.workspace_inheritance,
)?;
}
rewrite_cargo_toml_targets(
&mut document,
manifest_path,
opts.package_metadata,
&packaged_files,
)?;
let cargo_toml_path = prefix.join(manifest_path.file_name().unwrap());
writer.add_bytes(
cargo_toml_path,
Some(manifest_path),
document.to_string().as_bytes(),
false,
)?;
}
for (target, source) in target_source {
writer.add_file(prefix.join(target), source, false)?;
}
Ok(())
}
fn add_git_tracked_files_to_sdist(
pyproject_toml_path: &Path,
writer: &mut VirtualWriter<SDistWriter>,
prefix: impl AsRef<Path>,
) -> Result<()> {
let pyproject_dir = pyproject_toml_path.parent().unwrap();
let output = Command::new("git")
.args(["ls-files", "-z"])
.current_dir(pyproject_dir)
.output()
.context("Failed to run `git ls-files -z`")?;
if !output.status.success() {
bail!(
"Failed to query file list from git: {}\n--- Project Path: {}\n--- Stdout:\n{}\n--- Stderr:\n{}",
output.status,
pyproject_dir.display(),
String::from_utf8_lossy(&output.stdout),
String::from_utf8_lossy(&output.stderr),
);
}
let prefix = prefix.as_ref();
let file_paths = str::from_utf8(&output.stdout)
.context("git printed invalid utf-8 ಠ_ಠ")?
.split('\0')
.filter(|s| !s.is_empty())
.map(Path::new);
for source in file_paths {
writer.add_file(prefix.join(source), pyproject_dir.join(source), false)?;
}
Ok(())
}
struct SdistContext<'a> {
project: &'a crate::ProjectContext,
root_dir: &'a Path,
workspace_root: &'a Utf8Path,
workspace_manifest_path: camino::Utf8PathBuf,
known_path_deps: HashMap<String, PathDependency>,
sdist_root: PathBuf,
abs_manifest_dir: PathBuf,
relative_main_crate_manifest_dir: PathBuf,
project_root: PathBuf,
pyproject_dir: PathBuf,
workspace_manifest_cache: HashMap<PathBuf, WorkspaceManifestInheritance>,
}
impl<'a> SdistContext<'a> {
fn new(
project: &'a crate::ProjectContext,
pyproject_toml_path: &Path,
root_dir: &'a Path,
) -> Result<Self> {
let manifest_path = &project.manifest_path;
let workspace_root = &project.cargo_metadata.workspace_root;
let workspace_manifest_path = workspace_root.join("Cargo.toml");
let known_path_deps = find_path_deps(&project.cargo_metadata)?;
debug!(
"Found path dependencies: {:?}",
known_path_deps.keys().collect::<Vec<_>>()
);
let sdist_root = compute_sdist_root(
workspace_root,
pyproject_toml_path,
&project.project_layout.python_dir,
&known_path_deps,
)?;
debug!("Found sdist root: {}", sdist_root.display());
let abs_manifest_path = manifest_path
.normalize()
.with_context(|| {
format!(
"manifest path `{}` does not exist or is invalid",
manifest_path.display()
)
})?
.into_path_buf();
let abs_manifest_dir = abs_manifest_path.parent().unwrap().to_path_buf();
let relative_main_crate_manifest_dir = manifest_path
.parent()
.unwrap()
.strip_prefix(&sdist_root)
.unwrap()
.to_path_buf();
let project_root = compute_project_root(pyproject_toml_path, &sdist_root).to_path_buf();
let pyproject_dir = pyproject_toml_path.parent().unwrap().to_path_buf();
let workspace_manifest_cache =
build_workspace_manifest_cache(&known_path_deps, workspace_root)?;
Ok(Self {
project,
root_dir,
workspace_root,
workspace_manifest_path,
known_path_deps,
sdist_root,
abs_manifest_dir,
relative_main_crate_manifest_dir,
project_root,
pyproject_dir,
workspace_manifest_cache,
})
}
}
fn build_workspace_manifest_cache(
known_path_deps: &HashMap<String, PathDependency>,
root_workspace: &Utf8Path,
) -> Result<HashMap<PathBuf, WorkspaceManifestInheritance>> {
let mut cache = HashMap::new();
for path_dep in known_path_deps.values() {
if path_dep.workspace_root.as_path() == root_workspace.as_std_path() {
continue;
}
let workspace_manifest_path = path_dep.workspace_root.join("Cargo.toml");
if cache.contains_key(&workspace_manifest_path) {
continue;
}
cache.insert(
workspace_manifest_path.clone(),
parse_workspace_manifest_inheritance(&workspace_manifest_path)?,
);
}
Ok(cache)
}
fn compute_sdist_root(
workspace_root: &Utf8Path,
pyproject_toml_path: &Path,
python_dir: &Path,
known_path_deps: &HashMap<String, PathDependency>,
) -> Result<PathBuf> {
let mut sdist_root =
common_path_prefix(workspace_root.as_std_path(), pyproject_toml_path).unwrap();
for path_dep in known_path_deps.values() {
if let Some(prefix) =
common_path_prefix(&sdist_root, path_dep.manifest_path.parent().unwrap())
{
sdist_root = prefix;
} else {
bail!("Failed to determine common path prefix of path dependencies");
}
}
if !python_dir.starts_with(&sdist_root)
&& let Some(prefix) = common_path_prefix(&sdist_root, python_dir)
{
sdist_root = prefix;
}
Ok(sdist_root)
}
fn compute_project_root<'a>(pyproject_toml_path: &'a Path, sdist_root: &'a Path) -> &'a Path {
let pyproject_root = pyproject_toml_path.parent().unwrap();
if pyproject_root == sdist_root || pyproject_root.starts_with(sdist_root) {
sdist_root
} else {
assert!(sdist_root.starts_with(pyproject_root));
pyproject_root
}
}
fn add_path_dep(
writer: &mut VirtualWriter<SDistWriter>,
ctx: &SdistContext<'_>,
name: &str,
path_dep: &PathDependency,
) -> Result<()> {
debug!(
"Adding path dependency: {} at {}",
name,
path_dep.manifest_path.display()
);
let path_dep_manifest_dir = path_dep.manifest_path.parent().unwrap();
let relative_path_dep_manifest_dir =
path_dep_manifest_dir.strip_prefix(&ctx.sdist_root).unwrap();
let skip_cargo_toml =
ctx.workspace_manifest_path.as_std_path() == path_dep.manifest_path.as_path();
let target_dir = ctx.root_dir.join(relative_path_dep_manifest_dir);
let readme = path_dep
.readme
.as_ref()
.map(|readme| {
resolve_and_add_manifest_asset(
writer,
path_dep_manifest_dir,
readme,
&target_dir,
"readme",
None,
)
})
.transpose()?;
let license_file = path_dep
.license_file
.as_ref()
.map(|lf| {
resolve_and_add_manifest_asset(
writer,
path_dep_manifest_dir,
lf,
&target_dir,
"license-file",
Some(&path_dep.workspace_root),
)
})
.transpose()?;
let has_different_workspace =
path_dep.workspace_root.as_path() != ctx.workspace_root.as_std_path();
let path_dep_workspace_manifest =
has_different_workspace.then(|| path_dep.workspace_root.join("Cargo.toml"));
let workspace_outside_sdist = path_dep_workspace_manifest
.as_ref()
.is_some_and(|m| m.strip_prefix(&ctx.sdist_root).is_err());
let package_metadata = path_dep
.resolved_package
.as_ref()
.or_else(|| {
ctx.project
.cargo_metadata
.packages
.iter()
.find(|pkg| pkg.manifest_path.as_std_path() == path_dep.manifest_path.as_path())
})
.with_context(|| {
format!(
"Failed to find cargo metadata for path dependency at {}",
path_dep.manifest_path.display()
)
})?;
add_crate_to_source_distribution(
writer,
&path_dep.manifest_path,
&ctx.root_dir.join(relative_path_dep_manifest_dir),
readme.as_ref(),
license_file.as_ref(),
&AddCrateOptions {
is_root: false,
known_path_deps: None,
skip_prefixes: Vec::new(),
skip_cargo_toml,
package_metadata,
workspace_inheritance: workspace_outside_sdist.then(|| {
path_dep_workspace_manifest
.as_ref()
.and_then(|path| ctx.workspace_manifest_cache.get(path.as_path()))
.expect("workspace inheritance cache missing entry")
}),
},
)
.with_context(|| {
format!(
"Failed to add local dependency {} at {} to the source distribution",
name,
path_dep.manifest_path.display()
)
})?;
if let Some(path_dep_workspace_manifest) = path_dep_workspace_manifest {
if let Ok(relative_path_dep_workspace_manifest) =
path_dep_workspace_manifest.strip_prefix(&ctx.sdist_root)
{
writer.add_file(
ctx.root_dir.join(relative_path_dep_workspace_manifest),
&path_dep_workspace_manifest,
false,
)?;
} else {
debug!(
"Skipping workspace manifest at {} (outside sdist root), \
workspace-inherited fields have been inlined",
path_dep_workspace_manifest.display()
);
}
}
Ok(())
}
fn add_main_crate(writer: &mut VirtualWriter<SDistWriter>, ctx: &SdistContext<'_>) -> Result<()> {
let manifest_path = &ctx.project.manifest_path;
let main_crate = ctx
.project
.cargo_metadata
.root_package()
.context("Expected cargo to return metadata with root_package")?;
debug!("Adding the main crate {}", manifest_path.display());
let target_dir = ctx.root_dir.join(&ctx.relative_main_crate_manifest_dir);
let readme = main_crate
.readme
.as_ref()
.map(|readme| {
resolve_and_add_manifest_asset(
writer,
&ctx.abs_manifest_dir,
readme.as_std_path(),
&target_dir,
"readme",
None,
)
})
.transpose()?;
let license_file = main_crate
.license_file
.as_ref()
.map(|lf| {
resolve_and_add_manifest_asset(
writer,
&ctx.abs_manifest_dir,
lf.as_std_path(),
&target_dir,
"license-file",
Some(ctx.workspace_root.as_std_path()),
)
})
.transpose()?;
let skip_prefixes: Vec<PathBuf> =
if !ctx.relative_main_crate_manifest_dir.as_os_str().is_empty() {
let mut prefixes = Vec::new();
if let Some(python_module) = ctx.project.project_layout.python_module.as_ref()
&& let Ok(rel) = python_module.strip_prefix(&ctx.abs_manifest_dir)
{
prefixes.push(rel.to_path_buf());
}
for package in &ctx.project.project_layout.python_packages {
let package_path = ctx.project.project_layout.python_dir.join(package);
if let Ok(rel) = package_path.strip_prefix(&ctx.abs_manifest_dir)
&& !prefixes.contains(&rel.to_path_buf())
{
prefixes.push(rel.to_path_buf());
}
}
prefixes
} else {
Vec::new()
};
add_crate_to_source_distribution(
writer,
manifest_path,
&ctx.root_dir.join(&ctx.relative_main_crate_manifest_dir),
readme.as_ref(),
license_file.as_ref(),
&AddCrateOptions {
is_root: true,
known_path_deps: Some(&ctx.known_path_deps),
skip_prefixes,
skip_cargo_toml: false,
package_metadata: main_crate,
workspace_inheritance: None,
},
)?;
Ok(())
}
fn add_cargo_lock(writer: &mut VirtualWriter<SDistWriter>, ctx: &SdistContext<'_>) -> Result<()> {
let manifest_cargo_lock_path = ctx.abs_manifest_dir.join("Cargo.lock");
let workspace_cargo_lock = ctx.workspace_root.join("Cargo.lock").into_std_path_buf();
let cargo_lock_path = if manifest_cargo_lock_path.exists() {
Some(manifest_cargo_lock_path)
} else if workspace_cargo_lock.exists() {
Some(workspace_cargo_lock)
} else {
None
};
let cargo_lock_required = ctx.project.cargo_options.locked || ctx.project.cargo_options.frozen;
if let Some(cargo_lock_path) = cargo_lock_path {
let relative_cargo_lock = cargo_lock_path.strip_prefix(&ctx.project_root).unwrap();
writer.add_file(
ctx.root_dir.join(relative_cargo_lock),
&cargo_lock_path,
false,
)?;
} else if cargo_lock_required {
bail!("Cargo.lock is required by `--locked`/`--frozen` but it's not found.");
} else {
eprintln!(
"⚠️ Warning: Cargo.lock is not found, it is recommended \
to include it in the source distribution"
);
}
Ok(())
}
fn add_workspace_manifest(
writer: &mut VirtualWriter<SDistWriter>,
ctx: &SdistContext<'_>,
) -> Result<()> {
let normalized_workspace_root = ctx
.workspace_root
.as_std_path()
.normalize()
.map(|p| p.into_path_buf())
.unwrap_or_else(|_| ctx.workspace_root.as_std_path().to_path_buf());
let is_in_workspace = normalized_workspace_root != ctx.abs_manifest_dir;
if !is_in_workspace {
return Ok(());
}
let relative_workspace_cargo_toml = ctx
.workspace_manifest_path
.as_std_path()
.strip_prefix(&ctx.project_root)
.unwrap();
let mut deps_to_keep = ctx.known_path_deps.clone();
let main_member_name = ctx
.abs_manifest_dir
.strip_prefix(ctx.workspace_root)
.unwrap()
.to_slash()
.unwrap()
.to_string();
deps_to_keep.insert(
main_member_name,
PathDependency {
manifest_path: ctx.project.manifest_path.to_path_buf(),
workspace_root: ctx.workspace_root.as_std_path().to_path_buf(),
readme: None,
license_file: None,
resolved_package: None,
},
);
let mut document = parse_toml_file(ctx.workspace_manifest_path.as_std_path(), "Cargo.toml")?;
rewrite_cargo_toml(
&mut document,
ctx.workspace_manifest_path.as_std_path(),
&deps_to_keep,
)?;
let workspace_root_is_path_dep = ctx
.known_path_deps
.values()
.any(|dep| dep.manifest_path.as_path() == ctx.workspace_manifest_path.as_std_path());
if !workspace_root_is_path_dep && document.contains_key("package") {
strip_non_workspace_tables(&mut document, ctx.workspace_manifest_path.as_std_path());
}
writer.add_bytes(
ctx.root_dir.join(relative_workspace_cargo_toml),
Some(ctx.workspace_manifest_path.as_std_path()),
document.to_string().as_bytes(),
false,
)?;
Ok(())
}
fn add_cargo_package_files_to_sdist(
project: &crate::ProjectContext,
pyproject_toml_path: &Path,
writer: &mut VirtualWriter<SDistWriter>,
root_dir: &Path,
) -> Result<()> {
let ctx = SdistContext::new(project, pyproject_toml_path, root_dir)?;
for (name, path_dep) in ctx.known_path_deps.iter() {
add_path_dep(writer, &ctx, name, path_dep)?;
}
add_main_crate(writer, &ctx)?;
add_cargo_lock(writer, &ctx)?;
add_workspace_manifest(writer, &ctx)?;
add_pyproject_toml(writer, &ctx, pyproject_toml_path)?;
add_python_sources(writer, &ctx)?;
Ok(())
}
pub fn source_distribution(
project: &crate::ProjectContext,
artifact: &crate::ArtifactContext,
pyproject: &PyProjectToml,
excludes: Override,
) -> Result<PathBuf> {
let pyproject_toml_path = project
.pyproject_toml_path
.normalize()
.with_context(|| {
format!(
"pyproject.toml path `{}` does not exist or is invalid",
project.pyproject_toml_path.display()
)
})?
.into_path_buf();
let source_date_epoch: Option<u64> =
env::var("SOURCE_DATE_EPOCH")
.ok()
.and_then(|var| match var.parse() {
Err(_) => {
warn!("SOURCE_DATE_EPOCH is malformed, ignoring");
None
}
Ok(val) => Some(val),
});
let metadata24 = &project.metadata24;
let writer = SDistWriter::new(&artifact.out, metadata24, source_date_epoch)?;
let mut writer = VirtualWriter::new(writer, excludes);
let root_dir = PathBuf::from(format!(
"{}-{}",
&metadata24.get_distribution_escaped(),
&metadata24.get_version_escaped()
));
match pyproject.sdist_generator() {
SdistGenerator::Cargo => {
add_cargo_package_files_to_sdist(project, &pyproject_toml_path, &mut writer, &root_dir)?
}
SdistGenerator::Git => {
add_git_tracked_files_to_sdist(&pyproject_toml_path, &mut writer, &root_dir)?
}
}
let pyproject_dir = pyproject_toml_path.parent().unwrap();
add_pyproject_metadata(
&mut writer,
pyproject,
pyproject_dir,
&root_dir,
&project.project_layout.python_dir,
)?;
let pkg_info = root_dir.join("PKG-INFO");
writer.add_bytes(
&pkg_info,
None,
metadata24.to_file_contents()?.as_bytes(),
false,
)?;
let source_distribution_path = writer.finish(&pkg_info)?;
eprintln!(
"📦 Built source distribution to {}",
source_distribution_path.display()
);
Ok(source_distribution_path)
}
#[cfg(test)]
mod tests {
use super::*;
use fs_err as fs;
use tempfile::TempDir;
#[test]
fn test_resolve_manifest_asset_rejects_license_outside_allowed_root() {
let temp_dir = TempDir::new().unwrap();
let workspace_root = temp_dir.path().join("workspace");
let manifest_dir = workspace_root.join("crate");
fs::create_dir_all(&manifest_dir).unwrap();
fs::write(temp_dir.path().join("SECRET_LICENSE"), "secret").unwrap();
let err = resolve_manifest_asset(
&manifest_dir,
Path::new("../../SECRET_LICENSE"),
"license-file",
Some(&workspace_root),
)
.unwrap_err();
assert!(
err.to_string().contains("outside allowed root"),
"unexpected error: {err:#}"
);
}
}