use std::{
io::{BufReader, ErrorKind},
path::{Path, PathBuf},
};
use digest::Digest;
use rattler_conda_types::package::{IndexJson, PackageFile, PathType, PathsEntry, PathsJson};
use rattler_digest::Sha256;
use rayon::iter::{IntoParallelRefIterator, ParallelIterator};
use rayon::prelude::IndexedParallelIterator;
#[derive(Debug, Copy, Clone, Eq, PartialEq, Default)]
pub enum ValidationMode {
#[default]
Skip,
Fast,
Full,
}
#[derive(Debug, thiserror::Error)]
pub enum PackageValidationError {
#[error("neither a 'paths.json' or a deprecated 'files' file was found")]
MetadataMissing,
#[error("failed to read 'paths.json' file")]
ReadPathsJsonError(#[source] std::io::Error),
#[error("failed to read validation data from deprecated files")]
ReadDeprecatedPathsJsonError(#[source] std::io::Error),
#[error("the path '{0}' seems to be corrupted")]
CorruptedEntry(PathBuf, #[source] PackageEntryValidationError),
#[error("failed to read 'index.json'")]
ReadIndexJsonError(#[source] std::io::Error),
}
#[derive(Debug, thiserror::Error)]
pub enum PackageEntryValidationError {
#[error("failed to retrieve file metadata'")]
GetMetadataFailed(#[source] std::io::Error),
#[error("the file does not exist")]
NotFound,
#[error("expected a symbolic link")]
ExpectedSymlink,
#[error("expected a directory")]
ExpectedDirectory,
#[error("incorrect size, expected {0} but file on disk is {1}")]
IncorrectSize(u64, u64),
#[error("an io error occurred")]
IoError(#[from] std::io::Error),
#[error("sha256 hash mismatch, expected '{0}' but file on disk is '{1}'")]
HashMismatch(String, String),
}
pub fn validate_package_directory(
package_dir: &Path,
mode: ValidationMode,
) -> Result<(IndexJson, PathsJson), PackageValidationError> {
let index_json = IndexJson::from_package_directory(package_dir)
.map_err(PackageValidationError::ReadIndexJsonError)?;
let paths = match PathsJson::from_package_directory(package_dir) {
Err(e) if e.kind() == ErrorKind::NotFound => {
match PathsJson::from_deprecated_package_directory(package_dir) {
Ok(paths) => paths,
Err(e) if e.kind() == ErrorKind::NotFound => {
return Err(PackageValidationError::MetadataMissing)
}
Err(e) => return Err(PackageValidationError::ReadDeprecatedPathsJsonError(e)),
}
}
Err(e) => return Err(PackageValidationError::ReadPathsJsonError(e)),
Ok(paths) => paths,
};
if mode == ValidationMode::Skip {
return Ok((index_json, paths));
}
validate_package_directory_from_paths(package_dir, &paths, mode)
.map_err(|(path, err)| PackageValidationError::CorruptedEntry(path, err))?;
Ok((index_json, paths))
}
pub fn validate_package_directory_from_paths(
package_dir: &Path,
paths: &PathsJson,
mode: ValidationMode,
) -> Result<(), (PathBuf, PackageEntryValidationError)> {
paths
.paths
.par_iter()
.with_min_len(1000)
.try_for_each(|entry| {
validate_package_entry(package_dir, entry, mode)
.map_err(|e| (entry.relative_path.clone(), e))
})
}
fn validate_package_entry(
package_dir: &Path,
entry: &PathsEntry,
mode: ValidationMode,
) -> Result<(), PackageEntryValidationError> {
let path = package_dir.join(&entry.relative_path);
match entry.path_type {
PathType::HardLink => validate_package_hard_link_entry(path, entry, mode),
PathType::SoftLink => validate_package_soft_link_entry(path, entry, mode),
PathType::Directory => validate_package_directory_entry(path, entry, mode),
}
}
fn validate_package_hard_link_entry(
path: PathBuf,
entry: &PathsEntry,
mode: ValidationMode,
) -> Result<(), PackageEntryValidationError> {
debug_assert!(entry.path_type == PathType::HardLink);
if mode == ValidationMode::Fast {
if !path.is_file() {
return Err(PackageEntryValidationError::NotFound);
}
return Ok(());
}
if entry.sha256.is_none() && entry.size_in_bytes.is_none() {
if !path.is_file() {
return Err(PackageEntryValidationError::NotFound);
}
return Ok(());
}
let file = match std::fs::File::open(&path) {
Ok(file) => file,
Err(e) if e.kind() == ErrorKind::NotFound => {
return Err(PackageEntryValidationError::NotFound);
}
Err(e) => return Err(PackageEntryValidationError::IoError(e)),
};
if let Some(size_in_bytes) = entry.size_in_bytes {
let actual_file_len = file
.metadata()
.map_err(PackageEntryValidationError::IoError)?
.len();
if size_in_bytes != actual_file_len {
return Err(PackageEntryValidationError::IncorrectSize(
size_in_bytes,
actual_file_len,
));
}
}
if let Some(expected_hash) = &entry.sha256 {
let mut file = BufReader::with_capacity(64 * 1024, file);
let mut hasher = Sha256::default();
std::io::copy(&mut file, &mut hasher)?;
let hash = hasher.finalize();
if expected_hash != &hash {
return Err(PackageEntryValidationError::HashMismatch(
format!("{expected_hash:x}"),
format!("{hash:x}"),
));
}
}
Ok(())
}
fn validate_package_soft_link_entry(
path: PathBuf,
entry: &PathsEntry,
_mode: ValidationMode,
) -> Result<(), PackageEntryValidationError> {
debug_assert!(entry.path_type == PathType::SoftLink);
if !path.is_symlink() {
return Err(PackageEntryValidationError::ExpectedSymlink);
}
Ok(())
}
fn validate_package_directory_entry(
path: PathBuf,
entry: &PathsEntry,
_mode: ValidationMode,
) -> Result<(), PackageEntryValidationError> {
debug_assert!(entry.path_type == PathType::Directory);
if path.is_dir() {
Ok(())
} else {
Err(PackageEntryValidationError::ExpectedDirectory)
}
}
#[cfg(test)]
mod test {
use std::io::Write;
use assert_matches::assert_matches;
use rattler_conda_types::package::{PackageFile, PathType, PathsJson};
use rstest::rstest;
use url::Url;
use super::{
validate_package_directory, validate_package_directory_from_paths,
PackageEntryValidationError, PackageValidationError, ValidationMode,
};
#[rstest]
#[case::conda(
"https://conda.anaconda.org/conda-forge/win-64/conda-22.9.0-py38haa244fe_2.tar.bz2",
"3c2c2e8e81bde5fb1ac4b014f51a62411feff004580c708c97a0ec2b7058cdc4"
)]
#[case::mamba(
"https://conda.anaconda.org/conda-forge/win-64/mamba-1.0.0-py38hecfeebb_2.tar.bz2",
"f44c4bc9c6916ecc0e33137431645b029ade22190c7144eead61446dcbcc6f97"
)]
#[case::conda(
"https://conda.anaconda.org/conda-forge/win-64/conda-22.11.1-py38haa244fe_1.conda",
"a8a44c5ff2b2f423546d49721ba2e3e632233c74a813c944adf8e5742834930e"
)]
#[case::mamba(
"https://conda.anaconda.org/conda-forge/win-64/mamba-1.1.0-py39hb3d9227_2.conda",
"c172acdf9cb7655dd224879b30361a657b09bb084b65f151e36a2b51e51a080a"
)]
fn test_validate_package_files(#[case] url: Url, #[case] sha256: &str) {
let temp_dir = tempfile::tempdir().unwrap();
let package_path = tools::download_and_cache_file(url, sha256).unwrap();
rattler_package_streaming::fs::extract(&package_path, temp_dir.path()).unwrap();
let result = validate_package_directory(temp_dir.path(), ValidationMode::Full);
if let Err(e) = result {
panic!("{e}");
}
let paths = PathsJson::from_package_directory(temp_dir.path())
.or_else(|_| PathsJson::from_deprecated_package_directory(temp_dir.path()))
.unwrap();
let entry = paths
.paths
.iter()
.find(|e| e.path_type == PathType::HardLink)
.expect("package does not contain a file");
let mut file = std::fs::OpenOptions::new()
.write(true)
.open(temp_dir.path().join(&entry.relative_path))
.unwrap();
file.write_all(&[255]).unwrap();
drop(file);
assert_matches!(
validate_package_directory_from_paths(temp_dir.path(), &paths, ValidationMode::Full),
Err((
path,
PackageEntryValidationError::HashMismatch(_, _)
)) if path == entry.relative_path
);
}
#[rstest]
#[cfg(unix)]
#[case::mamba(
"https://conda.anaconda.org/conda-forge/linux-ppc64le/python-3.10.6-h2c4edbf_0_cpython.tar.bz2",
"978c122f6529cb617b90e6e692308a5945bf9c3ba0c27acbe4bea4c8b02cdad0"
)]
#[case::mamba(
"https://conda.anaconda.org/conda-forge/linux-64/zlib-1.2.8-3.tar.bz2",
"85fcb6906b8686fe6341db89b4e6fc2631ad69ee6eab2f4823bfd64ae0b20ac8"
)]
fn test_validate_package_files_symlink(#[case] url: Url, #[case] sha256: &str) {
let temp_dir = tempfile::tempdir().unwrap();
let package_path = tools::download_and_cache_file(url, sha256).unwrap();
rattler_package_streaming::fs::extract(&package_path, temp_dir.path()).unwrap();
let result = validate_package_directory(temp_dir.path(), ValidationMode::Full);
if let Err(e) = result {
panic!("{e}");
}
let paths = PathsJson::from_package_directory(temp_dir.path())
.or_else(|_| PathsJson::from_deprecated_package_directory(temp_dir.path()))
.unwrap();
let entry = paths
.paths
.iter()
.find(|e| e.path_type == PathType::SoftLink)
.expect("package does not contain a file");
let entry_path = temp_dir.path().join(&entry.relative_path);
let contents = std::fs::read(&entry_path).unwrap();
std::fs::remove_file(&entry_path).unwrap();
std::fs::write(entry_path, contents).unwrap();
assert_matches!(
validate_package_directory_from_paths(temp_dir.path(), &paths, ValidationMode::Full),
Err((
path,
PackageEntryValidationError::ExpectedSymlink
)) if path == entry.relative_path
);
}
#[test]
fn test_missing_metadata() {
let temp_dir = tempfile::tempdir().unwrap();
assert_matches!(
validate_package_directory(temp_dir.path(), ValidationMode::Full),
Err(PackageValidationError::ReadIndexJsonError(_))
);
}
}