1use std::{
15 io::{BufReader, ErrorKind},
16 path::{Path, PathBuf},
17};
18
19use digest::Digest;
20use rattler_conda_types::package::{IndexJson, PackageFile, PathType, PathsEntry, PathsJson};
21use rattler_digest::Sha256;
22use rayon::iter::{IntoParallelRefIterator, ParallelIterator};
23use rayon::prelude::IndexedParallelIterator;
24
25#[derive(Debug, Copy, Clone, Eq, PartialEq, Default)]
27pub enum ValidationMode {
28 #[default]
30 Fast,
31
32 Full,
34}
35
36#[derive(Debug, thiserror::Error)]
39pub enum PackageValidationError {
40 #[error("neither a 'paths.json' or a deprecated 'files' file was found")]
42 MetadataMissing,
43
44 #[error("failed to read 'paths.json' file")]
46 ReadPathsJsonError(#[source] std::io::Error),
47
48 #[error("failed to read validation data from deprecated files")]
50 ReadDeprecatedPathsJsonError(#[source] std::io::Error),
51
52 #[error("the path '{0}' seems to be corrupted")]
54 CorruptedEntry(PathBuf, #[source] PackageEntryValidationError),
55
56 #[error("failed to read 'index.json'")]
58 ReadIndexJsonError(#[source] std::io::Error),
59}
60
61#[derive(Debug, thiserror::Error)]
64pub enum PackageEntryValidationError {
65 #[error("failed to retrieve file metadata'")]
67 GetMetadataFailed(#[source] std::io::Error),
68
69 #[error("the file does not exist")]
71 NotFound,
72
73 #[error("expected a symbolic link")]
75 ExpectedSymlink,
76
77 #[error("expected a directory")]
79 ExpectedDirectory,
80
81 #[error("incorrect size, expected {0} but file on disk is {1}")]
83 IncorrectSize(u64, u64),
84
85 #[error("an io error occurred")]
87 IoError(#[from] std::io::Error),
88
89 #[error("sha256 hash mismatch, expected '{0}' but file on disk is '{1}'")]
91 HashMismatch(String, String),
92}
93
94pub fn validate_package_directory(
104 package_dir: &Path,
105 mode: ValidationMode,
106) -> Result<(IndexJson, PathsJson), PackageValidationError> {
107 let index_json = IndexJson::from_package_directory(package_dir)
109 .map_err(PackageValidationError::ReadIndexJsonError)?;
110
111 let paths = match PathsJson::from_package_directory(package_dir) {
115 Err(e) if e.kind() == ErrorKind::NotFound => {
116 match PathsJson::from_deprecated_package_directory(package_dir) {
117 Ok(paths) => paths,
118 Err(e) if e.kind() == ErrorKind::NotFound => {
119 return Err(PackageValidationError::MetadataMissing)
120 }
121 Err(e) => return Err(PackageValidationError::ReadDeprecatedPathsJsonError(e)),
122 }
123 }
124 Err(e) => return Err(PackageValidationError::ReadPathsJsonError(e)),
125 Ok(paths) => paths,
126 };
127
128 validate_package_directory_from_paths(package_dir, &paths, mode)
130 .map_err(|(path, err)| PackageValidationError::CorruptedEntry(path, err))?;
131
132 Ok((index_json, paths))
133}
134
135pub fn validate_package_directory_from_paths(
138 package_dir: &Path,
139 paths: &PathsJson,
140 mode: ValidationMode,
141) -> Result<(), (PathBuf, PackageEntryValidationError)> {
142 paths
144 .paths
145 .par_iter()
146 .with_min_len(1000)
147 .try_for_each(|entry| {
148 validate_package_entry(package_dir, entry, mode)
149 .map_err(|e| (entry.relative_path.clone(), e))
150 })
151}
152
153fn validate_package_entry(
156 package_dir: &Path,
157 entry: &PathsEntry,
158 mode: ValidationMode,
159) -> Result<(), PackageEntryValidationError> {
160 let path = package_dir.join(&entry.relative_path);
161
162 match entry.path_type {
164 PathType::HardLink => validate_package_hard_link_entry(path, entry, mode),
165 PathType::SoftLink => validate_package_soft_link_entry(path, entry, mode),
166 PathType::Directory => validate_package_directory_entry(path, entry, mode),
167 }
168}
169
170fn validate_package_hard_link_entry(
173 path: PathBuf,
174 entry: &PathsEntry,
175 mode: ValidationMode,
176) -> Result<(), PackageEntryValidationError> {
177 debug_assert!(entry.path_type == PathType::HardLink);
178
179 if mode == ValidationMode::Fast {
180 if !path.is_file() {
181 return Err(PackageEntryValidationError::NotFound);
182 }
183 return Ok(());
184 }
185
186 if entry.sha256.is_none() && entry.size_in_bytes.is_none() {
188 if !path.is_file() {
189 return Err(PackageEntryValidationError::NotFound);
190 }
191 return Ok(());
192 }
193
194 let file = match std::fs::File::open(&path) {
196 Ok(file) => file,
197 Err(e) if e.kind() == ErrorKind::NotFound => {
198 return Err(PackageEntryValidationError::NotFound);
199 }
200 Err(e) => return Err(PackageEntryValidationError::IoError(e)),
201 };
202
203 if let Some(size_in_bytes) = entry.size_in_bytes {
205 let actual_file_len = file
206 .metadata()
207 .map_err(PackageEntryValidationError::IoError)?
208 .len();
209 if size_in_bytes != actual_file_len {
210 return Err(PackageEntryValidationError::IncorrectSize(
211 size_in_bytes,
212 actual_file_len,
213 ));
214 }
215 }
216
217 if let Some(expected_hash) = &entry.sha256 {
219 let mut file = BufReader::with_capacity(64 * 1024, file);
221 let mut hasher = Sha256::default();
222 std::io::copy(&mut file, &mut hasher)?;
223 let hash = hasher.finalize();
224
225 if expected_hash != &hash {
227 return Err(PackageEntryValidationError::HashMismatch(
228 format!("{expected_hash:x}"),
229 format!("{hash:x}"),
230 ));
231 }
232 }
233
234 Ok(())
235}
236
237fn validate_package_soft_link_entry(
240 path: PathBuf,
241 entry: &PathsEntry,
242 _mode: ValidationMode,
243) -> Result<(), PackageEntryValidationError> {
244 debug_assert!(entry.path_type == PathType::SoftLink);
245
246 if !path.is_symlink() {
247 return Err(PackageEntryValidationError::ExpectedSymlink);
248 }
249
250 Ok(())
257}
258
259fn validate_package_directory_entry(
262 path: PathBuf,
263 entry: &PathsEntry,
264 _mode: ValidationMode,
265) -> Result<(), PackageEntryValidationError> {
266 debug_assert!(entry.path_type == PathType::Directory);
267
268 if path.is_dir() {
269 Ok(())
270 } else {
271 Err(PackageEntryValidationError::ExpectedDirectory)
272 }
273}
274
275#[cfg(test)]
276mod test {
277 use std::io::Write;
278
279 use assert_matches::assert_matches;
280 use rattler_conda_types::package::{PackageFile, PathType, PathsJson};
281 use rstest::rstest;
282 use url::Url;
283
284 use super::{
285 validate_package_directory, validate_package_directory_from_paths,
286 PackageEntryValidationError, PackageValidationError, ValidationMode,
287 };
288
289 #[rstest]
290 #[case::conda(
291 "https://conda.anaconda.org/conda-forge/win-64/conda-22.9.0-py38haa244fe_2.tar.bz2",
292 "3c2c2e8e81bde5fb1ac4b014f51a62411feff004580c708c97a0ec2b7058cdc4"
293 )]
294 #[case::mamba(
295 "https://conda.anaconda.org/conda-forge/win-64/mamba-1.0.0-py38hecfeebb_2.tar.bz2",
296 "f44c4bc9c6916ecc0e33137431645b029ade22190c7144eead61446dcbcc6f97"
297 )]
298 #[case::conda(
299 "https://conda.anaconda.org/conda-forge/win-64/conda-22.11.1-py38haa244fe_1.conda",
300 "a8a44c5ff2b2f423546d49721ba2e3e632233c74a813c944adf8e5742834930e"
301 )]
302 #[case::mamba(
303 "https://conda.anaconda.org/conda-forge/win-64/mamba-1.1.0-py39hb3d9227_2.conda",
304 "c172acdf9cb7655dd224879b30361a657b09bb084b65f151e36a2b51e51a080a"
305 )]
306 fn test_validate_package_files(#[case] url: Url, #[case] sha256: &str) {
307 let temp_dir = tempfile::tempdir().unwrap();
309 let package_path = tools::download_and_cache_file(url, sha256).unwrap();
310
311 rattler_package_streaming::fs::extract(&package_path, temp_dir.path()).unwrap();
312
313 let result = validate_package_directory(temp_dir.path(), ValidationMode::Full);
316 if let Err(e) = result {
317 panic!("{e}");
318 }
319
320 let paths = PathsJson::from_package_directory(temp_dir.path())
322 .or_else(|_| PathsJson::from_deprecated_package_directory(temp_dir.path()))
323 .unwrap();
324 let entry = paths
325 .paths
326 .iter()
327 .find(|e| e.path_type == PathType::HardLink)
328 .expect("package does not contain a file");
329
330 let mut file = std::fs::OpenOptions::new()
332 .write(true)
333 .open(temp_dir.path().join(&entry.relative_path))
334 .unwrap();
335 file.write_all(&[255]).unwrap();
336 drop(file);
337
338 assert_matches!(
341 validate_package_directory_from_paths(temp_dir.path(), &paths, ValidationMode::Full),
342 Err((
343 path,
344 PackageEntryValidationError::HashMismatch(_, _)
345 )) if path == entry.relative_path
346 );
347 }
348
349 #[rstest]
350 #[cfg(unix)]
351 #[case::mamba(
352 "https://conda.anaconda.org/conda-forge/linux-ppc64le/python-3.10.6-h2c4edbf_0_cpython.tar.bz2",
353 "978c122f6529cb617b90e6e692308a5945bf9c3ba0c27acbe4bea4c8b02cdad0"
354 )]
355 #[case::mamba(
357 "https://conda.anaconda.org/conda-forge/linux-64/zlib-1.2.8-3.tar.bz2",
358 "85fcb6906b8686fe6341db89b4e6fc2631ad69ee6eab2f4823bfd64ae0b20ac8"
359 )]
360 fn test_validate_package_files_symlink(#[case] url: Url, #[case] sha256: &str) {
361 let temp_dir = tempfile::tempdir().unwrap();
363 let package_path = tools::download_and_cache_file(url, sha256).unwrap();
364
365 rattler_package_streaming::fs::extract(&package_path, temp_dir.path()).unwrap();
366
367 let result = validate_package_directory(temp_dir.path(), ValidationMode::Full);
370 if let Err(e) = result {
371 panic!("{e}");
372 }
373
374 let paths = PathsJson::from_package_directory(temp_dir.path())
376 .or_else(|_| PathsJson::from_deprecated_package_directory(temp_dir.path()))
377 .unwrap();
378 let entry = paths
379 .paths
380 .iter()
381 .find(|e| e.path_type == PathType::SoftLink)
382 .expect("package does not contain a file");
383
384 let entry_path = temp_dir.path().join(&entry.relative_path);
386 let contents = std::fs::read(&entry_path).unwrap();
387 std::fs::remove_file(&entry_path).unwrap();
388 std::fs::write(entry_path, contents).unwrap();
389
390 assert_matches!(
392 validate_package_directory_from_paths(temp_dir.path(), &paths, ValidationMode::Full),
393 Err((
394 path,
395 PackageEntryValidationError::ExpectedSymlink
396 )) if path == entry.relative_path
397 );
398 }
399
400 #[test]
401 fn test_missing_metadata() {
402 let temp_dir = tempfile::tempdir().unwrap();
403 assert_matches!(
404 validate_package_directory(temp_dir.path(), ValidationMode::Full),
405 Err(PackageValidationError::ReadIndexJsonError(_))
406 );
407 }
408}