Skip to main content

rattler_cache/
validation.rs

1//! Functionality to validate the contents of a Conda package.
2//!
3//! Almost all Conda packages contain a file `info/paths.json` that describes
4//! all the files the package contains. The [`validate_package_directory`]
5//! function validates that a directory containing an extracted Conda package
6//! archive actually contains the files as described by the `paths.json` file.
7//!
8//! Very old Conda packages do not contain a `paths.json` file. These packages
9//! contain a (deprecated) `files` file as well as optionally a `has_prefix` and
10//! some other files. If the `paths.json` file is missing these deprecated files
11//! are used instead to reconstruct a [`PathsJson`] object. See
12//! [`PathsJson::from_deprecated_package_directory`] for more information.
13
14use std::{
15    io::{BufReader, ErrorKind},
16    path::{Path, PathBuf},
17};
18
19use digest::Digest;
20use rattler_conda_types::package::{IndexJson, PackageFile, PathType, PathsEntry, PathsJson};
21use rattler_digest::Sha256;
22use rayon::iter::{IntoParallelRefIterator, ParallelIterator};
23use rayon::prelude::IndexedParallelIterator;
24
25/// The mode in which the validation should be performed.
26#[derive(Debug, Copy, Clone, Eq, PartialEq, Default)]
27pub enum ValidationMode {
28    /// Only check if the package directory exists and contains a valid index.json.
29    /// Does not validate individual files or paths.json. This is the fastest validation
30    /// mode but provides minimal guarantees about package integrity.
31    #[default]
32    Skip,
33
34    /// Only check if the files exists. Do not check if the hashes match.
35    Fast,
36
37    /// Check if the files exists and the content matches the hashes.
38    Full,
39}
40
41/// An error that is returned by [`validate_package_directory`] if the contents
42/// of the directory seems to be corrupted.
43#[derive(Debug, thiserror::Error)]
44pub enum PackageValidationError {
45    /// Neither a `paths.json` file nor a deprecated `files` file was found.
46    #[error("neither a 'paths.json' or a deprecated 'files' file was found")]
47    MetadataMissing,
48
49    /// An error occurred while reading the `paths.json` file.
50    #[error("failed to read 'paths.json' file")]
51    ReadPathsJsonError(#[source] std::io::Error),
52
53    /// An error occurred while reading the deprecated `files` file.
54    #[error("failed to read validation data from deprecated files")]
55    ReadDeprecatedPathsJsonError(#[source] std::io::Error),
56
57    /// The path seems to be corrupted.
58    #[error("the path '{0}' seems to be corrupted")]
59    CorruptedEntry(PathBuf, #[source] PackageEntryValidationError),
60
61    /// An error occurred while reading the `index.json` file.
62    #[error("failed to read 'index.json'")]
63    ReadIndexJsonError(#[source] std::io::Error),
64}
65
66/// An error that indicates that a specific file in a package archive directory
67/// seems to be corrupted.
68#[derive(Debug, thiserror::Error)]
69pub enum PackageEntryValidationError {
70    /// An error occurred while reading the metadata of the file.
71    #[error("failed to retrieve file metadata'")]
72    GetMetadataFailed(#[source] std::io::Error),
73
74    /// The file does not exist.
75    #[error("the file does not exist")]
76    NotFound,
77
78    /// The file is not a symbolic link.
79    #[error("expected a symbolic link")]
80    ExpectedSymlink,
81
82    /// The file is not a directory.
83    #[error("expected a directory")]
84    ExpectedDirectory,
85
86    /// The size of the file does not match the expected size.
87    #[error("incorrect size, expected {0} but file on disk is {1}")]
88    IncorrectSize(u64, u64),
89
90    /// An IO error occurred while reading the file.
91    #[error("an io error occurred")]
92    IoError(#[from] std::io::Error),
93
94    /// The SHA256 hash of the file does not match the expected hash.
95    #[error("sha256 hash mismatch, expected '{0}' but file on disk is '{1}'")]
96    HashMismatch(String, String),
97}
98
99/// Determine whether the files in the specified directory match what is
100/// expected according to the `info/paths.json` file in the same directory.
101///
102/// If the `info/paths.json` file could not be found this function tries to
103/// reconstruct the information from older deprecated methods. See
104/// [`PathsJson::from_deprecated_package_directory`].
105///
106/// If validation succeeds the parsed [`PathsJson`] object is returned which
107/// contains information about the files in the archive.
108pub fn validate_package_directory(
109    package_dir: &Path,
110    mode: ValidationMode,
111) -> Result<(IndexJson, PathsJson), PackageValidationError> {
112    // Validate that there is a valid IndexJson
113    let index_json = IndexJson::from_package_directory(package_dir)
114        .map_err(PackageValidationError::ReadIndexJsonError)?;
115
116    // Read the 'paths.json' file which describes all files that should be present.
117    // If the file could not be found try reconstructing the paths information
118    // from deprecated files in the package directory.
119    let paths = match PathsJson::from_package_directory(package_dir) {
120        Err(e) if e.kind() == ErrorKind::NotFound => {
121            match PathsJson::from_deprecated_package_directory(package_dir) {
122                Ok(paths) => paths,
123                Err(e) if e.kind() == ErrorKind::NotFound => {
124                    return Err(PackageValidationError::MetadataMissing)
125                }
126                Err(e) => return Err(PackageValidationError::ReadDeprecatedPathsJsonError(e)),
127            }
128        }
129        Err(e) => return Err(PackageValidationError::ReadPathsJsonError(e)),
130        Ok(paths) => paths,
131    };
132
133    // In Skip mode, only validate that index.json and paths.json exist and are readable.
134    // Skip all file validation checks.
135    if mode == ValidationMode::Skip {
136        return Ok((index_json, paths));
137    }
138
139    // Validate all the entries
140    validate_package_directory_from_paths(package_dir, &paths, mode)
141        .map_err(|(path, err)| PackageValidationError::CorruptedEntry(path, err))?;
142
143    Ok((index_json, paths))
144}
145
146/// Determine whether the files in the specified directory match wat is expected
147/// according to the passed in [`PathsJson`].
148pub fn validate_package_directory_from_paths(
149    package_dir: &Path,
150    paths: &PathsJson,
151    mode: ValidationMode,
152) -> Result<(), (PathBuf, PackageEntryValidationError)> {
153    // Check every entry in the PathsJson object
154    paths
155        .paths
156        .par_iter()
157        .with_min_len(1000)
158        .try_for_each(|entry| {
159            validate_package_entry(package_dir, entry, mode)
160                .map_err(|e| (entry.relative_path.clone(), e))
161        })
162}
163
164/// Determine whether the information in the [`PathsEntry`] matches the file in
165/// the package directory.
166fn validate_package_entry(
167    package_dir: &Path,
168    entry: &PathsEntry,
169    mode: ValidationMode,
170) -> Result<(), PackageEntryValidationError> {
171    let path = package_dir.join(&entry.relative_path);
172
173    // Validate based on the type of path
174    match entry.path_type {
175        PathType::HardLink => validate_package_hard_link_entry(path, entry, mode),
176        PathType::SoftLink => validate_package_soft_link_entry(path, entry, mode),
177        PathType::Directory => validate_package_directory_entry(path, entry, mode),
178    }
179}
180
181/// Determine whether the information in the [`PathsEntry`] matches the file at
182/// the specified path.
183fn validate_package_hard_link_entry(
184    path: PathBuf,
185    entry: &PathsEntry,
186    mode: ValidationMode,
187) -> Result<(), PackageEntryValidationError> {
188    debug_assert!(entry.path_type == PathType::HardLink);
189
190    if mode == ValidationMode::Fast {
191        if !path.is_file() {
192            return Err(PackageEntryValidationError::NotFound);
193        }
194        return Ok(());
195    }
196
197    // Short-circuit if we have no validation reference
198    if entry.sha256.is_none() && entry.size_in_bytes.is_none() {
199        if !path.is_file() {
200            return Err(PackageEntryValidationError::NotFound);
201        }
202        return Ok(());
203    }
204
205    // Open the file for reading
206    let file = match std::fs::File::open(&path) {
207        Ok(file) => file,
208        Err(e) if e.kind() == ErrorKind::NotFound => {
209            return Err(PackageEntryValidationError::NotFound);
210        }
211        Err(e) => return Err(PackageEntryValidationError::IoError(e)),
212    };
213
214    // Validate the size of the file
215    if let Some(size_in_bytes) = entry.size_in_bytes {
216        let actual_file_len = file
217            .metadata()
218            .map_err(PackageEntryValidationError::IoError)?
219            .len();
220        if size_in_bytes != actual_file_len {
221            return Err(PackageEntryValidationError::IncorrectSize(
222                size_in_bytes,
223                actual_file_len,
224            ));
225        }
226    }
227
228    // Check the SHA256 hash of the file
229    if let Some(expected_hash) = &entry.sha256 {
230        // Determine the hash of the file on disk
231        let mut file = BufReader::with_capacity(64 * 1024, file);
232        let mut hasher = Sha256::default();
233        std::io::copy(&mut file, &mut hasher)?;
234        let hash = hasher.finalize();
235
236        // Compare the two hashes
237        if expected_hash != &hash {
238            return Err(PackageEntryValidationError::HashMismatch(
239                format!("{expected_hash:x}"),
240                format!("{hash:x}"),
241            ));
242        }
243    }
244
245    Ok(())
246}
247
248/// Determine whether the information in the [`PathsEntry`] matches the symbolic
249/// link at the specified path.
250fn validate_package_soft_link_entry(
251    path: PathBuf,
252    entry: &PathsEntry,
253    _mode: ValidationMode,
254) -> Result<(), PackageEntryValidationError> {
255    debug_assert!(entry.path_type == PathType::SoftLink);
256
257    if !path.is_symlink() {
258        return Err(PackageEntryValidationError::ExpectedSymlink);
259    }
260
261    // TODO: Validate symlink content. Dont validate the SHA256 hash of the file
262    // because since a symlink will most likely point to another file added as a
263    // hardlink by the package this is double work. Instead check that the
264    // symlink is correct e.g. `../a` points to the same file as `b/../../a` but
265    // they are different.
266
267    Ok(())
268}
269
270/// Determine whether the information in the [`PathsEntry`] matches the
271/// directory at the specified path.
272fn validate_package_directory_entry(
273    path: PathBuf,
274    entry: &PathsEntry,
275    _mode: ValidationMode,
276) -> Result<(), PackageEntryValidationError> {
277    debug_assert!(entry.path_type == PathType::Directory);
278
279    if path.is_dir() {
280        Ok(())
281    } else {
282        Err(PackageEntryValidationError::ExpectedDirectory)
283    }
284}
285
286#[cfg(test)]
287mod test {
288    use std::io::Write;
289
290    use assert_matches::assert_matches;
291    use rattler_conda_types::package::{PackageFile, PathType, PathsJson};
292    use rstest::rstest;
293    use url::Url;
294
295    use super::{
296        validate_package_directory, validate_package_directory_from_paths,
297        PackageEntryValidationError, PackageValidationError, ValidationMode,
298    };
299
300    #[rstest]
301    #[case::conda(
302        "https://conda.anaconda.org/conda-forge/win-64/conda-22.9.0-py38haa244fe_2.tar.bz2",
303        "3c2c2e8e81bde5fb1ac4b014f51a62411feff004580c708c97a0ec2b7058cdc4"
304    )]
305    #[case::mamba(
306        "https://conda.anaconda.org/conda-forge/win-64/mamba-1.0.0-py38hecfeebb_2.tar.bz2",
307        "f44c4bc9c6916ecc0e33137431645b029ade22190c7144eead61446dcbcc6f97"
308    )]
309    #[case::conda(
310        "https://conda.anaconda.org/conda-forge/win-64/conda-22.11.1-py38haa244fe_1.conda",
311        "a8a44c5ff2b2f423546d49721ba2e3e632233c74a813c944adf8e5742834930e"
312    )]
313    #[case::mamba(
314        "https://conda.anaconda.org/conda-forge/win-64/mamba-1.1.0-py39hb3d9227_2.conda",
315        "c172acdf9cb7655dd224879b30361a657b09bb084b65f151e36a2b51e51a080a"
316    )]
317    fn test_validate_package_files(#[case] url: Url, #[case] sha256: &str) {
318        // Create a temporary directory and extract the given package.
319        let temp_dir = tempfile::tempdir().unwrap();
320        let package_path = tools::download_and_cache_file(url, sha256).unwrap();
321
322        rattler_package_streaming::fs::extract(&package_path, temp_dir.path()).unwrap();
323
324        // Validate that the extracted package is correct. Since it's just been
325        // extracted this should work.
326        let result = validate_package_directory(temp_dir.path(), ValidationMode::Full);
327        if let Err(e) = result {
328            panic!("{e}");
329        }
330
331        // Read the paths.json file and select the first file in the archive.
332        let paths = PathsJson::from_package_directory(temp_dir.path())
333            .or_else(|_| PathsJson::from_deprecated_package_directory(temp_dir.path()))
334            .unwrap();
335        let entry = paths
336            .paths
337            .iter()
338            .find(|e| e.path_type == PathType::HardLink)
339            .expect("package does not contain a file");
340
341        // Change the file by writing a single character to the start of the file
342        let mut file = std::fs::OpenOptions::new()
343            .write(true)
344            .open(temp_dir.path().join(&entry.relative_path))
345            .unwrap();
346        file.write_all(&[255]).unwrap();
347        drop(file);
348
349        // Revalidate the package, given that we changed a file it should now fail with
350        // mismatched hashes.
351        assert_matches!(
352            validate_package_directory_from_paths(temp_dir.path(), &paths, ValidationMode::Full),
353            Err((
354                path,
355                PackageEntryValidationError::HashMismatch(_, _)
356            )) if path == entry.relative_path
357        );
358    }
359
360    #[rstest]
361    #[cfg(unix)]
362    #[case::mamba(
363        "https://conda.anaconda.org/conda-forge/linux-ppc64le/python-3.10.6-h2c4edbf_0_cpython.tar.bz2",
364        "978c122f6529cb617b90e6e692308a5945bf9c3ba0c27acbe4bea4c8b02cdad0"
365    )]
366    // Very old file with deprecated paths.json
367    #[case::mamba(
368        "https://conda.anaconda.org/conda-forge/linux-64/zlib-1.2.8-3.tar.bz2",
369        "85fcb6906b8686fe6341db89b4e6fc2631ad69ee6eab2f4823bfd64ae0b20ac8"
370    )]
371    fn test_validate_package_files_symlink(#[case] url: Url, #[case] sha256: &str) {
372        // Create a temporary directory and extract the given package.
373        let temp_dir = tempfile::tempdir().unwrap();
374        let package_path = tools::download_and_cache_file(url, sha256).unwrap();
375
376        rattler_package_streaming::fs::extract(&package_path, temp_dir.path()).unwrap();
377
378        // Validate that the extracted package is correct. Since it's just been
379        // extracted this should work.
380        let result = validate_package_directory(temp_dir.path(), ValidationMode::Full);
381        if let Err(e) = result {
382            panic!("{e}");
383        }
384
385        // Read the paths.json file and select the first symlink in the archive.
386        let paths = PathsJson::from_package_directory(temp_dir.path())
387            .or_else(|_| PathsJson::from_deprecated_package_directory(temp_dir.path()))
388            .unwrap();
389        let entry = paths
390            .paths
391            .iter()
392            .find(|e| e.path_type == PathType::SoftLink)
393            .expect("package does not contain a file");
394
395        // Replace the symlink with its content
396        let entry_path = temp_dir.path().join(&entry.relative_path);
397        let contents = std::fs::read(&entry_path).unwrap();
398        std::fs::remove_file(&entry_path).unwrap();
399        std::fs::write(entry_path, contents).unwrap();
400
401        // Revalidate the package, given that we replaced the symlink, it should fail.
402        assert_matches!(
403            validate_package_directory_from_paths(temp_dir.path(), &paths, ValidationMode::Full),
404            Err((
405                path,
406                PackageEntryValidationError::ExpectedSymlink
407            )) if path == entry.relative_path
408        );
409    }
410
411    #[test]
412    fn test_missing_metadata() {
413        let temp_dir = tempfile::tempdir().unwrap();
414        assert_matches!(
415            validate_package_directory(temp_dir.path(), ValidationMode::Full),
416            Err(PackageValidationError::ReadIndexJsonError(_))
417        );
418    }
419}