rattler_cache/
validation.rs

1//! Functionality to validate the contents of a Conda package.
2//!
3//! Almost all Conda packages contain a file `info/paths.json` that describes
4//! all the files the package contains. The [`validate_package_directory`]
5//! function validates that a directory containing an extracted Conda package
6//! archive actually contains the files as described by the `paths.json` file.
7//!
8//! Very old Conda packages do not contain a `paths.json` file. These packages
9//! contain a (deprecated) `files` file as well as optionally a `has_prefix` and
10//! some other files. If the `paths.json` file is missing these deprecated files
11//! are used instead to reconstruct a [`PathsJson`] object. See
12//! [`PathsJson::from_deprecated_package_directory`] for more information.
13
14use std::{
15    io::{BufReader, ErrorKind},
16    path::{Path, PathBuf},
17};
18
19use digest::Digest;
20use rattler_conda_types::package::{IndexJson, PackageFile, PathType, PathsEntry, PathsJson};
21use rattler_digest::Sha256;
22use rayon::iter::{IntoParallelRefIterator, ParallelIterator};
23use rayon::prelude::IndexedParallelIterator;
24
25/// The mode in which the validation should be performed.
26#[derive(Debug, Copy, Clone, Eq, PartialEq, Default)]
27pub enum ValidationMode {
28    /// Only check if the files exists. Do not check if the hashes match.
29    #[default]
30    Fast,
31
32    /// Check if the files exists and the content matches the hashes.
33    Full,
34}
35
36/// An error that is returned by [`validate_package_directory`] if the contents
37/// of the directory seems to be corrupted.
38#[derive(Debug, thiserror::Error)]
39pub enum PackageValidationError {
40    /// Neither a `paths.json` file nor a deprecated `files` file was found.
41    #[error("neither a 'paths.json' or a deprecated 'files' file was found")]
42    MetadataMissing,
43
44    /// An error occurred while reading the `paths.json` file.
45    #[error("failed to read 'paths.json' file")]
46    ReadPathsJsonError(#[source] std::io::Error),
47
48    /// An error occurred while reading the deprecated `files` file.
49    #[error("failed to read validation data from deprecated files")]
50    ReadDeprecatedPathsJsonError(#[source] std::io::Error),
51
52    /// The path seems to be corrupted.
53    #[error("the path '{0}' seems to be corrupted")]
54    CorruptedEntry(PathBuf, #[source] PackageEntryValidationError),
55
56    /// An error occurred while reading the `index.json` file.
57    #[error("failed to read 'index.json'")]
58    ReadIndexJsonError(#[source] std::io::Error),
59}
60
61/// An error that indicates that a specific file in a package archive directory
62/// seems to be corrupted.
63#[derive(Debug, thiserror::Error)]
64pub enum PackageEntryValidationError {
65    /// An error occurred while reading the metadata of the file.
66    #[error("failed to retrieve file metadata'")]
67    GetMetadataFailed(#[source] std::io::Error),
68
69    /// The file does not exist.
70    #[error("the file does not exist")]
71    NotFound,
72
73    /// The file is not a symbolic link.
74    #[error("expected a symbolic link")]
75    ExpectedSymlink,
76
77    /// The file is not a directory.
78    #[error("expected a directory")]
79    ExpectedDirectory,
80
81    /// The size of the file does not match the expected size.
82    #[error("incorrect size, expected {0} but file on disk is {1}")]
83    IncorrectSize(u64, u64),
84
85    /// An IO error occurred while reading the file.
86    #[error("an io error occurred")]
87    IoError(#[from] std::io::Error),
88
89    /// The SHA256 hash of the file does not match the expected hash.
90    #[error("sha256 hash mismatch, expected '{0}' but file on disk is '{1}'")]
91    HashMismatch(String, String),
92}
93
94/// Determine whether the files in the specified directory match what is
95/// expected according to the `info/paths.json` file in the same directory.
96///
97/// If the `info/paths.json` file could not be found this function tries to
98/// reconstruct the information from older deprecated methods. See
99/// [`PathsJson::from_deprecated_package_directory`].
100///
101/// If validation succeeds the parsed [`PathsJson`] object is returned which
102/// contains information about the files in the archive.
103pub fn validate_package_directory(
104    package_dir: &Path,
105    mode: ValidationMode,
106) -> Result<(IndexJson, PathsJson), PackageValidationError> {
107    // Validate that there is a valid IndexJson
108    let index_json = IndexJson::from_package_directory(package_dir)
109        .map_err(PackageValidationError::ReadIndexJsonError)?;
110
111    // Read the 'paths.json' file which describes all files that should be present.
112    // If the file could not be found try reconstructing the paths information
113    // from deprecated files in the package directory.
114    let paths = match PathsJson::from_package_directory(package_dir) {
115        Err(e) if e.kind() == ErrorKind::NotFound => {
116            match PathsJson::from_deprecated_package_directory(package_dir) {
117                Ok(paths) => paths,
118                Err(e) if e.kind() == ErrorKind::NotFound => {
119                    return Err(PackageValidationError::MetadataMissing)
120                }
121                Err(e) => return Err(PackageValidationError::ReadDeprecatedPathsJsonError(e)),
122            }
123        }
124        Err(e) => return Err(PackageValidationError::ReadPathsJsonError(e)),
125        Ok(paths) => paths,
126    };
127
128    // Validate all the entries
129    validate_package_directory_from_paths(package_dir, &paths, mode)
130        .map_err(|(path, err)| PackageValidationError::CorruptedEntry(path, err))?;
131
132    Ok((index_json, paths))
133}
134
135/// Determine whether the files in the specified directory match wat is expected
136/// according to the passed in [`PathsJson`].
137pub fn validate_package_directory_from_paths(
138    package_dir: &Path,
139    paths: &PathsJson,
140    mode: ValidationMode,
141) -> Result<(), (PathBuf, PackageEntryValidationError)> {
142    // Check every entry in the PathsJson object
143    paths
144        .paths
145        .par_iter()
146        .with_min_len(1000)
147        .try_for_each(|entry| {
148            validate_package_entry(package_dir, entry, mode)
149                .map_err(|e| (entry.relative_path.clone(), e))
150        })
151}
152
153/// Determine whether the information in the [`PathsEntry`] matches the file in
154/// the package directory.
155fn validate_package_entry(
156    package_dir: &Path,
157    entry: &PathsEntry,
158    mode: ValidationMode,
159) -> Result<(), PackageEntryValidationError> {
160    let path = package_dir.join(&entry.relative_path);
161
162    // Validate based on the type of path
163    match entry.path_type {
164        PathType::HardLink => validate_package_hard_link_entry(path, entry, mode),
165        PathType::SoftLink => validate_package_soft_link_entry(path, entry, mode),
166        PathType::Directory => validate_package_directory_entry(path, entry, mode),
167    }
168}
169
170/// Determine whether the information in the [`PathsEntry`] matches the file at
171/// the specified path.
172fn validate_package_hard_link_entry(
173    path: PathBuf,
174    entry: &PathsEntry,
175    mode: ValidationMode,
176) -> Result<(), PackageEntryValidationError> {
177    debug_assert!(entry.path_type == PathType::HardLink);
178
179    if mode == ValidationMode::Fast {
180        if !path.is_file() {
181            return Err(PackageEntryValidationError::NotFound);
182        }
183        return Ok(());
184    }
185
186    // Short-circuit if we have no validation reference
187    if entry.sha256.is_none() && entry.size_in_bytes.is_none() {
188        if !path.is_file() {
189            return Err(PackageEntryValidationError::NotFound);
190        }
191        return Ok(());
192    }
193
194    // Open the file for reading
195    let file = match std::fs::File::open(&path) {
196        Ok(file) => file,
197        Err(e) if e.kind() == ErrorKind::NotFound => {
198            return Err(PackageEntryValidationError::NotFound);
199        }
200        Err(e) => return Err(PackageEntryValidationError::IoError(e)),
201    };
202
203    // Validate the size of the file
204    if let Some(size_in_bytes) = entry.size_in_bytes {
205        let actual_file_len = file
206            .metadata()
207            .map_err(PackageEntryValidationError::IoError)?
208            .len();
209        if size_in_bytes != actual_file_len {
210            return Err(PackageEntryValidationError::IncorrectSize(
211                size_in_bytes,
212                actual_file_len,
213            ));
214        }
215    }
216
217    // Check the SHA256 hash of the file
218    if let Some(expected_hash) = &entry.sha256 {
219        // Determine the hash of the file on disk
220        let mut file = BufReader::with_capacity(64 * 1024, file);
221        let mut hasher = Sha256::default();
222        std::io::copy(&mut file, &mut hasher)?;
223        let hash = hasher.finalize();
224
225        // Compare the two hashes
226        if expected_hash != &hash {
227            return Err(PackageEntryValidationError::HashMismatch(
228                format!("{expected_hash:x}"),
229                format!("{hash:x}"),
230            ));
231        }
232    }
233
234    Ok(())
235}
236
237/// Determine whether the information in the [`PathsEntry`] matches the symbolic
238/// link at the specified path.
239fn validate_package_soft_link_entry(
240    path: PathBuf,
241    entry: &PathsEntry,
242    _mode: ValidationMode,
243) -> Result<(), PackageEntryValidationError> {
244    debug_assert!(entry.path_type == PathType::SoftLink);
245
246    if !path.is_symlink() {
247        return Err(PackageEntryValidationError::ExpectedSymlink);
248    }
249
250    // TODO: Validate symlink content. Dont validate the SHA256 hash of the file
251    // because since a symlink will most likely point to another file added as a
252    // hardlink by the package this is double work. Instead check that the
253    // symlink is correct e.g. `../a` points to the same file as `b/../../a` but
254    // they are different.
255
256    Ok(())
257}
258
259/// Determine whether the information in the [`PathsEntry`] matches the
260/// directory at the specified path.
261fn validate_package_directory_entry(
262    path: PathBuf,
263    entry: &PathsEntry,
264    _mode: ValidationMode,
265) -> Result<(), PackageEntryValidationError> {
266    debug_assert!(entry.path_type == PathType::Directory);
267
268    if path.is_dir() {
269        Ok(())
270    } else {
271        Err(PackageEntryValidationError::ExpectedDirectory)
272    }
273}
274
275#[cfg(test)]
276mod test {
277    use std::io::Write;
278
279    use assert_matches::assert_matches;
280    use rattler_conda_types::package::{PackageFile, PathType, PathsJson};
281    use rstest::rstest;
282    use url::Url;
283
284    use super::{
285        validate_package_directory, validate_package_directory_from_paths,
286        PackageEntryValidationError, PackageValidationError, ValidationMode,
287    };
288
289    #[rstest]
290    #[case::conda(
291        "https://conda.anaconda.org/conda-forge/win-64/conda-22.9.0-py38haa244fe_2.tar.bz2",
292        "3c2c2e8e81bde5fb1ac4b014f51a62411feff004580c708c97a0ec2b7058cdc4"
293    )]
294    #[case::mamba(
295        "https://conda.anaconda.org/conda-forge/win-64/mamba-1.0.0-py38hecfeebb_2.tar.bz2",
296        "f44c4bc9c6916ecc0e33137431645b029ade22190c7144eead61446dcbcc6f97"
297    )]
298    #[case::conda(
299        "https://conda.anaconda.org/conda-forge/win-64/conda-22.11.1-py38haa244fe_1.conda",
300        "a8a44c5ff2b2f423546d49721ba2e3e632233c74a813c944adf8e5742834930e"
301    )]
302    #[case::mamba(
303        "https://conda.anaconda.org/conda-forge/win-64/mamba-1.1.0-py39hb3d9227_2.conda",
304        "c172acdf9cb7655dd224879b30361a657b09bb084b65f151e36a2b51e51a080a"
305    )]
306    fn test_validate_package_files(#[case] url: Url, #[case] sha256: &str) {
307        // Create a temporary directory and extract the given package.
308        let temp_dir = tempfile::tempdir().unwrap();
309        let package_path = tools::download_and_cache_file(url, sha256).unwrap();
310
311        rattler_package_streaming::fs::extract(&package_path, temp_dir.path()).unwrap();
312
313        // Validate that the extracted package is correct. Since it's just been
314        // extracted this should work.
315        let result = validate_package_directory(temp_dir.path(), ValidationMode::Full);
316        if let Err(e) = result {
317            panic!("{e}");
318        }
319
320        // Read the paths.json file and select the first file in the archive.
321        let paths = PathsJson::from_package_directory(temp_dir.path())
322            .or_else(|_| PathsJson::from_deprecated_package_directory(temp_dir.path()))
323            .unwrap();
324        let entry = paths
325            .paths
326            .iter()
327            .find(|e| e.path_type == PathType::HardLink)
328            .expect("package does not contain a file");
329
330        // Change the file by writing a single character to the start of the file
331        let mut file = std::fs::OpenOptions::new()
332            .write(true)
333            .open(temp_dir.path().join(&entry.relative_path))
334            .unwrap();
335        file.write_all(&[255]).unwrap();
336        drop(file);
337
338        // Revalidate the package, given that we changed a file it should now fail with
339        // mismatched hashes.
340        assert_matches!(
341            validate_package_directory_from_paths(temp_dir.path(), &paths, ValidationMode::Full),
342            Err((
343                path,
344                PackageEntryValidationError::HashMismatch(_, _)
345            )) if path == entry.relative_path
346        );
347    }
348
349    #[rstest]
350    #[cfg(unix)]
351    #[case::mamba(
352        "https://conda.anaconda.org/conda-forge/linux-ppc64le/python-3.10.6-h2c4edbf_0_cpython.tar.bz2",
353        "978c122f6529cb617b90e6e692308a5945bf9c3ba0c27acbe4bea4c8b02cdad0"
354    )]
355    // Very old file with deprecated paths.json
356    #[case::mamba(
357        "https://conda.anaconda.org/conda-forge/linux-64/zlib-1.2.8-3.tar.bz2",
358        "85fcb6906b8686fe6341db89b4e6fc2631ad69ee6eab2f4823bfd64ae0b20ac8"
359    )]
360    fn test_validate_package_files_symlink(#[case] url: Url, #[case] sha256: &str) {
361        // Create a temporary directory and extract the given package.
362        let temp_dir = tempfile::tempdir().unwrap();
363        let package_path = tools::download_and_cache_file(url, sha256).unwrap();
364
365        rattler_package_streaming::fs::extract(&package_path, temp_dir.path()).unwrap();
366
367        // Validate that the extracted package is correct. Since it's just been
368        // extracted this should work.
369        let result = validate_package_directory(temp_dir.path(), ValidationMode::Full);
370        if let Err(e) = result {
371            panic!("{e}");
372        }
373
374        // Read the paths.json file and select the first symlink in the archive.
375        let paths = PathsJson::from_package_directory(temp_dir.path())
376            .or_else(|_| PathsJson::from_deprecated_package_directory(temp_dir.path()))
377            .unwrap();
378        let entry = paths
379            .paths
380            .iter()
381            .find(|e| e.path_type == PathType::SoftLink)
382            .expect("package does not contain a file");
383
384        // Replace the symlink with its content
385        let entry_path = temp_dir.path().join(&entry.relative_path);
386        let contents = std::fs::read(&entry_path).unwrap();
387        std::fs::remove_file(&entry_path).unwrap();
388        std::fs::write(entry_path, contents).unwrap();
389
390        // Revalidate the package, given that we replaced the symlink, it should fail.
391        assert_matches!(
392            validate_package_directory_from_paths(temp_dir.path(), &paths, ValidationMode::Full),
393            Err((
394                path,
395                PackageEntryValidationError::ExpectedSymlink
396            )) if path == entry.relative_path
397        );
398    }
399
400    #[test]
401    fn test_missing_metadata() {
402        let temp_dir = tempfile::tempdir().unwrap();
403        assert_matches!(
404            validate_package_directory(temp_dir.path(), ValidationMode::Full),
405            Err(PackageValidationError::ReadIndexJsonError(_))
406        );
407    }
408}