rattler_build_core 0.1.0

Core library for rattler-build
Documentation
//! This module maps files from the prefix into the temporary directory.

use crate::metadata::Output;
use fs_err as fs;
use std::{
    collections::HashSet,
    path::{Component, Path, PathBuf},
};

use super::PackagingError;

/// We check the (new) `pyc` files against the old files from the environment.
/// This is a temporary measure to avoid packaging `pyc` files that are not
/// generated by the build process.
pub fn filter_pyc(path: &Path, old_files: &HashSet<PathBuf>) -> bool {
    if let (Some(ext), Some(parent)) = (path.extension(), path.parent())
        && ext == "pyc"
    {
        let has_pycache = parent.ends_with("__pycache__");
        let pyfile = if has_pycache {
            // a pyc file with a pycache parent should be removed
            // replace two last dots with .py
            // these paths look like .../__pycache__/file_dependency.cpython-311.pyc
            // where the `file_dependency.py` path would be found in the parent directory from __pycache__
            let stem = path
                .file_name()
                .expect("unreachable as extension doesn't exist without filename")
                .to_string_lossy()
                .to_string();
            let py_stem = stem.rsplitn(3, '.').last().unwrap_or_default();
            if let Some(pp) = parent.parent() {
                pp.join(format!("{}.py", py_stem))
            } else {
                return false;
            }
        } else {
            path.with_extension("py")
        };

        if old_files.contains(&pyfile) {
            return true;
        }
    }
    false
}

/// Filter certain files to prevent them from being packaged:
///
/// - .pyo files are considered "harmful" (optimized python files)
/// - .la files are not needed at runtime and are unnecessary bloat
/// - .DS_Store files are not needed and macOS specific
/// - share/info/dir is skipped to avoid multiple packages writing to the same index file
/// - conda-meta directories are skipped
/// - CACHEDIR.TAG files are skipped
pub fn filter_file(relative_path: &Path) -> bool {
    let ext = relative_path.extension().unwrap_or_default();

    // skip the share/info/dir file because multiple packages would write
    // to the same index file
    if relative_path.starts_with("share/info/dir") {
        return true;
    }

    // pyo considered harmful: https://www.python.org/dev/peps/pep-0488/
    if ext == "pyo" {
        return true;
    }

    // we skip `.la` files because conda-build does it - la files are not needed at runtime
    if ext == "la" {
        return true;
    }

    // filter any paths with .DS_Store, CACHEDIR.TAG or conda-meta in them
    if relative_path.components().any(|c| {
        let s = c.as_os_str().to_string_lossy();
        s == ".DS_Store" || s == "conda-meta" || s == "CACHEDIR.TAG"
    }) {
        return true;
    }
    false
}

impl Output {
    /// This function copies the given file to the destination folder and
    /// transforms it on the way if needed.
    ///
    /// * For `noarch: python` packages, the "lib/pythonX.X" prefix is stripped so that only
    ///   the "site-packages" part is kept. Additionally, any `__pycache__` directories or
    ///   `.pyc` files are skipped.
    /// * For `noarch: python` packages, furthermore `bin` is replaced with `python-scripts`, and
    ///   `Scripts` is replaced with `python-scripts` (on Windows only). All other files are included
    ///   as-is.
    /// * Absolute symlinks are made relative so that they are easily relocatable.
    pub fn write_to_dest(
        &self,
        path: &Path,
        prefix: &Path,
        dest_folder: &Path,
    ) -> Result<Option<PathBuf>, PackagingError> {
        let target_platform = &self.build_configuration.target_platform;
        let entry_points = &self.recipe.build().python.entry_points;

        let path_rel = path.strip_prefix(prefix)?;

        if filter_file(path_rel) {
            return Ok(None);
        }

        let mut dest_path = dest_folder.join(path_rel);
        let ext = path.extension().unwrap_or_default();

        if ext == "py" || ext == "pyc" {
            // if we have a .so file of the same name, skip this path
            let so_path = path.with_extension("so");
            let pyd_path = path.with_extension("pyd");
            if so_path.exists() || pyd_path.exists() {
                return Ok(None);
            }
        }

        if self.is_python_version_independent() {
            // we need to remove files in bin/ that are registered as entry points
            if path_rel.starts_with("bin") {
                if let Some(name) = path_rel.file_name()
                    && entry_points
                        .iter()
                        .any(|ep| ep.command == name.to_string_lossy())
                {
                    return Ok(None);
                }
            }
            // Windows
            else if path_rel.starts_with("Scripts")
                && let Some(name) = path_rel.file_name()
                && entry_points.iter().any(|ep| {
                    format!("{}.exe", ep.command) == name.to_string_lossy()
                        || format!("{}-script.py", ep.command) == name.to_string_lossy()
                })
            {
                return Ok(None);
            }

            // skip .pyc or .pyo or .egg-info files
            if ["pyc", "egg-info", "pyo"].iter().any(|s| ext.eq(*s)) {
                return Ok(None); // skip .pyc files
            }

            // if any part of the path is __pycache__ skip it
            if path_rel
                .components()
                .any(|c| c == Component::Normal("__pycache__".as_ref()))
            {
                return Ok(None);
            }

            if path_rel
                .components()
                .any(|c| c == Component::Normal("site-packages".as_ref()))
            {
                // check if site-packages is in the path and strip everything before it
                let pat = std::path::Component::Normal("site-packages".as_ref());
                let parts = path_rel.components();
                let mut new_parts = Vec::new();
                let mut found = false;
                for part in parts {
                    if part == pat {
                        found = true;
                    }
                    if found {
                        new_parts.push(part);
                    }
                }

                dest_path = dest_folder.join(PathBuf::from_iter(new_parts));
            } else if path_rel.starts_with("bin") || path_rel.starts_with("Scripts") {
                // Replace bin with python-scripts. These should really be encoded
                // as entrypoints but sometimes recipe authors forget or don't know
                // how to do that. Maybe sometimes it's also not actually an
                // entrypoint. The reason for this is that on Windows, the
                // entrypoints are in `Scripts/...` folder, and on Unix they are in
                // the `bin/...` folder. So we need to make sure that the
                // entrypoints are in the right place.
                let mut new_parts = path_rel.components().collect::<Vec<_>>();
                new_parts[0] = Component::Normal("python-scripts".as_ref());

                // on Windows, if the file ends with -script.py, remove the -script.py suffix
                if let Some(Component::Normal(name)) = new_parts.last_mut()
                    && let Some(name_str) = name.to_str()
                    && target_platform.is_windows()
                    && let Some(stripped_suffix) = name_str.strip_suffix("-script.py")
                {
                    *name = stripped_suffix.as_ref();
                }

                dest_path = dest_folder.join(PathBuf::from_iter(new_parts));
            } else {
                // keep everything else as-is
                dest_path = dest_folder.join(path_rel);
            }
        }

        match dest_path.parent() {
            Some(parent) => {
                if fs::metadata(parent).is_err() {
                    fs::create_dir_all(parent)?;
                }
            }
            None => {
                return Err(PackagingError::IoError(std::io::Error::other(
                    "Could not get parent directory",
                )));
            }
        }

        let metadata = fs::symlink_metadata(path)?;

        // Handle symlinks: make absolute symlinks relative and copy the link
        if metadata.file_type().is_symlink() {
            if target_platform.is_windows() {
                tracing::warn!("Symlink creation on Windows requires administrator privileges");
            }
            // Read the link target
            match fs::read_link(path) {
                Ok(mut target) => {
                    // If absolute and within the build prefix, make it relative
                    if target.is_absolute() && target.starts_with(prefix) {
                        if let Some(parent) = path.parent()
                            && let Some(rel) = pathdiff::diff_paths(&target, parent)
                        {
                            target = rel;
                        }
                    } else if target.is_absolute() {
                        tracing::warn!(
                            "Symlink {path:?} points to absolute path {target:?} outside of the $PREFIX",
                        );
                    }
                    // Create the symlink at dest_path
                    #[cfg(unix)]
                    {
                        if let Err(e) = fs_err::os::unix::fs::symlink(&target, &dest_path) {
                            tracing::warn!(
                                "Failed to create symlink {:?} -> {:?}: {:?}",
                                dest_path,
                                target,
                                e
                            );
                        }
                    }
                    #[cfg(windows)]
                    {
                        let res = if target.is_dir() {
                            fs_err::os::windows::fs::symlink_dir(&target, &dest_path)
                        } else {
                            fs_err::os::windows::fs::symlink_file(&target, &dest_path)
                        };
                        if let Err(e) = res {
                            tracing::warn!(
                                "Failed to create symlink {:?} -> {:?}: {:?}",
                                dest_path,
                                target,
                                e
                            );
                        }
                    }
                }
                Err(e) => {
                    tracing::warn!("Failed to read symlink {:?}: {:?}", path, e);
                }
            }
            Ok(Some(dest_path))
        } else if metadata.is_dir() {
            // skip directories for now
            Ok(None)
        } else {
            tracing::trace!("Copying file {:?} to {:?}", path, dest_path);
            fs::copy(path, &dest_path)?;
            Ok(Some(dest_path))
        }
    }
}

#[cfg(test)]
mod test {
    use std::{
        collections::HashSet,
        path::{Path, PathBuf},
    };

    use crate::packaging::file_mapper::filter_pyc;

    #[test]
    fn test_filter_file() {
        let test_cases = vec![
            ("test.pyo", true),
            ("test.la", true),
            (".DS_Store", true),
            (".gitignore", false),
            (".git/HEAD", false),
            (".github/workflows/foo.yml", false),
            ("foo/.DS_Store", true),
            ("lib/libarchive.la", true),
            ("bla/.git/config", false),
            ("share/info/dir", true),
            ("share/info/dir/foo", true),
            ("lib/python3.9/site-packages/test/fast.pyo", true),
            ("lib/python3.9/site-packages/test/fast.py", false),
            ("lib/python3.9/site-packages/test/fast.pyc", false),
            ("lib/libarchive.a", false),
            ("lib/libarchive.so", false),
        ];

        for (file, expected) in test_cases {
            let path = std::path::Path::new(file);
            assert_eq!(
                super::filter_file(path),
                expected,
                "Failed for file: {}",
                file
            );
        }
    }

    #[test]
    fn test_filter_pyc() {
        let mut old_files = HashSet::new();
        old_files.insert(PathBuf::from("pkg/module.py"));
        old_files.insert(PathBuf::from("pkg/other.py"));
        old_files.insert(PathBuf::from("pkg/nested/deep.py"));

        let test_cases = vec![
            // __pycache__ cases
            ("pkg/__pycache__/module.cpython-311.pyc", true), // has corresponding .py
            ("pkg/__pycache__/missing.cpython-311.pyc", false), // no corresponding .py
            // Regular pyc files
            ("pkg/module.pyc", true),   // has corresponding .py
            ("pkg/missing.pyc", false), // no corresponding .py
            // Nested paths
            ("pkg/nested/__pycache__/deep.cpython-311.pyc", true),
            ("pkg/nested/deep.pyc", true),
            // Edge cases
            ("pkg/not_python.txt", false), // non-python files pass through
            ("pkg/__pycache__/invalid", false), // no extension
            ("", false),                   // empty path
        ];

        for (file, expected) in test_cases {
            let path = Path::new(file);
            assert_eq!(
                filter_pyc(path, &old_files),
                expected,
                "Failed for path: {}",
                file
            );
        }
    }
}