python-packaging 0.16.0

Python packaging primitives implemented in Rust
Documentation
// Copyright 2022 Gregory Szorc.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.

/*!
Functionality for defining how Python resources should be packaged.
*/

use {
    crate::{
        licensing::{LicenseFlavor, SAFE_SYSTEM_LIBRARIES},
        location::ConcreteResourceLocation,
        resource::{PythonExtensionModule, PythonExtensionModuleVariants, PythonResource},
        resource_collection::PythonResourceAddCollectionContext,
    },
    anyhow::Result,
    std::collections::{HashMap, HashSet},
};

/// Denotes methods to filter extension modules.
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum ExtensionModuleFilter {
    /// Only use the minimum set of extension modules needed to initialize an interpreter.
    Minimal,
    /// Use all extension modules.
    All,
    /// Only use extension modules without library dependencies.
    NoLibraries,
    NoCopyleft,
}

impl TryFrom<&str> for ExtensionModuleFilter {
    type Error = String;

    fn try_from(value: &str) -> Result<Self, Self::Error> {
        match value {
            "minimal" => Ok(ExtensionModuleFilter::Minimal),
            "all" => Ok(ExtensionModuleFilter::All),
            "no-libraries" => Ok(ExtensionModuleFilter::NoLibraries),
            "no-copyleft" => Ok(ExtensionModuleFilter::NoCopyleft),
            t => Err(format!("{} is not a valid extension module filter", t)),
        }
    }
}

impl AsRef<str> for ExtensionModuleFilter {
    fn as_ref(&self) -> &str {
        match self {
            ExtensionModuleFilter::All => "all",
            ExtensionModuleFilter::Minimal => "minimal",
            ExtensionModuleFilter::NoCopyleft => "no-copyleft",
            ExtensionModuleFilter::NoLibraries => "no-libraries",
        }
    }
}

/// Describes how resources should be handled.
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum ResourceHandlingMode {
    /// Files should be classified as typed resources.
    Classify,

    /// Files should be handled as files.
    Files,
}

impl TryFrom<&str> for ResourceHandlingMode {
    type Error = String;

    fn try_from(value: &str) -> Result<Self, Self::Error> {
        match value {
            "classify" => Ok(Self::Classify),
            "files" => Ok(Self::Files),
            _ => Err(format!(
                "{} is not a valid resource handling mode; use \"classify\" or \"files\"",
                value
            )),
        }
    }
}

impl AsRef<str> for ResourceHandlingMode {
    fn as_ref(&self) -> &str {
        match self {
            Self::Classify => "classify",
            Self::Files => "files",
        }
    }
}

/// Defines how Python resources should be packaged.
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct PythonPackagingPolicy {
    /// Which extension modules should be included.
    extension_module_filter: ExtensionModuleFilter,

    /// Preferred variants of extension modules.
    preferred_extension_module_variants: HashMap<String, String>,

    /// Where resources should be placed/loaded from by default.
    resources_location: ConcreteResourceLocation,

    /// Optional fallback location for resources should `resources_location` fail.
    resources_location_fallback: Option<ConcreteResourceLocation>,

    /// Whether to allow in-memory shared library loading.
    ///
    /// If true, we will attempt to load Python extension modules
    /// and their shared library dependencies from memory if supported.
    ///
    /// This feature is not supported on all platforms and this setting
    /// can get overrules by platform-specific capabilities.
    allow_in_memory_shared_library_loading: bool,

    /// Whether untyped files are allowed.
    ///
    /// If true, `File` instances can be added to the resource collector.
    ///
    /// If false, resources must be strongly typed (`PythonModuleSource`,
    /// `PythonPackageResource`, etc).
    allow_files: bool,

    /// Whether file scanning should emit `PythonResource::File` variants.
    ///
    /// If true, this resource variant is emitted when scanning for
    /// resources. If false, it isn't.
    ///
    /// This effectively says whether the file scanner should emit records
    /// corresponding to the actual file.
    file_scanner_emit_files: bool,

    /// Whether file scanning should classify files and emit `PythonResource::*`
    /// variants.
    ///
    /// If true, the file scanner will attempt to classify every file as
    /// a specific resource type and emit a `PythonResource::*` variant
    /// corresponding to the resource type.
    ///
    /// If false, this classification is not performed.
    file_scanner_classify_files: bool,

    /// Whether to classify non-`File` resources as `include = True` by default.
    include_classified_resources: bool,

    /// Whether to include source module from the Python distribution.
    include_distribution_sources: bool,

    /// Whether to include Python module source for non-distribution modules.
    include_non_distribution_sources: bool,

    /// Whether to include package resource files.
    include_distribution_resources: bool,

    /// Whether to include test files.
    include_test: bool,

    /// Whether to classify `File` resources as `include = True` by default.
    include_file_resources: bool,

    /// Mapping of target triple to list of extensions that don't work for that triple.
    ///
    /// Policy constructors can populate this with known broken extensions to
    /// prevent the policy from allowing an extension.
    broken_extensions: HashMap<String, Vec<String>>,

    /// Whether to write Python bytecode at optimization level 0.
    bytecode_optimize_level_zero: bool,

    /// Whether to write Python bytecode at optimization level 1.
    bytecode_optimize_level_one: bool,

    /// Whether to write Python bytecode at optimization level 2.
    bytecode_optimize_level_two: bool,

    /// Python modules for which bytecode should not be generated by default.
    no_bytecode_modules: HashSet<String>,
}

impl Default for PythonPackagingPolicy {
    fn default() -> Self {
        PythonPackagingPolicy {
            extension_module_filter: ExtensionModuleFilter::All,
            preferred_extension_module_variants: HashMap::new(),
            resources_location: ConcreteResourceLocation::InMemory,
            resources_location_fallback: None,
            allow_in_memory_shared_library_loading: false,
            allow_files: false,
            file_scanner_emit_files: false,
            file_scanner_classify_files: true,
            include_classified_resources: true,
            include_distribution_sources: true,
            include_non_distribution_sources: true,
            include_distribution_resources: false,
            include_test: false,
            include_file_resources: false,
            broken_extensions: HashMap::new(),
            bytecode_optimize_level_zero: true,
            bytecode_optimize_level_one: false,
            bytecode_optimize_level_two: false,
            no_bytecode_modules: HashSet::new(),
        }
    }
}

impl PythonPackagingPolicy {
    /// Obtain the active extension module filter for this instance.
    pub fn extension_module_filter(&self) -> &ExtensionModuleFilter {
        &self.extension_module_filter
    }

    /// Set the extension module filter to use.
    pub fn set_extension_module_filter(&mut self, filter: ExtensionModuleFilter) {
        self.extension_module_filter = filter;
    }

    /// Obtain the preferred extension module variants for this policy.
    ///
    /// The returned object is a mapping of extension name to its variant
    /// name.
    pub fn preferred_extension_module_variants(&self) -> &HashMap<String, String> {
        &self.preferred_extension_module_variants
    }

    /// Denote the preferred variant for an extension module.
    ///
    /// If set, the named variant will be chosen if it is present.
    pub fn set_preferred_extension_module_variant(&mut self, extension: &str, variant: &str) {
        self.preferred_extension_module_variants
            .insert(extension.to_string(), variant.to_string());
    }

    /// Obtain the primary location for added resources.
    pub fn resources_location(&self) -> &ConcreteResourceLocation {
        &self.resources_location
    }

    /// Set the primary location for added resources.
    pub fn set_resources_location(&mut self, location: ConcreteResourceLocation) {
        self.resources_location = location;
    }

    /// Obtain the fallback location for added resources.
    pub fn resources_location_fallback(&self) -> &Option<ConcreteResourceLocation> {
        &self.resources_location_fallback
    }

    /// Set the fallback location for added resources.
    pub fn set_resources_location_fallback(&mut self, location: Option<ConcreteResourceLocation>) {
        self.resources_location_fallback = location;
    }

    /// Whether to allow untyped `File` resources.
    pub fn allow_files(&self) -> bool {
        self.allow_files
    }

    /// Set whether to allow untyped `File` resources.
    pub fn set_allow_files(&mut self, value: bool) {
        self.allow_files = value;
    }

    /// Whether file scanning should emit `PythonResource::File` variants.
    pub fn file_scanner_emit_files(&self) -> bool {
        self.file_scanner_emit_files
    }

    /// Set whether file scanning should emit `PythonResource::File` variants.
    pub fn set_file_scanner_emit_files(&mut self, value: bool) {
        self.file_scanner_emit_files = value;
    }

    /// Whether file scanning should classify files into `PythonResource::*` variants.
    pub fn file_scanner_classify_files(&self) -> bool {
        self.file_scanner_classify_files
    }

    /// Set whether file scanning should classify files into `PythonResource::*` variants.
    pub fn set_file_scanner_classify_files(&mut self, value: bool) {
        self.file_scanner_classify_files = value;
    }

    /// Whether to allow in-memory shared library loading.
    pub fn allow_in_memory_shared_library_loading(&self) -> bool {
        self.allow_in_memory_shared_library_loading
    }

    /// Set the value for whether to allow in-memory shared library loading.
    pub fn set_allow_in_memory_shared_library_loading(&mut self, value: bool) {
        self.allow_in_memory_shared_library_loading = value;
    }

    /// Get setting for whether to include source modules from the distribution.
    pub fn include_distribution_sources(&self) -> bool {
        self.include_distribution_sources
    }

    /// Set whether we should include a Python distribution's module source code.
    pub fn set_include_distribution_sources(&mut self, include: bool) {
        self.include_distribution_sources = include;
    }

    /// Get setting for whether to include Python package resources from the distribution.
    pub fn include_distribution_resources(&self) -> bool {
        self.include_distribution_resources
    }

    /// Set whether to include package resources from the Python distribution.
    pub fn set_include_distribution_resources(&mut self, include: bool) {
        self.include_distribution_resources = include;
    }

    /// Whether to include Python sources for modules not in the standard library.
    pub fn include_non_distribution_sources(&self) -> bool {
        self.include_non_distribution_sources
    }

    /// Set whether to include Python sources for modules not in the standard library.
    pub fn set_include_non_distribution_sources(&mut self, include: bool) {
        self.include_non_distribution_sources = include;
    }

    /// Get setting for whether to include test files.
    pub fn include_test(&self) -> bool {
        self.include_test
    }

    /// Set whether we should include Python modules that define tests.
    pub fn set_include_test(&mut self, include: bool) {
        self.include_test = include;
    }

    /// Get whether to classify `File` resources as include by default.
    pub fn include_file_resources(&self) -> bool {
        self.include_file_resources
    }

    /// Set whether to classify `File` resources as include by default.
    pub fn set_include_file_resources(&mut self, value: bool) {
        self.include_file_resources = value;
    }

    /// Get whether to classify non-`File` resources as include by default.
    pub fn include_classified_resources(&self) -> bool {
        self.include_classified_resources
    }

    /// Set whether to classify non-`File` resources as include by default.
    pub fn set_include_classified_resources(&mut self, value: bool) {
        self.include_classified_resources = value;
    }

    /// Whether to write bytecode at optimization level 0.
    pub fn bytecode_optimize_level_zero(&self) -> bool {
        self.bytecode_optimize_level_zero
    }

    /// Set whether to write bytecode at optimization level 0.
    pub fn set_bytecode_optimize_level_zero(&mut self, value: bool) {
        self.bytecode_optimize_level_zero = value;
    }

    /// Whether to write bytecode at optimization level 1.
    pub fn bytecode_optimize_level_one(&self) -> bool {
        self.bytecode_optimize_level_one
    }

    /// Set whether to write bytecode at optimization level 1.
    pub fn set_bytecode_optimize_level_one(&mut self, value: bool) {
        self.bytecode_optimize_level_one = value;
    }

    /// Whether to write bytecode at optimization level 2.
    pub fn bytecode_optimize_level_two(&self) -> bool {
        self.bytecode_optimize_level_two
    }

    /// Set whether to write bytecode at optimization level 2.
    pub fn set_bytecode_optimize_level_two(&mut self, value: bool) {
        self.bytecode_optimize_level_two = value;
    }

    /// Set the resource handling mode of the policy.
    ///
    /// This is a convenience function for mapping a `ResourceHandlingMode`
    /// to corresponding field values.
    pub fn set_resource_handling_mode(&mut self, mode: ResourceHandlingMode) {
        match mode {
            ResourceHandlingMode::Classify => {
                self.file_scanner_emit_files = false;
                self.file_scanner_classify_files = true;
                self.allow_files = false;
                self.include_file_resources = false;
                self.include_classified_resources = true;
            }
            ResourceHandlingMode::Files => {
                self.file_scanner_emit_files = true;
                self.file_scanner_classify_files = false;
                self.allow_files = true;
                self.include_file_resources = true;
                self.include_classified_resources = true;
            }
        }
    }

    /// Obtain broken extensions for a target triple.
    pub fn broken_extensions_for_triple(&self, target_triple: &str) -> Option<&Vec<String>> {
        self.broken_extensions.get(target_triple)
    }

    /// Mark an extension as broken on a target platform, preventing it from being used.
    pub fn register_broken_extension(&mut self, target_triple: &str, extension: &str) {
        if !self.broken_extensions.contains_key(target_triple) {
            self.broken_extensions
                .insert(target_triple.to_string(), vec![]);
        }

        self.broken_extensions
            .get_mut(target_triple)
            .unwrap()
            .push(extension.to_string());
    }

    /// Register a Python module as one that should not generate bytecode.
    ///
    /// When source modules matching names registered with this function are added,
    /// their default settings for adding bytecode will always be false.
    ///
    /// It is still possible to force bytecode generation by setting the add context
    /// fields to true or explicitly adding a bytecode resource.
    pub fn register_no_bytecode_module(&mut self, name: &str) {
        self.no_bytecode_modules.insert(name.to_string());
    }

    /// Derive a `PythonResourceAddCollectionContext` for a resource using current settings.
    ///
    /// The returned object essentially says how the resource should be added
    /// to a `PythonResourceCollector` given this policy.
    pub fn derive_add_collection_context(
        &self,
        resource: &PythonResource,
    ) -> PythonResourceAddCollectionContext {
        let include = self.filter_python_resource(resource);

        let store_source = match resource {
            PythonResource::ModuleSource(ref module) => {
                if module.is_stdlib {
                    self.include_distribution_sources
                } else {
                    self.include_non_distribution_sources
                }
            }
            _ => false,
        };

        let location = self.resources_location.clone();
        let location_fallback = self.resources_location_fallback.clone();

        let optimize_level_zero = match resource {
            PythonResource::ModuleSource(module) => {
                if self.no_bytecode_modules.contains(&*module.name) {
                    false
                } else {
                    self.bytecode_optimize_level_zero
                }
            }
            _ => self.bytecode_optimize_level_zero,
        };
        let optimize_level_one = match resource {
            PythonResource::ModuleSource(module) => {
                if self.no_bytecode_modules.contains(&*module.name) {
                    false
                } else {
                    self.bytecode_optimize_level_one
                }
            }
            _ => self.bytecode_optimize_level_one,
        };
        let optimize_level_two = match resource {
            PythonResource::ModuleSource(module) => {
                if self.no_bytecode_modules.contains(&*module.name) {
                    false
                } else {
                    self.bytecode_optimize_level_two
                }
            }
            _ => self.bytecode_optimize_level_two,
        };

        PythonResourceAddCollectionContext {
            include,
            location,
            location_fallback,
            store_source,
            optimize_level_zero,
            optimize_level_one,
            optimize_level_two,
        }
    }

    /// Determine if a Python resource is applicable to the current policy.
    ///
    /// Given a `PythonResource`, this answers the question of whether that
    /// resource meets the inclusion requirements for the current policy.
    ///
    /// Returns true if the resource should be included, false otherwise.
    fn filter_python_resource(&self, resource: &PythonResource) -> bool {
        match resource {
            PythonResource::File(_) => {
                if !self.include_file_resources {
                    return false;
                }
            }
            _ => {
                if !self.include_classified_resources {
                    return false;
                }
            }
        }

        match resource {
            PythonResource::ModuleSource(module) => {
                if !self.include_test && module.is_test {
                    false
                } else {
                    self.include_distribution_sources
                }
            }
            PythonResource::ModuleBytecodeRequest(module) => self.include_test || !module.is_test,
            PythonResource::ModuleBytecode(_) => false,
            PythonResource::PackageResource(resource) => {
                if resource.is_stdlib {
                    if self.include_distribution_resources {
                        self.include_test || !resource.is_test
                    } else {
                        false
                    }
                } else {
                    true
                }
            }
            PythonResource::PackageDistributionResource(_) => true,
            PythonResource::ExtensionModule(_) => false,
            PythonResource::PathExtension(_) => false,
            PythonResource::EggFile(_) => false,
            PythonResource::File(_) => true,
        }
    }

    /// Resolve Python extension modules that are compliant with the policy.
    #[allow(clippy::if_same_then_else)]
    pub fn resolve_python_extension_modules<'a>(
        &self,
        extensions_variants: impl Iterator<Item = &'a PythonExtensionModuleVariants>,
        target_triple: &str,
    ) -> Result<Vec<PythonExtensionModule>> {
        let mut res = vec![];

        for variants in extensions_variants {
            let name = &variants.default_variant().name;

            // This extension is broken on this target. Ignore it.
            if self
                .broken_extensions
                .get(target_triple)
                .unwrap_or(&Vec::new())
                .contains(name)
            {
                continue;
            }

            // Always add minimally required extension modules, because things don't
            // work if we don't do this.
            let ext_variants: PythonExtensionModuleVariants = variants
                .iter()
                .filter_map(|em| {
                    if em.is_minimally_required() {
                        Some(em.clone())
                    } else {
                        None
                    }
                })
                .collect();

            if !ext_variants.is_empty() {
                res.push(
                    ext_variants
                        .choose_variant(&self.preferred_extension_module_variants)
                        .clone(),
                );
            }

            match self.extension_module_filter {
                // Nothing to do here since we added minimal extensions above.
                ExtensionModuleFilter::Minimal => {}

                ExtensionModuleFilter::All => {
                    res.push(
                        variants
                            .choose_variant(&self.preferred_extension_module_variants)
                            .clone(),
                    );
                }

                ExtensionModuleFilter::NoLibraries => {
                    let ext_variants: PythonExtensionModuleVariants = variants
                        .iter()
                        .filter_map(|em| {
                            if !em.requires_libraries() {
                                Some(em.clone())
                            } else {
                                None
                            }
                        })
                        .collect();

                    if !ext_variants.is_empty() {
                        res.push(
                            ext_variants
                                .choose_variant(&self.preferred_extension_module_variants)
                                .clone(),
                        );
                    }
                }

                ExtensionModuleFilter::NoCopyleft => {
                    let ext_variants: PythonExtensionModuleVariants = variants
                        .iter()
                        .filter_map(|em| {
                            // As a special case, if all we link against are system libraries
                            // that are known to be benign, allow that.
                            let all_safe_system_libraries = em.link_libraries.iter().all(|link| {
                                link.system && SAFE_SYSTEM_LIBRARIES.contains(&link.name.as_str())
                            });

                            if em.link_libraries.is_empty() || all_safe_system_libraries {
                                Some(em.clone())
                            } else if let Some(license) = &em.license {
                                match license.license() {
                                    LicenseFlavor::Spdx(expression) => {
                                        let copyleft = expression.evaluate(|req| {
                                            if let Some(id) = req.license.id() {
                                                id.is_copyleft()
                                            } else {
                                                true
                                            }
                                        });

                                        if !copyleft {
                                            Some(em.clone())
                                        } else {
                                            None
                                        }
                                    }
                                    LicenseFlavor::OtherExpression(_) => None,
                                    LicenseFlavor::PublicDomain => Some(em.clone()),
                                    LicenseFlavor::None => None,
                                    LicenseFlavor::Unknown(_) => None,
                                }
                            } else {
                                None
                            }
                        })
                        .collect();

                    if !ext_variants.is_empty() {
                        res.push(
                            ext_variants
                                .choose_variant(&self.preferred_extension_module_variants)
                                .clone(),
                        );
                    }
                }
            }
        }

        Ok(res)
    }
}

#[cfg(test)]
mod tests {
    use {super::*, simple_file_manifest::File};

    #[test]
    fn test_add_collection_context_file() -> Result<()> {
        let mut policy = PythonPackagingPolicy {
            include_file_resources: false,
            ..Default::default()
        };

        let file = File::new("foo.py", vec![42]);

        let add_context = policy.derive_add_collection_context(&file.clone().into());
        assert!(!add_context.include);

        policy.include_file_resources = true;
        let add_context = policy.derive_add_collection_context(&file.into());
        assert!(add_context.include);

        Ok(())
    }
}