python_packaging/
policy.rs

1// Copyright 2022 Gregory Szorc.
2//
3// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
6// option. This file may not be copied, modified, or distributed
7// except according to those terms.
8
9/*!
10Functionality for defining how Python resources should be packaged.
11*/
12
13use {
14    crate::{
15        licensing::{LicenseFlavor, SAFE_SYSTEM_LIBRARIES},
16        location::ConcreteResourceLocation,
17        resource::{PythonExtensionModule, PythonExtensionModuleVariants, PythonResource},
18        resource_collection::PythonResourceAddCollectionContext,
19    },
20    anyhow::Result,
21    std::collections::{HashMap, HashSet},
22};
23
24/// Denotes methods to filter extension modules.
25#[derive(Clone, Debug, PartialEq, Eq)]
26pub enum ExtensionModuleFilter {
27    /// Only use the minimum set of extension modules needed to initialize an interpreter.
28    Minimal,
29    /// Use all extension modules.
30    All,
31    /// Only use extension modules without library dependencies.
32    NoLibraries,
33    NoCopyleft,
34}
35
36impl TryFrom<&str> for ExtensionModuleFilter {
37    type Error = String;
38
39    fn try_from(value: &str) -> Result<Self, Self::Error> {
40        match value {
41            "minimal" => Ok(ExtensionModuleFilter::Minimal),
42            "all" => Ok(ExtensionModuleFilter::All),
43            "no-libraries" => Ok(ExtensionModuleFilter::NoLibraries),
44            "no-copyleft" => Ok(ExtensionModuleFilter::NoCopyleft),
45            t => Err(format!("{} is not a valid extension module filter", t)),
46        }
47    }
48}
49
50impl AsRef<str> for ExtensionModuleFilter {
51    fn as_ref(&self) -> &str {
52        match self {
53            ExtensionModuleFilter::All => "all",
54            ExtensionModuleFilter::Minimal => "minimal",
55            ExtensionModuleFilter::NoCopyleft => "no-copyleft",
56            ExtensionModuleFilter::NoLibraries => "no-libraries",
57        }
58    }
59}
60
61/// Describes how resources should be handled.
62#[derive(Clone, Copy, Debug, PartialEq, Eq)]
63pub enum ResourceHandlingMode {
64    /// Files should be classified as typed resources.
65    Classify,
66
67    /// Files should be handled as files.
68    Files,
69}
70
71impl TryFrom<&str> for ResourceHandlingMode {
72    type Error = String;
73
74    fn try_from(value: &str) -> Result<Self, Self::Error> {
75        match value {
76            "classify" => Ok(Self::Classify),
77            "files" => Ok(Self::Files),
78            _ => Err(format!(
79                "{} is not a valid resource handling mode; use \"classify\" or \"files\"",
80                value
81            )),
82        }
83    }
84}
85
86impl AsRef<str> for ResourceHandlingMode {
87    fn as_ref(&self) -> &str {
88        match self {
89            Self::Classify => "classify",
90            Self::Files => "files",
91        }
92    }
93}
94
95/// Defines how Python resources should be packaged.
96#[derive(Clone, Debug, PartialEq, Eq)]
97pub struct PythonPackagingPolicy {
98    /// Which extension modules should be included.
99    extension_module_filter: ExtensionModuleFilter,
100
101    /// Preferred variants of extension modules.
102    preferred_extension_module_variants: HashMap<String, String>,
103
104    /// Where resources should be placed/loaded from by default.
105    resources_location: ConcreteResourceLocation,
106
107    /// Optional fallback location for resources should `resources_location` fail.
108    resources_location_fallback: Option<ConcreteResourceLocation>,
109
110    /// Whether to allow in-memory shared library loading.
111    ///
112    /// If true, we will attempt to load Python extension modules
113    /// and their shared library dependencies from memory if supported.
114    ///
115    /// This feature is not supported on all platforms and this setting
116    /// can get overrules by platform-specific capabilities.
117    allow_in_memory_shared_library_loading: bool,
118
119    /// Whether untyped files are allowed.
120    ///
121    /// If true, `File` instances can be added to the resource collector.
122    ///
123    /// If false, resources must be strongly typed (`PythonModuleSource`,
124    /// `PythonPackageResource`, etc).
125    allow_files: bool,
126
127    /// Whether file scanning should emit `PythonResource::File` variants.
128    ///
129    /// If true, this resource variant is emitted when scanning for
130    /// resources. If false, it isn't.
131    ///
132    /// This effectively says whether the file scanner should emit records
133    /// corresponding to the actual file.
134    file_scanner_emit_files: bool,
135
136    /// Whether file scanning should classify files and emit `PythonResource::*`
137    /// variants.
138    ///
139    /// If true, the file scanner will attempt to classify every file as
140    /// a specific resource type and emit a `PythonResource::*` variant
141    /// corresponding to the resource type.
142    ///
143    /// If false, this classification is not performed.
144    file_scanner_classify_files: bool,
145
146    /// Whether to classify non-`File` resources as `include = True` by default.
147    include_classified_resources: bool,
148
149    /// Whether to include source module from the Python distribution.
150    include_distribution_sources: bool,
151
152    /// Whether to include Python module source for non-distribution modules.
153    include_non_distribution_sources: bool,
154
155    /// Whether to include package resource files.
156    include_distribution_resources: bool,
157
158    /// Whether to include test files.
159    include_test: bool,
160
161    /// Whether to classify `File` resources as `include = True` by default.
162    include_file_resources: bool,
163
164    /// Mapping of target triple to list of extensions that don't work for that triple.
165    ///
166    /// Policy constructors can populate this with known broken extensions to
167    /// prevent the policy from allowing an extension.
168    broken_extensions: HashMap<String, Vec<String>>,
169
170    /// Whether to write Python bytecode at optimization level 0.
171    bytecode_optimize_level_zero: bool,
172
173    /// Whether to write Python bytecode at optimization level 1.
174    bytecode_optimize_level_one: bool,
175
176    /// Whether to write Python bytecode at optimization level 2.
177    bytecode_optimize_level_two: bool,
178
179    /// Python modules for which bytecode should not be generated by default.
180    no_bytecode_modules: HashSet<String>,
181}
182
183impl Default for PythonPackagingPolicy {
184    fn default() -> Self {
185        PythonPackagingPolicy {
186            extension_module_filter: ExtensionModuleFilter::All,
187            preferred_extension_module_variants: HashMap::new(),
188            resources_location: ConcreteResourceLocation::InMemory,
189            resources_location_fallback: None,
190            allow_in_memory_shared_library_loading: false,
191            allow_files: false,
192            file_scanner_emit_files: false,
193            file_scanner_classify_files: true,
194            include_classified_resources: true,
195            include_distribution_sources: true,
196            include_non_distribution_sources: true,
197            include_distribution_resources: false,
198            include_test: false,
199            include_file_resources: false,
200            broken_extensions: HashMap::new(),
201            bytecode_optimize_level_zero: true,
202            bytecode_optimize_level_one: false,
203            bytecode_optimize_level_two: false,
204            no_bytecode_modules: HashSet::new(),
205        }
206    }
207}
208
209impl PythonPackagingPolicy {
210    /// Obtain the active extension module filter for this instance.
211    pub fn extension_module_filter(&self) -> &ExtensionModuleFilter {
212        &self.extension_module_filter
213    }
214
215    /// Set the extension module filter to use.
216    pub fn set_extension_module_filter(&mut self, filter: ExtensionModuleFilter) {
217        self.extension_module_filter = filter;
218    }
219
220    /// Obtain the preferred extension module variants for this policy.
221    ///
222    /// The returned object is a mapping of extension name to its variant
223    /// name.
224    pub fn preferred_extension_module_variants(&self) -> &HashMap<String, String> {
225        &self.preferred_extension_module_variants
226    }
227
228    /// Denote the preferred variant for an extension module.
229    ///
230    /// If set, the named variant will be chosen if it is present.
231    pub fn set_preferred_extension_module_variant(&mut self, extension: &str, variant: &str) {
232        self.preferred_extension_module_variants
233            .insert(extension.to_string(), variant.to_string());
234    }
235
236    /// Obtain the primary location for added resources.
237    pub fn resources_location(&self) -> &ConcreteResourceLocation {
238        &self.resources_location
239    }
240
241    /// Set the primary location for added resources.
242    pub fn set_resources_location(&mut self, location: ConcreteResourceLocation) {
243        self.resources_location = location;
244    }
245
246    /// Obtain the fallback location for added resources.
247    pub fn resources_location_fallback(&self) -> &Option<ConcreteResourceLocation> {
248        &self.resources_location_fallback
249    }
250
251    /// Set the fallback location for added resources.
252    pub fn set_resources_location_fallback(&mut self, location: Option<ConcreteResourceLocation>) {
253        self.resources_location_fallback = location;
254    }
255
256    /// Whether to allow untyped `File` resources.
257    pub fn allow_files(&self) -> bool {
258        self.allow_files
259    }
260
261    /// Set whether to allow untyped `File` resources.
262    pub fn set_allow_files(&mut self, value: bool) {
263        self.allow_files = value;
264    }
265
266    /// Whether file scanning should emit `PythonResource::File` variants.
267    pub fn file_scanner_emit_files(&self) -> bool {
268        self.file_scanner_emit_files
269    }
270
271    /// Set whether file scanning should emit `PythonResource::File` variants.
272    pub fn set_file_scanner_emit_files(&mut self, value: bool) {
273        self.file_scanner_emit_files = value;
274    }
275
276    /// Whether file scanning should classify files into `PythonResource::*` variants.
277    pub fn file_scanner_classify_files(&self) -> bool {
278        self.file_scanner_classify_files
279    }
280
281    /// Set whether file scanning should classify files into `PythonResource::*` variants.
282    pub fn set_file_scanner_classify_files(&mut self, value: bool) {
283        self.file_scanner_classify_files = value;
284    }
285
286    /// Whether to allow in-memory shared library loading.
287    pub fn allow_in_memory_shared_library_loading(&self) -> bool {
288        self.allow_in_memory_shared_library_loading
289    }
290
291    /// Set the value for whether to allow in-memory shared library loading.
292    pub fn set_allow_in_memory_shared_library_loading(&mut self, value: bool) {
293        self.allow_in_memory_shared_library_loading = value;
294    }
295
296    /// Get setting for whether to include source modules from the distribution.
297    pub fn include_distribution_sources(&self) -> bool {
298        self.include_distribution_sources
299    }
300
301    /// Set whether we should include a Python distribution's module source code.
302    pub fn set_include_distribution_sources(&mut self, include: bool) {
303        self.include_distribution_sources = include;
304    }
305
306    /// Get setting for whether to include Python package resources from the distribution.
307    pub fn include_distribution_resources(&self) -> bool {
308        self.include_distribution_resources
309    }
310
311    /// Set whether to include package resources from the Python distribution.
312    pub fn set_include_distribution_resources(&mut self, include: bool) {
313        self.include_distribution_resources = include;
314    }
315
316    /// Whether to include Python sources for modules not in the standard library.
317    pub fn include_non_distribution_sources(&self) -> bool {
318        self.include_non_distribution_sources
319    }
320
321    /// Set whether to include Python sources for modules not in the standard library.
322    pub fn set_include_non_distribution_sources(&mut self, include: bool) {
323        self.include_non_distribution_sources = include;
324    }
325
326    /// Get setting for whether to include test files.
327    pub fn include_test(&self) -> bool {
328        self.include_test
329    }
330
331    /// Set whether we should include Python modules that define tests.
332    pub fn set_include_test(&mut self, include: bool) {
333        self.include_test = include;
334    }
335
336    /// Get whether to classify `File` resources as include by default.
337    pub fn include_file_resources(&self) -> bool {
338        self.include_file_resources
339    }
340
341    /// Set whether to classify `File` resources as include by default.
342    pub fn set_include_file_resources(&mut self, value: bool) {
343        self.include_file_resources = value;
344    }
345
346    /// Get whether to classify non-`File` resources as include by default.
347    pub fn include_classified_resources(&self) -> bool {
348        self.include_classified_resources
349    }
350
351    /// Set whether to classify non-`File` resources as include by default.
352    pub fn set_include_classified_resources(&mut self, value: bool) {
353        self.include_classified_resources = value;
354    }
355
356    /// Whether to write bytecode at optimization level 0.
357    pub fn bytecode_optimize_level_zero(&self) -> bool {
358        self.bytecode_optimize_level_zero
359    }
360
361    /// Set whether to write bytecode at optimization level 0.
362    pub fn set_bytecode_optimize_level_zero(&mut self, value: bool) {
363        self.bytecode_optimize_level_zero = value;
364    }
365
366    /// Whether to write bytecode at optimization level 1.
367    pub fn bytecode_optimize_level_one(&self) -> bool {
368        self.bytecode_optimize_level_one
369    }
370
371    /// Set whether to write bytecode at optimization level 1.
372    pub fn set_bytecode_optimize_level_one(&mut self, value: bool) {
373        self.bytecode_optimize_level_one = value;
374    }
375
376    /// Whether to write bytecode at optimization level 2.
377    pub fn bytecode_optimize_level_two(&self) -> bool {
378        self.bytecode_optimize_level_two
379    }
380
381    /// Set whether to write bytecode at optimization level 2.
382    pub fn set_bytecode_optimize_level_two(&mut self, value: bool) {
383        self.bytecode_optimize_level_two = value;
384    }
385
386    /// Set the resource handling mode of the policy.
387    ///
388    /// This is a convenience function for mapping a `ResourceHandlingMode`
389    /// to corresponding field values.
390    pub fn set_resource_handling_mode(&mut self, mode: ResourceHandlingMode) {
391        match mode {
392            ResourceHandlingMode::Classify => {
393                self.file_scanner_emit_files = false;
394                self.file_scanner_classify_files = true;
395                self.allow_files = false;
396                self.include_file_resources = false;
397                self.include_classified_resources = true;
398            }
399            ResourceHandlingMode::Files => {
400                self.file_scanner_emit_files = true;
401                self.file_scanner_classify_files = false;
402                self.allow_files = true;
403                self.include_file_resources = true;
404                self.include_classified_resources = true;
405            }
406        }
407    }
408
409    /// Obtain broken extensions for a target triple.
410    pub fn broken_extensions_for_triple(&self, target_triple: &str) -> Option<&Vec<String>> {
411        self.broken_extensions.get(target_triple)
412    }
413
414    /// Mark an extension as broken on a target platform, preventing it from being used.
415    pub fn register_broken_extension(&mut self, target_triple: &str, extension: &str) {
416        if !self.broken_extensions.contains_key(target_triple) {
417            self.broken_extensions
418                .insert(target_triple.to_string(), vec![]);
419        }
420
421        self.broken_extensions
422            .get_mut(target_triple)
423            .unwrap()
424            .push(extension.to_string());
425    }
426
427    /// Register a Python module as one that should not generate bytecode.
428    ///
429    /// When source modules matching names registered with this function are added,
430    /// their default settings for adding bytecode will always be false.
431    ///
432    /// It is still possible to force bytecode generation by setting the add context
433    /// fields to true or explicitly adding a bytecode resource.
434    pub fn register_no_bytecode_module(&mut self, name: &str) {
435        self.no_bytecode_modules.insert(name.to_string());
436    }
437
438    /// Derive a `PythonResourceAddCollectionContext` for a resource using current settings.
439    ///
440    /// The returned object essentially says how the resource should be added
441    /// to a `PythonResourceCollector` given this policy.
442    pub fn derive_add_collection_context(
443        &self,
444        resource: &PythonResource,
445    ) -> PythonResourceAddCollectionContext {
446        let include = self.filter_python_resource(resource);
447
448        let store_source = match resource {
449            PythonResource::ModuleSource(ref module) => {
450                if module.is_stdlib {
451                    self.include_distribution_sources
452                } else {
453                    self.include_non_distribution_sources
454                }
455            }
456            _ => false,
457        };
458
459        let location = self.resources_location.clone();
460        let location_fallback = self.resources_location_fallback.clone();
461
462        let optimize_level_zero = match resource {
463            PythonResource::ModuleSource(module) => {
464                if self.no_bytecode_modules.contains(&*module.name) {
465                    false
466                } else {
467                    self.bytecode_optimize_level_zero
468                }
469            }
470            _ => self.bytecode_optimize_level_zero,
471        };
472        let optimize_level_one = match resource {
473            PythonResource::ModuleSource(module) => {
474                if self.no_bytecode_modules.contains(&*module.name) {
475                    false
476                } else {
477                    self.bytecode_optimize_level_one
478                }
479            }
480            _ => self.bytecode_optimize_level_one,
481        };
482        let optimize_level_two = match resource {
483            PythonResource::ModuleSource(module) => {
484                if self.no_bytecode_modules.contains(&*module.name) {
485                    false
486                } else {
487                    self.bytecode_optimize_level_two
488                }
489            }
490            _ => self.bytecode_optimize_level_two,
491        };
492
493        PythonResourceAddCollectionContext {
494            include,
495            location,
496            location_fallback,
497            store_source,
498            optimize_level_zero,
499            optimize_level_one,
500            optimize_level_two,
501        }
502    }
503
504    /// Determine if a Python resource is applicable to the current policy.
505    ///
506    /// Given a `PythonResource`, this answers the question of whether that
507    /// resource meets the inclusion requirements for the current policy.
508    ///
509    /// Returns true if the resource should be included, false otherwise.
510    fn filter_python_resource(&self, resource: &PythonResource) -> bool {
511        match resource {
512            PythonResource::File(_) => {
513                if !self.include_file_resources {
514                    return false;
515                }
516            }
517            _ => {
518                if !self.include_classified_resources {
519                    return false;
520                }
521            }
522        }
523
524        match resource {
525            PythonResource::ModuleSource(module) => {
526                if !self.include_test && module.is_test {
527                    false
528                } else {
529                    self.include_distribution_sources
530                }
531            }
532            PythonResource::ModuleBytecodeRequest(module) => self.include_test || !module.is_test,
533            PythonResource::ModuleBytecode(_) => false,
534            PythonResource::PackageResource(resource) => {
535                if resource.is_stdlib {
536                    if self.include_distribution_resources {
537                        self.include_test || !resource.is_test
538                    } else {
539                        false
540                    }
541                } else {
542                    true
543                }
544            }
545            PythonResource::PackageDistributionResource(_) => true,
546            PythonResource::ExtensionModule(_) => false,
547            PythonResource::PathExtension(_) => false,
548            PythonResource::EggFile(_) => false,
549            PythonResource::File(_) => true,
550        }
551    }
552
553    /// Resolve Python extension modules that are compliant with the policy.
554    #[allow(clippy::if_same_then_else)]
555    pub fn resolve_python_extension_modules<'a>(
556        &self,
557        extensions_variants: impl Iterator<Item = &'a PythonExtensionModuleVariants>,
558        target_triple: &str,
559    ) -> Result<Vec<PythonExtensionModule>> {
560        let mut res = vec![];
561
562        for variants in extensions_variants {
563            let name = &variants.default_variant().name;
564
565            // This extension is broken on this target. Ignore it.
566            if self
567                .broken_extensions
568                .get(target_triple)
569                .unwrap_or(&Vec::new())
570                .contains(name)
571            {
572                continue;
573            }
574
575            // Always add minimally required extension modules, because things don't
576            // work if we don't do this.
577            let ext_variants: PythonExtensionModuleVariants = variants
578                .iter()
579                .filter_map(|em| {
580                    if em.is_minimally_required() {
581                        Some(em.clone())
582                    } else {
583                        None
584                    }
585                })
586                .collect();
587
588            if !ext_variants.is_empty() {
589                res.push(
590                    ext_variants
591                        .choose_variant(&self.preferred_extension_module_variants)
592                        .clone(),
593                );
594            }
595
596            match self.extension_module_filter {
597                // Nothing to do here since we added minimal extensions above.
598                ExtensionModuleFilter::Minimal => {}
599
600                ExtensionModuleFilter::All => {
601                    res.push(
602                        variants
603                            .choose_variant(&self.preferred_extension_module_variants)
604                            .clone(),
605                    );
606                }
607
608                ExtensionModuleFilter::NoLibraries => {
609                    let ext_variants: PythonExtensionModuleVariants = variants
610                        .iter()
611                        .filter_map(|em| {
612                            if !em.requires_libraries() {
613                                Some(em.clone())
614                            } else {
615                                None
616                            }
617                        })
618                        .collect();
619
620                    if !ext_variants.is_empty() {
621                        res.push(
622                            ext_variants
623                                .choose_variant(&self.preferred_extension_module_variants)
624                                .clone(),
625                        );
626                    }
627                }
628
629                ExtensionModuleFilter::NoCopyleft => {
630                    let ext_variants: PythonExtensionModuleVariants = variants
631                        .iter()
632                        .filter_map(|em| {
633                            // As a special case, if all we link against are system libraries
634                            // that are known to be benign, allow that.
635                            let all_safe_system_libraries = em.link_libraries.iter().all(|link| {
636                                link.system && SAFE_SYSTEM_LIBRARIES.contains(&link.name.as_str())
637                            });
638
639                            if em.link_libraries.is_empty() || all_safe_system_libraries {
640                                Some(em.clone())
641                            } else if let Some(license) = &em.license {
642                                match license.license() {
643                                    LicenseFlavor::Spdx(expression) => {
644                                        let copyleft = expression.evaluate(|req| {
645                                            if let Some(id) = req.license.id() {
646                                                id.is_copyleft()
647                                            } else {
648                                                true
649                                            }
650                                        });
651
652                                        if !copyleft {
653                                            Some(em.clone())
654                                        } else {
655                                            None
656                                        }
657                                    }
658                                    LicenseFlavor::OtherExpression(_) => None,
659                                    LicenseFlavor::PublicDomain => Some(em.clone()),
660                                    LicenseFlavor::None => None,
661                                    LicenseFlavor::Unknown(_) => None,
662                                }
663                            } else {
664                                None
665                            }
666                        })
667                        .collect();
668
669                    if !ext_variants.is_empty() {
670                        res.push(
671                            ext_variants
672                                .choose_variant(&self.preferred_extension_module_variants)
673                                .clone(),
674                        );
675                    }
676                }
677            }
678        }
679
680        Ok(res)
681    }
682}
683
684#[cfg(test)]
685mod tests {
686    use {super::*, simple_file_manifest::File};
687
688    #[test]
689    fn test_add_collection_context_file() -> Result<()> {
690        let mut policy = PythonPackagingPolicy {
691            include_file_resources: false,
692            ..Default::default()
693        };
694
695        let file = File::new("foo.py", vec![42]);
696
697        let add_context = policy.derive_add_collection_context(&file.clone().into());
698        assert!(!add_context.include);
699
700        policy.include_file_resources = true;
701        let add_context = policy.derive_add_collection_context(&file.into());
702        assert!(add_context.include);
703
704        Ok(())
705    }
706}