python_packaging/
filesystem_scanning.rs

1// Copyright 2022 Gregory Szorc.
2//
3// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
6// option. This file may not be copied, modified, or distributed
7// except according to those terms.
8
9/*!
10Scanning the filesystem for Python resources.
11*/
12
13use {
14    crate::{
15        module_util::{is_package_from_path, PythonModuleSuffixes},
16        package_metadata::PythonPackageMetadata,
17        resource::{
18            BytecodeOptimizationLevel, PythonEggFile, PythonExtensionModule, PythonModuleBytecode,
19            PythonModuleSource, PythonPackageDistributionResource,
20            PythonPackageDistributionResourceFlavor, PythonPackageResource, PythonPathExtension,
21            PythonResource,
22        },
23    },
24    anyhow::{Context, Result},
25    simple_file_manifest::{File, FileData, FileEntry, FileManifest},
26    std::{
27        collections::HashSet,
28        ffi::OsStr,
29        path::{Path, PathBuf},
30    },
31};
32
33#[cfg(unix)]
34use std::os::unix::fs::PermissionsExt;
35
36#[cfg(unix)]
37fn is_executable(metadata: &std::fs::Metadata) -> bool {
38    let permissions = metadata.permissions();
39    permissions.mode() & 0o111 != 0
40}
41
42#[cfg(windows)]
43fn is_executable(_metadata: &std::fs::Metadata) -> bool {
44    false
45}
46
47pub fn walk_tree_files(path: &Path) -> Box<dyn Iterator<Item = walkdir::DirEntry>> {
48    let res = walkdir::WalkDir::new(path).sort_by(|a, b| a.file_name().cmp(b.file_name()));
49
50    let filtered = res.into_iter().filter_map(|entry| {
51        let entry = entry.expect("unable to get directory entry");
52
53        let path = entry.path();
54
55        if path.is_dir() {
56            None
57        } else {
58            Some(entry)
59        }
60    });
61
62    Box::new(filtered)
63}
64
65#[derive(Debug, PartialEq)]
66struct ResourceFile {
67    /// Filesystem path of this resource.
68    pub full_path: PathBuf,
69
70    /// Relative path of this resource.
71    pub relative_path: PathBuf,
72}
73
74#[derive(Debug, PartialEq)]
75enum PathItem<'a> {
76    PythonResource(Box<PythonResource<'a>>),
77    ResourceFile(ResourceFile),
78}
79
80#[derive(Debug, PartialEq)]
81struct PathEntry {
82    path: PathBuf,
83    /// Whether we emitted a `PythonResource::File` instance.
84    file_emitted: bool,
85    /// Whether we emitted a non-`PythonResource::File` instance.
86    non_file_emitted: bool,
87}
88
89/// An iterator of `PythonResource`.
90pub struct PythonResourceIterator<'a> {
91    root_path: PathBuf,
92    cache_tag: String,
93    suffixes: PythonModuleSuffixes,
94    paths: Vec<PathEntry>,
95    /// Content overrides for individual paths.
96    ///
97    /// This is a hacky way to allow us to abstract I/O.
98    path_content_overrides: FileManifest,
99    seen_packages: HashSet<String>,
100    resources: Vec<ResourceFile>,
101    // Whether to emit `PythonResource::File` entries.
102    emit_files: bool,
103    // Whether to emit non-`PythonResource::File` entries.
104    emit_non_files: bool,
105    _phantom: std::marker::PhantomData<&'a ()>,
106}
107
108impl<'a> PythonResourceIterator<'a> {
109    fn new(
110        path: &Path,
111        cache_tag: &str,
112        suffixes: &PythonModuleSuffixes,
113        emit_files: bool,
114        emit_non_files: bool,
115    ) -> Result<PythonResourceIterator<'a>> {
116        let res = walkdir::WalkDir::new(path).sort_by(|a, b| a.file_name().cmp(b.file_name()));
117
118        let filtered = res
119            .into_iter()
120            .map(|entry| {
121                let entry = entry.context("resolving directory entry")?;
122
123                let path = entry.path();
124
125                Ok(if path.is_dir() {
126                    None
127                } else {
128                    Some(PathEntry {
129                        path: path.to_path_buf(),
130                        file_emitted: false,
131                        non_file_emitted: false,
132                    })
133                })
134            })
135            .collect::<Result<Vec<_>>>()?
136            .into_iter()
137            .flatten()
138            .collect::<Vec<_>>();
139
140        Ok(PythonResourceIterator {
141            root_path: path.to_path_buf(),
142            cache_tag: cache_tag.to_string(),
143            suffixes: suffixes.clone(),
144            paths: filtered,
145            path_content_overrides: FileManifest::default(),
146            seen_packages: HashSet::new(),
147            resources: Vec::new(),
148            emit_files,
149            emit_non_files,
150            _phantom: std::marker::PhantomData,
151        })
152    }
153
154    /// Construct an instance from an iterable of `(File)`.
155    pub fn from_data_locations(
156        resources: &[File],
157        cache_tag: &str,
158        suffixes: &PythonModuleSuffixes,
159        emit_files: bool,
160        emit_non_files: bool,
161    ) -> Result<PythonResourceIterator<'a>> {
162        let mut paths = resources
163            .iter()
164            .map(|file| PathEntry {
165                path: file.path().to_path_buf(),
166                file_emitted: false,
167                non_file_emitted: false,
168            })
169            .collect::<Vec<_>>();
170        paths.sort_by(|a, b| a.path.cmp(&b.path));
171
172        let mut path_content_overrides = FileManifest::default();
173        for resource in resources {
174            path_content_overrides.add_file_entry(resource.path(), resource.entry().clone())?;
175        }
176
177        Ok(PythonResourceIterator {
178            root_path: PathBuf::new(),
179            cache_tag: cache_tag.to_string(),
180            suffixes: suffixes.clone(),
181            paths,
182            path_content_overrides,
183            seen_packages: HashSet::new(),
184            resources: Vec::new(),
185            emit_files,
186            emit_non_files,
187            _phantom: std::marker::PhantomData,
188        })
189    }
190
191    fn resolve_is_executable(&self, path: &Path) -> bool {
192        match self.path_content_overrides.get(path) {
193            Some(file) => file.is_executable(),
194            None => {
195                if let Ok(metadata) = path.metadata() {
196                    is_executable(&metadata)
197                } else {
198                    false
199                }
200            }
201        }
202    }
203
204    fn resolve_file_data(&self, path: &Path) -> FileData {
205        match self.path_content_overrides.get(path) {
206            Some(file) => file.file_data().clone(),
207            None => FileData::Path(path.to_path_buf()),
208        }
209    }
210
211    fn resolve_path(&mut self, path: &Path) -> Option<PathItem<'a>> {
212        let mut rel_path = path
213            .strip_prefix(&self.root_path)
214            .expect("unable to strip path prefix");
215        let mut rel_str = rel_path.to_str().expect("could not convert path to str");
216        let mut components = rel_path
217            .iter()
218            .map(|p| p.to_str().expect("unable to get path as str"))
219            .collect::<Vec<_>>();
220
221        // Files in .dist-info and .egg-info directories are distribution metadata files.
222        // Parsing the package name out of the directory name can be a bit wonky, as
223        // case sensitivity and other normalization can come into play. So our strategy
224        // is to parse the well-known metadata record inside the directory to extract
225        // the package info. If the file doesn't exist or can't be parsed, we ignore this
226        // distribution entirely.
227
228        let distribution_info = if components[0].ends_with(".dist-info") {
229            Some((
230                self.root_path.join(components[0]).join("METADATA"),
231                PythonPackageDistributionResourceFlavor::DistInfo,
232            ))
233        } else if components[0].ends_with(".egg-info") {
234            Some((
235                self.root_path.join(components[0]).join("PKG-INFO"),
236                PythonPackageDistributionResourceFlavor::EggInfo,
237            ))
238        } else {
239            None
240        };
241
242        if let Some((metadata_path, location)) = distribution_info {
243            let data = if let Some(file) = self.path_content_overrides.get(&metadata_path) {
244                file.resolve_content().ok()?
245            } else {
246                std::fs::read(&metadata_path).ok()?
247            };
248
249            let metadata = PythonPackageMetadata::from_metadata(&data).ok()?;
250            let package = metadata.name()?;
251            let version = metadata.version()?;
252
253            // Name of resource is file path after the initial directory.
254            let name = components[1..components.len()].join("/");
255
256            return Some(PathItem::PythonResource(Box::new(
257                PythonPackageDistributionResource {
258                    location,
259                    package: package.to_string(),
260                    version: version.to_string(),
261                    name,
262                    data: self.resolve_file_data(path),
263                }
264                .into(),
265            )));
266        }
267
268        // site-packages directories are package roots within package roots. Treat them as
269        // such.
270        let in_site_packages = if components[0] == "site-packages" {
271            let sp_path = self.root_path.join("site-packages");
272            rel_path = path
273                .strip_prefix(sp_path)
274                .expect("unable to strip site-packages prefix");
275
276            rel_str = rel_path.to_str().expect("could not convert path to str");
277            components = rel_path
278                .iter()
279                .map(|p| p.to_str().expect("unable to get path as str"))
280                .collect::<Vec<_>>();
281
282            true
283        } else {
284            false
285        };
286
287        // It looks like we're in an unpacked egg. This is similar to the site-packages
288        // scenario: we essentially have a new package root that corresponds to the
289        // egg's extraction directory.
290        if components[0..components.len() - 1]
291            .iter()
292            .any(|p| p.ends_with(".egg"))
293        {
294            let mut egg_root_path = self.root_path.clone();
295
296            if in_site_packages {
297                egg_root_path = egg_root_path.join("site-packages");
298            }
299
300            for p in &components[0..components.len() - 1] {
301                egg_root_path = egg_root_path.join(p);
302
303                if p.ends_with(".egg") {
304                    break;
305                }
306            }
307
308            rel_path = path
309                .strip_prefix(egg_root_path)
310                .expect("unable to strip egg prefix");
311            components = rel_path
312                .iter()
313                .map(|p| p.to_str().expect("unable to get path as str"))
314                .collect::<Vec<_>>();
315
316            // Ignore EGG-INFO directory, as it is just packaging metadata.
317            if components[0] == "EGG-INFO" {
318                return None;
319            }
320        }
321
322        let file_name = rel_path.file_name().unwrap().to_string_lossy();
323
324        for ext_suffix in &self.suffixes.extension {
325            if file_name.ends_with(ext_suffix) {
326                let package_parts = &components[0..components.len() - 1];
327                let mut package = itertools::join(package_parts, ".");
328
329                let module_name = &file_name[0..file_name.len() - ext_suffix.len()];
330
331                let mut full_module_name: Vec<&str> = package_parts.to_vec();
332
333                if module_name != "__init__" {
334                    full_module_name.push(module_name);
335                }
336
337                let full_module_name = itertools::join(full_module_name, ".");
338
339                if package.is_empty() {
340                    package = full_module_name.clone();
341                }
342
343                self.seen_packages.insert(package);
344
345                let module_components = full_module_name.split('.').collect::<Vec<_>>();
346                let final_name = module_components[module_components.len() - 1];
347                let init_fn = Some(format!("PyInit_{final_name}"));
348
349                return Some(PathItem::PythonResource(Box::new(
350                    PythonExtensionModule {
351                        name: full_module_name,
352                        init_fn,
353                        extension_file_suffix: ext_suffix.clone(),
354                        shared_library: Some(self.resolve_file_data(path)),
355                        object_file_data: vec![],
356                        is_package: is_package_from_path(path),
357                        link_libraries: vec![],
358                        is_stdlib: false,
359                        builtin_default: false,
360                        required: false,
361                        variant: None,
362                        license: None,
363                    }
364                    .into(),
365                )));
366            }
367        }
368
369        // File extension matches a registered source suffix.
370        if self
371            .suffixes
372            .source
373            .iter()
374            .any(|ext| rel_str.ends_with(ext))
375        {
376            let package_parts = &components[0..components.len() - 1];
377            let mut package = itertools::join(package_parts, ".");
378
379            let module_name = rel_path
380                .file_stem()
381                .expect("unable to get file stem")
382                .to_str()
383                .expect("unable to convert path to str");
384
385            let mut full_module_name: Vec<&str> = package_parts.to_vec();
386
387            if module_name != "__init__" {
388                full_module_name.push(module_name);
389            }
390
391            let full_module_name = itertools::join(full_module_name, ".");
392
393            if package.is_empty() {
394                package = full_module_name.clone();
395            }
396
397            self.seen_packages.insert(package);
398
399            return Some(PathItem::PythonResource(Box::new(
400                PythonModuleSource {
401                    name: full_module_name,
402                    source: self.resolve_file_data(path),
403                    is_package: is_package_from_path(path),
404                    cache_tag: self.cache_tag.clone(),
405                    is_stdlib: false,
406                    is_test: false,
407                }
408                .into(),
409            )));
410        }
411
412        if self
413            .suffixes
414            .bytecode
415            .iter()
416            .any(|ext| rel_str.ends_with(ext))
417        {
418            // .pyc files should be in a __pycache__ directory.
419            if components.len() < 2 {
420                return None;
421            }
422
423            // Possibly from Python 2?
424            if components[components.len() - 2] != "__pycache__" {
425                return None;
426            }
427
428            let package_parts = &components[0..components.len() - 2];
429            let mut package = itertools::join(package_parts, ".");
430
431            // Files have format <package>/__pycache__/<module>.<cache_tag>.<extra tag><suffix>>
432            let filename = rel_path
433                .file_name()
434                .expect("unable to get file name")
435                .to_string_lossy()
436                .to_string();
437
438            let filename_parts = filename.split('.').collect::<Vec<&str>>();
439
440            if filename_parts.len() < 3 {
441                return None;
442            }
443
444            let mut remaining_filename = filename.clone();
445
446            // The first part is always the module name.
447            let module_name = filename_parts[0];
448            remaining_filename = remaining_filename[module_name.len() + 1..].to_string();
449
450            // The second part is the cache tag. It should match ours.
451            if filename_parts[1] != self.cache_tag {
452                return None;
453            }
454
455            // Keep the leading dot in case there is no cache tag: in this case the
456            // suffix has the leading dot and we'll need to match against that.
457            remaining_filename = remaining_filename[self.cache_tag.len()..].to_string();
458
459            // Look for optional tag, of which we only recognize opt-1, opt-2, and None.
460            let optimization_level = if filename_parts[2] == "opt-1" {
461                remaining_filename = remaining_filename[6..].to_string();
462                BytecodeOptimizationLevel::One
463            } else if filename_parts[2] == "opt-2" {
464                remaining_filename = remaining_filename[6..].to_string();
465                BytecodeOptimizationLevel::Two
466            } else {
467                BytecodeOptimizationLevel::Zero
468            };
469
470            // Only the bytecode suffix should remain.
471            if !self.suffixes.bytecode.contains(&remaining_filename) {
472                return None;
473            }
474
475            let mut full_module_name: Vec<&str> = package_parts.to_vec();
476
477            if module_name != "__init__" {
478                full_module_name.push(module_name);
479            }
480
481            let full_module_name = itertools::join(full_module_name, ".");
482
483            if package.is_empty() {
484                package = full_module_name.clone();
485            }
486
487            self.seen_packages.insert(package);
488
489            return Some(PathItem::PythonResource(Box::new(
490                PythonModuleBytecode::from_path(
491                    &full_module_name,
492                    optimization_level,
493                    &self.cache_tag,
494                    path,
495                )
496                .into(),
497            )));
498        }
499
500        let resource = match rel_path.extension().and_then(OsStr::to_str) {
501            Some("egg") => PathItem::PythonResource(Box::new(
502                PythonEggFile {
503                    data: self.resolve_file_data(path),
504                }
505                .into(),
506            )),
507            Some("pth") => PathItem::PythonResource(Box::new(
508                PythonPathExtension {
509                    data: self.resolve_file_data(path),
510                }
511                .into(),
512            )),
513            _ => {
514                // If it is some other file type, we categorize it as a resource
515                // file. The package name and resource name are resolved later,
516                // by the iterator.
517                PathItem::ResourceFile(ResourceFile {
518                    full_path: path.to_path_buf(),
519                    relative_path: rel_path.to_path_buf(),
520                })
521            }
522        };
523
524        Some(resource)
525    }
526}
527
528impl<'a> Iterator for PythonResourceIterator<'a> {
529    type Item = Result<PythonResource<'a>>;
530
531    fn next(&mut self) -> Option<Result<PythonResource<'a>>> {
532        // Our strategy is to walk directory entries and buffer resource files locally.
533        // We then emit those at the end, perhaps doing some post-processing along the
534        // way.
535        loop {
536            if self.paths.is_empty() {
537                break;
538            }
539
540            // If we're emitting PythonResource::File entries and we haven't
541            // done so for this path, do so now.
542            if self.emit_files && !self.paths[0].file_emitted {
543                self.paths[0].file_emitted = true;
544
545                let rel_path = self.paths[0]
546                    .path
547                    .strip_prefix(&self.root_path)
548                    .expect("unable to strip path prefix")
549                    .to_path_buf();
550
551                let f = File::new(
552                    rel_path,
553                    FileEntry::new_from_data(
554                        self.resolve_file_data(&self.paths[0].path),
555                        self.resolve_is_executable(&self.paths[0].path),
556                    ),
557                );
558
559                return Some(Ok(f.into()));
560            }
561
562            if self.emit_non_files && !self.paths[0].non_file_emitted {
563                self.paths[0].non_file_emitted = true;
564
565                // Because resolve_path is a mutable borrow.
566                let path_temp = self.paths[0].path.clone();
567
568                if let Some(entry) = self.resolve_path(&path_temp) {
569                    // Buffer Resource entries until later.
570                    match entry {
571                        PathItem::ResourceFile(resource) => {
572                            self.resources.push(resource);
573                        }
574                        PathItem::PythonResource(resource) => {
575                            return Some(Ok(*resource));
576                        }
577                    }
578                }
579            }
580
581            // We're done emitting variants for this path. Discard it and move
582            // on to next record.
583            //
584            // Removing the first element is a bit inefficient. Should we
585            // reverse storage / iteration order instead?
586            self.paths.remove(0);
587            continue;
588        }
589
590        loop {
591            if self.resources.is_empty() {
592                return None;
593            }
594
595            // This isn't efficient. But we shouldn't care.
596            let resource = self.resources.remove(0);
597
598            // Resource addressing in Python is a bit wonky. This is because the resource
599            // reading APIs allow loading resources across package and directory boundaries.
600            // For example, let's say we have a resource defined at the relative path
601            // `foo/bar/resource.txt`. This resource could be accessed via the following
602            // mechanisms:
603            //
604            // * Via the `resource.txt` resource on package `bar`'s resource reader.
605            // * Via the `bar/resource.txt` resource on package `foo`'s resource reader.
606            // * Via the `foo/bar/resource.txt` resource on the root resource reader.
607            //
608            // Furthermore, there could be resources in subdirectories that don't have
609            // Python packages, forcing directory separators in resource names. e.g.
610            // `foo/bar/resources/baz.txt`, where there isn't a `foo.bar.resources` Python
611            // package.
612            //
613            // Our strategy for handling this is to initially resolve the relative path to
614            // the resource. Then when we get to this code, we have awareness of all Python
615            // packages and can supplement the relative path (which is the one true resource
616            // identifier) with annotations, such as the leaf-most Python package.
617
618            // Resources should always have a filename component. Otherwise how did we get here?
619            let basename = resource
620                .relative_path
621                .file_name()
622                .unwrap()
623                .to_string_lossy();
624
625            // We also resolve the leaf-most Python package that this resource is within and
626            // the relative path within that package.
627            let (leaf_package, relative_name) =
628                if let Some(relative_directory) = resource.relative_path.parent() {
629                    // We walk relative directory components until we find a Python package.
630                    let mut components = relative_directory
631                        .iter()
632                        .map(|p| p.to_string_lossy())
633                        .collect::<Vec<_>>();
634
635                    let mut relative_components = vec![basename];
636                    let mut package = None;
637                    let mut relative_name = None;
638
639                    while !components.is_empty() {
640                        let candidate_package = itertools::join(&components, ".");
641
642                        if self.seen_packages.contains(&candidate_package) {
643                            package = Some(candidate_package);
644                            relative_components.reverse();
645                            relative_name = Some(itertools::join(&relative_components, "/"));
646                            break;
647                        }
648
649                        let popped = components.pop().unwrap();
650                        relative_components.push(popped);
651                    }
652
653                    (package, relative_name)
654                } else {
655                    (None, None)
656                };
657
658            // Resources without a resolved package are not legal.
659            if leaf_package.is_none() {
660                continue;
661            }
662
663            let leaf_package = leaf_package.unwrap();
664            let relative_name = relative_name.unwrap();
665
666            return Some(Ok(PythonPackageResource {
667                leaf_package,
668                relative_name,
669                data: self.resolve_file_data(&resource.full_path),
670                is_stdlib: false,
671                is_test: false,
672            }
673            .into()));
674        }
675    }
676}
677
678/// Find Python resources in a directory.
679///
680/// Given a root directory path, walk the directory and find all Python
681/// resources in it.
682///
683/// A resource is a Python source file, bytecode file, or resource file which
684/// can be addressed via the ``A.B.C`` naming convention.
685///
686/// Returns an iterator of ``PythonResource`` instances.
687pub fn find_python_resources<'a>(
688    root_path: &Path,
689    cache_tag: &str,
690    suffixes: &PythonModuleSuffixes,
691    emit_files: bool,
692    emit_non_files: bool,
693) -> Result<PythonResourceIterator<'a>> {
694    PythonResourceIterator::new(root_path, cache_tag, suffixes, emit_files, emit_non_files)
695}
696
697#[cfg(test)]
698mod tests {
699    use {
700        super::*,
701        once_cell::sync::Lazy,
702        std::fs::{create_dir_all, write},
703    };
704
705    const DEFAULT_CACHE_TAG: &str = "cpython-37";
706
707    static DEFAULT_SUFFIXES: Lazy<PythonModuleSuffixes> = Lazy::new(|| PythonModuleSuffixes {
708        source: vec![".py".to_string()],
709        bytecode: vec![".pyc".to_string()],
710        debug_bytecode: vec![],
711        optimized_bytecode: vec![],
712        extension: vec![],
713    });
714
715    #[test]
716    fn test_source_resolution() -> Result<()> {
717        let td = tempfile::Builder::new()
718            .prefix("python-packaging-test")
719            .tempdir()?;
720        let tp = td.path();
721
722        let acme_path = tp.join("acme");
723        let acme_a_path = acme_path.join("a");
724        let acme_bar_path = acme_path.join("bar");
725
726        create_dir_all(&acme_a_path).unwrap();
727        create_dir_all(&acme_bar_path).unwrap();
728
729        write(acme_path.join("__init__.py"), "")?;
730        write(acme_a_path.join("__init__.py"), "")?;
731        write(acme_bar_path.join("__init__.py"), "")?;
732
733        write(acme_a_path.join("foo.py"), "# acme.foo")?;
734
735        let resources =
736            PythonResourceIterator::new(tp, DEFAULT_CACHE_TAG, &DEFAULT_SUFFIXES, true, true)?
737                .collect::<Result<Vec<_>>>()?;
738        assert_eq!(resources.len(), 8);
739
740        assert_eq!(
741            resources[0],
742            File::new(
743                "acme/__init__.py",
744                FileEntry::try_from(acme_path.join("__init__.py"))?
745            )
746            .into()
747        );
748        assert_eq!(
749            resources[1],
750            PythonModuleSource {
751                name: "acme".to_string(),
752                source: FileData::Path(acme_path.join("__init__.py")),
753                is_package: true,
754                cache_tag: DEFAULT_CACHE_TAG.to_string(),
755                is_stdlib: false,
756                is_test: false,
757            }
758            .into()
759        );
760        assert_eq!(
761            resources[2],
762            File::new(
763                "acme/a/__init__.py",
764                FileEntry::try_from(acme_a_path.join("__init__.py"))?
765            )
766            .into()
767        );
768        assert_eq!(
769            resources[3],
770            PythonModuleSource {
771                name: "acme.a".to_string(),
772                source: FileData::Path(acme_a_path.join("__init__.py")),
773                is_package: true,
774                cache_tag: DEFAULT_CACHE_TAG.to_string(),
775                is_stdlib: false,
776                is_test: false,
777            }
778            .into()
779        );
780        assert_eq!(
781            resources[4],
782            File::new(
783                "acme/a/foo.py",
784                FileEntry::try_from(acme_a_path.join("foo.py"))?
785            )
786            .into()
787        );
788        assert_eq!(
789            resources[5],
790            PythonModuleSource {
791                name: "acme.a.foo".to_string(),
792                source: FileData::Path(acme_a_path.join("foo.py")),
793                is_package: false,
794                cache_tag: DEFAULT_CACHE_TAG.to_string(),
795                is_stdlib: false,
796                is_test: false,
797            }
798            .into()
799        );
800        assert_eq!(
801            resources[6],
802            File::new(
803                "acme/bar/__init__.py",
804                FileEntry::try_from(acme_bar_path.join("__init__.py"))?
805            )
806            .into()
807        );
808        assert_eq!(
809            resources[7],
810            PythonModuleSource {
811                name: "acme.bar".to_string(),
812                source: FileData::Path(acme_bar_path.join("__init__.py")),
813                is_package: true,
814                cache_tag: DEFAULT_CACHE_TAG.to_string(),
815                is_stdlib: false,
816                is_test: false,
817            }
818            .into()
819        );
820
821        Ok(())
822    }
823
824    #[test]
825    fn test_bytecode_resolution() -> Result<()> {
826        let td = tempfile::Builder::new()
827            .prefix("python-packaging-test")
828            .tempdir()?;
829        let tp = td.path();
830
831        let acme_path = tp.join("acme");
832        let acme_a_path = acme_path.join("a");
833        let acme_bar_path = acme_path.join("bar");
834
835        create_dir_all(&acme_a_path)?;
836        create_dir_all(&acme_bar_path)?;
837
838        let acme_pycache_path = acme_path.join("__pycache__");
839        let acme_a_pycache_path = acme_a_path.join("__pycache__");
840        let acme_bar_pycache_path = acme_bar_path.join("__pycache__");
841
842        create_dir_all(&acme_pycache_path)?;
843        create_dir_all(&acme_a_pycache_path)?;
844        create_dir_all(&acme_bar_pycache_path)?;
845
846        // Dummy paths that should be ignored.
847        write(acme_pycache_path.join("__init__.pyc"), "")?;
848        write(acme_pycache_path.join("__init__.cpython-37.foo.pyc"), "")?;
849
850        write(acme_pycache_path.join("__init__.cpython-37.pyc"), "")?;
851        write(acme_pycache_path.join("__init__.cpython-37.opt-1.pyc"), "")?;
852        write(acme_pycache_path.join("__init__.cpython-37.opt-2.pyc"), "")?;
853        write(acme_pycache_path.join("__init__.cpython-38.pyc"), "")?;
854        write(acme_pycache_path.join("__init__.cpython-38.opt-1.pyc"), "")?;
855        write(acme_pycache_path.join("__init__.cpython-38.opt-2.pyc"), "")?;
856        write(acme_pycache_path.join("foo.cpython-37.pyc"), "")?;
857        write(acme_pycache_path.join("foo.cpython-37.opt-1.pyc"), "")?;
858        write(acme_pycache_path.join("foo.cpython-37.opt-2.pyc"), "")?;
859        write(acme_pycache_path.join("foo.cpython-38.pyc"), "")?;
860        write(acme_pycache_path.join("foo.cpython-38.opt-1.pyc"), "")?;
861        write(acme_pycache_path.join("foo.cpython-38.opt-2.pyc"), "")?;
862
863        write(acme_a_pycache_path.join("__init__.cpython-37.pyc"), "")?;
864        write(
865            acme_a_pycache_path.join("__init__.cpython-37.opt-1.pyc"),
866            "",
867        )?;
868        write(
869            acme_a_pycache_path.join("__init__.cpython-37.opt-2.pyc"),
870            "",
871        )?;
872        write(acme_a_pycache_path.join("__init__.cpython-38.pyc"), "")?;
873        write(
874            acme_a_pycache_path.join("__init__.cpython-38.opt-1.pyc"),
875            "",
876        )?;
877        write(
878            acme_a_pycache_path.join("__init__.cpython-38.opt-2.pyc"),
879            "",
880        )?;
881        write(acme_a_pycache_path.join("foo.cpython-37.pyc"), "")?;
882        write(acme_a_pycache_path.join("foo.cpython-37.opt-1.pyc"), "")?;
883        write(acme_a_pycache_path.join("foo.cpython-37.opt-2.pyc"), "")?;
884        write(acme_a_pycache_path.join("foo.cpython-38.pyc"), "")?;
885        write(acme_a_pycache_path.join("foo.cpython-38.opt-1.pyc"), "")?;
886        write(acme_a_pycache_path.join("foo.cpython-38.opt-2.pyc"), "")?;
887
888        write(acme_bar_pycache_path.join("__init__.cpython-37.pyc"), "")?;
889        write(
890            acme_bar_pycache_path.join("__init__.cpython-37.opt-1.pyc"),
891            "",
892        )?;
893        write(
894            acme_bar_pycache_path.join("__init__.cpython-37.opt-2.pyc"),
895            "",
896        )?;
897        write(acme_bar_pycache_path.join("__init__.cpython-38.pyc"), "")?;
898        write(
899            acme_bar_pycache_path.join("__init__.cpython-38.opt-1.pyc"),
900            "",
901        )?;
902        write(
903            acme_bar_pycache_path.join("__init__.cpython-38.opt-2.pyc"),
904            "",
905        )?;
906        write(acme_bar_pycache_path.join("foo.cpython-37.pyc"), "")?;
907        write(acme_bar_pycache_path.join("foo.cpython-37.opt-1.pyc"), "")?;
908        write(acme_bar_pycache_path.join("foo.cpython-37.opt-2.pyc"), "")?;
909        write(acme_bar_pycache_path.join("foo.cpython-38.pyc"), "")?;
910        write(acme_bar_pycache_path.join("foo.cpython-38.opt-1.pyc"), "")?;
911        write(acme_bar_pycache_path.join("foo.cpython-38.opt-2.pyc"), "")?;
912
913        let resources =
914            PythonResourceIterator::new(tp, "cpython-38", &DEFAULT_SUFFIXES, false, true)?
915                .collect::<Result<Vec<_>>>()?;
916        assert_eq!(resources.len(), 18);
917
918        assert_eq!(
919            resources[0],
920            PythonModuleBytecode::from_path(
921                "acme",
922                BytecodeOptimizationLevel::One,
923                "cpython-38",
924                &acme_pycache_path.join("__init__.cpython-38.opt-1.pyc")
925            )
926            .into()
927        );
928        assert_eq!(
929            resources[1],
930            PythonModuleBytecode::from_path(
931                "acme",
932                BytecodeOptimizationLevel::Two,
933                "cpython-38",
934                &acme_pycache_path.join("__init__.cpython-38.opt-2.pyc")
935            )
936            .into()
937        );
938        assert_eq!(
939            resources[2],
940            PythonModuleBytecode::from_path(
941                "acme",
942                BytecodeOptimizationLevel::Zero,
943                "cpython-38",
944                &acme_pycache_path.join("__init__.cpython-38.pyc")
945            )
946            .into()
947        );
948        assert_eq!(
949            resources[3],
950            PythonModuleBytecode::from_path(
951                "acme.foo",
952                BytecodeOptimizationLevel::One,
953                "cpython-38",
954                &acme_pycache_path.join("foo.cpython-38.opt-1.pyc")
955            )
956            .into()
957        );
958        assert_eq!(
959            resources[4],
960            PythonModuleBytecode::from_path(
961                "acme.foo",
962                BytecodeOptimizationLevel::Two,
963                "cpython-38",
964                &acme_pycache_path.join("foo.cpython-38.opt-2.pyc")
965            )
966            .into()
967        );
968        assert_eq!(
969            resources[5],
970            PythonModuleBytecode::from_path(
971                "acme.foo",
972                BytecodeOptimizationLevel::Zero,
973                "cpython-38",
974                &acme_pycache_path.join("foo.cpython-38.pyc")
975            )
976            .into()
977        );
978        assert_eq!(
979            resources[6],
980            PythonModuleBytecode::from_path(
981                "acme.a",
982                BytecodeOptimizationLevel::One,
983                "cpython-38",
984                &acme_a_pycache_path.join("__init__.cpython-38.opt-1.pyc")
985            )
986            .into()
987        );
988        assert_eq!(
989            resources[7],
990            PythonModuleBytecode::from_path(
991                "acme.a",
992                BytecodeOptimizationLevel::Two,
993                "cpython-38",
994                &acme_a_pycache_path.join("__init__.cpython-38.opt-2.pyc")
995            )
996            .into()
997        );
998        assert_eq!(
999            resources[8],
1000            PythonModuleBytecode::from_path(
1001                "acme.a",
1002                BytecodeOptimizationLevel::Zero,
1003                "cpython-38",
1004                &acme_a_pycache_path.join("__init__.cpython-38.pyc")
1005            )
1006            .into()
1007        );
1008        assert_eq!(
1009            resources[9],
1010            PythonModuleBytecode::from_path(
1011                "acme.a.foo",
1012                BytecodeOptimizationLevel::One,
1013                "cpython-38",
1014                &acme_a_pycache_path.join("foo.cpython-38.opt-1.pyc")
1015            )
1016            .into()
1017        );
1018        assert_eq!(
1019            resources[10],
1020            PythonModuleBytecode::from_path(
1021                "acme.a.foo",
1022                BytecodeOptimizationLevel::Two,
1023                "cpython-38",
1024                &acme_a_pycache_path.join("foo.cpython-38.opt-2.pyc")
1025            )
1026            .into()
1027        );
1028        assert_eq!(
1029            resources[11],
1030            PythonModuleBytecode::from_path(
1031                "acme.a.foo",
1032                BytecodeOptimizationLevel::Zero,
1033                "cpython-38",
1034                &acme_a_pycache_path.join("foo.cpython-38.pyc")
1035            )
1036            .into()
1037        );
1038        assert_eq!(
1039            resources[12],
1040            PythonModuleBytecode::from_path(
1041                "acme.bar",
1042                BytecodeOptimizationLevel::One,
1043                "cpython-38",
1044                &acme_bar_pycache_path.join("__init__.cpython-38.opt-1.pyc")
1045            )
1046            .into()
1047        );
1048        assert_eq!(
1049            resources[13],
1050            PythonModuleBytecode::from_path(
1051                "acme.bar",
1052                BytecodeOptimizationLevel::Two,
1053                "cpython-38",
1054                &acme_bar_pycache_path.join("__init__.cpython-38.opt-2.pyc")
1055            )
1056            .into()
1057        );
1058        assert_eq!(
1059            resources[14],
1060            PythonModuleBytecode::from_path(
1061                "acme.bar",
1062                BytecodeOptimizationLevel::Zero,
1063                "cpython-38",
1064                &acme_bar_pycache_path.join("__init__.cpython-38.pyc")
1065            )
1066            .into()
1067        );
1068        assert_eq!(
1069            resources[15],
1070            PythonModuleBytecode::from_path(
1071                "acme.bar.foo",
1072                BytecodeOptimizationLevel::One,
1073                "cpython-38",
1074                &acme_bar_pycache_path.join("foo.cpython-38.opt-1.pyc")
1075            )
1076            .into()
1077        );
1078        assert_eq!(
1079            resources[16],
1080            PythonModuleBytecode::from_path(
1081                "acme.bar.foo",
1082                BytecodeOptimizationLevel::Two,
1083                "cpython-38",
1084                &acme_bar_pycache_path.join("foo.cpython-38.opt-2.pyc")
1085            )
1086            .into()
1087        );
1088        assert_eq!(
1089            resources[17],
1090            PythonModuleBytecode::from_path(
1091                "acme.bar.foo",
1092                BytecodeOptimizationLevel::Zero,
1093                "cpython-38",
1094                &acme_bar_pycache_path.join("foo.cpython-38.pyc")
1095            )
1096            .into()
1097        );
1098
1099        Ok(())
1100    }
1101
1102    #[test]
1103    fn test_site_packages() -> Result<()> {
1104        let td = tempfile::Builder::new()
1105            .prefix("python-packaging-test")
1106            .tempdir()?;
1107        let tp = td.path();
1108
1109        let sp_path = tp.join("site-packages");
1110        let acme_path = sp_path.join("acme");
1111
1112        create_dir_all(&acme_path).unwrap();
1113
1114        write(acme_path.join("__init__.py"), "")?;
1115        write(acme_path.join("bar.py"), "")?;
1116
1117        let resources =
1118            PythonResourceIterator::new(tp, DEFAULT_CACHE_TAG, &DEFAULT_SUFFIXES, false, true)?
1119                .collect::<Result<Vec<_>>>()?;
1120        assert_eq!(resources.len(), 2);
1121
1122        assert_eq!(
1123            resources[0],
1124            PythonModuleSource {
1125                name: "acme".to_string(),
1126                source: FileData::Path(acme_path.join("__init__.py")),
1127                is_package: true,
1128                cache_tag: DEFAULT_CACHE_TAG.to_string(),
1129                is_stdlib: false,
1130                is_test: false,
1131            }
1132            .into()
1133        );
1134        assert_eq!(
1135            resources[1],
1136            PythonModuleSource {
1137                name: "acme.bar".to_string(),
1138                source: FileData::Path(acme_path.join("bar.py")),
1139                is_package: false,
1140                cache_tag: DEFAULT_CACHE_TAG.to_string(),
1141                is_stdlib: false,
1142                is_test: false,
1143            }
1144            .into()
1145        );
1146
1147        Ok(())
1148    }
1149
1150    #[test]
1151    fn test_extension_module() -> Result<()> {
1152        let td = tempfile::Builder::new()
1153            .prefix("python-packaging-test")
1154            .tempdir()?;
1155        let tp = td.path();
1156
1157        create_dir_all(tp.join("markupsafe"))?;
1158
1159        let pyd_path = tp.join("foo.pyd");
1160        let so_path = tp.join("bar.so");
1161        let cffi_path = tp.join("_cffi_backend.cp37-win_amd64.pyd");
1162        let markupsafe_speedups_path = tp
1163            .join("markupsafe")
1164            .join("_speedups.cpython-37m-x86_64-linux-gnu.so");
1165        let zstd_path = tp.join("zstd.cpython-37m-x86_64-linux-gnu.so");
1166
1167        write(&pyd_path, "")?;
1168        write(&so_path, "")?;
1169        write(&cffi_path, "")?;
1170        write(&markupsafe_speedups_path, "")?;
1171        write(&zstd_path, "")?;
1172
1173        let suffixes = PythonModuleSuffixes {
1174            source: vec![],
1175            bytecode: vec![],
1176            debug_bytecode: vec![],
1177            optimized_bytecode: vec![],
1178            extension: vec![
1179                ".cp37-win_amd64.pyd".to_string(),
1180                ".cp37-win32.pyd".to_string(),
1181                ".cpython-37m-x86_64-linux-gnu.so".to_string(),
1182                ".pyd".to_string(),
1183                ".so".to_string(),
1184            ],
1185        };
1186
1187        let resources = PythonResourceIterator::new(tp, "cpython-37", &suffixes, false, true)?
1188            .collect::<Result<Vec<_>>>()?;
1189
1190        assert_eq!(resources.len(), 5);
1191
1192        assert_eq!(
1193            resources[0],
1194            PythonExtensionModule {
1195                name: "_cffi_backend".to_string(),
1196                init_fn: Some("PyInit__cffi_backend".to_string()),
1197                extension_file_suffix: ".cp37-win_amd64.pyd".to_string(),
1198                shared_library: Some(FileData::Path(cffi_path)),
1199                object_file_data: vec![],
1200                is_package: false,
1201                link_libraries: vec![],
1202                is_stdlib: false,
1203                builtin_default: false,
1204                required: false,
1205                variant: None,
1206                license: None,
1207            }
1208            .into()
1209        );
1210        assert_eq!(
1211            resources[1],
1212            PythonExtensionModule {
1213                name: "bar".to_string(),
1214                init_fn: Some("PyInit_bar".to_string()),
1215                extension_file_suffix: ".so".to_string(),
1216                shared_library: Some(FileData::Path(so_path)),
1217                object_file_data: vec![],
1218                is_package: false,
1219                link_libraries: vec![],
1220                is_stdlib: false,
1221                builtin_default: false,
1222                required: false,
1223                variant: None,
1224                license: None,
1225            }
1226            .into(),
1227        );
1228        assert_eq!(
1229            resources[2],
1230            PythonExtensionModule {
1231                name: "foo".to_string(),
1232                init_fn: Some("PyInit_foo".to_string()),
1233                extension_file_suffix: ".pyd".to_string(),
1234                shared_library: Some(FileData::Path(pyd_path)),
1235                object_file_data: vec![],
1236                is_package: false,
1237                link_libraries: vec![],
1238                is_stdlib: false,
1239                builtin_default: false,
1240                required: false,
1241                variant: None,
1242                license: None,
1243            }
1244            .into(),
1245        );
1246        assert_eq!(
1247            resources[3],
1248            PythonExtensionModule {
1249                name: "markupsafe._speedups".to_string(),
1250                init_fn: Some("PyInit__speedups".to_string()),
1251                extension_file_suffix: ".cpython-37m-x86_64-linux-gnu.so".to_string(),
1252                shared_library: Some(FileData::Path(markupsafe_speedups_path)),
1253                object_file_data: vec![],
1254                is_package: false,
1255                link_libraries: vec![],
1256                is_stdlib: false,
1257                builtin_default: false,
1258                required: false,
1259                variant: None,
1260                license: None,
1261            }
1262            .into(),
1263        );
1264        assert_eq!(
1265            resources[4],
1266            PythonExtensionModule {
1267                name: "zstd".to_string(),
1268                init_fn: Some("PyInit_zstd".to_string()),
1269                extension_file_suffix: ".cpython-37m-x86_64-linux-gnu.so".to_string(),
1270                shared_library: Some(FileData::Path(zstd_path)),
1271                object_file_data: vec![],
1272                is_package: false,
1273                link_libraries: vec![],
1274                is_stdlib: false,
1275                builtin_default: false,
1276                required: false,
1277                variant: None,
1278                license: None,
1279            }
1280            .into(),
1281        );
1282
1283        Ok(())
1284    }
1285
1286    #[test]
1287    fn test_egg_file() -> Result<()> {
1288        let td = tempfile::Builder::new()
1289            .prefix("python-packaging-test")
1290            .tempdir()?;
1291        let tp = td.path();
1292
1293        create_dir_all(tp)?;
1294
1295        let egg_path = tp.join("foo-1.0-py3.7.egg");
1296        write(&egg_path, "")?;
1297
1298        let resources =
1299            PythonResourceIterator::new(tp, DEFAULT_CACHE_TAG, &DEFAULT_SUFFIXES, false, true)?
1300                .collect::<Result<Vec<_>>>()?;
1301        assert_eq!(resources.len(), 1);
1302
1303        assert_eq!(
1304            resources[0],
1305            PythonEggFile {
1306                data: FileData::Path(egg_path)
1307            }
1308            .into()
1309        );
1310
1311        Ok(())
1312    }
1313
1314    #[test]
1315    fn test_egg_dir() -> Result<()> {
1316        let td = tempfile::Builder::new()
1317            .prefix("python-packaging-test")
1318            .tempdir()?;
1319        let tp = td.path();
1320
1321        create_dir_all(tp)?;
1322
1323        let egg_path = tp.join("site-packages").join("foo-1.0-py3.7.egg");
1324        let egg_info_path = egg_path.join("EGG-INFO");
1325        let package_path = egg_path.join("foo");
1326
1327        create_dir_all(&egg_info_path)?;
1328        create_dir_all(&package_path)?;
1329
1330        write(egg_info_path.join("PKG-INFO"), "")?;
1331        write(package_path.join("__init__.py"), "")?;
1332        write(package_path.join("bar.py"), "")?;
1333
1334        let resources =
1335            PythonResourceIterator::new(tp, DEFAULT_CACHE_TAG, &DEFAULT_SUFFIXES, false, true)?
1336                .collect::<Result<Vec<_>>>()?;
1337        assert_eq!(resources.len(), 2);
1338
1339        assert_eq!(
1340            resources[0],
1341            PythonModuleSource {
1342                name: "foo".to_string(),
1343                source: FileData::Path(package_path.join("__init__.py")),
1344                is_package: true,
1345                cache_tag: DEFAULT_CACHE_TAG.to_string(),
1346                is_stdlib: false,
1347                is_test: false,
1348            }
1349            .into()
1350        );
1351        assert_eq!(
1352            resources[1],
1353            PythonModuleSource {
1354                name: "foo.bar".to_string(),
1355                source: FileData::Path(package_path.join("bar.py")),
1356                is_package: false,
1357                cache_tag: DEFAULT_CACHE_TAG.to_string(),
1358                is_stdlib: false,
1359                is_test: false,
1360            }
1361            .into()
1362        );
1363
1364        Ok(())
1365    }
1366
1367    #[test]
1368    fn test_pth_file() -> Result<()> {
1369        let td = tempfile::Builder::new()
1370            .prefix("python-packaging-test")
1371            .tempdir()?;
1372        let tp = td.path();
1373
1374        create_dir_all(tp)?;
1375
1376        let pth_path = tp.join("foo.pth");
1377        write(&pth_path, "")?;
1378
1379        let resources =
1380            PythonResourceIterator::new(tp, DEFAULT_CACHE_TAG, &DEFAULT_SUFFIXES, false, true)?
1381                .collect::<Result<Vec<_>>>()?;
1382        assert_eq!(resources.len(), 1);
1383
1384        assert_eq!(
1385            resources[0],
1386            PythonPathExtension {
1387                data: FileData::Path(pth_path)
1388            }
1389            .into()
1390        );
1391
1392        Ok(())
1393    }
1394
1395    /// Resource files without a package are not valid.
1396    #[test]
1397    fn test_root_resource_file() -> Result<()> {
1398        let td = tempfile::Builder::new()
1399            .prefix("python-packaging-test")
1400            .tempdir()?;
1401        let tp = td.path();
1402
1403        let resource_path = tp.join("resource.txt");
1404        write(resource_path, "content")?;
1405
1406        assert!(PythonResourceIterator::new(
1407            tp,
1408            DEFAULT_CACHE_TAG,
1409            &DEFAULT_SUFFIXES,
1410            false,
1411            true
1412        )?
1413        .next()
1414        .is_none());
1415
1416        Ok(())
1417    }
1418
1419    /// Resource files in a relative directory without a package are not valid.
1420    #[test]
1421    fn test_relative_resource_no_package() -> Result<()> {
1422        let td = tempfile::Builder::new()
1423            .prefix("python-packaging-test")
1424            .tempdir()?;
1425        let tp = td.path();
1426
1427        write(tp.join("foo.py"), "")?;
1428        let resource_dir = tp.join("resources");
1429        create_dir_all(&resource_dir)?;
1430
1431        let resource_path = resource_dir.join("resource.txt");
1432        write(resource_path, "content")?;
1433
1434        let resources =
1435            PythonResourceIterator::new(tp, DEFAULT_CACHE_TAG, &DEFAULT_SUFFIXES, false, true)?
1436                .collect::<Result<Vec<_>>>()?;
1437        assert_eq!(resources.len(), 1);
1438
1439        assert_eq!(
1440            resources[0],
1441            PythonModuleSource {
1442                name: "foo".to_string(),
1443                source: FileData::Path(tp.join("foo.py")),
1444                is_package: false,
1445                cache_tag: DEFAULT_CACHE_TAG.to_string(),
1446                is_stdlib: false,
1447                is_test: false,
1448            }
1449            .into()
1450        );
1451
1452        Ok(())
1453    }
1454
1455    /// Resource files next to a package are detected.
1456    #[test]
1457    fn test_relative_package_resource() -> Result<()> {
1458        let td = tempfile::Builder::new()
1459            .prefix("python-packaging-test")
1460            .tempdir()?;
1461        let tp = td.path();
1462
1463        let package_dir = tp.join("foo");
1464        create_dir_all(&package_dir)?;
1465
1466        let module_path = package_dir.join("__init__.py");
1467        write(&module_path, "")?;
1468        let resource_path = package_dir.join("resource.txt");
1469        write(&resource_path, "content")?;
1470
1471        let resources =
1472            PythonResourceIterator::new(tp, DEFAULT_CACHE_TAG, &DEFAULT_SUFFIXES, false, true)?
1473                .collect::<Result<Vec<_>>>()?;
1474
1475        assert_eq!(resources.len(), 2);
1476        assert_eq!(
1477            resources[0],
1478            PythonModuleSource {
1479                name: "foo".to_string(),
1480                source: FileData::Path(module_path),
1481                is_package: true,
1482                cache_tag: DEFAULT_CACHE_TAG.to_string(),
1483                is_stdlib: false,
1484                is_test: false,
1485            }
1486            .into()
1487        );
1488        assert_eq!(
1489            resources[1],
1490            PythonPackageResource {
1491                leaf_package: "foo".to_string(),
1492                relative_name: "resource.txt".to_string(),
1493                data: FileData::Path(resource_path),
1494                is_stdlib: false,
1495                is_test: false,
1496            }
1497            .into()
1498        );
1499
1500        Ok(())
1501    }
1502
1503    /// Resource files in sub-directory are detected.
1504    #[test]
1505    fn test_subdirectory_resource() -> Result<()> {
1506        let td = tempfile::Builder::new()
1507            .prefix("python-packaging-test")
1508            .tempdir()?;
1509        let tp = td.path();
1510
1511        let package_dir = tp.join("foo");
1512        let subdir = package_dir.join("resources");
1513        create_dir_all(&subdir)?;
1514
1515        let module_path = package_dir.join("__init__.py");
1516        write(&module_path, "")?;
1517        let resource_path = subdir.join("resource.txt");
1518        write(&resource_path, "content")?;
1519
1520        let resources =
1521            PythonResourceIterator::new(tp, DEFAULT_CACHE_TAG, &DEFAULT_SUFFIXES, false, true)?
1522                .collect::<Result<Vec<_>>>()?;
1523
1524        assert_eq!(resources.len(), 2);
1525        assert_eq!(
1526            resources[0],
1527            PythonModuleSource {
1528                name: "foo".to_string(),
1529                source: FileData::Path(module_path),
1530                is_package: true,
1531                cache_tag: DEFAULT_CACHE_TAG.to_string(),
1532                is_stdlib: false,
1533                is_test: false,
1534            }
1535            .into(),
1536        );
1537        assert_eq!(
1538            resources[1],
1539            PythonPackageResource {
1540                leaf_package: "foo".to_string(),
1541                relative_name: "resources/resource.txt".to_string(),
1542                data: FileData::Path(resource_path),
1543                is_stdlib: false,
1544                is_test: false,
1545            }
1546            .into()
1547        );
1548
1549        Ok(())
1550    }
1551
1552    /// .dist-info directory ignored if METADATA file not present.
1553    #[test]
1554    fn test_distinfo_missing_metadata() -> Result<()> {
1555        let td = tempfile::Builder::new()
1556            .prefix("python-packaging-test")
1557            .tempdir()?;
1558        let tp = td.path();
1559
1560        let dist_path = tp.join("foo-1.2.dist-info");
1561        create_dir_all(&dist_path)?;
1562        let resource = dist_path.join("file.txt");
1563        write(resource, "content")?;
1564
1565        let resources =
1566            PythonResourceIterator::new(tp, DEFAULT_CACHE_TAG, &DEFAULT_SUFFIXES, false, true)?
1567                .collect::<Result<Vec<_>>>()?;
1568        assert!(resources.is_empty());
1569
1570        Ok(())
1571    }
1572
1573    /// .dist-info with invalid METADATA file has no content emitted.
1574    #[test]
1575    fn test_distinfo_bad_metadata() -> Result<()> {
1576        let td = tempfile::Builder::new()
1577            .prefix("python-packaging-test")
1578            .tempdir()?;
1579        let tp = td.path();
1580
1581        let dist_path = tp.join("foo-1.2.dist-info");
1582        create_dir_all(&dist_path)?;
1583        let metadata = dist_path.join("METADATA");
1584        write(metadata, "bad content")?;
1585        let resource = dist_path.join("file.txt");
1586        write(resource, "content")?;
1587
1588        let resources =
1589            PythonResourceIterator::new(tp, DEFAULT_CACHE_TAG, &DEFAULT_SUFFIXES, false, true)?
1590                .collect::<Result<Vec<_>>>()?;
1591        assert!(resources.is_empty());
1592
1593        Ok(())
1594    }
1595
1596    /// .dist-info with partial METADATA content has no content emitted.
1597    #[test]
1598    fn test_distinfo_partial_metadata() -> Result<()> {
1599        let td = tempfile::Builder::new()
1600            .prefix("python-packaging-test")
1601            .tempdir()?;
1602        let tp = td.path();
1603
1604        let dist_path = tp.join("black-1.2.3.dist-info");
1605        create_dir_all(&dist_path)?;
1606        let metadata = dist_path.join("METADATA");
1607        write(metadata, "Name: black\n")?;
1608        let resource = dist_path.join("file.txt");
1609        write(resource, "content")?;
1610
1611        let resources =
1612            PythonResourceIterator::new(tp, DEFAULT_CACHE_TAG, &DEFAULT_SUFFIXES, false, true)?
1613                .collect::<Result<Vec<_>>>()?;
1614        assert!(resources.is_empty());
1615
1616        Ok(())
1617    }
1618
1619    /// .dist-info with partial METADATA content has no content emitted.
1620    #[test]
1621    fn test_distinfo_valid_metadata() -> Result<()> {
1622        let td = tempfile::Builder::new()
1623            .prefix("python-packaging-test")
1624            .tempdir()?;
1625        let tp = td.path();
1626
1627        let dist_path = tp.join("black-1.2.3.dist-info");
1628        create_dir_all(&dist_path)?;
1629        let metadata_path = dist_path.join("METADATA");
1630        write(&metadata_path, "Name: black\nVersion: 1.2.3\n")?;
1631        let resource_path = dist_path.join("file.txt");
1632        write(&resource_path, "content")?;
1633
1634        let subdir = dist_path.join("subdir");
1635        create_dir_all(&subdir)?;
1636        let subdir_resource_path = subdir.join("sub.txt");
1637        write(&subdir_resource_path, "content")?;
1638
1639        let resources =
1640            PythonResourceIterator::new(tp, DEFAULT_CACHE_TAG, &DEFAULT_SUFFIXES, false, true)?
1641                .collect::<Result<Vec<_>>>()?;
1642        assert_eq!(resources.len(), 3);
1643
1644        assert_eq!(
1645            resources[0],
1646            PythonPackageDistributionResource {
1647                location: PythonPackageDistributionResourceFlavor::DistInfo,
1648                package: "black".to_string(),
1649                version: "1.2.3".to_string(),
1650                name: "METADATA".to_string(),
1651                data: FileData::Path(metadata_path),
1652            }
1653            .into()
1654        );
1655        assert_eq!(
1656            resources[1],
1657            PythonPackageDistributionResource {
1658                location: PythonPackageDistributionResourceFlavor::DistInfo,
1659                package: "black".to_string(),
1660                version: "1.2.3".to_string(),
1661                name: "file.txt".to_string(),
1662                data: FileData::Path(resource_path),
1663            }
1664            .into()
1665        );
1666        assert_eq!(
1667            resources[2],
1668            PythonPackageDistributionResource {
1669                location: PythonPackageDistributionResourceFlavor::DistInfo,
1670                package: "black".to_string(),
1671                version: "1.2.3".to_string(),
1672                name: "subdir/sub.txt".to_string(),
1673                data: FileData::Path(subdir_resource_path),
1674            }
1675            .into()
1676        );
1677
1678        Ok(())
1679    }
1680
1681    #[test]
1682    fn distinfo_package_name_normalization() -> Result<()> {
1683        // Package names on disk are normalized to lowercase with hyphens replaced
1684        // as underscores. Make sure we can still find the original names and
1685        // attribute with the proper name.
1686        let td = tempfile::Builder::new()
1687            .prefix("python-packaging-test")
1688            .tempdir()?;
1689        let tp = td.path();
1690
1691        let dist_path = tp.join("foo_bar-1.0.dist-info");
1692        create_dir_all(&dist_path)?;
1693        let metadata_path = dist_path.join("METADATA");
1694        write(&metadata_path, "Name: Foo-BAR\nVersion: 1.0\n")?;
1695        let resource_path = dist_path.join("resource.txt");
1696        write(&resource_path, "content")?;
1697
1698        let resources =
1699            PythonResourceIterator::new(tp, DEFAULT_CACHE_TAG, &DEFAULT_SUFFIXES, false, true)?
1700                .collect::<Result<Vec<_>>>()?;
1701        assert_eq!(resources.len(), 2);
1702
1703        assert_eq!(
1704            resources[0],
1705            PythonPackageDistributionResource {
1706                location: PythonPackageDistributionResourceFlavor::DistInfo,
1707                package: "Foo-BAR".into(),
1708                version: "1.0".into(),
1709                name: "METADATA".into(),
1710                data: FileData::Path(metadata_path),
1711            }
1712            .into()
1713        );
1714        assert_eq!(
1715            resources[1],
1716            PythonPackageDistributionResource {
1717                location: PythonPackageDistributionResourceFlavor::DistInfo,
1718                package: "Foo-BAR".into(),
1719                version: "1.0".into(),
1720                name: "resource.txt".into(),
1721                data: FileData::Path(resource_path),
1722            }
1723            .into()
1724        );
1725
1726        Ok(())
1727    }
1728
1729    #[test]
1730    fn test_egginfo_valid_metadata() -> Result<()> {
1731        let td = tempfile::Builder::new()
1732            .prefix("python-packaging-test")
1733            .tempdir()?;
1734        let tp = td.path();
1735
1736        let egg_path = tp.join("black-1.2.3.egg-info");
1737        create_dir_all(&egg_path)?;
1738        let metadata_path = egg_path.join("PKG-INFO");
1739        write(&metadata_path, "Name: black\nVersion: 1.2.3\n")?;
1740        let resource_path = egg_path.join("file.txt");
1741        write(&resource_path, "content")?;
1742
1743        let subdir = egg_path.join("subdir");
1744        create_dir_all(&subdir)?;
1745        let subdir_resource_path = subdir.join("sub.txt");
1746        write(&subdir_resource_path, "content")?;
1747
1748        let resources =
1749            PythonResourceIterator::new(tp, DEFAULT_CACHE_TAG, &DEFAULT_SUFFIXES, false, true)?
1750                .collect::<Result<Vec<_>>>()?;
1751        assert_eq!(resources.len(), 3);
1752
1753        assert_eq!(
1754            resources[0],
1755            PythonPackageDistributionResource {
1756                location: PythonPackageDistributionResourceFlavor::EggInfo,
1757                package: "black".to_string(),
1758                version: "1.2.3".to_string(),
1759                name: "PKG-INFO".to_string(),
1760                data: FileData::Path(metadata_path),
1761            }
1762            .into()
1763        );
1764        assert_eq!(
1765            resources[1],
1766            PythonPackageDistributionResource {
1767                location: PythonPackageDistributionResourceFlavor::EggInfo,
1768                package: "black".to_string(),
1769                version: "1.2.3".to_string(),
1770                name: "file.txt".to_string(),
1771                data: FileData::Path(resource_path),
1772            }
1773            .into()
1774        );
1775        assert_eq!(
1776            resources[2],
1777            PythonPackageDistributionResource {
1778                location: PythonPackageDistributionResourceFlavor::EggInfo,
1779                package: "black".to_string(),
1780                version: "1.2.3".to_string(),
1781                name: "subdir/sub.txt".to_string(),
1782                data: FileData::Path(subdir_resource_path),
1783            }
1784            .into()
1785        );
1786
1787        Ok(())
1788    }
1789
1790    #[test]
1791    fn egginfo_package_name_normalization() -> Result<()> {
1792        // Package names on disk are normalized to lowercase with hyphens replaced
1793        // as underscores. Make sure we can still find the original names and
1794        // attribute with the proper name.
1795        let td = tempfile::Builder::new()
1796            .prefix("python-packaging-test")
1797            .tempdir()?;
1798        let tp = td.path();
1799
1800        let dist_path = tp.join("foo_bar-1.0.egg-info");
1801        create_dir_all(&dist_path)?;
1802        let metadata_path = dist_path.join("PKG-INFO");
1803        write(&metadata_path, "Name: Foo-BAR\nVersion: 1.0\n")?;
1804        let resource_path = dist_path.join("resource.txt");
1805        write(&resource_path, "content")?;
1806
1807        let resources =
1808            PythonResourceIterator::new(tp, DEFAULT_CACHE_TAG, &DEFAULT_SUFFIXES, false, true)?
1809                .collect::<Result<Vec<_>>>()?;
1810        assert_eq!(resources.len(), 2);
1811
1812        assert_eq!(
1813            resources[0],
1814            PythonPackageDistributionResource {
1815                location: PythonPackageDistributionResourceFlavor::EggInfo,
1816                package: "Foo-BAR".into(),
1817                version: "1.0".into(),
1818                name: "PKG-INFO".into(),
1819                data: FileData::Path(metadata_path),
1820            }
1821            .into()
1822        );
1823        assert_eq!(
1824            resources[1],
1825            PythonPackageDistributionResource {
1826                location: PythonPackageDistributionResourceFlavor::EggInfo,
1827                package: "Foo-BAR".into(),
1828                version: "1.0".into(),
1829                name: "resource.txt".into(),
1830                data: FileData::Path(resource_path),
1831            }
1832            .into()
1833        );
1834
1835        Ok(())
1836    }
1837
1838    #[test]
1839    fn test_memory_resources() -> Result<()> {
1840        let inputs = vec![
1841            File::new("foo/__init__.py", vec![0]),
1842            File::new("foo/bar.py", FileEntry::new_from_data(vec![1], true)),
1843        ];
1844
1845        let resources = PythonResourceIterator::from_data_locations(
1846            &inputs,
1847            DEFAULT_CACHE_TAG,
1848            &DEFAULT_SUFFIXES,
1849            true,
1850            true,
1851        )?
1852        .collect::<Result<Vec<_>>>()?;
1853
1854        assert_eq!(resources.len(), 4);
1855        assert_eq!(resources[0], File::new("foo/__init__.py", vec![0]).into());
1856        assert_eq!(
1857            resources[1],
1858            PythonModuleSource {
1859                name: "foo".to_string(),
1860                source: FileData::Memory(vec![0]),
1861                is_package: true,
1862                cache_tag: DEFAULT_CACHE_TAG.to_string(),
1863                is_stdlib: false,
1864                is_test: false,
1865            }
1866            .into()
1867        );
1868        assert_eq!(
1869            resources[2],
1870            File::new("foo/bar.py", FileEntry::new_from_data(vec![1], true)).into()
1871        );
1872        assert_eq!(
1873            resources[3],
1874            PythonModuleSource {
1875                name: "foo.bar".to_string(),
1876                source: FileData::Memory(vec![1]),
1877                is_package: false,
1878                cache_tag: DEFAULT_CACHE_TAG.to_string(),
1879                is_stdlib: false,
1880                is_test: false,
1881            }
1882            .into()
1883        );
1884
1885        Ok(())
1886    }
1887}