Skip to main content

uv_distribution_types/
installed_modules.rs

1//! Discovers importable modules provided by an installed wheel.
2//!
3//! Installed wheels record installed paths in `<name>-<version>.dist-info/RECORD`. Python source
4//! files, legacy sourceless bytecode, and recognized native extension modules located under the
5//! import root contribute a [`ModuleName`] and its parent package prefixes.
6//!
7//! This is intentionally file-based: it does not infer modules exposed through `.pth` files,
8//! legacy namespace declarations in `__init__.py`, or `.pyi`-only stub distributions.
9
10use std::collections::BTreeSet;
11use std::path::{Component, Path};
12
13use fs_err::File;
14use uv_fs::normalize_path;
15use uv_install_wheel::read_record;
16use uv_pypi_types::ModuleName;
17
18use crate::installed::{InstalledDist, InstalledDistError};
19
20impl InstalledDist {
21    /// Read the modules provided by this installed distribution.
22    pub fn read_modules(
23        &self,
24        extension_suffixes: &[Box<str>],
25    ) -> Result<BTreeSet<ModuleName>, InstalledDistError> {
26        let dist_info = self.install_path();
27        if !has_extension(dist_info, "dist-info") {
28            return Ok(BTreeSet::new());
29        }
30
31        let record_path = dist_info.join("RECORD");
32        let record = read_record(File::open(&record_path)?)?;
33
34        let mut modules = BTreeSet::new();
35        for entry in record {
36            add_record_module(&entry.path, extension_suffixes, &mut modules);
37        }
38
39        Ok(modules)
40    }
41}
42
43fn add_record_module(
44    path: &str,
45    extension_suffixes: &[Box<str>],
46    modules: &mut BTreeSet<ModuleName>,
47) {
48    let Some(components) = record_path_components(path) else {
49        return;
50    };
51    let Some((file_name, parents)) = components.split_last() else {
52        return;
53    };
54    let file_name = file_name.as_ref();
55
56    // Metadata and other entries under `.dist-info` directories are not modules.
57    if components
58        .iter()
59        .any(|component| has_extension(component.as_ref(), "dist-info"))
60    {
61        return;
62    }
63    // Files in a `.data` directory that were not relocated into the import root are not modules.
64    // Relocated files are recorded at their installed paths instead.
65    if components
66        .first()
67        .is_some_and(|component| has_extension(component.as_ref(), "data"))
68    {
69        return;
70    }
71
72    let mut module_components = parents
73        .iter()
74        .map(std::convert::AsRef::as_ref)
75        .collect::<Vec<_>>();
76    // We intentionally skip `.pyi` files here because we're looking for runtime module ownership.
77    // Type stubs will require separate ownership modeling.
78    if file_name == "__init__.py" {
79        // The parent path is the package.
80    } else if let Some(stem) = file_name.strip_suffix(".py") {
81        module_components.push(stem);
82    } else if let Some(stem) = bytecode_module_stem(file_name, parents) {
83        if stem != "__init__" {
84            module_components.push(stem);
85        }
86    } else if let Some(stem) = {
87        // Python reports the recognized suffixes in import lookup order through
88        // `importlib.machinery.EXTENSION_SUFFIXES`; preserve that order so a generic suffix such
89        // as `.so` does not consume a more-specific suffix such as `.abi3.so`.
90        extension_suffixes.iter().find_map(|suffix| {
91            let stem = file_name.strip_suffix(suffix.as_ref())?;
92            (!stem.is_empty()).then_some(stem)
93        })
94    } {
95        if stem != "__init__" {
96            module_components.push(stem);
97        }
98    } else {
99        return;
100    }
101
102    add_module_components(&module_components, modules);
103}
104
105fn record_path_components(path: &str) -> Option<Vec<Box<str>>> {
106    let normalized = normalize_path(Path::new(path));
107    let path = normalized.as_ref();
108
109    // `RECORD` can include absolute paths and relative paths that leave the directory containing
110    // `.dist-info`, for example installed scripts. Those entries cannot describe modules here.
111    if path.is_absolute() {
112        return None;
113    }
114
115    let mut components = Vec::new();
116    for component in path.components() {
117        match component {
118            Component::Normal(component) => {
119                components.push(Box::from(component.to_str()?));
120            }
121            Component::CurDir => {}
122            Component::ParentDir | Component::Prefix(_) | Component::RootDir => return None,
123        }
124    }
125
126    Some(components)
127}
128
129/// Return the module stem for importable sourceless bytecode in a `RECORD` path.
130///
131/// CPython can import `package/module.pyc` directly when only bytecode is installed. In
132/// contrast, `package/__pycache__/module.cpython-312.pyc` is not an import source without
133/// `package/module.py`.
134fn bytecode_module_stem<'a>(file_name: &'a str, parents: &[Box<str>]) -> Option<&'a str> {
135    let stem = file_name.strip_suffix(".pyc")?;
136    if parents
137        .last()
138        .is_some_and(|parent| parent.as_ref() == "__pycache__")
139    {
140        // A `.pyc` file in `__pycache__` does not make the module importable
141        // without the corresponding source file. Sourceless imports use the
142        // legacy `module.pyc` location instead.
143        return None;
144    }
145
146    Some(stem)
147}
148
149fn has_extension(path: impl AsRef<Path>, extension: &str) -> bool {
150    path.as_ref()
151        .extension()
152        .is_some_and(|candidate| candidate == extension)
153}
154
155fn add_module_components(components: &[&str], modules: &mut BTreeSet<ModuleName>) {
156    let Ok(module) = ModuleName::from_components(components.iter().copied()) else {
157        return;
158    };
159
160    modules.extend(module.prefixes());
161}
162
163#[cfg(test)]
164mod tests {
165    use std::collections::BTreeSet;
166
167    use uv_pypi_types::ModuleName;
168
169    use super::add_record_module;
170
171    fn extension_suffixes() -> Vec<Box<str>> {
172        [
173            ".cpython-312-darwin.so",
174            ".cpython-314td-darwin.so",
175            ".abi3.so",
176            ".cp312-win_amd64.pyd",
177            ".so",
178        ]
179        .into_iter()
180        .map(Box::from)
181        .collect()
182    }
183
184    fn module_names(modules: BTreeSet<ModuleName>) -> String {
185        modules
186            .into_iter()
187            .map(|module| module.to_string())
188            .collect::<Vec<_>>()
189            .join("\n")
190    }
191
192    #[test]
193    fn record_module_normalizes_record_paths() {
194        let mut modules = BTreeSet::new();
195        add_record_module("./package/../café.py", &[], &mut modules);
196
197        assert_eq!(module_names(modules), "café");
198    }
199
200    #[test]
201    fn record_module_from_legacy_bytecode() {
202        let mut modules = BTreeSet::new();
203        add_record_module("package/module.pyc", &[], &mut modules);
204        add_record_module("legacy.pyc", &[], &mut modules);
205
206        assert_eq!(module_names(modules), "legacy\npackage\npackage.module");
207    }
208
209    #[test]
210    fn record_module_ignores_pycache_bytecode() {
211        let mut modules = BTreeSet::new();
212        add_record_module(
213            "package/__pycache__/module.cpython-312.opt-1.pyc",
214            &[],
215            &mut modules,
216        );
217        add_record_module(
218            "package/__pycache__/__init__.cpython-312.pyc",
219            &[],
220            &mut modules,
221        );
222
223        assert_eq!(module_names(modules), "");
224    }
225
226    #[test]
227    fn record_module_from_extension_module() {
228        let extension_suffixes = extension_suffixes();
229        let mut modules = BTreeSet::new();
230        add_record_module(
231            "package/extension.cpython-312-darwin.so",
232            &extension_suffixes,
233            &mut modules,
234        );
235        add_record_module(
236            "package/free_threaded.cpython-314td-darwin.so",
237            &extension_suffixes,
238            &mut modules,
239        );
240        add_record_module("package/limited.abi3.so", &extension_suffixes, &mut modules);
241        add_record_module(
242            "package/windows.cp312-win_amd64.pyd",
243            &extension_suffixes,
244            &mut modules,
245        );
246        add_record_module(
247            "package/__init__.cpython-312-darwin.so",
248            &extension_suffixes,
249            &mut modules,
250        );
251        add_record_module("plain.so", &extension_suffixes, &mut modules);
252
253        assert_eq!(
254            module_names(modules),
255            "package\npackage.extension\npackage.free_threaded\npackage.limited\npackage.windows\nplain"
256        );
257    }
258
259    #[test]
260    fn record_module_ignores_unrecognized_extension_suffixes() {
261        let extension_suffixes = extension_suffixes();
262        let mut modules = BTreeSet::new();
263        add_record_module(
264            "package/extension.not-an-extension-tag.so",
265            &extension_suffixes,
266            &mut modules,
267        );
268        add_record_module(
269            "package/bogus.pypynonsense.so",
270            &extension_suffixes,
271            &mut modules,
272        );
273
274        assert_eq!(module_names(modules), "");
275    }
276}