poetry_udeps/
lib.rs

1use std::{
2    collections::BTreeMap,
3    fs::{self, File},
4    io::{self, BufRead, BufReader, Read},
5    path::{Path, PathBuf},
6    thread,
7};
8
9use anyhow::{Result, bail};
10use clap::Parser;
11use clap_verbosity_flag::Verbosity;
12use ignore::{WalkBuilder, types::TypesBuilder};
13use toml::Value;
14use tracing::{debug, error, info};
15use xshell::{Shell, cmd};
16
17mod name_map;
18mod parser;
19use crate::name_map::KNOWN_NAMES;
20use crate::parser::{ImportStatement, parse_python_file};
21
22const IGNORE_FILE: &str = ".poetryudepsignore";
23
24#[derive(Parser)]
25#[command(author, version, about, long_about = None)]
26pub struct Cli {
27    #[clap(flatten)]
28    pub verbose: Verbosity,
29    #[arg(short = 'e', long)]
30    /// Look for dependency usage in the poetry virtualenv.
31    ///
32    /// Assumes you have already installed all dependencies using poetry. It
33    /// will check the directory specified by `poetry env info -p`.
34    pub virtualenv: bool,
35    #[arg(short, long)]
36    /// Look for unused dependencies in dev-dependencies.
37    ///
38    /// Many projects include dev deps like CLI tools that are intentionally
39    /// not directly used in the codebase.
40    pub dev: bool,
41    #[arg(long = "no-ignore")]
42    /// Do not ignore the packages in .poetryudepsignore
43    pub no_ignore: bool,
44}
45
46fn get_venv_path() -> Result<String> {
47    let sh = Shell::new()?;
48
49    Ok(cmd!(sh, "poetry env info -p").quiet().read()?)
50}
51
52enum DepType {
53    Main,
54    Dev,
55}
56
57/// Returns two maps (one for core deps, one for dev-deps).
58///
59/// The maps are filled with either the original package name -> None, or with
60/// the alias -> [package names]. This helps us quickly determine which original
61/// dependency to eliminate if either the original package name or alias is
62/// found.
63///
64/// We do not simply track the aliases alone, as reporting an alias as obsolete
65/// is not as straightforward to the user which line to eliminate from their
66/// pyproject.toml.
67fn get_dependencies(file: &Path, deps: &DepType) -> Result<Option<BTreeMap<String, Vec<String>>>> {
68    let toml = fs::read_to_string(file)?;
69
70    // TODO: map package name to actual module name.
71    // Ref: https://stackoverflow.com/a/54853084
72    let value = toml.parse::<Value>()?;
73    let dep_table: Vec<String> = match deps {
74        DepType::Main => {
75            match value
76                .get("tool")
77                .and_then(|tool| tool.get("poetry"))
78                .and_then(|poetry| poetry.get("dependencies"))
79                .and_then(|deps| deps.as_table())
80            {
81                Some(deps) => deps.keys().map(std::borrow::ToOwned::to_owned).collect(),
82                // Check poetry >=2.0
83                None => {
84                    if let Some(deps) = value
85                        .get("project")
86                        .and_then(|dev| dev.get("dependencies"))
87                        .and_then(|dependencies| dependencies.as_array())
88                        .map(|dep_array: &Vec<Value>| {
89                            dep_array
90                                .iter()
91                                .filter_map(|val| {
92                                    val.as_str().and_then(|s| {
93                                        pep_508::parse(s).ok().map(|req| req.name.to_string())
94                                    })
95                                })
96                                .collect()
97                        })
98                    {
99                        deps
100                    } else {
101                        bail!("failed to parse dependencies from pyproject.toml")
102                    }
103                }
104            }
105        }
106        DepType::Dev => {
107            // Check poetry >=1.0,<1.2's dev-dependencies
108            match value
109                .get("tool")
110                .and_then(|tool| tool.get("poetry"))
111                .and_then(|poetry| poetry.get("dev-dependencies"))
112                .and_then(|dev| dev.as_table())
113            {
114                Some(dev) => dev.keys().map(std::borrow::ToOwned::to_owned).collect(),
115                // Check poetry >=1.2.0's dependency groups
116                None => {
117                    if let Some(deps) = value
118                        .get("tool")
119                        .and_then(|tool| tool.get("poetry"))
120                        .and_then(|poetry| poetry.get("group"))
121                        .and_then(|group| group.get("dev"))
122                        .and_then(|dev| dev.get("dependencies"))
123                        .and_then(|dependencies| dependencies.as_table())
124                    {
125                        deps.keys().map(std::borrow::ToOwned::to_owned).collect()
126                    } else {
127                        info!("failed to parse dev dependencies from pyproject.toml");
128                        return Ok(None);
129                    }
130                }
131            }
132        }
133    };
134    let mut dependencies: BTreeMap<String, Vec<String>> = BTreeMap::new();
135
136    // Generate a list of possible aliases for the package
137    dep_table.iter().filter(|s| *s != "python").for_each(|s| {
138        let package = String::from(s);
139        dependencies.insert(package.clone(), vec![]);
140        let mut alias = KNOWN_NAMES.get(&package).map(|a| String::from(*a));
141
142        // Or basic replacement
143        if alias.is_none() && package.contains('-') {
144            alias = Some(package.replace('-', "_").to_lowercase());
145        }
146        if let Some(a) = alias {
147            dependencies.entry(a).or_default().push(package);
148        } else {
149            dependencies.insert(package, vec![]);
150        }
151    });
152    Ok(Some(dependencies))
153}
154
155// Read lines from ignorefile. Ignore empty lines and comments.
156fn read_lines(file: &File) -> io::Result<Vec<String>> {
157    let lines: Vec<_> = BufReader::new(file).lines().collect::<Result<_, _>>()?;
158    Ok(lines
159        .into_iter()
160        .filter(|line| !(line.is_empty() || line.trim_start().starts_with('#')))
161        .collect())
162}
163
164// Filter out dependencies from udeps if they are in the ignorefile.
165fn apply_ignorefile(udeps: Vec<String>) -> io::Result<Vec<String>> {
166    let ignore_packages = match File::open(IGNORE_FILE) {
167        Ok(poetryudepsignore) => read_lines(&poetryudepsignore)?,
168        Err(_) => return Ok(udeps),
169    };
170
171    debug!(ignored = ?ignore_packages);
172    Ok(udeps
173        .into_iter()
174        .filter(|dep| !ignore_packages.contains(dep))
175        .collect())
176}
177
178#[allow(clippy::too_many_lines)]
179#[allow(clippy::missing_errors_doc)]
180#[allow(clippy::missing_panics_doc)]
181pub fn run(cli: &Cli) -> Result<Option<Vec<String>>> {
182    let pyproject_path = Path::new("pyproject.toml");
183
184    match pyproject_path.try_exists() {
185        Ok(true) => (),
186        Ok(false) => {
187            error!("pyproject.toml not found. Are you in the root directory of your project?",);
188            // Just fall through, the subsequent read will raise the error for us
189        }
190        Err(e) => {
191            error!("pyproject.toml not found. Are you in the root directory of your project?",);
192            return Err(e.into());
193        }
194    }
195
196    let mut main_deps = get_dependencies(pyproject_path, &DepType::Main)?.unwrap();
197    info!(?main_deps);
198    let mut dev_deps = get_dependencies(pyproject_path, &DepType::Dev)?.unwrap_or_default();
199    info!(?dev_deps);
200
201    let (tx, rx) = flume::bounded::<(ImportStatement, PathBuf)>(100);
202
203    // Setup main thread for stdout
204    let check_dev_deps = cli.dev;
205    let stdout_thread = thread::spawn(move || -> io::Result<Option<Vec<String>>> {
206        for (import, path) in rx {
207            debug!(
208                package = import.package,
209                module = import.module,
210                path = path.to_str(),
211                "Checking import",
212            );
213            // Packages may have several aliases
214            let mut aliases = vec![];
215            if !import.module.is_empty() {
216                // Google-style package naming
217                aliases.push(format!(
218                    "{}-{}",
219                    import.package.replace('.', "-"),
220                    import.module
221                ));
222            }
223            // DBT Adapters
224            if import.package.starts_with("dbt.adapters") {
225                aliases.push({
226                    let parts: Vec<&str> = import.package.split('.').collect();
227                    [parts[0], parts[2]].join("-")
228                });
229            }
230            // SQLAlchemy Extentions
231            if import.package.contains('.') {
232                aliases.push(import.package.split('.').collect::<Vec<&str>>().join("-"));
233            }
234            if let Some(p) = import.package.split_once('.') {
235                aliases.push(p.0.to_string());
236            }
237
238            // Include parent packages after 1 level deep.
239            // This is to catch things like
240            // `from google.auth.transport import requests` --> google-auth
241            let v: Vec<&str> = import.package.split('.').collect();
242            if v.len() >= 2 {
243                aliases.push(format!("{}-{}", v[0], v[1]));
244            }
245
246            // Just the package
247            aliases.push(import.package);
248
249            for alias in aliases {
250                if main_deps.contains_key(&alias) {
251                    if let Some(v) = main_deps.remove(&alias) {
252                        if v.is_empty() {
253                            info!(found = alias, path = path.to_str());
254                        } else {
255                            for orig in v {
256                                info!(found = orig, path = path.to_str());
257                                main_deps.remove(&orig);
258                            }
259                        }
260                    }
261                }
262                if dev_deps.contains_key(&alias) {
263                    if let Some(v) = dev_deps.remove(&alias) {
264                        if v.is_empty() {
265                            info!("Found {} in {}", alias, path.display());
266                        } else {
267                            for orig in v {
268                                info!("Found {} in {}", orig, path.display());
269                                main_deps.remove(&orig);
270                            }
271                        }
272                    }
273                }
274            }
275        }
276
277        let mut udeps = Vec::new();
278        for (key, value) in &main_deps {
279            // Only print the non-alias names
280            if value.is_empty() {
281                udeps.push(key.to_owned());
282            }
283        }
284        if check_dev_deps {
285            for (key, value) in &dev_deps {
286                // Only print the non-alias names
287                if value.is_empty() {
288                    udeps.push(key.to_owned());
289                }
290            }
291        }
292
293        if udeps.is_empty() {
294            Ok(None)
295        } else {
296            // Filter out those from ignorefile
297            let filtered = apply_ignorefile(udeps)?;
298            if filtered.is_empty() {
299                Ok(None)
300            } else {
301                Ok(Some(filtered))
302            }
303        }
304    });
305
306    if cli.virtualenv {
307        // Iterate over Python files in parallel in the venv
308        let venv_path = get_venv_path()?;
309        info!("Reading files in venv: {}", venv_path);
310        let types = TypesBuilder::new().add_defaults().select("py").build()?;
311        let walker = WalkBuilder::new(venv_path)
312            .standard_filters(false)
313            .types(types)
314            .build_parallel();
315        walker.run(|| {
316            let tx = tx.clone();
317            Box::new(move |result| {
318                use ignore::WalkState::Continue;
319
320                if let Ok(dir) = result {
321                    if dir.file_type().unwrap().is_file() {
322                        let mut file = File::open(dir.path()).unwrap();
323                        let mut buf = Vec::new();
324                        file.read_to_end(&mut buf).unwrap();
325                        let contents = String::from_utf8_lossy(&buf);
326                        let v = parse_python_file(&contents).unwrap();
327
328                        let path = dir.into_path();
329                        for import in v {
330                            tx.send((import, path.clone())).unwrap();
331                        }
332                    }
333                }
334
335                Continue
336            })
337        });
338    }
339
340    // Iterate over Python files in parallel in the current directory
341    let types = TypesBuilder::new().add_defaults().select("py").build()?;
342    let walker = WalkBuilder::new("./")
343        .standard_filters(true)
344        .types(types)
345        .build_parallel();
346    walker.run(|| {
347        let tx = tx.clone();
348        Box::new(move |result| {
349            use ignore::WalkState::Continue;
350
351            if let Ok(dir) = result {
352                if dir.file_type().unwrap().is_file() {
353                    let contents = fs::read_to_string(dir.path()).unwrap();
354                    let v = parse_python_file(&contents).unwrap();
355
356                    let path = dir.into_path();
357                    for import in v {
358                        tx.send((import, path.clone())).unwrap();
359                    }
360                }
361            }
362
363            Continue
364        })
365    });
366
367    drop(tx);
368    match stdout_thread.join() {
369        Ok(j) => {
370            match j {
371                Ok(deps) => Ok(deps),
372                Err(err) => {
373                    // A broken pipe means graceful termination, so fall through.
374                    // Otherwise, something bad happened while writing to stdout, so bubble
375                    // it up.
376                    if err.kind() == io::ErrorKind::BrokenPipe {
377                        Ok(None)
378                    } else {
379                        Err(err.into())
380                    }
381                }
382            }
383        }
384        Err(_) => todo!(),
385    }
386}