python_check_updates/parsers/
pyproject.rs

1use super::{Dependency, DependencyParser};
2use crate::version::VersionSpec;
3use anyhow::{Context, Result};
4use std::fs;
5use std::path::PathBuf;
6use toml::Value;
7
8/// Parser for pyproject.toml files (PEP 621, Poetry, PDM)
9pub struct PyProjectParser;
10
11impl PyProjectParser {
12    pub fn new() -> Self {
13        Self
14    }
15
16    /// Parse PEP 621 format dependencies
17    fn parse_pep621_dependencies(
18        &self,
19        toml_value: &Value,
20        path: &PathBuf,
21        content: &str,
22    ) -> Result<Vec<Dependency>> {
23        let mut dependencies = Vec::new();
24
25        // Parse [project.dependencies] - array of strings
26        if let Some(deps) = toml_value
27            .get("project")
28            .and_then(|p| p.get("dependencies"))
29            .and_then(|d| d.as_array())
30        {
31            for dep_value in deps {
32                if let Some(dep_str) = dep_value.as_str() {
33                    if let Some(dep) = self.parse_dependency_string(dep_str, path, content) {
34                        dependencies.push(dep);
35                    }
36                }
37            }
38        }
39
40        // Parse [project.optional-dependencies] - tables of arrays
41        if let Some(optional_deps) = toml_value
42            .get("project")
43            .and_then(|p| p.get("optional-dependencies"))
44            .and_then(|d| d.as_table())
45        {
46            for (_group_name, deps_value) in optional_deps {
47                if let Some(deps) = deps_value.as_array() {
48                    for dep_value in deps {
49                        if let Some(dep_str) = dep_value.as_str() {
50                            if let Some(dep) = self.parse_dependency_string(dep_str, path, content)
51                            {
52                                dependencies.push(dep);
53                            }
54                        }
55                    }
56                }
57            }
58        }
59
60        Ok(dependencies)
61    }
62
63    /// Parse Poetry format dependencies
64    fn parse_poetry_dependencies(
65        &self,
66        toml_value: &Value,
67        path: &PathBuf,
68        content: &str,
69    ) -> Result<Vec<Dependency>> {
70        let mut dependencies = Vec::new();
71
72        // Parse [tool.poetry.dependencies]
73        if let Some(deps) = toml_value
74            .get("tool")
75            .and_then(|t| t.get("poetry"))
76            .and_then(|p| p.get("dependencies"))
77            .and_then(|d| d.as_table())
78        {
79            for (pkg_name, version_value) in deps {
80                // Skip python itself
81                if pkg_name == "python" {
82                    continue;
83                }
84                if let Some(dep) = self.parse_poetry_dependency(pkg_name, version_value, path, content)
85                {
86                    dependencies.push(dep);
87                }
88            }
89        }
90
91        // Parse [tool.poetry.dev-dependencies] (legacy)
92        if let Some(deps) = toml_value
93            .get("tool")
94            .and_then(|t| t.get("poetry"))
95            .and_then(|p| p.get("dev-dependencies"))
96            .and_then(|d| d.as_table())
97        {
98            for (pkg_name, version_value) in deps {
99                if let Some(dep) = self.parse_poetry_dependency(pkg_name, version_value, path, content)
100                {
101                    dependencies.push(dep);
102                }
103            }
104        }
105
106        // Parse [tool.poetry.group.*.dependencies]
107        if let Some(groups) = toml_value
108            .get("tool")
109            .and_then(|t| t.get("poetry"))
110            .and_then(|p| p.get("group"))
111            .and_then(|g| g.as_table())
112        {
113            for (_group_name, group_value) in groups {
114                if let Some(deps) = group_value.get("dependencies").and_then(|d| d.as_table()) {
115                    for (pkg_name, version_value) in deps {
116                        if pkg_name == "python" {
117                            continue;
118                        }
119                        if let Some(dep) = self.parse_poetry_dependency(pkg_name, version_value, path, content)
120                        {
121                            dependencies.push(dep);
122                        }
123                    }
124                }
125            }
126        }
127
128        Ok(dependencies)
129    }
130
131    /// Parse PDM format dependencies
132    fn parse_pdm_dependencies(
133        &self,
134        toml_value: &Value,
135        path: &PathBuf,
136        content: &str,
137    ) -> Result<Vec<Dependency>> {
138        let mut dependencies = Vec::new();
139
140        // Parse [tool.pdm.dependencies] - similar to PEP 621 but in tool.pdm
141        if let Some(deps) = toml_value
142            .get("tool")
143            .and_then(|t| t.get("pdm"))
144            .and_then(|p| p.get("dependencies"))
145            .and_then(|d| d.as_array())
146        {
147            for dep_value in deps {
148                if let Some(dep_str) = dep_value.as_str() {
149                    if let Some(dep) = self.parse_dependency_string(dep_str, path, content) {
150                        dependencies.push(dep);
151                    }
152                }
153            }
154        }
155
156        // Parse [tool.pdm.dev-dependencies] - table of arrays
157        if let Some(dev_deps) = toml_value
158            .get("tool")
159            .and_then(|t| t.get("pdm"))
160            .and_then(|p| p.get("dev-dependencies"))
161            .and_then(|d| d.as_table())
162        {
163            for (_group_name, deps_value) in dev_deps {
164                if let Some(deps) = deps_value.as_array() {
165                    for dep_value in deps {
166                        if let Some(dep_str) = dep_value.as_str() {
167                            if let Some(dep) = self.parse_dependency_string(dep_str, path, content)
168                            {
169                                dependencies.push(dep);
170                            }
171                        }
172                    }
173                }
174            }
175        }
176
177        Ok(dependencies)
178    }
179
180    /// Parse PEP 735 dependency-groups format
181    fn parse_dependency_groups(
182        &self,
183        toml_value: &Value,
184        path: &PathBuf,
185        content: &str,
186    ) -> Result<Vec<Dependency>> {
187        let mut dependencies = Vec::new();
188
189        // Parse [dependency-groups] - tables of arrays
190        if let Some(groups) = toml_value
191            .get("dependency-groups")
192            .and_then(|d| d.as_table())
193        {
194            for (_group_name, deps_value) in groups {
195                if let Some(deps) = deps_value.as_array() {
196                    for dep_value in deps {
197                        if let Some(dep_str) = dep_value.as_str() {
198                            if let Some(dep) = self.parse_dependency_string(dep_str, path, content)
199                            {
200                                dependencies.push(dep);
201                            }
202                        }
203                    }
204                }
205            }
206        }
207
208        Ok(dependencies)
209    }
210
211    /// Parse a Poetry dependency entry which can be a string or inline table
212    fn parse_poetry_dependency(
213        &self,
214        name: &str,
215        value: &Value,
216        path: &PathBuf,
217        content: &str,
218    ) -> Option<Dependency> {
219        let version_str = match value {
220            // Simple string version: package = "^1.0"
221            Value::String(s) => s.clone(),
222            // Inline table: package = {version = "^1.0", optional = true}
223            Value::Table(table) => {
224                // Get version from the table
225                table.get("version")?.as_str()?.to_string()
226            }
227            _ => return None,
228        };
229
230        // Find the line number and original line text
231        let (line_number, original_line) = self.find_line_in_content(content, name, &version_str);
232
233        // Parse the version spec
234        let version_spec = VersionSpec::parse(&version_str).ok()?;
235
236        Some(Dependency {
237            name: name.to_lowercase().replace('_', "-"),
238            version_spec,
239            source_file: path.clone(),
240            line_number,
241            original_line,
242        })
243    }
244
245    /// Parse a dependency string like "requests>=2.28.0" or "numpy==1.24.0"
246    fn parse_dependency_string(
247        &self,
248        dep_str: &str,
249        path: &PathBuf,
250        content: &str,
251    ) -> Option<Dependency> {
252        // Split by comparison operators
253        let dep_str = dep_str.trim();
254
255        // Handle markers (like ; python_version >= "3.8") by splitting on semicolon
256        let dep_str = dep_str.split(';').next()?.trim();
257
258        // Handle extras (like requests[security]) - extract package name
259        let dep_str_no_extras = if let Some(idx) = dep_str.find('[') {
260            &dep_str[..idx]
261        } else {
262            dep_str
263        };
264
265        // Find the package name and version spec
266        let operators = [">=", "<=", "==", "!=", "~=", ">", "<", "^", "~"];
267
268        for op in &operators {
269            if let Some(idx) = dep_str_no_extras.find(op) {
270                let pkg_name = dep_str_no_extras[..idx].trim();
271                let version_part = dep_str_no_extras[idx..].trim();
272
273                // Parse version spec
274                let version_spec = VersionSpec::parse(version_part).ok()?;
275
276                // Find line number and original line
277                let (line_number, original_line) = self.find_line_in_content(content, pkg_name, version_part);
278
279                return Some(Dependency {
280                    name: pkg_name.to_lowercase().replace('_', "-"),
281                    version_spec,
282                    source_file: path.clone(),
283                    line_number,
284                    original_line,
285                });
286            }
287        }
288
289        // No version specifier found - might be just package name
290        if !dep_str_no_extras.is_empty() {
291            let pkg_name = dep_str_no_extras.trim();
292            let (line_number, original_line) = self.find_line_in_content(content, pkg_name, "");
293
294            return Some(Dependency {
295                name: pkg_name.to_lowercase().replace('_', "-"),
296                version_spec: VersionSpec::Any,
297                source_file: path.clone(),
298                line_number,
299                original_line,
300            });
301        }
302
303        None
304    }
305
306    /// Find the line number and original line text for a dependency in the file content
307    fn find_line_in_content(&self, content: &str, pkg_name: &str, version_str: &str) -> (usize, String) {
308        // Search for the line containing the package name
309        for (i, line) in content.lines().enumerate() {
310            let line_lower = line.to_lowercase();
311            let pkg_lower = pkg_name.to_lowercase();
312
313            // Check if line contains the package name
314            if line_lower.contains(&pkg_lower) {
315                // For TOML tables, look for the key
316                if line.contains('=') || line.contains(pkg_name) {
317                    // Make sure it's not a comment
318                    if let Some(comment_idx) = line.find('#') {
319                        if line[..comment_idx].to_lowercase().contains(&pkg_lower) {
320                            return (i + 1, line.trim().to_string());
321                        }
322                    } else if !version_str.is_empty() && line.contains(version_str) {
323                        return (i + 1, line.trim().to_string());
324                    } else if line_lower.contains(&pkg_lower) {
325                        return (i + 1, line.trim().to_string());
326                    }
327                }
328            }
329        }
330
331        // Default if not found
332        (1, format!("{} = \"{}\"", pkg_name, version_str))
333    }
334}
335
336impl DependencyParser for PyProjectParser {
337    fn parse(&self, path: &PathBuf) -> Result<Vec<Dependency>> {
338        // Read file content
339        let content = fs::read_to_string(path)
340            .with_context(|| format!("Failed to read file: {}", path.display()))?;
341
342        // Parse TOML
343        let toml_value: Value = toml::from_str(&content)
344            .with_context(|| format!("Failed to parse TOML: {}", path.display()))?;
345
346        let mut all_dependencies = Vec::new();
347
348        // Try parsing all formats - a file might have multiple formats
349
350        // PEP 621 format
351        if let Ok(deps) = self.parse_pep621_dependencies(&toml_value, path, &content) {
352            all_dependencies.extend(deps);
353        }
354
355        // Poetry format
356        if let Ok(deps) = self.parse_poetry_dependencies(&toml_value, path, &content) {
357            all_dependencies.extend(deps);
358        }
359
360        // PDM format
361        if let Ok(deps) = self.parse_pdm_dependencies(&toml_value, path, &content) {
362            all_dependencies.extend(deps);
363        }
364
365        // PEP 735 dependency-groups format
366        if let Ok(deps) = self.parse_dependency_groups(&toml_value, path, &content) {
367            all_dependencies.extend(deps);
368        }
369
370        // Deduplicate dependencies by name (keep first occurrence)
371        let mut seen = std::collections::HashSet::new();
372        all_dependencies.retain(|dep| seen.insert(dep.name.clone()));
373
374        Ok(all_dependencies)
375    }
376
377    fn can_parse(&self, path: &PathBuf) -> bool {
378        path.file_name()
379            .and_then(|n| n.to_str())
380            .map(|n| n == "pyproject.toml")
381            .unwrap_or(false)
382    }
383}
384
385#[cfg(test)]
386mod tests {
387    use super::*;
388    use std::io::Write;
389    use tempfile::NamedTempFile;
390
391    #[test]
392    fn test_can_parse() {
393        let parser = PyProjectParser::new();
394        assert!(parser.can_parse(&PathBuf::from("pyproject.toml")));
395        assert!(parser.can_parse(&PathBuf::from("/path/to/pyproject.toml")));
396        assert!(!parser.can_parse(&PathBuf::from("requirements.txt")));
397    }
398
399    #[test]
400    fn test_parse_pep621_dependencies() {
401        let content = r#"
402[project]
403name = "myproject"
404dependencies = [
405    "requests>=2.28.0",
406    "numpy==1.24.0",
407    "flask~=2.0.0",
408]
409
410[project.optional-dependencies]
411dev = [
412    "pytest>=7.0.0",
413    "black>=22.0.0",
414]
415"#;
416
417        let mut file = NamedTempFile::new().unwrap();
418        file.write_all(content.as_bytes()).unwrap();
419        let path = PathBuf::from(file.path());
420
421        let parser = PyProjectParser::new();
422        let deps = parser.parse(&path).unwrap();
423
424        assert_eq!(deps.len(), 5);
425        assert!(deps.iter().any(|d| d.name == "requests"));
426        assert!(deps.iter().any(|d| d.name == "numpy"));
427        assert!(deps.iter().any(|d| d.name == "flask"));
428        assert!(deps.iter().any(|d| d.name == "pytest"));
429        assert!(deps.iter().any(|d| d.name == "black"));
430    }
431
432    #[test]
433    fn test_parse_poetry_dependencies() {
434        let content = r#"
435[tool.poetry]
436name = "myproject"
437
438[tool.poetry.dependencies]
439python = "^3.8"
440requests = "^2.28.0"
441numpy = "1.24.0"
442
443[tool.poetry.group.dev.dependencies]
444pytest = "^7.0.0"
445black = {version = "^22.0.0", optional = true}
446"#;
447
448        let mut file = NamedTempFile::new().unwrap();
449        file.write_all(content.as_bytes()).unwrap();
450        let path = PathBuf::from(file.path());
451
452        let parser = PyProjectParser::new();
453        let deps = parser.parse(&path).unwrap();
454
455        // Should not include python itself
456        assert!(!deps.iter().any(|d| d.name == "python"));
457        assert!(deps.iter().any(|d| d.name == "requests"));
458        assert!(deps.iter().any(|d| d.name == "numpy"));
459        assert!(deps.iter().any(|d| d.name == "pytest"));
460        assert!(deps.iter().any(|d| d.name == "black"));
461
462        // Check version specs are parsed correctly
463        let requests_dep = deps.iter().find(|d| d.name == "requests").unwrap();
464        assert!(matches!(requests_dep.version_spec, VersionSpec::Caret(_)));
465    }
466
467    #[test]
468    fn test_parse_pdm_dependencies() {
469        // PDM uses PEP 621 format for main dependencies
470        // and [tool.pdm.dev-dependencies] for dev dependencies
471        let content = r#"
472[project]
473name = "myproject"
474dependencies = [
475    "requests>=2.28.0",
476    "numpy==1.24.0",
477]
478
479[tool.pdm.dev-dependencies]
480test = [
481    "pytest>=7.0.0",
482]
483"#;
484
485        let mut file = NamedTempFile::new().unwrap();
486        file.write_all(content.as_bytes()).unwrap();
487        let path = PathBuf::from(file.path());
488
489        let parser = PyProjectParser::new();
490        let deps = parser.parse(&path).unwrap();
491
492        assert!(deps.iter().any(|d| d.name == "requests"));
493        assert!(deps.iter().any(|d| d.name == "numpy"));
494        assert!(deps.iter().any(|d| d.name == "pytest"));
495    }
496
497    #[test]
498    fn test_parse_dependency_with_extras() {
499        let content = r#"
500[project]
501dependencies = [
502    "requests[security]>=2.28.0",
503]
504"#;
505
506        let mut file = NamedTempFile::new().unwrap();
507        file.write_all(content.as_bytes()).unwrap();
508        let path = PathBuf::from(file.path());
509
510        let parser = PyProjectParser::new();
511        let deps = parser.parse(&path).unwrap();
512
513        assert_eq!(deps.len(), 1);
514        assert_eq!(deps[0].name, "requests");
515    }
516
517    #[test]
518    fn test_parse_dependency_with_markers() {
519        let content = r#"
520[project]
521dependencies = [
522    "requests>=2.28.0; python_version >= '3.8'",
523]
524"#;
525
526        let mut file = NamedTempFile::new().unwrap();
527        file.write_all(content.as_bytes()).unwrap();
528        let path = PathBuf::from(file.path());
529
530        let parser = PyProjectParser::new();
531        let deps = parser.parse(&path).unwrap();
532
533        assert_eq!(deps.len(), 1);
534        assert_eq!(deps[0].name, "requests");
535    }
536
537    #[test]
538    fn test_deduplication() {
539        // If same package appears in multiple sections, keep first one
540        let content = r#"
541[project]
542dependencies = [
543    "requests>=2.28.0",
544]
545
546[project.optional-dependencies]
547dev = [
548    "requests>=2.30.0",
549]
550"#;
551
552        let mut file = NamedTempFile::new().unwrap();
553        file.write_all(content.as_bytes()).unwrap();
554        let path = PathBuf::from(file.path());
555
556        let parser = PyProjectParser::new();
557        let deps = parser.parse(&path).unwrap();
558
559        // Should only have one requests entry (the first one)
560        assert_eq!(deps.iter().filter(|d| d.name == "requests").count(), 1);
561    }
562}