Skip to main content

pcu/parsers/
pyproject.rs

1use super::{Dependency, DependencyParser};
2use check_updates_core::VersionSpec;
3use anyhow::{Context, Result};
4use std::fs;
5use std::path::Path;
6use toml::Value;
7
8/// Parser for pyproject.toml files (PEP 621, Poetry, PDM)
9pub struct PyProjectParser;
10
11impl Default for PyProjectParser {
12    fn default() -> Self {
13        Self::new()
14    }
15}
16
17impl PyProjectParser {
18    pub fn new() -> Self {
19        Self
20    }
21
22    /// Parse PEP 621 format dependencies
23    fn parse_pep621_dependencies(
24        &self,
25        toml_value: &Value,
26        path: &Path,
27        content: &str,
28    ) -> Result<Vec<Dependency>> {
29        let mut dependencies = Vec::new();
30
31        // Parse [project.dependencies] - array of strings
32        if let Some(deps) = toml_value
33            .get("project")
34            .and_then(|p| p.get("dependencies"))
35            .and_then(|d| d.as_array())
36        {
37            for dep_value in deps {
38                if let Some(dep_str) = dep_value.as_str()
39                    && let Some(dep) = self.parse_dependency_string(dep_str, path, content) {
40                        dependencies.push(dep);
41                    }
42            }
43        }
44
45        // Parse [project.optional-dependencies] - tables of arrays
46        if let Some(optional_deps) = toml_value
47            .get("project")
48            .and_then(|p| p.get("optional-dependencies"))
49            .and_then(|d| d.as_table())
50        {
51            for (_group_name, deps_value) in optional_deps {
52                if let Some(deps) = deps_value.as_array() {
53                    for dep_value in deps {
54                        if let Some(dep_str) = dep_value.as_str()
55                            && let Some(dep) = self.parse_dependency_string(dep_str, path, content)
56                            {
57                                dependencies.push(dep);
58                            }
59                    }
60                }
61            }
62        }
63
64        Ok(dependencies)
65    }
66
67    /// Parse Poetry format dependencies
68    fn parse_poetry_dependencies(
69        &self,
70        toml_value: &Value,
71        path: &Path,
72        content: &str,
73    ) -> Result<Vec<Dependency>> {
74        let mut dependencies = Vec::new();
75
76        // Parse [tool.poetry.dependencies]
77        if let Some(deps) = toml_value
78            .get("tool")
79            .and_then(|t| t.get("poetry"))
80            .and_then(|p| p.get("dependencies"))
81            .and_then(|d| d.as_table())
82        {
83            for (pkg_name, version_value) in deps {
84                // Skip python itself
85                if pkg_name == "python" {
86                    continue;
87                }
88                if let Some(dep) = self.parse_poetry_dependency(pkg_name, version_value, path, content)
89                {
90                    dependencies.push(dep);
91                }
92            }
93        }
94
95        // Parse [tool.poetry.dev-dependencies] (legacy)
96        if let Some(deps) = toml_value
97            .get("tool")
98            .and_then(|t| t.get("poetry"))
99            .and_then(|p| p.get("dev-dependencies"))
100            .and_then(|d| d.as_table())
101        {
102            for (pkg_name, version_value) in deps {
103                if let Some(dep) = self.parse_poetry_dependency(pkg_name, version_value, path, content)
104                {
105                    dependencies.push(dep);
106                }
107            }
108        }
109
110        // Parse [tool.poetry.group.*.dependencies]
111        if let Some(groups) = toml_value
112            .get("tool")
113            .and_then(|t| t.get("poetry"))
114            .and_then(|p| p.get("group"))
115            .and_then(|g| g.as_table())
116        {
117            for (_group_name, group_value) in groups {
118                if let Some(deps) = group_value.get("dependencies").and_then(|d| d.as_table()) {
119                    for (pkg_name, version_value) in deps {
120                        if pkg_name == "python" {
121                            continue;
122                        }
123                        if let Some(dep) = self.parse_poetry_dependency(pkg_name, version_value, path, content)
124                        {
125                            dependencies.push(dep);
126                        }
127                    }
128                }
129            }
130        }
131
132        Ok(dependencies)
133    }
134
135    /// Parse PDM format dependencies
136    fn parse_pdm_dependencies(
137        &self,
138        toml_value: &Value,
139        path: &Path,
140        content: &str,
141    ) -> Result<Vec<Dependency>> {
142        let mut dependencies = Vec::new();
143
144        // Parse [tool.pdm.dependencies] - similar to PEP 621 but in tool.pdm
145        if let Some(deps) = toml_value
146            .get("tool")
147            .and_then(|t| t.get("pdm"))
148            .and_then(|p| p.get("dependencies"))
149            .and_then(|d| d.as_array())
150        {
151            for dep_value in deps {
152                if let Some(dep_str) = dep_value.as_str()
153                    && let Some(dep) = self.parse_dependency_string(dep_str, path, content) {
154                        dependencies.push(dep);
155                    }
156            }
157        }
158
159        // Parse [tool.pdm.dev-dependencies] - table of arrays
160        if let Some(dev_deps) = toml_value
161            .get("tool")
162            .and_then(|t| t.get("pdm"))
163            .and_then(|p| p.get("dev-dependencies"))
164            .and_then(|d| d.as_table())
165        {
166            for (_group_name, deps_value) in dev_deps {
167                if let Some(deps) = deps_value.as_array() {
168                    for dep_value in deps {
169                        if let Some(dep_str) = dep_value.as_str()
170                            && let Some(dep) = self.parse_dependency_string(dep_str, path, content)
171                            {
172                                dependencies.push(dep);
173                            }
174                    }
175                }
176            }
177        }
178
179        Ok(dependencies)
180    }
181
182    /// Parse PEP 735 dependency-groups format
183    fn parse_dependency_groups(
184        &self,
185        toml_value: &Value,
186        path: &Path,
187        content: &str,
188    ) -> Result<Vec<Dependency>> {
189        let mut dependencies = Vec::new();
190
191        // Parse [dependency-groups] - tables of arrays
192        if let Some(groups) = toml_value
193            .get("dependency-groups")
194            .and_then(|d| d.as_table())
195        {
196            for (_group_name, deps_value) in groups {
197                if let Some(deps) = deps_value.as_array() {
198                    for dep_value in deps {
199                        if let Some(dep_str) = dep_value.as_str()
200                            && let Some(dep) = self.parse_dependency_string(dep_str, path, content)
201                            {
202                                dependencies.push(dep);
203                            }
204                    }
205                }
206            }
207        }
208
209        Ok(dependencies)
210    }
211
212    /// Parse a Poetry dependency entry which can be a string or inline table
213    fn parse_poetry_dependency(
214        &self,
215        name: &str,
216        value: &Value,
217        path: &Path,
218        content: &str,
219    ) -> Option<Dependency> {
220        let version_str = match value {
221            // Simple string version: package = "^1.0"
222            Value::String(s) => s.clone(),
223            // Inline table: package = {version = "^1.0", optional = true}
224            Value::Table(table) => {
225                // Get version from the table
226                table.get("version")?.as_str()?.to_string()
227            }
228            _ => return None,
229        };
230
231        // Find the line number and original line text
232        let (line_number, original_line) = self.find_line_in_content(content, name, &version_str);
233
234        // Parse the version spec
235        let version_spec = VersionSpec::parse(&version_str).ok()?;
236
237        Some(Dependency {
238            name: name.to_lowercase().replace('_', "-"),
239            version_spec,
240            source_file: path.to_path_buf(),
241            line_number,
242            original_line,
243        })
244    }
245
246    /// Parse a dependency string like "requests>=2.28.0" or "numpy==1.24.0"
247    fn parse_dependency_string(
248        &self,
249        dep_str: &str,
250        path: &Path,
251        content: &str,
252    ) -> Option<Dependency> {
253        // Split by comparison operators
254        let dep_str = dep_str.trim();
255
256        // Handle markers (like ; python_version >= "3.8") by splitting on semicolon
257        let dep_str = dep_str.split(';').next()?.trim();
258
259        // Handle extras (like requests[security]) - extract package name
260        let dep_str_no_extras = if let Some(idx) = dep_str.find('[') {
261            &dep_str[..idx]
262        } else {
263            dep_str
264        };
265
266        // Find the package name and version spec
267        let operators = [">=", "<=", "==", "!=", "~=", ">", "<", "^", "~"];
268
269        for op in &operators {
270            if let Some(idx) = dep_str_no_extras.find(op) {
271                let pkg_name = dep_str_no_extras[..idx].trim();
272                let version_part = dep_str_no_extras[idx..].trim();
273
274                // Parse version spec
275                let version_spec = VersionSpec::parse(version_part).ok()?;
276
277                // Find line number and original line
278                let (line_number, original_line) = self.find_line_in_content(content, pkg_name, version_part);
279
280                return Some(Dependency {
281                    name: pkg_name.to_lowercase().replace('_', "-"),
282                    version_spec,
283                    source_file: path.to_path_buf(),
284                    line_number,
285                    original_line,
286                });
287            }
288        }
289
290        // No version specifier found - might be just package name
291        if !dep_str_no_extras.is_empty() {
292            let pkg_name = dep_str_no_extras.trim();
293            let (line_number, original_line) = self.find_line_in_content(content, pkg_name, "");
294
295            return Some(Dependency {
296                name: pkg_name.to_lowercase().replace('_', "-"),
297                version_spec: VersionSpec::Any,
298                source_file: path.to_path_buf(),
299                line_number,
300                original_line,
301            });
302        }
303
304        None
305    }
306
307    /// Find the line number and original line text for a dependency in the file content
308    fn find_line_in_content(&self, content: &str, pkg_name: &str, version_str: &str) -> (usize, String) {
309        // Search for the line containing the package name
310        for (i, line) in content.lines().enumerate() {
311            let line_lower = line.to_lowercase();
312            let pkg_lower = pkg_name.to_lowercase();
313
314            // Check if line contains the package name
315            if line_lower.contains(&pkg_lower) {
316                // For TOML tables, look for the key
317                if line.contains('=') || line.contains(pkg_name) {
318                    // Make sure it's not a comment
319                    if let Some(comment_idx) = line.find('#') {
320                        if line[..comment_idx].to_lowercase().contains(&pkg_lower) {
321                            return (i + 1, line.trim().to_string());
322                        }
323                    } else if (!version_str.is_empty() && line.contains(version_str))
324                        || line_lower.contains(&pkg_lower)
325                    {
326                        return (i + 1, line.trim().to_string());
327                    }
328                }
329            }
330        }
331
332        // Default if not found
333        (1, format!("{pkg_name} = \"{version_str}\""))
334    }
335}
336
337impl DependencyParser for PyProjectParser {
338    fn parse(&self, path: &Path) -> Result<Vec<Dependency>> {
339        // Read file content
340        let content = fs::read_to_string(path)
341            .with_context(|| format!("Failed to read file: {}", path.display()))?;
342
343        // Parse TOML
344        let toml_value: Value = toml::from_str(&content)
345            .with_context(|| format!("Failed to parse TOML: {}", path.display()))?;
346
347        let mut all_dependencies = Vec::new();
348
349        // Try parsing all formats - a file might have multiple formats
350
351        // PEP 621 format
352        if let Ok(deps) = self.parse_pep621_dependencies(&toml_value, path, &content) {
353            all_dependencies.extend(deps);
354        }
355
356        // Poetry format
357        if let Ok(deps) = self.parse_poetry_dependencies(&toml_value, path, &content) {
358            all_dependencies.extend(deps);
359        }
360
361        // PDM format
362        if let Ok(deps) = self.parse_pdm_dependencies(&toml_value, path, &content) {
363            all_dependencies.extend(deps);
364        }
365
366        // PEP 735 dependency-groups format
367        if let Ok(deps) = self.parse_dependency_groups(&toml_value, path, &content) {
368            all_dependencies.extend(deps);
369        }
370
371        // Deduplicate dependencies by name (keep first occurrence)
372        let mut seen = std::collections::HashSet::new();
373        all_dependencies.retain(|dep| seen.insert(dep.name.clone()));
374
375        Ok(all_dependencies)
376    }
377
378    fn can_parse(&self, path: &Path) -> bool {
379        path.file_name()
380            .and_then(|n| n.to_str())
381            .map(|n| n == "pyproject.toml")
382            .unwrap_or(false)
383    }
384}
385
386#[cfg(test)]
387mod tests {
388    use super::*;
389    use std::io::Write;
390    use std::path::PathBuf;
391    use tempfile::NamedTempFile;
392
393    #[test]
394    fn test_can_parse() {
395        let parser = PyProjectParser::new();
396        assert!(parser.can_parse(&PathBuf::from("pyproject.toml")));
397        assert!(parser.can_parse(&PathBuf::from("/path/to/pyproject.toml")));
398        assert!(!parser.can_parse(&PathBuf::from("requirements.txt")));
399    }
400
401    #[test]
402    fn test_parse_pep621_dependencies() {
403        let content = r#"
404[project]
405name = "myproject"
406dependencies = [
407    "requests>=2.28.0",
408    "numpy==1.24.0",
409    "flask~=2.0.0",
410]
411
412[project.optional-dependencies]
413dev = [
414    "pytest>=7.0.0",
415    "black>=22.0.0",
416]
417"#;
418
419        let mut file = NamedTempFile::new().unwrap();
420        file.write_all(content.as_bytes()).unwrap();
421        let path = PathBuf::from(file.path());
422
423        let parser = PyProjectParser::new();
424        let deps = parser.parse(&path).unwrap();
425
426        assert_eq!(deps.len(), 5);
427        assert!(deps.iter().any(|d| d.name == "requests"));
428        assert!(deps.iter().any(|d| d.name == "numpy"));
429        assert!(deps.iter().any(|d| d.name == "flask"));
430        assert!(deps.iter().any(|d| d.name == "pytest"));
431        assert!(deps.iter().any(|d| d.name == "black"));
432    }
433
434    #[test]
435    fn test_parse_poetry_dependencies() {
436        let content = r#"
437[tool.poetry]
438name = "myproject"
439
440[tool.poetry.dependencies]
441python = "^3.8"
442requests = "^2.28.0"
443numpy = "1.24.0"
444
445[tool.poetry.group.dev.dependencies]
446pytest = "^7.0.0"
447black = {version = "^22.0.0", optional = true}
448"#;
449
450        let mut file = NamedTempFile::new().unwrap();
451        file.write_all(content.as_bytes()).unwrap();
452        let path = PathBuf::from(file.path());
453
454        let parser = PyProjectParser::new();
455        let deps = parser.parse(&path).unwrap();
456
457        // Should not include python itself
458        assert!(!deps.iter().any(|d| d.name == "python"));
459        assert!(deps.iter().any(|d| d.name == "requests"));
460        assert!(deps.iter().any(|d| d.name == "numpy"));
461        assert!(deps.iter().any(|d| d.name == "pytest"));
462        assert!(deps.iter().any(|d| d.name == "black"));
463
464        // Check version specs are parsed correctly
465        let requests_dep = deps.iter().find(|d| d.name == "requests").unwrap();
466        assert!(matches!(requests_dep.version_spec, VersionSpec::Caret(_)));
467    }
468
469    #[test]
470    fn test_parse_pdm_dependencies() {
471        // PDM uses PEP 621 format for main dependencies
472        // and [tool.pdm.dev-dependencies] for dev dependencies
473        let content = r#"
474[project]
475name = "myproject"
476dependencies = [
477    "requests>=2.28.0",
478    "numpy==1.24.0",
479]
480
481[tool.pdm.dev-dependencies]
482test = [
483    "pytest>=7.0.0",
484]
485"#;
486
487        let mut file = NamedTempFile::new().unwrap();
488        file.write_all(content.as_bytes()).unwrap();
489        let path = PathBuf::from(file.path());
490
491        let parser = PyProjectParser::new();
492        let deps = parser.parse(&path).unwrap();
493
494        assert!(deps.iter().any(|d| d.name == "requests"));
495        assert!(deps.iter().any(|d| d.name == "numpy"));
496        assert!(deps.iter().any(|d| d.name == "pytest"));
497    }
498
499    #[test]
500    fn test_parse_dependency_with_extras() {
501        let content = r#"
502[project]
503dependencies = [
504    "requests[security]>=2.28.0",
505]
506"#;
507
508        let mut file = NamedTempFile::new().unwrap();
509        file.write_all(content.as_bytes()).unwrap();
510        let path = PathBuf::from(file.path());
511
512        let parser = PyProjectParser::new();
513        let deps = parser.parse(&path).unwrap();
514
515        assert_eq!(deps.len(), 1);
516        assert_eq!(deps[0].name, "requests");
517    }
518
519    #[test]
520    fn test_parse_dependency_with_markers() {
521        let content = r#"
522[project]
523dependencies = [
524    "requests>=2.28.0; python_version >= '3.8'",
525]
526"#;
527
528        let mut file = NamedTempFile::new().unwrap();
529        file.write_all(content.as_bytes()).unwrap();
530        let path = PathBuf::from(file.path());
531
532        let parser = PyProjectParser::new();
533        let deps = parser.parse(&path).unwrap();
534
535        assert_eq!(deps.len(), 1);
536        assert_eq!(deps[0].name, "requests");
537    }
538
539    #[test]
540    fn test_deduplication() {
541        // If same package appears in multiple sections, keep first one
542        let content = r#"
543[project]
544dependencies = [
545    "requests>=2.28.0",
546]
547
548[project.optional-dependencies]
549dev = [
550    "requests>=2.30.0",
551]
552"#;
553
554        let mut file = NamedTempFile::new().unwrap();
555        file.write_all(content.as_bytes()).unwrap();
556        let path = PathBuf::from(file.path());
557
558        let parser = PyProjectParser::new();
559        let deps = parser.parse(&path).unwrap();
560
561        // Should only have one requests entry (the first one)
562        assert_eq!(deps.iter().filter(|d| d.name == "requests").count(), 1);
563    }
564}