Skip to main content

garbage_code_hunter/deps_shamer/
parser.rs

1//! Parsers for various dependency file formats.
2//!
3//! Supports Cargo.toml, package.json, go.mod, and requirements.txt.
4
5use super::types::{DepFile, DepSource, Dependency, Ecosystem};
6use anyhow::{Context, Result};
7use std::fs;
8use std::path::Path;
9
10type DepParser<'a> = (&'a str, fn(&Path) -> Result<DepFile>);
11
12/// Detect and parse dependency files in the given directory.
13pub fn detect_and_parse(project_path: &Path) -> Result<Vec<DepFile>> {
14    let mut results = Vec::new();
15
16    let candidates: &[DepParser] = &[
17        ("Cargo.toml", parse_cargo_toml),
18        ("package.json", parse_package_json),
19        ("go.mod", parse_go_mod),
20        ("requirements.txt", parse_requirements_txt),
21        ("pyproject.toml", parse_pyproject_toml),
22    ];
23
24    for (filename, parser) in candidates {
25        let dep_path = project_path.join(filename);
26        if dep_path.exists() {
27            match parser(&dep_path) {
28                Ok(dep_file) => results.push(dep_file),
29                Err(e) => {
30                    eprintln!("Warning: failed to parse {}: {}", filename, e);
31                }
32            }
33        }
34    }
35
36    Ok(results)
37}
38
39/// Parse a Cargo.toml file.
40fn parse_cargo_toml(path: &Path) -> Result<DepFile> {
41    let content =
42        fs::read_to_string(path).with_context(|| format!("Failed to read {}", path.display()))?;
43    let doc: toml::Value = content
44        .parse()
45        .with_context(|| format!("Failed to parse TOML from {}", path.display()))?;
46
47    let mut dependencies = Vec::new();
48
49    // Parse [dependencies]
50    if let Some(deps) = doc.get("dependencies").and_then(|v| v.as_table()) {
51        for (name, value) in deps {
52            let dep = parse_cargo_dep(name, value, false, false);
53            dependencies.push(dep);
54        }
55    }
56
57    // Parse [dev-dependencies]
58    if let Some(deps) = doc.get("dev-dependencies").and_then(|v| v.as_table()) {
59        for (name, value) in deps {
60            let dep = parse_cargo_dep(name, value, true, false);
61            dependencies.push(dep);
62        }
63    }
64
65    // Parse [build-dependencies]
66    if let Some(deps) = doc.get("build-dependencies").and_then(|v| v.as_table()) {
67        for (name, value) in deps {
68            let dep = parse_cargo_dep(name, value, false, false);
69            dependencies.push(dep);
70        }
71    }
72
73    Ok(DepFile {
74        path: path.to_string_lossy().to_string(),
75        ecosystem: Ecosystem::Rust,
76        dependencies,
77    })
78}
79
80fn parse_cargo_dep(name: &str, value: &toml::Value, is_dev: bool, is_optional: bool) -> Dependency {
81    match value {
82        // Simple version string: serde = "1.0"
83        toml::Value::String(version) => Dependency {
84            name: name.to_string(),
85            version: version.clone(),
86            source: DepSource::Registry,
87            is_dev,
88            is_optional,
89        },
90        // Table form: serde = { version = "1.0", features = [...] }
91        toml::Value::Table(table) => {
92            let version = table
93                .get("version")
94                .and_then(|v| v.as_str())
95                .unwrap_or("*")
96                .to_string();
97
98            let source = if let Some(git_url) = table.get("git").and_then(|v| v.as_str()) {
99                DepSource::Git {
100                    url: git_url.to_string(),
101                }
102            } else if let Some(p) = table.get("path").and_then(|v| v.as_str()) {
103                DepSource::Path {
104                    path: p.to_string(),
105                }
106            } else {
107                DepSource::Registry
108            };
109
110            let optional = table
111                .get("optional")
112                .and_then(|v| v.as_bool())
113                .unwrap_or(is_optional);
114
115            Dependency {
116                name: name.to_string(),
117                version,
118                source,
119                is_dev,
120                is_optional: optional,
121            }
122        }
123        _ => Dependency {
124            name: name.to_string(),
125            version: "*".to_string(),
126            source: DepSource::Unknown,
127            is_dev,
128            is_optional,
129        },
130    }
131}
132
133/// Parse a package.json file.
134fn parse_package_json(path: &Path) -> Result<DepFile> {
135    let content =
136        fs::read_to_string(path).with_context(|| format!("Failed to read {}", path.display()))?;
137    let doc: serde_json::Value = serde_json::from_str(&content)
138        .with_context(|| format!("Failed to parse JSON from {}", path.display()))?;
139
140    let mut dependencies = Vec::new();
141
142    // Parse "dependencies"
143    if let Some(deps) = doc.get("dependencies").and_then(|v| v.as_object()) {
144        for (name, version_val) in deps {
145            let version = version_val.as_str().unwrap_or("*").to_string();
146            let source = classify_npm_source(&version);
147            dependencies.push(Dependency {
148                name: name.clone(),
149                version,
150                source,
151                is_dev: false,
152                is_optional: false,
153            });
154        }
155    }
156
157    // Parse "devDependencies"
158    if let Some(deps) = doc.get("devDependencies").and_then(|v| v.as_object()) {
159        for (name, version_val) in deps {
160            let version = version_val.as_str().unwrap_or("*").to_string();
161            let source = classify_npm_source(&version);
162            dependencies.push(Dependency {
163                name: name.clone(),
164                version,
165                source,
166                is_dev: true,
167                is_optional: false,
168            });
169        }
170    }
171
172    // Parse "optionalDependencies"
173    if let Some(deps) = doc.get("optionalDependencies").and_then(|v| v.as_object()) {
174        for (name, version_val) in deps {
175            let version = version_val.as_str().unwrap_or("*").to_string();
176            dependencies.push(Dependency {
177                name: name.clone(),
178                version,
179                source: DepSource::Registry,
180                is_dev: false,
181                is_optional: true,
182            });
183        }
184    }
185
186    Ok(DepFile {
187        path: path.to_string_lossy().to_string(),
188        ecosystem: Ecosystem::Node,
189        dependencies,
190    })
191}
192
193fn classify_npm_source(version: &str) -> DepSource {
194    if version.starts_with("git+")
195        || version.starts_with("github:")
196        || version.starts_with("git://")
197    {
198        DepSource::Git {
199            url: version.to_string(),
200        }
201    } else if version.starts_with("file:") || version.starts_with("link:") {
202        DepSource::Path {
203            path: version
204                .trim_start_matches("file:")
205                .trim_start_matches("link:")
206                .to_string(),
207        }
208    } else {
209        DepSource::Registry
210    }
211}
212
213/// Parse a go.mod file.
214fn parse_go_mod(path: &Path) -> Result<DepFile> {
215    let content =
216        fs::read_to_string(path).with_context(|| format!("Failed to read {}", path.display()))?;
217
218    let mut dependencies = Vec::new();
219    let mut in_require_block = false;
220
221    for line in content.lines() {
222        let trimmed = line.trim();
223
224        if trimmed.starts_with("require (") {
225            in_require_block = true;
226            continue;
227        }
228
229        if in_require_block && trimmed == ")" {
230            in_require_block = false;
231            continue;
232        }
233
234        if in_require_block || trimmed.starts_with("require ") {
235            let line_content = if trimmed.starts_with("require ") {
236                trimmed.strip_prefix("require ").unwrap_or(trimmed)
237            } else {
238                trimmed
239            };
240
241            let parts: Vec<&str> = line_content.split_whitespace().collect();
242            if parts.len() >= 2 {
243                let name = parts[0].to_string();
244                let version = parts[1].to_string();
245
246                // Check if it's indirect (comment at end)
247                let is_indirect = trimmed.contains("// indirect");
248
249                dependencies.push(Dependency {
250                    name,
251                    version,
252                    source: DepSource::Registry,
253                    is_dev: false,
254                    is_optional: is_indirect,
255                });
256            }
257        }
258    }
259
260    Ok(DepFile {
261        path: path.to_string_lossy().to_string(),
262        ecosystem: Ecosystem::Go,
263        dependencies,
264    })
265}
266
267/// Parse a requirements.txt file (Python).
268fn parse_requirements_txt(path: &Path) -> Result<DepFile> {
269    let content =
270        fs::read_to_string(path).with_context(|| format!("Failed to read {}", path.display()))?;
271
272    let mut dependencies = Vec::new();
273
274    for line in content.lines() {
275        let trimmed = line.trim();
276
277        // Skip empty lines, comments, and options
278        if trimmed.is_empty() || trimmed.starts_with('#') || trimmed.starts_with('-') {
279            continue;
280        }
281
282        // Handle lines like: package>=1.0, package==1.0, package~=1.0, package!=1.0, package>1.0
283        let (name, version) =
284            if let Some(pos) = trimmed.find(|c: char| ['>', '<', '=', '!', '~'].contains(&c)) {
285                (&trimmed[..pos], &trimmed[pos..])
286            } else if let Some(pos) = trimmed.find('[') {
287                // Handle extras: package[extra]>=1.0
288                (&trimmed[..pos], "*")
289            } else {
290                (trimmed, "*")
291            };
292
293        // Handle git URLs
294        let source = if trimmed.starts_with("git+") || trimmed.contains("git://") {
295            DepSource::Git {
296                url: trimmed.to_string(),
297            }
298        } else if trimmed.starts_with("./")
299            || trimmed.starts_with("../")
300            || trimmed.starts_with("/")
301        {
302            DepSource::Path {
303                path: trimmed.to_string(),
304            }
305        } else {
306            DepSource::Registry
307        };
308
309        dependencies.push(Dependency {
310            name: name.to_string(),
311            version: version.to_string(),
312            source,
313            is_dev: false,
314            is_optional: false,
315        });
316    }
317
318    Ok(DepFile {
319        path: path.to_string_lossy().to_string(),
320        ecosystem: Ecosystem::Python,
321        dependencies,
322    })
323}
324
325/// Parse a pyproject.toml file.
326fn parse_pyproject_toml(path: &Path) -> Result<DepFile> {
327    let content =
328        fs::read_to_string(path).with_context(|| format!("Failed to read {}", path.display()))?;
329    let doc: toml::Value = content
330        .parse()
331        .with_context(|| format!("Failed to parse TOML from {}", path.display()))?;
332
333    let mut dependencies = Vec::new();
334
335    // Parse [project.dependencies] (PEP 621)
336    if let Some(deps) = doc
337        .get("project")
338        .and_then(|p| p.get("dependencies"))
339        .and_then(|d| d.as_array())
340    {
341        for dep_val in deps {
342            if let Some(dep_str) = dep_val.as_str() {
343                let (name, version) = parse_python_dep_string(dep_str);
344                dependencies.push(Dependency {
345                    name,
346                    version,
347                    source: DepSource::Registry,
348                    is_dev: false,
349                    is_optional: false,
350                });
351            }
352        }
353    }
354
355    // Parse [project.optional-dependencies]
356    if let Some(opt_deps) = doc
357        .get("project")
358        .and_then(|p| p.get("optional-dependencies"))
359        .and_then(|d| d.as_table())
360    {
361        for (_group, deps) in opt_deps {
362            if let Some(deps_arr) = deps.as_array() {
363                for dep_val in deps_arr {
364                    if let Some(dep_str) = dep_val.as_str() {
365                        let (name, version) = parse_python_dep_string(dep_str);
366                        dependencies.push(Dependency {
367                            name,
368                            version,
369                            source: DepSource::Registry,
370                            is_dev: false,
371                            is_optional: true,
372                        });
373                    }
374                }
375            }
376        }
377    }
378
379    // Parse [tool.poetry.dependencies] (Poetry format)
380    if let Some(deps) = doc
381        .get("tool")
382        .and_then(|t| t.get("poetry"))
383        .and_then(|p| p.get("dependencies"))
384        .and_then(|d| d.as_table())
385    {
386        for (name, value) in deps {
387            if name == "python" {
388                continue;
389            }
390            let version = match value {
391                toml::Value::String(v) => v.clone(),
392                toml::Value::Table(t) => t
393                    .get("version")
394                    .and_then(|v| v.as_str())
395                    .unwrap_or("*")
396                    .to_string(),
397                _ => "*".to_string(),
398            };
399            dependencies.push(Dependency {
400                name: name.clone(),
401                version,
402                source: DepSource::Registry,
403                is_dev: false,
404                is_optional: false,
405            });
406        }
407    }
408
409    Ok(DepFile {
410        path: path.to_string_lossy().to_string(),
411        ecosystem: Ecosystem::Python,
412        dependencies,
413    })
414}
415
416fn parse_python_dep_string(dep_str: &str) -> (String, String) {
417    if let Some(pos) = dep_str.find(|c: char| ['>', '<', '=', '!', '~'].contains(&c)) {
418        let name_part = &dep_str[..pos];
419        let name = if let Some(bracket_pos) = name_part.find('[') {
420            &name_part[..bracket_pos]
421        } else {
422            name_part
423        };
424        (name.trim().to_string(), dep_str[pos..].to_string())
425    } else {
426        let name = if let Some(bracket_pos) = dep_str.find('[') {
427            &dep_str[..bracket_pos]
428        } else {
429            dep_str
430        };
431        (name.trim().to_string(), "*".to_string())
432    }
433}
434
435#[cfg(test)]
436mod tests {
437    use super::*;
438    use std::fs;
439    use tempfile::TempDir;
440
441    #[test]
442    fn test_parse_cargo_toml_basic() {
443        let dir = TempDir::new().unwrap();
444        let path = dir.path().join("Cargo.toml");
445        fs::write(
446            &path,
447            r#"
448[dependencies]
449serde = "1.0"
450tokio = { version = "1.0", features = ["full"] }
451my-lib = { git = "https://github.com/foo/bar" }
452local-crate = { path = "../local" }
453
454[dev-dependencies]
455tempfile = "3.0"
456"#,
457        )
458        .unwrap();
459
460        let dep_file = parse_cargo_toml(&path).unwrap();
461        assert_eq!(dep_file.ecosystem, Ecosystem::Rust);
462        assert_eq!(dep_file.dependencies.len(), 5);
463
464        let serde_dep = dep_file
465            .dependencies
466            .iter()
467            .find(|d| d.name == "serde")
468            .unwrap();
469        assert_eq!(serde_dep.version, "1.0");
470        assert_eq!(serde_dep.source, DepSource::Registry);
471        assert!(!serde_dep.is_dev);
472
473        let tokio_dep = dep_file
474            .dependencies
475            .iter()
476            .find(|d| d.name == "tokio")
477            .unwrap();
478        assert_eq!(tokio_dep.version, "1.0");
479
480        let git_dep = dep_file
481            .dependencies
482            .iter()
483            .find(|d| d.name == "my-lib")
484            .unwrap();
485        assert!(matches!(git_dep.source, DepSource::Git { .. }));
486
487        let local_dep = dep_file
488            .dependencies
489            .iter()
490            .find(|d| d.name == "local-crate")
491            .unwrap();
492        assert!(matches!(local_dep.source, DepSource::Path { .. }));
493
494        let tempfile_dep = dep_file
495            .dependencies
496            .iter()
497            .find(|d| d.name == "tempfile")
498            .unwrap();
499        assert!(tempfile_dep.is_dev);
500    }
501
502    #[test]
503    fn test_parse_package_json_basic() {
504        let dir = TempDir::new().unwrap();
505        let path = dir.path().join("package.json");
506        fs::write(
507            &path,
508            r#"{
509    "name": "test-project",
510    "dependencies": {
511        "express": "^4.18.0",
512        "lodash": "~4.17.0"
513    },
514    "devDependencies": {
515        "jest": "^29.0.0"
516    },
517    "optionalDependencies": {
518        "fsevents": "^2.3.0"
519    }
520}"#,
521        )
522        .unwrap();
523
524        let dep_file = parse_package_json(&path).unwrap();
525        assert_eq!(dep_file.ecosystem, Ecosystem::Node);
526        assert_eq!(dep_file.dependencies.len(), 4);
527
528        let express = dep_file
529            .dependencies
530            .iter()
531            .find(|d| d.name == "express")
532            .unwrap();
533        assert_eq!(express.version, "^4.18.0");
534        assert!(!express.is_dev);
535
536        let jest = dep_file
537            .dependencies
538            .iter()
539            .find(|d| d.name == "jest")
540            .unwrap();
541        assert!(jest.is_dev);
542
543        let fsevents = dep_file
544            .dependencies
545            .iter()
546            .find(|d| d.name == "fsevents")
547            .unwrap();
548        assert!(fsevents.is_optional);
549    }
550
551    #[test]
552    fn test_parse_go_mod_basic() {
553        let dir = TempDir::new().unwrap();
554        let path = dir.path().join("go.mod");
555        fs::write(
556            &path,
557            r#"module example.com/myapp
558
559go 1.21
560
561require (
562    github.com/gin-gonic/gin v1.9.1
563    github.com/go-sql-driver/mysql v1.7.0 // indirect
564)
565"#,
566        )
567        .unwrap();
568
569        let dep_file = parse_go_mod(&path).unwrap();
570        assert_eq!(dep_file.ecosystem, Ecosystem::Go);
571        assert_eq!(dep_file.dependencies.len(), 2);
572
573        let gin = dep_file
574            .dependencies
575            .iter()
576            .find(|d| d.name == "github.com/gin-gonic/gin")
577            .unwrap();
578        assert_eq!(gin.version, "v1.9.1");
579        assert!(!gin.is_optional);
580
581        let mysql = dep_file
582            .dependencies
583            .iter()
584            .find(|d| d.name == "github.com/go-sql-driver/mysql")
585            .unwrap();
586        assert!(mysql.is_optional);
587    }
588
589    #[test]
590    fn test_parse_requirements_txt_basic() {
591        let dir = TempDir::new().unwrap();
592        let path = dir.path().join("requirements.txt");
593        fs::write(
594            &path,
595            r#"# This is a comment
596flask>=2.0
597requests==2.28.1
598django~=4.0
599numpy
600git+https://github.com/foo/bar.git
601"#,
602        )
603        .unwrap();
604
605        let dep_file = parse_requirements_txt(&path).unwrap();
606        assert_eq!(dep_file.ecosystem, Ecosystem::Python);
607        assert_eq!(dep_file.dependencies.len(), 5);
608
609        let flask = dep_file
610            .dependencies
611            .iter()
612            .find(|d| d.name == "flask")
613            .unwrap();
614        assert_eq!(flask.version, ">=2.0");
615
616        let numpy = dep_file
617            .dependencies
618            .iter()
619            .find(|d| d.name == "numpy")
620            .unwrap();
621        assert_eq!(numpy.version, "*");
622
623        let git_dep = dep_file
624            .dependencies
625            .iter()
626            .find(|d| d.name.starts_with("git+"))
627            .unwrap();
628        assert!(matches!(git_dep.source, DepSource::Git { .. }));
629    }
630
631    #[test]
632    fn test_detect_and_parse_multiple_files() {
633        let dir = TempDir::new().unwrap();
634        fs::write(
635            dir.path().join("Cargo.toml"),
636            "[dependencies]\nserde = \"1.0\"\n",
637        )
638        .unwrap();
639        fs::write(
640            dir.path().join("package.json"),
641            r#"{"dependencies": {"express": "^4.0"}}"#,
642        )
643        .unwrap();
644
645        let results = detect_and_parse(dir.path()).unwrap();
646        assert_eq!(results.len(), 2);
647        assert!(results.iter().any(|f| f.ecosystem == Ecosystem::Rust));
648        assert!(results.iter().any(|f| f.ecosystem == Ecosystem::Node));
649    }
650
651    #[test]
652    fn test_parse_pyproject_toml_poetry() {
653        let dir = TempDir::new().unwrap();
654        let path = dir.path().join("pyproject.toml");
655        fs::write(
656            &path,
657            r#"
658[tool.poetry.dependencies]
659python = "^3.9"
660flask = "^2.0"
661requests = ">=2.28"
662"#,
663        )
664        .unwrap();
665
666        let dep_file = parse_pyproject_toml(&path).unwrap();
667        assert_eq!(dep_file.ecosystem, Ecosystem::Python);
668        // python should be filtered out
669        assert_eq!(dep_file.dependencies.len(), 2);
670        assert!(dep_file.dependencies.iter().all(|d| d.name != "python"));
671    }
672}