Skip to main content

provenant/parsers/
pip_inspect_deplock.rs

1//! Parser for pip inspect deplock files.
2//!
3//! Extracts package metadata from `pip-inspect.deplock` files which contain
4//! installed Python package information generated by dependency-inspector.
5//!
6//! # Supported Formats
7//! - `pip-inspect.deplock` - pip inspect output in deplock format
8//!
9//! # Key Features
10//! - Installed package identification
11//! - Main package extraction
12//! - License and description metadata
13//!
14//! # Implementation Notes
15//! - Format: JSON with installed packages list
16//! - Generated by dependency-inspector tool
17//! - Spec: https://pip.pypa.io/en/stable/cli/pip_inspect/
18
19use crate::models::{DatasourceId, PackageType};
20use std::fs;
21use std::path::Path;
22
23use crate::parser_warn as warn;
24use serde::{Deserialize, Serialize};
25use serde_json::Value;
26
27use crate::models::PackageData;
28
29use super::PackageParser;
30use super::license_normalization::normalize_spdx_declared_license;
31use super::python::extract_requires_dist_dependencies;
32
33const PACKAGE_TYPE: PackageType = PackageType::Pypi;
34
35fn default_package_data() -> PackageData {
36    PackageData {
37        package_type: Some(PACKAGE_TYPE),
38        primary_language: Some("Python".to_string()),
39        datasource_id: Some(DatasourceId::PypiInspectDeplock),
40        ..Default::default()
41    }
42}
43
44/// Parser for pip inspect deplock files
45pub struct PipInspectDeplockParser;
46
47#[derive(Debug, Deserialize, Serialize)]
48struct PipInspectDeplock {
49    installed: Option<Vec<InstalledPackage>>,
50    pip_version: Option<String>,
51    version: Option<String>,
52}
53
54#[derive(Debug, Deserialize, Serialize)]
55struct InstalledPackage {
56    metadata: Option<PackageMetadata>,
57    requested: Option<bool>,
58    direct_url: Option<Value>,
59}
60
61#[derive(Debug, Deserialize, Serialize)]
62struct PackageMetadata {
63    name: Option<String>,
64    version: Option<String>,
65    license: Option<String>,
66    description: Option<String>,
67    keywords: Option<String>,
68    requires_dist: Option<Vec<String>>,
69}
70
71impl PackageParser for PipInspectDeplockParser {
72    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
73
74    fn is_match(path: &Path) -> bool {
75        path.to_str()
76            .is_some_and(|p| p.ends_with("/pip-inspect.deplock"))
77    }
78
79    fn extract_packages(path: &Path) -> Vec<PackageData> {
80        let content = match fs::read_to_string(path) {
81            Ok(c) => c,
82            Err(e) => {
83                warn!("Failed to read pip-inspect.deplock file {:?}: {}", path, e);
84                return vec![default_package_data()];
85            }
86        };
87
88        vec![parse_pip_inspect_deplock(&content)]
89    }
90}
91
92pub(crate) fn parse_pip_inspect_deplock(content: &str) -> PackageData {
93    let data: PipInspectDeplock = match serde_json::from_str(content) {
94        Ok(d) => d,
95        Err(e) => {
96            warn!("Failed to parse pip-inspect.deplock: {}", e);
97            return default_package_data();
98        }
99    };
100
101    let Some(installed_packages) = data.installed else {
102        return default_package_data();
103    };
104
105    // Find the main package (has direct_url and is_requested)
106    let main_package = installed_packages
107        .iter()
108        .find(|p| p.requested.unwrap_or(false) && p.direct_url.is_some());
109
110    let metadata = if let Some(pkg) = main_package {
111        pkg.metadata.as_ref()
112    } else {
113        // If no main package found, try to find any requested package
114        installed_packages
115            .iter()
116            .find(|p| p.requested.unwrap_or(false))
117            .and_then(|p| p.metadata.as_ref())
118    };
119
120    let Some(metadata) = metadata else {
121        return default_package_data();
122    };
123
124    // Build extra_data with pip version info
125    let mut extra_data = std::collections::HashMap::new();
126    if let Some(ref pip_version) = data.pip_version {
127        extra_data.insert(
128            "pip_version".to_string(),
129            Value::String(pip_version.clone()),
130        );
131    }
132    if let Some(ref inspect_version) = data.version {
133        extra_data.insert(
134            "inspect_version".to_string(),
135            Value::String(inspect_version.clone()),
136        );
137    }
138
139    let extra_data_opt = if extra_data.is_empty() {
140        None
141    } else {
142        Some(extra_data)
143    };
144
145    let keywords = metadata
146        .keywords
147        .as_ref()
148        .map(|k| vec![k.clone()])
149        .unwrap_or_default();
150    let (declared_license_expression, declared_license_expression_spdx, license_detections) =
151        normalize_spdx_declared_license(metadata.license.as_deref());
152    let dependencies = metadata
153        .requires_dist
154        .as_ref()
155        .map(|requires_dist| extract_requires_dist_dependencies(requires_dist))
156        .unwrap_or_default();
157
158    PackageData {
159        package_type: Some(PACKAGE_TYPE),
160        primary_language: Some("Python".to_string()),
161        name: metadata.name.clone(),
162        version: metadata.version.clone(),
163        declared_license_expression,
164        declared_license_expression_spdx,
165        license_detections,
166        extracted_license_statement: metadata.license.clone(),
167        description: metadata.description.clone(),
168        keywords,
169        is_virtual: true,
170        extra_data: extra_data_opt,
171        dependencies,
172        datasource_id: Some(DatasourceId::PypiInspectDeplock),
173        ..Default::default()
174    }
175}
176
177crate::register_parser!(
178    "pip inspect deplock file",
179    &["*pip-inspect.deplock"],
180    "pypi",
181    "Python",
182    Some("https://pip.pypa.io/en/stable/cli/pip_inspect/"),
183);