Skip to main content

provenant/parsers/
pip_inspect_deplock.rs

1//! Parser for pip inspect deplock files.
2//!
3//! Extracts package metadata from `pip-inspect.deplock` files which contain
4//! installed Python package information generated by dependency-inspector.
5//!
6//! # Supported Formats
7//! - `pip-inspect.deplock` - pip inspect output in deplock format
8//!
9//! # Key Features
10//! - Installed package identification
11//! - Main package extraction
12//! - License and description metadata
13//!
14//! # Implementation Notes
15//! - Format: JSON with installed packages list
16//! - Generated by dependency-inspector tool
17//! - Spec: https://pip.pypa.io/en/stable/cli/pip_inspect/
18
19use crate::models::{PackageData, PackageType};
20use std::path::Path;
21
22#[cfg(test)]
23use crate::models::DatasourceId;
24#[cfg(test)]
25use crate::parser_warn as warn;
26#[cfg(test)]
27use serde::{Deserialize, Serialize};
28#[cfg(test)]
29use serde_json::Value;
30
31use super::PackageParser;
32#[cfg(test)]
33use super::license_normalization::normalize_spdx_declared_license;
34use super::python::PythonParser;
35#[cfg(test)]
36use super::python::extract_requires_dist_dependencies;
37
38const PACKAGE_TYPE: PackageType = PackageType::Pypi;
39
40#[cfg(test)]
41fn default_package_data() -> PackageData {
42    PackageData {
43        package_type: Some(PACKAGE_TYPE),
44        primary_language: Some("Python".to_string()),
45        datasource_id: Some(DatasourceId::PypiInspectDeplock),
46        ..Default::default()
47    }
48}
49
50/// Parser for pip inspect deplock files
51pub struct PipInspectDeplockParser;
52
53#[cfg(test)]
54#[derive(Debug, Deserialize, Serialize)]
55struct PipInspectDeplock {
56    installed: Option<Vec<InstalledPackage>>,
57    pip_version: Option<String>,
58    version: Option<String>,
59}
60
61#[cfg(test)]
62#[derive(Debug, Deserialize, Serialize)]
63struct InstalledPackage {
64    metadata: Option<PackageMetadata>,
65    requested: Option<bool>,
66    direct_url: Option<Value>,
67}
68
69#[cfg(test)]
70#[derive(Debug, Deserialize, Serialize)]
71struct PackageMetadata {
72    name: Option<String>,
73    version: Option<String>,
74    license: Option<String>,
75    description: Option<String>,
76    keywords: Option<String>,
77    requires_dist: Option<Vec<String>>,
78}
79
80impl PackageParser for PipInspectDeplockParser {
81    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
82
83    fn is_match(path: &Path) -> bool {
84        path.to_str()
85            .is_some_and(|p| p.ends_with("/pip-inspect.deplock"))
86    }
87
88    fn extract_packages(path: &Path) -> Vec<PackageData> {
89        vec![PythonParser::extract_first_package(path)]
90    }
91}
92
93#[cfg(test)]
94pub(crate) fn parse_pip_inspect_deplock(content: &str) -> PackageData {
95    let data: PipInspectDeplock = match serde_json::from_str(content) {
96        Ok(d) => d,
97        Err(e) => {
98            warn!("Failed to parse pip-inspect.deplock: {}", e);
99            return default_package_data();
100        }
101    };
102
103    let Some(installed_packages) = data.installed else {
104        return default_package_data();
105    };
106
107    // Find the main package (has direct_url and is_requested)
108    let main_package = installed_packages
109        .iter()
110        .find(|p| p.requested.unwrap_or(false) && p.direct_url.is_some());
111
112    let metadata = if let Some(pkg) = main_package {
113        pkg.metadata.as_ref()
114    } else {
115        // If no main package found, try to find any requested package
116        installed_packages
117            .iter()
118            .find(|p| p.requested.unwrap_or(false))
119            .and_then(|p| p.metadata.as_ref())
120    };
121
122    let Some(metadata) = metadata else {
123        return default_package_data();
124    };
125
126    // Build extra_data with pip version info
127    let mut extra_data = std::collections::HashMap::new();
128    if let Some(ref pip_version) = data.pip_version {
129        extra_data.insert(
130            "pip_version".to_string(),
131            Value::String(pip_version.clone()),
132        );
133    }
134    if let Some(ref inspect_version) = data.version {
135        extra_data.insert(
136            "inspect_version".to_string(),
137            Value::String(inspect_version.clone()),
138        );
139    }
140
141    let extra_data_opt = if extra_data.is_empty() {
142        None
143    } else {
144        Some(extra_data)
145    };
146
147    let keywords = metadata
148        .keywords
149        .as_ref()
150        .map(|k| vec![k.clone()])
151        .unwrap_or_default();
152    let (declared_license_expression, declared_license_expression_spdx, license_detections) =
153        normalize_spdx_declared_license(metadata.license.as_deref());
154    let dependencies = metadata
155        .requires_dist
156        .as_ref()
157        .map(|requires_dist| extract_requires_dist_dependencies(requires_dist))
158        .unwrap_or_default();
159
160    PackageData {
161        package_type: Some(PACKAGE_TYPE),
162        primary_language: Some("Python".to_string()),
163        name: metadata.name.clone(),
164        version: metadata.version.clone(),
165        declared_license_expression,
166        declared_license_expression_spdx,
167        license_detections,
168        extracted_license_statement: metadata.license.clone(),
169        description: metadata.description.clone(),
170        keywords,
171        is_virtual: true,
172        extra_data: extra_data_opt,
173        dependencies,
174        datasource_id: Some(DatasourceId::PypiInspectDeplock),
175        ..Default::default()
176    }
177}
178
179crate::register_parser!(
180    "pip inspect deplock file",
181    &["*pip-inspect.deplock"],
182    "pypi",
183    "Python",
184    Some("https://pip.pypa.io/en/stable/cli/pip_inspect/"),
185);