Skip to main content

provenant/parsers/
pip_inspect_deplock.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4//! Parser for pip inspect deplock files.
5//!
6//! Extracts package metadata from `pip-inspect.deplock` files which contain
7//! installed Python package information generated by dependency-inspector.
8//!
9//! # Supported Formats
10//! - `pip-inspect.deplock` - pip inspect output in deplock format
11//!
12//! # Key Features
13//! - Installed package identification
14//! - Main package extraction
15//! - License and description metadata
16//!
17//! # Implementation Notes
18//! - Format: JSON with installed packages list
19//! - Generated by dependency-inspector tool
20//! - Spec: https://pip.pypa.io/en/stable/cli/pip_inspect/
21
22use crate::models::{PackageData, PackageType};
23use std::path::Path;
24
25#[cfg(test)]
26use crate::models::DatasourceId;
27#[cfg(test)]
28use crate::parser_warn as warn;
29#[cfg(test)]
30use serde::{Deserialize, Serialize};
31#[cfg(test)]
32use serde_json::Value;
33
34use super::PackageParser;
35#[cfg(test)]
36use super::license_normalization::normalize_spdx_declared_license;
37use super::python::PythonParser;
38#[cfg(test)]
39use super::python::extract_requires_dist_dependencies;
40#[cfg(test)]
41use crate::parsers::utils::{MAX_ITERATION_COUNT, truncate_field};
42
43const PACKAGE_TYPE: PackageType = PackageType::Pypi;
44
45#[cfg(test)]
46fn default_package_data() -> PackageData {
47    PackageData {
48        package_type: Some(PACKAGE_TYPE),
49        primary_language: Some("Python".to_string()),
50        datasource_id: Some(DatasourceId::PypiInspectDeplock),
51        ..Default::default()
52    }
53}
54
55/// Parser for pip inspect deplock files
56pub struct PipInspectDeplockParser;
57
58#[cfg(test)]
59#[derive(Debug, Deserialize, Serialize)]
60struct PipInspectDeplock {
61    installed: Option<Vec<InstalledPackage>>,
62    pip_version: Option<String>,
63    version: Option<String>,
64}
65
66#[cfg(test)]
67#[derive(Debug, Deserialize, Serialize)]
68struct InstalledPackage {
69    metadata: Option<PackageMetadata>,
70    requested: Option<bool>,
71    direct_url: Option<Value>,
72}
73
74#[cfg(test)]
75#[derive(Debug, Deserialize, Serialize)]
76struct PackageMetadata {
77    name: Option<String>,
78    version: Option<String>,
79    license: Option<String>,
80    description: Option<String>,
81    keywords: Option<String>,
82    requires_dist: Option<Vec<String>>,
83}
84
85impl PackageParser for PipInspectDeplockParser {
86    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
87
88    fn is_match(path: &Path) -> bool {
89        path.to_str()
90            .is_some_and(|p| p.ends_with("/pip-inspect.deplock"))
91    }
92
93    fn extract_packages(path: &Path) -> Vec<PackageData> {
94        vec![PythonParser::extract_first_package(path)]
95    }
96}
97
98#[cfg(test)]
99pub(crate) fn parse_pip_inspect_deplock(content: &str) -> PackageData {
100    let data: PipInspectDeplock = match serde_json::from_str(content) {
101        Ok(d) => d,
102        Err(e) => {
103            warn!("Failed to parse pip-inspect.deplock: {}", e);
104            return default_package_data();
105        }
106    };
107
108    let Some(installed_packages) = data.installed else {
109        return default_package_data();
110    };
111
112    // Find the main package (has direct_url and is_requested)
113    let main_package = installed_packages
114        .iter()
115        .take(MAX_ITERATION_COUNT)
116        .find(|p| p.requested.unwrap_or(false) && p.direct_url.is_some());
117
118    let metadata = if let Some(pkg) = main_package {
119        pkg.metadata.as_ref()
120    } else {
121        // If no main package found, try to find any requested package
122        installed_packages
123            .iter()
124            .take(MAX_ITERATION_COUNT)
125            .find(|p| p.requested.unwrap_or(false))
126            .and_then(|p| p.metadata.as_ref())
127    };
128
129    let Some(metadata) = metadata else {
130        return default_package_data();
131    };
132
133    // Build extra_data with pip version info
134    let mut extra_data = std::collections::HashMap::new();
135    if let Some(ref pip_version) = data.pip_version {
136        extra_data.insert(
137            "pip_version".to_string(),
138            Value::String(pip_version.clone()),
139        );
140    }
141    if let Some(ref inspect_version) = data.version {
142        extra_data.insert(
143            "inspect_version".to_string(),
144            Value::String(inspect_version.clone()),
145        );
146    }
147
148    let extra_data_opt = if extra_data.is_empty() {
149        None
150    } else {
151        Some(extra_data)
152    };
153
154    let keywords = metadata
155        .keywords
156        .as_ref()
157        .map(|k| vec![truncate_field(k.clone())])
158        .unwrap_or_default();
159    let (declared_license_expression, declared_license_expression_spdx, license_detections) =
160        normalize_spdx_declared_license(metadata.license.as_deref());
161    let dependencies = metadata
162        .requires_dist
163        .as_ref()
164        .map(|requires_dist| extract_requires_dist_dependencies(requires_dist))
165        .unwrap_or_default();
166
167    PackageData {
168        package_type: Some(PACKAGE_TYPE),
169        primary_language: Some("Python".to_string()),
170        name: metadata.name.as_ref().map(|v| truncate_field(v.clone())),
171        version: metadata.version.as_ref().map(|v| truncate_field(v.clone())),
172        declared_license_expression,
173        declared_license_expression_spdx,
174        license_detections,
175        extracted_license_statement: metadata.license.as_ref().map(|v| truncate_field(v.clone())),
176        description: metadata
177            .description
178            .as_ref()
179            .map(|v| truncate_field(v.clone())),
180        keywords,
181        is_virtual: true,
182        extra_data: extra_data_opt,
183        dependencies,
184        datasource_id: Some(DatasourceId::PypiInspectDeplock),
185        ..Default::default()
186    }
187}
188
189crate::register_parser!(
190    "pip inspect deplock file",
191    &["*pip-inspect.deplock"],
192    "pypi",
193    "Python",
194    Some("https://pip.pypa.io/en/stable/cli/pip_inspect/"),
195);