Skip to main content

provenant/parsers/
pip_inspect_deplock.rs

1//! Parser for pip inspect deplock files.
2//!
3//! Extracts package metadata from `pip-inspect.deplock` files which contain
4//! installed Python package information generated by dependency-inspector.
5//!
6//! # Supported Formats
7//! - `pip-inspect.deplock` - pip inspect output in deplock format
8//!
9//! # Key Features
10//! - Installed package identification
11//! - Main package extraction
12//! - License and description metadata
13//!
14//! # Implementation Notes
15//! - Format: JSON with installed packages list
16//! - Generated by dependency-inspector tool
17//! - Spec: https://pip.pypa.io/en/stable/cli/pip_inspect/
18
19use crate::models::{PackageData, PackageType};
20use std::path::Path;
21
22#[cfg(test)]
23use crate::models::DatasourceId;
24#[cfg(test)]
25use crate::parser_warn as warn;
26#[cfg(test)]
27use serde::{Deserialize, Serialize};
28#[cfg(test)]
29use serde_json::Value;
30
31use super::PackageParser;
32#[cfg(test)]
33use super::license_normalization::normalize_spdx_declared_license;
34use super::python::PythonParser;
35#[cfg(test)]
36use super::python::extract_requires_dist_dependencies;
37#[cfg(test)]
38use crate::parsers::utils::{MAX_ITERATION_COUNT, truncate_field};
39
40const PACKAGE_TYPE: PackageType = PackageType::Pypi;
41
42#[cfg(test)]
43fn default_package_data() -> PackageData {
44    PackageData {
45        package_type: Some(PACKAGE_TYPE),
46        primary_language: Some("Python".to_string()),
47        datasource_id: Some(DatasourceId::PypiInspectDeplock),
48        ..Default::default()
49    }
50}
51
52/// Parser for pip inspect deplock files
53pub struct PipInspectDeplockParser;
54
55#[cfg(test)]
56#[derive(Debug, Deserialize, Serialize)]
57struct PipInspectDeplock {
58    installed: Option<Vec<InstalledPackage>>,
59    pip_version: Option<String>,
60    version: Option<String>,
61}
62
63#[cfg(test)]
64#[derive(Debug, Deserialize, Serialize)]
65struct InstalledPackage {
66    metadata: Option<PackageMetadata>,
67    requested: Option<bool>,
68    direct_url: Option<Value>,
69}
70
71#[cfg(test)]
72#[derive(Debug, Deserialize, Serialize)]
73struct PackageMetadata {
74    name: Option<String>,
75    version: Option<String>,
76    license: Option<String>,
77    description: Option<String>,
78    keywords: Option<String>,
79    requires_dist: Option<Vec<String>>,
80}
81
82impl PackageParser for PipInspectDeplockParser {
83    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
84
85    fn is_match(path: &Path) -> bool {
86        path.to_str()
87            .is_some_and(|p| p.ends_with("/pip-inspect.deplock"))
88    }
89
90    fn extract_packages(path: &Path) -> Vec<PackageData> {
91        vec![PythonParser::extract_first_package(path)]
92    }
93}
94
95#[cfg(test)]
96pub(crate) fn parse_pip_inspect_deplock(content: &str) -> PackageData {
97    let data: PipInspectDeplock = match serde_json::from_str(content) {
98        Ok(d) => d,
99        Err(e) => {
100            warn!("Failed to parse pip-inspect.deplock: {}", e);
101            return default_package_data();
102        }
103    };
104
105    let Some(installed_packages) = data.installed else {
106        return default_package_data();
107    };
108
109    // Find the main package (has direct_url and is_requested)
110    let main_package = installed_packages
111        .iter()
112        .take(MAX_ITERATION_COUNT)
113        .find(|p| p.requested.unwrap_or(false) && p.direct_url.is_some());
114
115    let metadata = if let Some(pkg) = main_package {
116        pkg.metadata.as_ref()
117    } else {
118        // If no main package found, try to find any requested package
119        installed_packages
120            .iter()
121            .take(MAX_ITERATION_COUNT)
122            .find(|p| p.requested.unwrap_or(false))
123            .and_then(|p| p.metadata.as_ref())
124    };
125
126    let Some(metadata) = metadata else {
127        return default_package_data();
128    };
129
130    // Build extra_data with pip version info
131    let mut extra_data = std::collections::HashMap::new();
132    if let Some(ref pip_version) = data.pip_version {
133        extra_data.insert(
134            "pip_version".to_string(),
135            Value::String(pip_version.clone()),
136        );
137    }
138    if let Some(ref inspect_version) = data.version {
139        extra_data.insert(
140            "inspect_version".to_string(),
141            Value::String(inspect_version.clone()),
142        );
143    }
144
145    let extra_data_opt = if extra_data.is_empty() {
146        None
147    } else {
148        Some(extra_data)
149    };
150
151    let keywords = metadata
152        .keywords
153        .as_ref()
154        .map(|k| vec![truncate_field(k.clone())])
155        .unwrap_or_default();
156    let (declared_license_expression, declared_license_expression_spdx, license_detections) =
157        normalize_spdx_declared_license(metadata.license.as_deref());
158    let dependencies = metadata
159        .requires_dist
160        .as_ref()
161        .map(|requires_dist| extract_requires_dist_dependencies(requires_dist))
162        .unwrap_or_default();
163
164    PackageData {
165        package_type: Some(PACKAGE_TYPE),
166        primary_language: Some("Python".to_string()),
167        name: metadata.name.as_ref().map(|v| truncate_field(v.clone())),
168        version: metadata.version.as_ref().map(|v| truncate_field(v.clone())),
169        declared_license_expression,
170        declared_license_expression_spdx,
171        license_detections,
172        extracted_license_statement: metadata.license.as_ref().map(|v| truncate_field(v.clone())),
173        description: metadata
174            .description
175            .as_ref()
176            .map(|v| truncate_field(v.clone())),
177        keywords,
178        is_virtual: true,
179        extra_data: extra_data_opt,
180        dependencies,
181        datasource_id: Some(DatasourceId::PypiInspectDeplock),
182        ..Default::default()
183    }
184}
185
186crate::register_parser!(
187    "pip inspect deplock file",
188    &["*pip-inspect.deplock"],
189    "pypi",
190    "Python",
191    Some("https://pip.pypa.io/en/stable/cli/pip_inspect/"),
192);