Skip to main content

provenant/parsers/
pip_inspect_deplock.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4//! Parser for pip inspect deplock files.
5//!
6//! Extracts package metadata from `pip-inspect.deplock` files which contain
7//! installed Python package information generated by dependency-inspector.
8//!
9//! # Supported Formats
10//! - `pip-inspect.deplock` - pip inspect output in deplock format
11//!
12//! # Key Features
13//! - Installed package identification
14//! - Main package extraction
15//! - License and description metadata
16//!
17//! # Implementation Notes
18//! - Format: JSON with installed packages list
19//! - Generated by dependency-inspector tool
20//! - Spec: https://pip.pypa.io/en/stable/cli/pip_inspect/
21
22use crate::models::{PackageData, PackageType};
23use std::path::Path;
24
25#[cfg(test)]
26use crate::models::DatasourceId;
27#[cfg(test)]
28use crate::parser_warn as warn;
29#[cfg(test)]
30use serde::{Deserialize, Serialize};
31#[cfg(test)]
32use serde_json::Value;
33
34use super::PackageParser;
35#[cfg(test)]
36use super::license_normalization::normalize_spdx_declared_license;
37use super::metadata::ParserMetadata;
38use super::python::PythonParser;
39#[cfg(test)]
40use super::python::extract_requires_dist_dependencies;
41#[cfg(test)]
42use crate::parsers::utils::{MAX_ITERATION_COUNT, truncate_field};
43
44const PACKAGE_TYPE: PackageType = PackageType::Pypi;
45
46#[cfg(test)]
47fn default_package_data() -> PackageData {
48    PackageData {
49        package_type: Some(PACKAGE_TYPE),
50        primary_language: Some("Python".to_string()),
51        datasource_id: Some(DatasourceId::PypiInspectDeplock),
52        ..Default::default()
53    }
54}
55
56/// Parser for pip inspect deplock files
57pub struct PipInspectDeplockParser;
58
59#[cfg(test)]
60#[derive(Debug, Deserialize, Serialize)]
61struct PipInspectDeplock {
62    installed: Option<Vec<InstalledPackage>>,
63    pip_version: Option<String>,
64    version: Option<String>,
65}
66
67#[cfg(test)]
68#[derive(Debug, Deserialize, Serialize)]
69struct InstalledPackage {
70    metadata: Option<PackageMetadata>,
71    requested: Option<bool>,
72    direct_url: Option<Value>,
73}
74
75#[cfg(test)]
76#[derive(Debug, Deserialize, Serialize)]
77struct PackageMetadata {
78    name: Option<String>,
79    version: Option<String>,
80    license: Option<String>,
81    description: Option<String>,
82    keywords: Option<String>,
83    requires_dist: Option<Vec<String>>,
84}
85
86impl PackageParser for PipInspectDeplockParser {
87    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
88
89    fn metadata() -> Vec<ParserMetadata> {
90        vec![ParserMetadata {
91            description: "pip inspect deplock file",
92            file_patterns: &["*pip-inspect.deplock"],
93            package_type: "pypi",
94            primary_language: "Python",
95            documentation_url: Some("https://pip.pypa.io/en/stable/cli/pip_inspect/"),
96        }]
97    }
98
99    fn is_match(path: &Path) -> bool {
100        path.to_str()
101            .is_some_and(|p| p.ends_with("/pip-inspect.deplock"))
102    }
103
104    fn extract_packages(path: &Path) -> Vec<PackageData> {
105        vec![PythonParser::extract_first_package(path)]
106    }
107}
108
109#[cfg(test)]
110pub(crate) fn parse_pip_inspect_deplock(content: &str) -> PackageData {
111    let data: PipInspectDeplock = match serde_json::from_str(content) {
112        Ok(d) => d,
113        Err(e) => {
114            warn!("Failed to parse pip-inspect.deplock: {}", e);
115            return default_package_data();
116        }
117    };
118
119    let Some(installed_packages) = data.installed else {
120        return default_package_data();
121    };
122
123    // Find the main package (has direct_url and is_requested)
124    let main_package = installed_packages
125        .iter()
126        .take(MAX_ITERATION_COUNT)
127        .find(|p| p.requested.unwrap_or(false) && p.direct_url.is_some());
128
129    let metadata = if let Some(pkg) = main_package {
130        pkg.metadata.as_ref()
131    } else {
132        // If no main package found, try to find any requested package
133        installed_packages
134            .iter()
135            .take(MAX_ITERATION_COUNT)
136            .find(|p| p.requested.unwrap_or(false))
137            .and_then(|p| p.metadata.as_ref())
138    };
139
140    let Some(metadata) = metadata else {
141        return default_package_data();
142    };
143
144    // Build extra_data with pip version info
145    let mut extra_data = std::collections::HashMap::new();
146    if let Some(ref pip_version) = data.pip_version {
147        extra_data.insert(
148            "pip_version".to_string(),
149            Value::String(pip_version.clone()),
150        );
151    }
152    if let Some(ref inspect_version) = data.version {
153        extra_data.insert(
154            "inspect_version".to_string(),
155            Value::String(inspect_version.clone()),
156        );
157    }
158
159    let extra_data_opt = if extra_data.is_empty() {
160        None
161    } else {
162        Some(extra_data)
163    };
164
165    let keywords = metadata
166        .keywords
167        .as_ref()
168        .map(|k| vec![truncate_field(k.clone())])
169        .unwrap_or_default();
170    let (declared_license_expression, declared_license_expression_spdx, license_detections) =
171        normalize_spdx_declared_license(metadata.license.as_deref());
172    let dependencies = metadata
173        .requires_dist
174        .as_ref()
175        .map(|requires_dist| extract_requires_dist_dependencies(requires_dist))
176        .unwrap_or_default();
177
178    PackageData {
179        package_type: Some(PACKAGE_TYPE),
180        primary_language: Some("Python".to_string()),
181        name: metadata.name.as_ref().map(|v| truncate_field(v.clone())),
182        version: metadata.version.as_ref().map(|v| truncate_field(v.clone())),
183        declared_license_expression,
184        declared_license_expression_spdx,
185        license_detections,
186        extracted_license_statement: metadata.license.as_ref().map(|v| truncate_field(v.clone())),
187        description: metadata
188            .description
189            .as_ref()
190            .map(|v| truncate_field(v.clone())),
191        keywords,
192        is_virtual: true,
193        extra_data: extra_data_opt,
194        dependencies,
195        datasource_id: Some(DatasourceId::PypiInspectDeplock),
196        ..Default::default()
197    }
198}