Skip to main content

provenant/parsers/
pip_inspect_deplock.rs

1//! Parser for pip inspect deplock files.
2//!
3//! Extracts package metadata from `pip-inspect.deplock` files which contain
4//! installed Python package information generated by dependency-inspector.
5//!
6//! # Supported Formats
7//! - `pip-inspect.deplock` - pip inspect output in deplock format
8//!
9//! # Key Features
10//! - Installed package identification
11//! - Main package extraction
12//! - License and description metadata
13//!
14//! # Implementation Notes
15//! - Format: JSON with installed packages list
16//! - Generated by dependency-inspector tool
17//! - Spec: https://pip.pypa.io/en/stable/cli/pip_inspect/
18
19use crate::models::{DatasourceId, PackageType};
20use std::fs;
21use std::path::Path;
22
23use log::warn;
24use serde::{Deserialize, Serialize};
25use serde_json::Value;
26
27use crate::models::PackageData;
28
29use super::PackageParser;
30
31const PACKAGE_TYPE: PackageType = PackageType::Pypi;
32
33fn default_package_data() -> PackageData {
34    PackageData {
35        package_type: Some(PACKAGE_TYPE),
36        primary_language: Some("Python".to_string()),
37        datasource_id: Some(DatasourceId::PypiInspectDeplock),
38        ..Default::default()
39    }
40}
41
42/// Parser for pip inspect deplock files
43pub struct PipInspectDeplockParser;
44
45#[derive(Debug, Deserialize, Serialize)]
46struct PipInspectDeplock {
47    installed: Option<Vec<InstalledPackage>>,
48    pip_version: Option<String>,
49    version: Option<String>,
50}
51
52#[derive(Debug, Deserialize, Serialize)]
53struct InstalledPackage {
54    metadata: Option<PackageMetadata>,
55    requested: Option<bool>,
56    direct_url: Option<Value>,
57}
58
59#[derive(Debug, Deserialize, Serialize)]
60struct PackageMetadata {
61    name: Option<String>,
62    version: Option<String>,
63    license: Option<String>,
64    description: Option<String>,
65    keywords: Option<String>,
66}
67
68impl PackageParser for PipInspectDeplockParser {
69    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
70
71    fn is_match(path: &Path) -> bool {
72        path.to_str()
73            .is_some_and(|p| p.ends_with("/pip-inspect.deplock"))
74    }
75
76    fn extract_packages(path: &Path) -> Vec<PackageData> {
77        let content = match fs::read_to_string(path) {
78            Ok(c) => c,
79            Err(e) => {
80                warn!("Failed to read pip-inspect.deplock file {:?}: {}", path, e);
81                return vec![default_package_data()];
82            }
83        };
84
85        vec![parse_pip_inspect_deplock(&content)]
86    }
87}
88
89pub(crate) fn parse_pip_inspect_deplock(content: &str) -> PackageData {
90    let data: PipInspectDeplock = match serde_json::from_str(content) {
91        Ok(d) => d,
92        Err(e) => {
93            warn!("Failed to parse pip-inspect.deplock: {}", e);
94            return default_package_data();
95        }
96    };
97
98    let Some(installed_packages) = data.installed else {
99        return default_package_data();
100    };
101
102    // Find the main package (has direct_url and is_requested)
103    let main_package = installed_packages
104        .iter()
105        .find(|p| p.requested.unwrap_or(false) && p.direct_url.is_some());
106
107    let metadata = if let Some(pkg) = main_package {
108        pkg.metadata.as_ref()
109    } else {
110        // If no main package found, try to find any requested package
111        installed_packages
112            .iter()
113            .find(|p| p.requested.unwrap_or(false))
114            .and_then(|p| p.metadata.as_ref())
115    };
116
117    let Some(metadata) = metadata else {
118        return default_package_data();
119    };
120
121    // Build extra_data with pip version info
122    let mut extra_data = std::collections::HashMap::new();
123    if let Some(ref pip_version) = data.pip_version {
124        extra_data.insert(
125            "pip_version".to_string(),
126            Value::String(pip_version.clone()),
127        );
128    }
129    if let Some(ref inspect_version) = data.version {
130        extra_data.insert(
131            "inspect_version".to_string(),
132            Value::String(inspect_version.clone()),
133        );
134    }
135
136    let extra_data_opt = if extra_data.is_empty() {
137        None
138    } else {
139        Some(extra_data)
140    };
141
142    let keywords = metadata
143        .keywords
144        .as_ref()
145        .map(|k| vec![k.clone()])
146        .unwrap_or_default();
147
148    PackageData {
149        package_type: Some(PACKAGE_TYPE),
150        primary_language: Some("Python".to_string()),
151        name: metadata.name.clone(),
152        version: metadata.version.clone(),
153        extracted_license_statement: metadata.license.clone(),
154        description: metadata.description.clone(),
155        keywords,
156        is_virtual: true,
157        extra_data: extra_data_opt,
158        datasource_id: Some(DatasourceId::PypiInspectDeplock),
159        ..Default::default()
160    }
161}
162
163crate::register_parser!(
164    "pip inspect deplock file",
165    &["*pip-inspect.deplock"],
166    "pypi",
167    "Python",
168    Some("https://pip.pypa.io/en/stable/cli/pip_inspect/"),
169);