provenant-cli 0.0.15

Rust-based ScanCode-compatible scanner for licenses, package metadata, SBOMs, and provenance data.
Documentation
//! Parser for pip inspect deplock files.
//!
//! Extracts package metadata from `pip-inspect.deplock` files which contain
//! installed Python package information generated by dependency-inspector.
//!
//! # Supported Formats
//! - `pip-inspect.deplock` - pip inspect output in deplock format
//!
//! # Key Features
//! - Installed package identification
//! - Main package extraction
//! - License and description metadata
//!
//! # Implementation Notes
//! - Format: JSON with installed packages list
//! - Generated by dependency-inspector tool
//! - Spec: https://pip.pypa.io/en/stable/cli/pip_inspect/

use crate::models::{PackageData, PackageType};
use std::path::Path;

#[cfg(test)]
use crate::models::DatasourceId;
#[cfg(test)]
use crate::parser_warn as warn;
#[cfg(test)]
use serde::{Deserialize, Serialize};
#[cfg(test)]
use serde_json::Value;

use super::PackageParser;
#[cfg(test)]
use super::license_normalization::normalize_spdx_declared_license;
use super::python::PythonParser;
#[cfg(test)]
use super::python::extract_requires_dist_dependencies;

const PACKAGE_TYPE: PackageType = PackageType::Pypi;

#[cfg(test)]
fn default_package_data() -> PackageData {
    PackageData {
        package_type: Some(PACKAGE_TYPE),
        primary_language: Some("Python".to_string()),
        datasource_id: Some(DatasourceId::PypiInspectDeplock),
        ..Default::default()
    }
}

/// Parser for pip inspect deplock files
pub struct PipInspectDeplockParser;

#[cfg(test)]
#[derive(Debug, Deserialize, Serialize)]
struct PipInspectDeplock {
    installed: Option<Vec<InstalledPackage>>,
    pip_version: Option<String>,
    version: Option<String>,
}

#[cfg(test)]
#[derive(Debug, Deserialize, Serialize)]
struct InstalledPackage {
    metadata: Option<PackageMetadata>,
    requested: Option<bool>,
    direct_url: Option<Value>,
}

#[cfg(test)]
#[derive(Debug, Deserialize, Serialize)]
struct PackageMetadata {
    name: Option<String>,
    version: Option<String>,
    license: Option<String>,
    description: Option<String>,
    keywords: Option<String>,
    requires_dist: Option<Vec<String>>,
}

impl PackageParser for PipInspectDeplockParser {
    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;

    fn is_match(path: &Path) -> bool {
        path.to_str()
            .is_some_and(|p| p.ends_with("/pip-inspect.deplock"))
    }

    fn extract_packages(path: &Path) -> Vec<PackageData> {
        vec![PythonParser::extract_first_package(path)]
    }
}

#[cfg(test)]
pub(crate) fn parse_pip_inspect_deplock(content: &str) -> PackageData {
    let data: PipInspectDeplock = match serde_json::from_str(content) {
        Ok(d) => d,
        Err(e) => {
            warn!("Failed to parse pip-inspect.deplock: {}", e);
            return default_package_data();
        }
    };

    let Some(installed_packages) = data.installed else {
        return default_package_data();
    };

    // Find the main package (has direct_url and is_requested)
    let main_package = installed_packages
        .iter()
        .find(|p| p.requested.unwrap_or(false) && p.direct_url.is_some());

    let metadata = if let Some(pkg) = main_package {
        pkg.metadata.as_ref()
    } else {
        // If no main package found, try to find any requested package
        installed_packages
            .iter()
            .find(|p| p.requested.unwrap_or(false))
            .and_then(|p| p.metadata.as_ref())
    };

    let Some(metadata) = metadata else {
        return default_package_data();
    };

    // Build extra_data with pip version info
    let mut extra_data = std::collections::HashMap::new();
    if let Some(ref pip_version) = data.pip_version {
        extra_data.insert(
            "pip_version".to_string(),
            Value::String(pip_version.clone()),
        );
    }
    if let Some(ref inspect_version) = data.version {
        extra_data.insert(
            "inspect_version".to_string(),
            Value::String(inspect_version.clone()),
        );
    }

    let extra_data_opt = if extra_data.is_empty() {
        None
    } else {
        Some(extra_data)
    };

    let keywords = metadata
        .keywords
        .as_ref()
        .map(|k| vec![k.clone()])
        .unwrap_or_default();
    let (declared_license_expression, declared_license_expression_spdx, license_detections) =
        normalize_spdx_declared_license(metadata.license.as_deref());
    let dependencies = metadata
        .requires_dist
        .as_ref()
        .map(|requires_dist| extract_requires_dist_dependencies(requires_dist))
        .unwrap_or_default();

    PackageData {
        package_type: Some(PACKAGE_TYPE),
        primary_language: Some("Python".to_string()),
        name: metadata.name.clone(),
        version: metadata.version.clone(),
        declared_license_expression,
        declared_license_expression_spdx,
        license_detections,
        extracted_license_statement: metadata.license.clone(),
        description: metadata.description.clone(),
        keywords,
        is_virtual: true,
        extra_data: extra_data_opt,
        dependencies,
        datasource_id: Some(DatasourceId::PypiInspectDeplock),
        ..Default::default()
    }
}

crate::register_parser!(
    "pip inspect deplock file",
    &["*pip-inspect.deplock"],
    "pypi",
    "Python",
    Some("https://pip.pypa.io/en/stable/cli/pip_inspect/"),
);