Skip to main content

provenant/parsers/
conda_meta_json.rs

1//! Parser for Conda metadata JSON files.
2//!
3//! Extracts package metadata from `conda-meta/*.json` files which contain
4//! installed package information in Conda environments.
5//!
6//! # Supported Formats
7//! - `conda-meta/*.json` - Conda installed package metadata
8//!
9//! # Key Features
10//! - Installed package identification
11//! - License extraction
12//! - Download URLs and checksums
13//!
14//! # Implementation Notes
15//! - Format: JSON with package metadata
16//! - Located in conda-meta/ directory in rootfs
17//! - Spec: https://docs.conda.io/
18
19use crate::models::{DatasourceId, FileReference, Md5Digest, PackageType, Sha256Digest};
20use std::collections::HashMap;
21use std::fs;
22use std::path::Path;
23
24use crate::parser_warn as warn;
25use serde::{Deserialize, Serialize};
26use serde_json::Value;
27
28use crate::models::PackageData;
29
30use super::PackageParser;
31
32const PACKAGE_TYPE: PackageType = PackageType::Conda;
33
34fn default_package_data() -> PackageData {
35    PackageData {
36        package_type: Some(PACKAGE_TYPE),
37        primary_language: Some("Python".to_string()),
38        datasource_id: Some(DatasourceId::CondaMetaJson),
39        ..Default::default()
40    }
41}
42
43/// Parser for Conda metadata JSON files
44pub struct CondaMetaJsonParser;
45
46#[derive(Debug, Deserialize, Serialize)]
47struct CondaMetaJson {
48    name: Option<String>,
49    version: Option<String>,
50    license: Option<String>,
51    url: Option<String>,
52    size: Option<u64>,
53    md5: Option<String>,
54    sha256: Option<String>,
55    requested_spec: Option<String>,
56    channel: Option<String>,
57    extracted_package_dir: Option<String>,
58    files: Option<Vec<String>>,
59    package_tarball_full_path: Option<String>,
60    #[serde(flatten)]
61    other: HashMap<String, Value>,
62}
63
64impl PackageParser for CondaMetaJsonParser {
65    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
66
67    fn is_match(path: &Path) -> bool {
68        path.extension().and_then(|ext| ext.to_str()) == Some("json")
69            && !path
70                .file_name()
71                .and_then(|name| name.to_str())
72                .is_some_and(|name| name.ends_with("expected.json"))
73            && path
74                .components()
75                .any(|component| component.as_os_str() == "conda-meta")
76    }
77
78    fn extract_packages(path: &Path) -> Vec<PackageData> {
79        let content = match fs::read_to_string(path) {
80            Ok(c) => c,
81            Err(e) => {
82                warn!("Failed to read conda-meta JSON file {:?}: {}", path, e);
83                return vec![default_package_data()];
84            }
85        };
86
87        vec![parse_conda_meta_json(&content)]
88    }
89}
90
91pub(crate) fn parse_conda_meta_json(content: &str) -> PackageData {
92    parse_conda_meta_json_with_path(content, None)
93}
94
95pub(crate) fn parse_conda_meta_json_with_path(content: &str, _path: Option<&Path>) -> PackageData {
96    let metadata: CondaMetaJson = match serde_json::from_str(content) {
97        Ok(m) => m,
98        Err(e) => {
99            warn!("Failed to parse conda-meta JSON: {}", e);
100            return default_package_data();
101        }
102    };
103
104    // Build extra_data with specific fields
105    let mut extra_data = HashMap::new();
106    if let Some(ref requested_spec) = metadata.requested_spec {
107        extra_data.insert(
108            "requested_spec".to_string(),
109            Value::String(requested_spec.clone()),
110        );
111    }
112    if let Some(ref channel) = metadata.channel {
113        extra_data.insert("channel".to_string(), Value::String(channel.clone()));
114    }
115    if let Some(ref extracted_package_dir) = metadata.extracted_package_dir {
116        extra_data.insert(
117            "extracted_package_dir".to_string(),
118            Value::String(extracted_package_dir.clone()),
119        );
120    }
121    if let Some(ref files) = metadata.files {
122        extra_data.insert(
123            "files".to_string(),
124            Value::Array(files.iter().map(|f| Value::String(f.clone())).collect()),
125        );
126    }
127    if let Some(ref package_tarball_full_path) = metadata.package_tarball_full_path {
128        extra_data.insert(
129            "package_tarball_full_path".to_string(),
130            Value::String(package_tarball_full_path.clone()),
131        );
132    }
133
134    let extra_data_opt = if extra_data.is_empty() {
135        None
136    } else {
137        Some(extra_data)
138    };
139
140    let purl = metadata.name.as_deref().and_then(|name| {
141        super::conda::build_purl(
142            "conda",
143            None,
144            name,
145            metadata.version.as_deref(),
146            None,
147            None,
148            None,
149        )
150    });
151
152    let file_references = build_conda_file_references(
153        metadata.extracted_package_dir.as_deref(),
154        metadata.package_tarball_full_path.as_deref(),
155        metadata.files.as_deref(),
156    );
157
158    PackageData {
159        package_type: Some(PACKAGE_TYPE),
160        primary_language: Some("Python".to_string()),
161        name: metadata.name,
162        version: metadata.version,
163        extracted_license_statement: metadata.license,
164        download_url: metadata.url,
165        size: metadata.size,
166        md5: metadata.md5.and_then(|h| Md5Digest::from_hex(&h).ok()),
167        sha256: metadata
168            .sha256
169            .and_then(|h| Sha256Digest::from_hex(&h).ok()),
170        extra_data: extra_data_opt,
171        file_references,
172        datasource_id: Some(DatasourceId::CondaMetaJson),
173        purl,
174        ..Default::default()
175    }
176}
177
178fn build_conda_file_references(
179    extracted_package_dir: Option<&str>,
180    package_tarball_full_path: Option<&str>,
181    files: Option<&[String]>,
182) -> Vec<FileReference> {
183    let mut refs = Vec::new();
184
185    if let Some(extracted_dir) = extracted_package_dir
186        && let Some(relative) = condense_to_pkgs_relative(extracted_dir)
187    {
188        refs.push(FileReference {
189            path: relative,
190            size: None,
191            sha1: None,
192            md5: None,
193            sha256: None,
194            sha512: None,
195            extra_data: None,
196        });
197    }
198
199    if let Some(tarball) = package_tarball_full_path
200        && let Some(relative) = condense_to_pkgs_relative(tarball)
201    {
202        refs.push(FileReference {
203            path: relative,
204            size: None,
205            sha1: None,
206            md5: None,
207            sha256: None,
208            sha512: None,
209            extra_data: None,
210        });
211    }
212
213    if let Some(files) = files {
214        for file in files {
215            refs.push(FileReference {
216                path: file.clone(),
217                size: None,
218                sha1: None,
219                md5: None,
220                sha256: None,
221                sha512: None,
222                extra_data: None,
223            });
224        }
225    }
226
227    refs
228}
229
230fn condense_to_pkgs_relative(path: &str) -> Option<String> {
231    let normalized = path.replace('\\', "/");
232    let relative = normalized.split("/pkgs/").nth(1)?;
233    Some(format!("pkgs/{}", relative.trim_start_matches('/')))
234}
235
236crate::register_parser!(
237    "Conda installed package metadata JSON",
238    &["*conda-meta/*.json"],
239    "conda",
240    "Python",
241    Some("https://docs.conda.io/"),
242);