Skip to main content

provenant/parsers/
conda_meta_json.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4//! Parser for Conda metadata JSON files.
5//!
6//! Extracts package metadata from `conda-meta/*.json` files which contain
7//! installed package information in Conda environments.
8//!
9//! # Supported Formats
10//! - `conda-meta/*.json` - Conda installed package metadata
11//!
12//! # Key Features
13//! - Installed package identification
14//! - License extraction
15//! - Download URLs and checksums
16//!
17//! # Implementation Notes
18//! - Format: JSON with package metadata
19//! - Located in conda-meta/ directory in rootfs
20//! - Spec: https://docs.conda.io/
21
22use crate::models::{DatasourceId, FileReference, Md5Digest, PackageType, Sha256Digest};
23use std::collections::HashMap;
24use std::path::Path;
25
26use crate::parser_warn as warn;
27use crate::parsers::utils::{MAX_ITERATION_COUNT, read_file_to_string, truncate_field};
28use serde::{Deserialize, Serialize};
29use serde_json::Value;
30
31use crate::models::PackageData;
32
33use super::PackageParser;
34
35const PACKAGE_TYPE: PackageType = PackageType::Conda;
36
37fn default_package_data() -> PackageData {
38    PackageData {
39        package_type: Some(PACKAGE_TYPE),
40        primary_language: Some("Python".to_string()),
41        datasource_id: Some(DatasourceId::CondaMetaJson),
42        ..Default::default()
43    }
44}
45
46/// Parser for Conda metadata JSON files
47pub struct CondaMetaJsonParser;
48
49#[derive(Debug, Deserialize, Serialize)]
50struct CondaMetaJson {
51    name: Option<String>,
52    version: Option<String>,
53    license: Option<String>,
54    url: Option<String>,
55    size: Option<u64>,
56    md5: Option<String>,
57    sha256: Option<String>,
58    requested_spec: Option<String>,
59    channel: Option<String>,
60    extracted_package_dir: Option<String>,
61    files: Option<Vec<String>>,
62    package_tarball_full_path: Option<String>,
63    #[serde(flatten)]
64    other: HashMap<String, Value>,
65}
66
67impl PackageParser for CondaMetaJsonParser {
68    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
69
70    fn is_match(path: &Path) -> bool {
71        path.extension().and_then(|ext| ext.to_str()) == Some("json")
72            && !path
73                .file_name()
74                .and_then(|name| name.to_str())
75                .is_some_and(|name| name.ends_with("expected.json"))
76            && path
77                .components()
78                .any(|component| component.as_os_str() == "conda-meta")
79    }
80
81    fn extract_packages(path: &Path) -> Vec<PackageData> {
82        let content = match read_file_to_string(path, None) {
83            Ok(c) => c,
84            Err(e) => {
85                warn!("Failed to read conda-meta JSON file {:?}: {}", path, e);
86                return vec![default_package_data()];
87            }
88        };
89
90        vec![parse_conda_meta_json(&content)]
91    }
92}
93
94pub(crate) fn parse_conda_meta_json(content: &str) -> PackageData {
95    parse_conda_meta_json_with_path(content, None)
96}
97
98pub(crate) fn parse_conda_meta_json_with_path(content: &str, _path: Option<&Path>) -> PackageData {
99    let metadata: CondaMetaJson = match serde_json::from_str(content) {
100        Ok(m) => m,
101        Err(e) => {
102            warn!("Failed to parse conda-meta JSON: {}", e);
103            return default_package_data();
104        }
105    };
106
107    // Build extra_data with specific fields
108    let mut extra_data = HashMap::new();
109    if let Some(ref requested_spec) = metadata.requested_spec {
110        extra_data.insert(
111            "requested_spec".to_string(),
112            Value::String(truncate_field(requested_spec.clone())),
113        );
114    }
115    if let Some(ref channel) = metadata.channel {
116        extra_data.insert(
117            "channel".to_string(),
118            Value::String(truncate_field(channel.clone())),
119        );
120    }
121    if let Some(ref extracted_package_dir) = metadata.extracted_package_dir {
122        extra_data.insert(
123            "extracted_package_dir".to_string(),
124            Value::String(truncate_field(extracted_package_dir.clone())),
125        );
126    }
127    if let Some(ref files) = metadata.files {
128        extra_data.insert(
129            "files".to_string(),
130            Value::Array(
131                files
132                    .iter()
133                    .map(|f| Value::String(truncate_field(f.clone())))
134                    .collect(),
135            ),
136        );
137    }
138    if let Some(ref package_tarball_full_path) = metadata.package_tarball_full_path {
139        extra_data.insert(
140            "package_tarball_full_path".to_string(),
141            Value::String(truncate_field(package_tarball_full_path.clone())),
142        );
143    }
144
145    let extra_data_opt = if extra_data.is_empty() {
146        None
147    } else {
148        Some(extra_data)
149    };
150
151    let purl = metadata.name.as_deref().and_then(|name| {
152        super::conda::build_purl(
153            "conda",
154            None,
155            name,
156            metadata.version.as_deref(),
157            None,
158            None,
159            None,
160        )
161    });
162
163    let file_references = build_conda_file_references(
164        metadata.extracted_package_dir.as_deref(),
165        metadata.package_tarball_full_path.as_deref(),
166        metadata.files.as_deref(),
167    );
168
169    PackageData {
170        package_type: Some(PACKAGE_TYPE),
171        primary_language: Some("Python".to_string()),
172        name: metadata.name.map(truncate_field),
173        version: metadata.version.map(truncate_field),
174        extracted_license_statement: metadata.license.map(truncate_field),
175        download_url: metadata.url.map(truncate_field),
176        size: metadata.size,
177        md5: metadata.md5.and_then(|h| Md5Digest::from_hex(&h).ok()),
178        sha256: metadata
179            .sha256
180            .and_then(|h| Sha256Digest::from_hex(&h).ok()),
181        extra_data: extra_data_opt,
182        file_references,
183        datasource_id: Some(DatasourceId::CondaMetaJson),
184        purl,
185        ..Default::default()
186    }
187}
188
189fn build_conda_file_references(
190    extracted_package_dir: Option<&str>,
191    package_tarball_full_path: Option<&str>,
192    files: Option<&[String]>,
193) -> Vec<FileReference> {
194    let mut refs = Vec::new();
195
196    if let Some(extracted_dir) = extracted_package_dir
197        && let Some(relative) = condense_to_pkgs_relative(extracted_dir)
198    {
199        refs.push(FileReference {
200            path: truncate_field(relative),
201            size: None,
202            sha1: None,
203            md5: None,
204            sha256: None,
205            sha512: None,
206            extra_data: None,
207        });
208    }
209
210    if let Some(tarball) = package_tarball_full_path
211        && let Some(relative) = condense_to_pkgs_relative(tarball)
212    {
213        refs.push(FileReference {
214            path: truncate_field(relative),
215            size: None,
216            sha1: None,
217            md5: None,
218            sha256: None,
219            sha512: None,
220            extra_data: None,
221        });
222    }
223
224    if let Some(files) = files {
225        for file in files.iter().take(MAX_ITERATION_COUNT) {
226            refs.push(FileReference {
227                path: truncate_field(file.clone()),
228                size: None,
229                sha1: None,
230                md5: None,
231                sha256: None,
232                sha512: None,
233                extra_data: None,
234            });
235        }
236    }
237
238    refs
239}
240
241fn condense_to_pkgs_relative(path: &str) -> Option<String> {
242    let normalized = path.replace('\\', "/");
243    let relative = normalized.split("/pkgs/").nth(1)?;
244    Some(format!("pkgs/{}", relative.trim_start_matches('/')))
245}
246
247crate::register_parser!(
248    "Conda installed package metadata JSON",
249    &["*conda-meta/*.json"],
250    "conda",
251    "Python",
252    Some("https://docs.conda.io/"),
253);