Skip to main content

provenant/parsers/
conda_meta_json.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4//! Parser for Conda metadata JSON files.
5//!
6//! Extracts package metadata from `conda-meta/*.json` files which contain
7//! installed package information in Conda environments.
8//!
9//! # Supported Formats
10//! - `conda-meta/*.json` - Conda installed package metadata
11//!
12//! # Key Features
13//! - Installed package identification
14//! - License extraction
15//! - Download URLs and checksums
16//!
17//! # Implementation Notes
18//! - Format: JSON with package metadata
19//! - Located in conda-meta/ directory in rootfs
20//! - Spec: https://docs.conda.io/
21
22use crate::models::{DatasourceId, FileReference, Md5Digest, PackageType, Sha256Digest};
23use std::collections::HashMap;
24use std::path::Path;
25
26use crate::parser_warn as warn;
27use crate::parsers::utils::{MAX_ITERATION_COUNT, read_file_to_string, truncate_field};
28use serde::{Deserialize, Serialize};
29use serde_json::Value;
30
31use crate::models::PackageData;
32
33use super::PackageParser;
34
35const PACKAGE_TYPE: PackageType = PackageType::Conda;
36
37fn default_package_data() -> PackageData {
38    PackageData {
39        package_type: Some(PACKAGE_TYPE),
40        primary_language: Some("Python".to_string()),
41        datasource_id: Some(DatasourceId::CondaMetaJson),
42        ..Default::default()
43    }
44}
45
46/// Parser for Conda metadata JSON files
47pub struct CondaMetaJsonParser;
48
49#[derive(Debug, Deserialize, Serialize)]
50struct CondaMetaJson {
51    name: Option<String>,
52    version: Option<String>,
53    license: Option<String>,
54    url: Option<String>,
55    size: Option<u64>,
56    md5: Option<String>,
57    sha256: Option<String>,
58    requested_spec: Option<String>,
59    channel: Option<String>,
60    extracted_package_dir: Option<String>,
61    files: Option<Vec<String>>,
62    package_tarball_full_path: Option<String>,
63    #[serde(flatten)]
64    other: HashMap<String, Value>,
65}
66
67impl PackageParser for CondaMetaJsonParser {
68    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
69
70    fn is_match(path: &Path) -> bool {
71        path.extension().and_then(|ext| ext.to_str()) == Some("json")
72            && !path
73                .file_name()
74                .and_then(|name| name.to_str())
75                .is_some_and(|name| name.ends_with("expected.json"))
76            && path
77                .components()
78                .any(|component| component.as_os_str() == "conda-meta")
79    }
80
81    fn extract_packages(path: &Path) -> Vec<PackageData> {
82        let content = match read_file_to_string(path, None) {
83            Ok(c) => c,
84            Err(e) => {
85                warn!("Failed to read conda-meta JSON file {:?}: {}", path, e);
86                return vec![default_package_data()];
87            }
88        };
89
90        vec![parse_conda_meta_json(&content)]
91    }
92
93    fn metadata() -> Vec<super::metadata::ParserMetadata> {
94        vec![super::metadata::ParserMetadata {
95            description: "Conda installed package metadata JSON",
96            file_patterns: &["*conda-meta/*.json"],
97            package_type: "conda",
98            primary_language: "Python",
99            documentation_url: Some("https://docs.conda.io/"),
100        }]
101    }
102}
103
104pub(crate) fn parse_conda_meta_json(content: &str) -> PackageData {
105    parse_conda_meta_json_with_path(content, None)
106}
107
108pub(crate) fn parse_conda_meta_json_with_path(content: &str, _path: Option<&Path>) -> PackageData {
109    let metadata: CondaMetaJson = match serde_json::from_str(content) {
110        Ok(m) => m,
111        Err(e) => {
112            warn!("Failed to parse conda-meta JSON: {}", e);
113            return default_package_data();
114        }
115    };
116
117    // Build extra_data with specific fields
118    let mut extra_data = HashMap::new();
119    if let Some(ref requested_spec) = metadata.requested_spec {
120        extra_data.insert(
121            "requested_spec".to_string(),
122            Value::String(truncate_field(requested_spec.clone())),
123        );
124    }
125    if let Some(ref channel) = metadata.channel {
126        extra_data.insert(
127            "channel".to_string(),
128            Value::String(truncate_field(channel.clone())),
129        );
130    }
131    if let Some(ref extracted_package_dir) = metadata.extracted_package_dir {
132        extra_data.insert(
133            "extracted_package_dir".to_string(),
134            Value::String(truncate_field(extracted_package_dir.clone())),
135        );
136    }
137    if let Some(ref files) = metadata.files {
138        extra_data.insert(
139            "files".to_string(),
140            Value::Array(
141                files
142                    .iter()
143                    .map(|f| Value::String(truncate_field(f.clone())))
144                    .collect(),
145            ),
146        );
147    }
148    if let Some(ref package_tarball_full_path) = metadata.package_tarball_full_path {
149        extra_data.insert(
150            "package_tarball_full_path".to_string(),
151            Value::String(truncate_field(package_tarball_full_path.clone())),
152        );
153    }
154
155    let extra_data_opt = if extra_data.is_empty() {
156        None
157    } else {
158        Some(extra_data)
159    };
160
161    let purl = metadata.name.as_deref().and_then(|name| {
162        super::conda::build_purl(
163            "conda",
164            None,
165            name,
166            metadata.version.as_deref(),
167            None,
168            None,
169            None,
170        )
171    });
172
173    let file_references = build_conda_file_references(
174        metadata.extracted_package_dir.as_deref(),
175        metadata.package_tarball_full_path.as_deref(),
176        metadata.files.as_deref(),
177    );
178
179    PackageData {
180        package_type: Some(PACKAGE_TYPE),
181        primary_language: Some("Python".to_string()),
182        name: metadata.name.map(truncate_field),
183        version: metadata.version.map(truncate_field),
184        extracted_license_statement: metadata.license.map(truncate_field),
185        download_url: metadata.url.map(truncate_field),
186        size: metadata.size,
187        md5: metadata.md5.and_then(|h| Md5Digest::from_hex(&h).ok()),
188        sha256: metadata
189            .sha256
190            .and_then(|h| Sha256Digest::from_hex(&h).ok()),
191        extra_data: extra_data_opt,
192        file_references,
193        datasource_id: Some(DatasourceId::CondaMetaJson),
194        purl,
195        ..Default::default()
196    }
197}
198
199fn build_conda_file_references(
200    extracted_package_dir: Option<&str>,
201    package_tarball_full_path: Option<&str>,
202    files: Option<&[String]>,
203) -> Vec<FileReference> {
204    let mut refs = Vec::new();
205
206    if let Some(extracted_dir) = extracted_package_dir
207        && let Some(relative) = condense_to_pkgs_relative(extracted_dir)
208    {
209        refs.push(FileReference {
210            path: truncate_field(relative),
211            size: None,
212            sha1: None,
213            md5: None,
214            sha256: None,
215            sha512: None,
216            extra_data: None,
217        });
218    }
219
220    if let Some(tarball) = package_tarball_full_path
221        && let Some(relative) = condense_to_pkgs_relative(tarball)
222    {
223        refs.push(FileReference {
224            path: truncate_field(relative),
225            size: None,
226            sha1: None,
227            md5: None,
228            sha256: None,
229            sha512: None,
230            extra_data: None,
231        });
232    }
233
234    if let Some(files) = files {
235        for file in files.iter().take(MAX_ITERATION_COUNT) {
236            refs.push(FileReference {
237                path: truncate_field(file.clone()),
238                size: None,
239                sha1: None,
240                md5: None,
241                sha256: None,
242                sha512: None,
243                extra_data: None,
244            });
245        }
246    }
247
248    refs
249}
250
251fn condense_to_pkgs_relative(path: &str) -> Option<String> {
252    let normalized = path.replace('\\', "/");
253    let relative = normalized.split("/pkgs/").nth(1)?;
254    Some(format!("pkgs/{}", relative.trim_start_matches('/')))
255}