Skip to main content

provenant/parsers/
conda_meta_json.rs

1//! Parser for Conda metadata JSON files.
2//!
3//! Extracts package metadata from `conda-meta/*.json` files which contain
4//! installed package information in Conda environments.
5//!
6//! # Supported Formats
7//! - `conda-meta/*.json` - Conda installed package metadata
8//!
9//! # Key Features
10//! - Installed package identification
11//! - License extraction
12//! - Download URLs and checksums
13//!
14//! # Implementation Notes
15//! - Format: JSON with package metadata
16//! - Located in conda-meta/ directory in rootfs
17//! - Spec: https://docs.conda.io/
18
19use crate::models::{DatasourceId, FileReference, Md5Digest, PackageType, Sha256Digest};
20use std::collections::HashMap;
21use std::path::Path;
22
23use crate::parser_warn as warn;
24use crate::parsers::utils::{MAX_ITERATION_COUNT, read_file_to_string, truncate_field};
25use serde::{Deserialize, Serialize};
26use serde_json::Value;
27
28use crate::models::PackageData;
29
30use super::PackageParser;
31
32const PACKAGE_TYPE: PackageType = PackageType::Conda;
33
34fn default_package_data() -> PackageData {
35    PackageData {
36        package_type: Some(PACKAGE_TYPE),
37        primary_language: Some("Python".to_string()),
38        datasource_id: Some(DatasourceId::CondaMetaJson),
39        ..Default::default()
40    }
41}
42
43/// Parser for Conda metadata JSON files
44pub struct CondaMetaJsonParser;
45
46#[derive(Debug, Deserialize, Serialize)]
47struct CondaMetaJson {
48    name: Option<String>,
49    version: Option<String>,
50    license: Option<String>,
51    url: Option<String>,
52    size: Option<u64>,
53    md5: Option<String>,
54    sha256: Option<String>,
55    requested_spec: Option<String>,
56    channel: Option<String>,
57    extracted_package_dir: Option<String>,
58    files: Option<Vec<String>>,
59    package_tarball_full_path: Option<String>,
60    #[serde(flatten)]
61    other: HashMap<String, Value>,
62}
63
64impl PackageParser for CondaMetaJsonParser {
65    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
66
67    fn is_match(path: &Path) -> bool {
68        path.extension().and_then(|ext| ext.to_str()) == Some("json")
69            && !path
70                .file_name()
71                .and_then(|name| name.to_str())
72                .is_some_and(|name| name.ends_with("expected.json"))
73            && path
74                .components()
75                .any(|component| component.as_os_str() == "conda-meta")
76    }
77
78    fn extract_packages(path: &Path) -> Vec<PackageData> {
79        let content = match read_file_to_string(path, None) {
80            Ok(c) => c,
81            Err(e) => {
82                warn!("Failed to read conda-meta JSON file {:?}: {}", path, e);
83                return vec![default_package_data()];
84            }
85        };
86
87        vec![parse_conda_meta_json(&content)]
88    }
89}
90
91pub(crate) fn parse_conda_meta_json(content: &str) -> PackageData {
92    parse_conda_meta_json_with_path(content, None)
93}
94
95pub(crate) fn parse_conda_meta_json_with_path(content: &str, _path: Option<&Path>) -> PackageData {
96    let metadata: CondaMetaJson = match serde_json::from_str(content) {
97        Ok(m) => m,
98        Err(e) => {
99            warn!("Failed to parse conda-meta JSON: {}", e);
100            return default_package_data();
101        }
102    };
103
104    // Build extra_data with specific fields
105    let mut extra_data = HashMap::new();
106    if let Some(ref requested_spec) = metadata.requested_spec {
107        extra_data.insert(
108            "requested_spec".to_string(),
109            Value::String(truncate_field(requested_spec.clone())),
110        );
111    }
112    if let Some(ref channel) = metadata.channel {
113        extra_data.insert(
114            "channel".to_string(),
115            Value::String(truncate_field(channel.clone())),
116        );
117    }
118    if let Some(ref extracted_package_dir) = metadata.extracted_package_dir {
119        extra_data.insert(
120            "extracted_package_dir".to_string(),
121            Value::String(truncate_field(extracted_package_dir.clone())),
122        );
123    }
124    if let Some(ref files) = metadata.files {
125        extra_data.insert(
126            "files".to_string(),
127            Value::Array(
128                files
129                    .iter()
130                    .map(|f| Value::String(truncate_field(f.clone())))
131                    .collect(),
132            ),
133        );
134    }
135    if let Some(ref package_tarball_full_path) = metadata.package_tarball_full_path {
136        extra_data.insert(
137            "package_tarball_full_path".to_string(),
138            Value::String(truncate_field(package_tarball_full_path.clone())),
139        );
140    }
141
142    let extra_data_opt = if extra_data.is_empty() {
143        None
144    } else {
145        Some(extra_data)
146    };
147
148    let purl = metadata.name.as_deref().and_then(|name| {
149        super::conda::build_purl(
150            "conda",
151            None,
152            name,
153            metadata.version.as_deref(),
154            None,
155            None,
156            None,
157        )
158    });
159
160    let file_references = build_conda_file_references(
161        metadata.extracted_package_dir.as_deref(),
162        metadata.package_tarball_full_path.as_deref(),
163        metadata.files.as_deref(),
164    );
165
166    PackageData {
167        package_type: Some(PACKAGE_TYPE),
168        primary_language: Some("Python".to_string()),
169        name: metadata.name.map(truncate_field),
170        version: metadata.version.map(truncate_field),
171        extracted_license_statement: metadata.license.map(truncate_field),
172        download_url: metadata.url.map(truncate_field),
173        size: metadata.size,
174        md5: metadata.md5.and_then(|h| Md5Digest::from_hex(&h).ok()),
175        sha256: metadata
176            .sha256
177            .and_then(|h| Sha256Digest::from_hex(&h).ok()),
178        extra_data: extra_data_opt,
179        file_references,
180        datasource_id: Some(DatasourceId::CondaMetaJson),
181        purl,
182        ..Default::default()
183    }
184}
185
186fn build_conda_file_references(
187    extracted_package_dir: Option<&str>,
188    package_tarball_full_path: Option<&str>,
189    files: Option<&[String]>,
190) -> Vec<FileReference> {
191    let mut refs = Vec::new();
192
193    if let Some(extracted_dir) = extracted_package_dir
194        && let Some(relative) = condense_to_pkgs_relative(extracted_dir)
195    {
196        refs.push(FileReference {
197            path: truncate_field(relative),
198            size: None,
199            sha1: None,
200            md5: None,
201            sha256: None,
202            sha512: None,
203            extra_data: None,
204        });
205    }
206
207    if let Some(tarball) = package_tarball_full_path
208        && let Some(relative) = condense_to_pkgs_relative(tarball)
209    {
210        refs.push(FileReference {
211            path: truncate_field(relative),
212            size: None,
213            sha1: None,
214            md5: None,
215            sha256: None,
216            sha512: None,
217            extra_data: None,
218        });
219    }
220
221    if let Some(files) = files {
222        for file in files.iter().take(MAX_ITERATION_COUNT) {
223            refs.push(FileReference {
224                path: truncate_field(file.clone()),
225                size: None,
226                sha1: None,
227                md5: None,
228                sha256: None,
229                sha512: None,
230                extra_data: None,
231            });
232        }
233    }
234
235    refs
236}
237
238fn condense_to_pkgs_relative(path: &str) -> Option<String> {
239    let normalized = path.replace('\\', "/");
240    let relative = normalized.split("/pkgs/").nth(1)?;
241    Some(format!("pkgs/{}", relative.trim_start_matches('/')))
242}
243
244crate::register_parser!(
245    "Conda installed package metadata JSON",
246    &["*conda-meta/*.json"],
247    "conda",
248    "Python",
249    Some("https://docs.conda.io/"),
250);