Skip to main content

provenant/parsers/
conda_meta_json.rs

1//! Parser for Conda metadata JSON files.
2//!
3//! Extracts package metadata from `conda-meta/*.json` files which contain
4//! installed package information in Conda environments.
5//!
6//! # Supported Formats
7//! - `conda-meta/*.json` - Conda installed package metadata
8//!
9//! # Key Features
10//! - Installed package identification
11//! - License extraction
12//! - Download URLs and checksums
13//!
14//! # Implementation Notes
15//! - Format: JSON with package metadata
16//! - Located in conda-meta/ directory in rootfs
17//! - Spec: https://docs.conda.io/
18
19use crate::models::{DatasourceId, FileReference, PackageType};
20use std::collections::HashMap;
21use std::fs;
22use std::path::Path;
23
24use log::warn;
25use serde::{Deserialize, Serialize};
26use serde_json::Value;
27
28use crate::models::PackageData;
29
30use super::PackageParser;
31
32const PACKAGE_TYPE: PackageType = PackageType::Conda;
33
34fn default_package_data() -> PackageData {
35    PackageData {
36        package_type: Some(PACKAGE_TYPE),
37        primary_language: Some("Python".to_string()),
38        datasource_id: Some(DatasourceId::CondaMetaJson),
39        ..Default::default()
40    }
41}
42
43/// Parser for Conda metadata JSON files
44pub struct CondaMetaJsonParser;
45
46#[derive(Debug, Deserialize, Serialize)]
47struct CondaMetaJson {
48    name: Option<String>,
49    version: Option<String>,
50    license: Option<String>,
51    url: Option<String>,
52    size: Option<u64>,
53    md5: Option<String>,
54    sha256: Option<String>,
55    requested_spec: Option<String>,
56    channel: Option<String>,
57    extracted_package_dir: Option<String>,
58    files: Option<Vec<String>>,
59    package_tarball_full_path: Option<String>,
60    #[serde(flatten)]
61    other: HashMap<String, Value>,
62}
63
64impl PackageParser for CondaMetaJsonParser {
65    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
66
67    fn is_match(path: &Path) -> bool {
68        path.extension().and_then(|ext| ext.to_str()) == Some("json")
69            && path
70                .components()
71                .any(|component| component.as_os_str() == "conda-meta")
72    }
73
74    fn extract_packages(path: &Path) -> Vec<PackageData> {
75        let content = match fs::read_to_string(path) {
76            Ok(c) => c,
77            Err(e) => {
78                warn!("Failed to read conda-meta JSON file {:?}: {}", path, e);
79                return vec![default_package_data()];
80            }
81        };
82
83        vec![parse_conda_meta_json(&content)]
84    }
85}
86
87pub(crate) fn parse_conda_meta_json(content: &str) -> PackageData {
88    parse_conda_meta_json_with_path(content, None)
89}
90
91pub(crate) fn parse_conda_meta_json_with_path(content: &str, _path: Option<&Path>) -> PackageData {
92    let metadata: CondaMetaJson = match serde_json::from_str(content) {
93        Ok(m) => m,
94        Err(e) => {
95            warn!("Failed to parse conda-meta JSON: {}", e);
96            return default_package_data();
97        }
98    };
99
100    // Build extra_data with specific fields
101    let mut extra_data = HashMap::new();
102    if let Some(ref requested_spec) = metadata.requested_spec {
103        extra_data.insert(
104            "requested_spec".to_string(),
105            Value::String(requested_spec.clone()),
106        );
107    }
108    if let Some(ref channel) = metadata.channel {
109        extra_data.insert("channel".to_string(), Value::String(channel.clone()));
110    }
111    if let Some(ref extracted_package_dir) = metadata.extracted_package_dir {
112        extra_data.insert(
113            "extracted_package_dir".to_string(),
114            Value::String(extracted_package_dir.clone()),
115        );
116    }
117    if let Some(ref files) = metadata.files {
118        extra_data.insert(
119            "files".to_string(),
120            Value::Array(files.iter().map(|f| Value::String(f.clone())).collect()),
121        );
122    }
123    if let Some(ref package_tarball_full_path) = metadata.package_tarball_full_path {
124        extra_data.insert(
125            "package_tarball_full_path".to_string(),
126            Value::String(package_tarball_full_path.clone()),
127        );
128    }
129
130    let extra_data_opt = if extra_data.is_empty() {
131        None
132    } else {
133        Some(extra_data)
134    };
135
136    let purl = metadata.name.as_deref().and_then(|name| {
137        super::conda::build_purl(
138            "conda",
139            None,
140            name,
141            metadata.version.as_deref(),
142            None,
143            None,
144            None,
145        )
146    });
147
148    let file_references = build_conda_file_references(
149        metadata.extracted_package_dir.as_deref(),
150        metadata.package_tarball_full_path.as_deref(),
151        metadata.files.as_deref(),
152    );
153
154    PackageData {
155        package_type: Some(PACKAGE_TYPE),
156        primary_language: Some("Python".to_string()),
157        name: metadata.name,
158        version: metadata.version,
159        extracted_license_statement: metadata.license,
160        download_url: metadata.url,
161        size: metadata.size,
162        md5: metadata.md5,
163        sha256: metadata.sha256,
164        extra_data: extra_data_opt,
165        file_references,
166        datasource_id: Some(DatasourceId::CondaMetaJson),
167        purl,
168        ..Default::default()
169    }
170}
171
172fn build_conda_file_references(
173    extracted_package_dir: Option<&str>,
174    package_tarball_full_path: Option<&str>,
175    files: Option<&[String]>,
176) -> Vec<FileReference> {
177    let mut refs = Vec::new();
178
179    if let Some(extracted_dir) = extracted_package_dir
180        && let Some(relative) = condense_to_pkgs_relative(extracted_dir)
181    {
182        refs.push(FileReference {
183            path: relative,
184            size: None,
185            sha1: None,
186            md5: None,
187            sha256: None,
188            sha512: None,
189            extra_data: None,
190        });
191    }
192
193    if let Some(tarball) = package_tarball_full_path
194        && let Some(relative) = condense_to_pkgs_relative(tarball)
195    {
196        refs.push(FileReference {
197            path: relative,
198            size: None,
199            sha1: None,
200            md5: None,
201            sha256: None,
202            sha512: None,
203            extra_data: None,
204        });
205    }
206
207    if let Some(files) = files {
208        for file in files {
209            refs.push(FileReference {
210                path: file.clone(),
211                size: None,
212                sha1: None,
213                md5: None,
214                sha256: None,
215                sha512: None,
216                extra_data: None,
217            });
218        }
219    }
220
221    refs
222}
223
224fn condense_to_pkgs_relative(path: &str) -> Option<String> {
225    let normalized = path.replace('\\', "/");
226    let relative = normalized.split("/pkgs/").nth(1)?;
227    Some(format!("pkgs/{}", relative.trim_start_matches('/')))
228}
229
230crate::register_parser!(
231    "Conda installed package metadata JSON",
232    &["*conda-meta/*.json"],
233    "conda",
234    "Python",
235    Some("https://docs.conda.io/"),
236);