Skip to main content

provenant/parsers/
conan_data.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4//! Parser for Conan conandata.yml files.
5//!
6//! Extracts package metadata from `conandata.yml` files which contain
7//! external source information for Conan packages.
8//!
9//! # Supported Formats
10//! - `conandata.yml` - Conan external source metadata
11//!
12//! # Key Features
13//! - Version-specific source URLs
14//! - SHA256 checksums
15//! - Multiple source mirrors support
16//! - Patch metadata extraction (beyond Python which ignores patches)
17//!
18//! # Implementation Notes
19//! - Format: YAML with `sources` dict containing version→{url, sha256}
20//! - Each version can have multiple URLs (list or single string)
21//! - Patches section contains version→[{patch_file, patch_description, patch_type}]
22//! - Spec: https://docs.conan.io/2/tutorial/creating_packages/handle_sources_in_packages.html
23
24use crate::models::{DatasourceId, PackageType, Sha256Digest};
25use std::collections::HashMap;
26use std::path::Path;
27
28use crate::parser_warn as warn;
29use serde::{Deserialize, Serialize};
30use serde_json::json;
31
32use crate::models::PackageData;
33use crate::parsers::utils::{MAX_ITERATION_COUNT, read_file_to_string, truncate_field};
34
35use super::PackageParser;
36
37const PACKAGE_TYPE: PackageType = PackageType::Conan;
38
39fn default_package_data() -> PackageData {
40    PackageData {
41        package_type: Some(PACKAGE_TYPE),
42        primary_language: Some("C++".to_string()),
43        datasource_id: Some(DatasourceId::ConanConanDataYml),
44        ..Default::default()
45    }
46}
47
48/// Parser for Conan conandata.yml files
49pub struct ConanDataParser;
50
51#[derive(Debug, Deserialize, Serialize)]
52struct ConanDataYml {
53    sources: Option<HashMap<String, SourcesValue>>,
54    patches: Option<HashMap<String, PatchesValue>>,
55}
56
57#[derive(Debug, Deserialize, Serialize)]
58#[serde(untagged)]
59enum SourcesValue {
60    Single(SourceInfo),
61    Multiple(Vec<SourceInfo>),
62}
63
64#[derive(Debug, Deserialize, Serialize)]
65#[serde(untagged)]
66enum UrlValue {
67    Single(String),
68    Multiple(Vec<String>),
69}
70
71#[derive(Debug, Deserialize, Serialize)]
72#[serde(untagged)]
73enum PatchesValue {
74    List(Vec<PatchInfo>),
75    String(String),
76}
77
78#[derive(Debug, Deserialize, Serialize)]
79struct PatchInfo {
80    patch_file: Option<String>,
81    patch_description: Option<String>,
82    patch_type: Option<String>,
83}
84
85#[derive(Debug, Deserialize, Serialize)]
86struct SourceInfo {
87    url: Option<UrlValue>,
88    sha256: Option<String>,
89}
90
91impl SourceInfo {
92    fn primary_download_url(&self) -> Option<String> {
93        match &self.url {
94            Some(UrlValue::Single(url)) => Some(truncate_field(url.clone())),
95            Some(UrlValue::Multiple(urls)) if !urls.is_empty() => {
96                Some(truncate_field(urls[0].clone()))
97            }
98            _ => None,
99        }
100    }
101
102    fn additional_data_json(&self) -> serde_json::Value {
103        let mut entry = serde_json::Map::new();
104
105        if let Some(url) = &self.url {
106            match url {
107                UrlValue::Single(value) => {
108                    entry.insert("url".to_string(), json!(truncate_field(value.clone())));
109                }
110                UrlValue::Multiple(values) => {
111                    let urls: Vec<_> = values.iter().cloned().map(truncate_field).collect();
112                    entry.insert("url".to_string(), json!(urls));
113                }
114            }
115        }
116
117        if let Some(sha256) = &self.sha256 {
118            entry.insert("sha256".to_string(), json!(sha256));
119        }
120
121        serde_json::Value::Object(entry)
122    }
123}
124
125fn sources_to_infos(sources_value: SourcesValue) -> Vec<SourceInfo> {
126    match sources_value {
127        SourcesValue::Single(source) => vec![source],
128        SourcesValue::Multiple(sources) => sources,
129    }
130}
131
132impl PackageParser for ConanDataParser {
133    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
134
135    fn is_match(path: &Path) -> bool {
136        path.to_str().is_some_and(|p| p.ends_with("/conandata.yml"))
137    }
138
139    fn extract_packages(path: &Path) -> Vec<PackageData> {
140        let content = match read_file_to_string(path, None) {
141            Ok(c) => c,
142            Err(e) => {
143                warn!("Failed to read conandata.yml file {:?}: {}", path, e);
144                return vec![default_package_data()];
145            }
146        };
147
148        parse_conandata_yml(&content)
149    }
150}
151
152pub(crate) fn parse_conandata_yml(content: &str) -> Vec<PackageData> {
153    let data: ConanDataYml = match yaml_serde::from_str(content) {
154        Ok(d) => d,
155        Err(e) => {
156            warn!("Failed to parse conandata.yml: {}", e);
157            return vec![default_package_data()];
158        }
159    };
160
161    let Some(sources) = data.sources else {
162        return vec![default_package_data()];
163    };
164
165    let mut packages = Vec::new();
166
167    for (version, sources_value) in sources.into_iter().take(MAX_ITERATION_COUNT) {
168        let source_infos = sources_to_infos(sources_value);
169        let mut extra_data = HashMap::new();
170
171        let primary_index = source_infos
172            .iter()
173            .position(|source_info| {
174                source_info.url.is_some()
175                    || source_info
176                        .sha256
177                        .as_ref()
178                        .is_some_and(|value| !value.is_empty())
179            })
180            .unwrap_or(0);
181
182        let primary_source = source_infos.get(primary_index);
183
184        let download_url = primary_source.and_then(SourceInfo::primary_download_url);
185
186        if let Some(UrlValue::Multiple(urls)) =
187            primary_source.and_then(|source| source.url.as_ref())
188            && urls.len() > 1
189        {
190            let mirror_urls: Vec<_> = urls.iter().cloned().map(truncate_field).collect();
191            extra_data.insert("mirror_urls".to_string(), json!(mirror_urls));
192        }
193
194        if source_infos.len() > 1 {
195            let additional_sources: Vec<_> = source_infos
196                .iter()
197                .enumerate()
198                .filter(|(index, _)| *index != primary_index)
199                .map(|(_, source_info)| source_info.additional_data_json())
200                .collect();
201
202            if !additional_sources.is_empty() {
203                extra_data.insert("additional_sources".to_string(), json!(additional_sources));
204            }
205        }
206
207        if let Some(ref patches_map) = data.patches
208            && let Some(patches_value) = patches_map.get(&version)
209        {
210            let patches_json = match patches_value {
211                PatchesValue::List(patches) => {
212                    let patches_data: Vec<_> = patches
213                        .iter()
214                        .map(|p| {
215                            json!({
216                                "patch_file": p.patch_file,
217                                "patch_description": p.patch_description,
218                                "patch_type": p.patch_type,
219                            })
220                        })
221                        .collect();
222                    json!(patches_data)
223                }
224                PatchesValue::String(s) => json!(s),
225            };
226            extra_data.insert("patches".to_string(), patches_json);
227        }
228
229        packages.push(PackageData {
230            package_type: Some(PACKAGE_TYPE),
231            primary_language: Some("C++".to_string()),
232            version: Some(truncate_field(version)),
233            download_url,
234            sha256: primary_source
235                .and_then(|source_info| source_info.sha256.as_deref())
236                .and_then(|hash| Sha256Digest::from_hex(hash).ok()),
237            extra_data: if extra_data.is_empty() {
238                None
239            } else {
240                Some(extra_data)
241            },
242            datasource_id: Some(DatasourceId::ConanConanDataYml),
243            ..Default::default()
244        });
245    }
246
247    if packages.is_empty() {
248        packages.push(default_package_data());
249    }
250
251    packages
252}
253
254crate::register_parser!(
255    "Conan external source metadata",
256    &["*/conandata.yml"],
257    "conan",
258    "C++",
259    Some("https://docs.conan.io/2/tutorial/creating_packages/handle_sources_in_packages.html"),
260);