Skip to main content

provenant/parsers/
conan_data.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4//! Parser for Conan conandata.yml files.
5//!
6//! Extracts package metadata from `conandata.yml` files which contain
7//! external source information for Conan packages.
8//!
9//! # Supported Formats
10//! - `conandata.yml` - Conan external source metadata
11//!
12//! # Key Features
13//! - Version-specific source URLs
14//! - SHA256 checksums
15//! - Multiple source mirrors support
16//! - Patch metadata extraction (beyond Python which ignores patches)
17//!
18//! # Implementation Notes
19//! - Format: YAML with `sources` dict containing version→{url, sha256}
20//! - Each version can have multiple URLs (list or single string)
21//! - Patches section contains version→[{patch_file, patch_description, patch_type}]
22//! - Spec: https://docs.conan.io/2/tutorial/creating_packages/handle_sources_in_packages.html
23
24use crate::models::{DatasourceId, PackageType, Sha256Digest};
25use std::collections::HashMap;
26use std::path::Path;
27
28use crate::parser_warn as warn;
29use serde::{Deserialize, Serialize};
30use serde_json::json;
31
32use crate::models::PackageData;
33use crate::parsers::utils::{MAX_ITERATION_COUNT, read_file_to_string, truncate_field};
34
35use super::PackageParser;
36
37const PACKAGE_TYPE: PackageType = PackageType::Conan;
38
39fn default_package_data() -> PackageData {
40    PackageData {
41        package_type: Some(PACKAGE_TYPE),
42        primary_language: Some("C++".to_string()),
43        datasource_id: Some(DatasourceId::ConanConanDataYml),
44        ..Default::default()
45    }
46}
47
48/// Parser for Conan conandata.yml files
49pub struct ConanDataParser;
50
51#[derive(Debug, Deserialize, Serialize)]
52struct ConanDataYml {
53    sources: Option<HashMap<String, SourcesValue>>,
54    patches: Option<HashMap<String, PatchesValue>>,
55}
56
57#[derive(Debug, Deserialize, Serialize)]
58#[serde(untagged)]
59enum SourcesValue {
60    Single(SourceInfo),
61    Multiple(Vec<SourceInfo>),
62}
63
64#[derive(Debug, Deserialize, Serialize)]
65#[serde(untagged)]
66enum UrlValue {
67    Single(String),
68    Multiple(Vec<String>),
69}
70
71#[derive(Debug, Deserialize, Serialize)]
72#[serde(untagged)]
73enum PatchesValue {
74    List(Vec<PatchInfo>),
75    String(String),
76}
77
78#[derive(Debug, Deserialize, Serialize)]
79struct PatchInfo {
80    patch_file: Option<String>,
81    patch_description: Option<String>,
82    patch_type: Option<String>,
83}
84
85#[derive(Debug, Deserialize, Serialize)]
86struct SourceInfo {
87    url: Option<UrlValue>,
88    sha256: Option<String>,
89}
90
91impl SourceInfo {
92    fn primary_download_url(&self) -> Option<String> {
93        match &self.url {
94            Some(UrlValue::Single(url)) => Some(truncate_field(url.clone())),
95            Some(UrlValue::Multiple(urls)) if !urls.is_empty() => {
96                Some(truncate_field(urls[0].clone()))
97            }
98            _ => None,
99        }
100    }
101
102    fn additional_data_json(&self) -> serde_json::Value {
103        let mut entry = serde_json::Map::new();
104
105        if let Some(url) = &self.url {
106            match url {
107                UrlValue::Single(value) => {
108                    entry.insert("url".to_string(), json!(truncate_field(value.clone())));
109                }
110                UrlValue::Multiple(values) => {
111                    let urls: Vec<_> = values.iter().cloned().map(truncate_field).collect();
112                    entry.insert("url".to_string(), json!(urls));
113                }
114            }
115        }
116
117        if let Some(sha256) = &self.sha256 {
118            entry.insert("sha256".to_string(), json!(sha256));
119        }
120
121        serde_json::Value::Object(entry)
122    }
123}
124
125fn sources_to_infos(sources_value: SourcesValue) -> Vec<SourceInfo> {
126    match sources_value {
127        SourcesValue::Single(source) => vec![source],
128        SourcesValue::Multiple(sources) => sources,
129    }
130}
131
132impl PackageParser for ConanDataParser {
133    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
134
135    fn is_match(path: &Path) -> bool {
136        path.to_str().is_some_and(|p| p.ends_with("/conandata.yml"))
137    }
138
139    fn extract_packages(path: &Path) -> Vec<PackageData> {
140        let content = match read_file_to_string(path, None) {
141            Ok(c) => c,
142            Err(e) => {
143                warn!("Failed to read conandata.yml file {:?}: {}", path, e);
144                return vec![default_package_data()];
145            }
146        };
147
148        parse_conandata_yml(&content)
149    }
150
151    fn metadata() -> Vec<super::metadata::ParserMetadata> {
152        vec![super::metadata::ParserMetadata {
153            description: "Conan external source metadata",
154            file_patterns: &["*/conandata.yml"],
155            package_type: "conan",
156            primary_language: "C++",
157            documentation_url: Some(
158                "https://docs.conan.io/2/tutorial/creating_packages/handle_sources_in_packages.html",
159            ),
160        }]
161    }
162}
163
164pub(crate) fn parse_conandata_yml(content: &str) -> Vec<PackageData> {
165    let data: ConanDataYml = match yaml_serde::from_str(content) {
166        Ok(d) => d,
167        Err(e) => {
168            warn!("Failed to parse conandata.yml: {}", e);
169            return vec![default_package_data()];
170        }
171    };
172
173    let Some(sources) = data.sources else {
174        return vec![default_package_data()];
175    };
176
177    let mut packages = Vec::new();
178
179    for (version, sources_value) in sources.into_iter().take(MAX_ITERATION_COUNT) {
180        let source_infos = sources_to_infos(sources_value);
181        let mut extra_data = HashMap::new();
182
183        let primary_index = source_infos
184            .iter()
185            .position(|source_info| {
186                source_info.url.is_some()
187                    || source_info
188                        .sha256
189                        .as_ref()
190                        .is_some_and(|value| !value.is_empty())
191            })
192            .unwrap_or(0);
193
194        let primary_source = source_infos.get(primary_index);
195
196        let download_url = primary_source.and_then(SourceInfo::primary_download_url);
197
198        if let Some(UrlValue::Multiple(urls)) =
199            primary_source.and_then(|source| source.url.as_ref())
200            && urls.len() > 1
201        {
202            let mirror_urls: Vec<_> = urls.iter().cloned().map(truncate_field).collect();
203            extra_data.insert("mirror_urls".to_string(), json!(mirror_urls));
204        }
205
206        if source_infos.len() > 1 {
207            let additional_sources: Vec<_> = source_infos
208                .iter()
209                .enumerate()
210                .filter(|(index, _)| *index != primary_index)
211                .map(|(_, source_info)| source_info.additional_data_json())
212                .collect();
213
214            if !additional_sources.is_empty() {
215                extra_data.insert("additional_sources".to_string(), json!(additional_sources));
216            }
217        }
218
219        if let Some(ref patches_map) = data.patches
220            && let Some(patches_value) = patches_map.get(&version)
221        {
222            let patches_json = match patches_value {
223                PatchesValue::List(patches) => {
224                    let patches_data: Vec<_> = patches
225                        .iter()
226                        .map(|p| {
227                            json!({
228                                "patch_file": p.patch_file,
229                                "patch_description": p.patch_description,
230                                "patch_type": p.patch_type,
231                            })
232                        })
233                        .collect();
234                    json!(patches_data)
235                }
236                PatchesValue::String(s) => json!(s),
237            };
238            extra_data.insert("patches".to_string(), patches_json);
239        }
240
241        packages.push(PackageData {
242            package_type: Some(PACKAGE_TYPE),
243            primary_language: Some("C++".to_string()),
244            version: Some(truncate_field(version)),
245            download_url,
246            sha256: primary_source
247                .and_then(|source_info| source_info.sha256.as_deref())
248                .and_then(|hash| Sha256Digest::from_hex(hash).ok()),
249            extra_data: if extra_data.is_empty() {
250                None
251            } else {
252                Some(extra_data)
253            },
254            datasource_id: Some(DatasourceId::ConanConanDataYml),
255            ..Default::default()
256        });
257    }
258
259    if packages.is_empty() {
260        packages.push(default_package_data());
261    }
262
263    packages
264}