Skip to main content

provenant/parsers/
conda.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4//! Parser for Conda/Anaconda package manifest files.
5//!
6//! Extracts package metadata and dependencies from Conda ecosystem manifest files
7//! supporting both recipe definitions and environment specifications.
8//!
9//! # Supported Formats
10//! - meta.yaml (Conda recipe metadata with Jinja2 templating support)
11//! - conda.yaml/environment.yml (Conda environment dependency specifications)
12//!
13//! # Key Features
14//! - YAML parsing for environment files
15//! - Dependency extraction from dependencies and build_requirements sections
16//! - Channel specification and platform detection
17//! - Version constraint parsing for Conda version specifiers
18//! - Package URL (purl) generation for conda packages
19//! - Limited meta.yaml support (note: Jinja2 templating not fully resolved)
20//!
21//! # Implementation Notes
22//! - Uses YAML parsing via `yaml_serde`
23//! - meta.yaml: Jinja2 templates not evaluated (use rendered YAML if available)
24//! - environment.yml: Full dependency specification support
25//! - Graceful error handling with `warn!()` logs
26//!
27//! # References
28//! - <https://docs.conda.io/projects/conda-build/en/latest/resources/define-metadata.html>
29//! - <https://docs.conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html>
30
31use std::collections::HashMap;
32use std::path::Path;
33
34use crate::parser_warn as warn;
35use crate::parsers::utils::{MAX_ITERATION_COUNT, read_file_to_string, truncate_field};
36use regex::Regex;
37use yaml_serde::Value;
38
39use crate::models::{DatasourceId, Dependency, PackageData, PackageType, Sha256Digest};
40
41use super::PackageParser;
42use super::license_normalization::{
43    DeclaredLicenseMatchMetadata, build_declared_license_data_from_pair,
44    normalize_spdx_declared_license,
45};
46
47fn default_package_data(datasource_id: Option<DatasourceId>) -> PackageData {
48    PackageData {
49        package_type: Some(CondaMetaYamlParser::PACKAGE_TYPE),
50        datasource_id,
51        ..Default::default()
52    }
53}
54
55fn is_conda_recipe_yaml_path(path: &Path) -> bool {
56    let Some(name) = path.file_name().and_then(|name| name.to_str()) else {
57        return false;
58    };
59    if name != "recipe.yaml" && name != "recipe.yml" {
60        return false;
61    }
62    path.parent()
63        .and_then(|parent| parent.file_name())
64        .and_then(|name| name.to_str())
65        .is_some_and(|name| name == "recipe")
66}
67
68/// Build a PURL (Package URL) for Conda or PyPI packages
69pub(crate) fn build_purl(
70    package_type: &str,
71    namespace: Option<&str>,
72    name: &str,
73    version: Option<&str>,
74    _qualifiers: Option<&str>,
75    _subpath: Option<&str>,
76    _extras: Option<&str>,
77) -> Option<String> {
78    let purl = match package_type {
79        "conda" => {
80            if let Some(ns) = namespace {
81                match version {
82                    Some(v) => format!("pkg:conda/{}/{}@{}", ns, name, v),
83                    None => format!("pkg:conda/{}/{}", ns, name),
84                }
85            } else {
86                match version {
87                    Some(v) => format!("pkg:conda/{}@{}", name, v),
88                    None => format!("pkg:conda/{}", name),
89                }
90            }
91        }
92        "pypi" => match version {
93            Some(v) => format!("pkg:pypi/{}@{}", name, v),
94            None => format!("pkg:pypi/{}", name),
95        },
96        _ => format!("pkg:{}/{}", package_type, name),
97    };
98    Some(purl)
99}
100
101fn build_conda_package_purl(name: Option<&str>, version: Option<&str>) -> Option<String> {
102    let name = name?;
103    build_purl("conda", None, name, version, None, None, None)
104}
105
106fn yaml_value_to_string(value: &Value) -> Option<String> {
107    match value {
108        Value::String(s) => Some(truncate_field(s.clone())),
109        Value::Number(n) => Some(truncate_field(n.to_string())),
110        Value::Bool(b) => Some(truncate_field(b.to_string())),
111        _ => None,
112    }
113}
114
115fn extract_jinja_statement(trimmed_line: &str) -> Option<&str> {
116    if !trimmed_line.starts_with("{%") {
117        return None;
118    }
119
120    let end = trimmed_line.find("%}")?;
121    Some(trimmed_line[2..end].trim())
122}
123
124fn extract_conda_requirement_name(req: &str) -> Option<String> {
125    let req = req.trim();
126    if req.is_empty() {
127        return None;
128    }
129
130    let req_without_ns = req.rsplit_once("::").map(|(_, rest)| rest).unwrap_or(req);
131
132    let name = req_without_ns
133        .split_whitespace()
134        .next()
135        .unwrap_or(req_without_ns)
136        .split(['=', '<', '>', '!', '~'])
137        .next()
138        .unwrap_or(req_without_ns)
139        .trim();
140
141    if name.is_empty() {
142        None
143    } else {
144        Some(truncate_field(name.to_string()))
145    }
146}
147
148/// Conda recipe manifest (meta.yaml) parser.
149///
150/// Extracts package metadata and dependencies from Conda recipe files, which
151/// define how to build a Conda package. Handles Jinja2 templating used in
152/// recipe files for variable substitution.
153pub struct CondaMetaYamlParser;
154
155impl PackageParser for CondaMetaYamlParser {
156    const PACKAGE_TYPE: PackageType = PackageType::Conda;
157
158    fn is_match(path: &Path) -> bool {
159        // Match */meta.yaml following Python reference logic
160        path.file_name()
161            .is_some_and(|name| name == "meta.yaml" || name == "meta.yml")
162            || is_conda_recipe_yaml_path(path)
163    }
164
165    fn extract_packages(path: &Path) -> Vec<PackageData> {
166        let contents = match read_file_to_string(path, None) {
167            Ok(c) => c,
168            Err(e) => {
169                warn!("Failed to read {}: {}", path.display(), e);
170                return vec![default_package_data(Some(DatasourceId::CondaMetaYaml))];
171            }
172        };
173
174        if is_conda_recipe_yaml_path(path) {
175            let yaml: Value = match yaml_serde::from_str(&contents) {
176                Ok(y) => y,
177                Err(e) => {
178                    warn!("Failed to parse YAML in {}: {}", path.display(), e);
179                    return vec![default_package_data(Some(DatasourceId::CondaMetaYaml))];
180                }
181            };
182
183            if !looks_like_conda_recipe_yaml(&yaml) {
184                return Vec::new();
185            }
186
187            return vec![parse_conda_recipe_yaml(&yaml)];
188        }
189
190        // Extract Jinja2 variables and apply crude substitution
191        let variables = extract_jinja2_variables(&contents);
192        let processed_yaml = apply_jinja2_substitutions(&contents, &variables);
193
194        // Parse YAML after Jinja2 processing
195        let yaml: Value = match yaml_serde::from_str(&processed_yaml) {
196            Ok(y) => y,
197            Err(e) => {
198                warn!("Failed to parse YAML in {}: {}", path.display(), e);
199                return vec![default_package_data(Some(DatasourceId::CondaMetaYaml))];
200            }
201        };
202
203        let package_element = yaml.get("package").and_then(|v| v.as_mapping());
204        let name = package_element
205            .and_then(|p| p.get("name"))
206            .and_then(yaml_value_to_string);
207
208        let version = package_element
209            .and_then(|p| p.get("version"))
210            .and_then(yaml_value_to_string);
211
212        let source = yaml.get("source").and_then(|v| v.as_mapping());
213        let download_url = source
214            .and_then(|s| s.get("url"))
215            .and_then(|v| v.as_str())
216            .map(|s| truncate_field(s.to_string()));
217
218        let sha256 = source
219            .and_then(|s| s.get("sha256"))
220            .and_then(|v| v.as_str())
221            .and_then(|s| Sha256Digest::from_hex(s).ok());
222
223        let about = yaml.get("about").and_then(|v| v.as_mapping());
224        let homepage_url = about
225            .and_then(|a| a.get("home"))
226            .and_then(|v| v.as_str())
227            .map(|s| truncate_field(s.to_string()));
228
229        let extracted_license_statement = about
230            .and_then(|a| a.get("license"))
231            .and_then(|v| v.as_str())
232            .map(|s| truncate_field(s.to_string()));
233        let (declared_license_expression, declared_license_expression_spdx, license_detections) =
234            normalize_conda_declared_license(extracted_license_statement.as_deref());
235
236        let description = about
237            .and_then(|a| a.get("summary"))
238            .and_then(|v| v.as_str())
239            .map(|s| truncate_field(s.to_string()));
240
241        let vcs_url = about
242            .and_then(|a| a.get("dev_url"))
243            .and_then(|v| v.as_str())
244            .map(|s| truncate_field(s.to_string()));
245        let license_file = about
246            .and_then(|a| a.get("license_file"))
247            .and_then(|v| v.as_str())
248            .map(str::trim)
249            .filter(|value| !value.is_empty())
250            .map(|s| truncate_field(s.to_string()));
251
252        // Extract dependencies from requirements sections
253        let mut dependencies = Vec::new();
254        let mut extra_data: HashMap<String, serde_json::Value> = HashMap::new();
255
256        if let Some(requirements) = yaml.get("requirements").and_then(|v| v.as_mapping()) {
257            for (scope_key, reqs_value) in requirements {
258                let scope = scope_key.as_str().unwrap_or("unknown");
259                if let Some(reqs) = reqs_value.as_sequence() {
260                    for req in reqs.iter().take(MAX_ITERATION_COUNT) {
261                        if let Some(req_str) = req.as_str()
262                            && let Some(dep) = parse_conda_requirement(req_str, scope)
263                        {
264                            // Filter out pip/python from dependencies, add to extra_data
265                            if extract_conda_requirement_name(req_str)
266                                .is_some_and(|n| n == "pip" || n == "python")
267                            {
268                                if let Some(arr) = extra_data
269                                    .entry(scope.to_string())
270                                    .or_insert_with(|| serde_json::Value::Array(vec![]))
271                                    .as_array_mut()
272                                {
273                                    arr.push(serde_json::Value::String(truncate_field(
274                                        req_str.to_string(),
275                                    )))
276                                }
277                            } else {
278                                dependencies.push(dep);
279                            }
280                        }
281                    }
282                }
283            }
284        }
285
286        let mut pkg = default_package_data(Some(DatasourceId::CondaMetaYaml));
287        pkg.package_type = Some(Self::PACKAGE_TYPE);
288        pkg.datasource_id = Some(DatasourceId::CondaMetaYaml);
289        pkg.name = name;
290        pkg.version = version;
291        pkg.purl = build_conda_package_purl(pkg.name.as_deref(), pkg.version.as_deref());
292        pkg.download_url = download_url;
293        pkg.homepage_url = homepage_url;
294        pkg.declared_license_expression = declared_license_expression.map(truncate_field);
295        pkg.declared_license_expression_spdx = declared_license_expression_spdx.map(truncate_field);
296        pkg.license_detections = license_detections;
297        pkg.extracted_license_statement = extracted_license_statement.map(truncate_field);
298        pkg.description = description;
299        pkg.vcs_url = vcs_url;
300        pkg.sha256 = sha256;
301        pkg.dependencies = dependencies;
302        if let Some(license_file) = license_file {
303            extra_data.insert(
304                "license_file".to_string(),
305                serde_json::Value::String(license_file),
306            );
307        }
308        if !extra_data.is_empty() {
309            pkg.extra_data = Some(extra_data);
310        }
311        vec![pkg]
312    }
313
314    fn metadata() -> Vec<super::metadata::ParserMetadata> {
315        vec![super::metadata::ParserMetadata {
316            description: "Conda package manifest and environment file",
317            file_patterns: &[
318                "**/meta.yaml",
319                "**/meta.yml",
320                "**/recipe/recipe.yaml",
321                "**/recipe/recipe.yml",
322                "**/environment.yml",
323                "**/environment.yaml",
324                "**/env.yaml",
325                "**/env.yml",
326                "**/conda.yaml",
327                "**/conda.yml",
328                "**/*conda*.yaml",
329                "**/*conda*.yml",
330                "**/*env*.yaml",
331                "**/*env*.yml",
332                "**/*environment*.yaml",
333                "**/*environment*.yml",
334            ],
335            package_type: "conda",
336            primary_language: "Python",
337            documentation_url: Some("https://docs.conda.io/"),
338        }]
339    }
340}
341
342fn looks_like_conda_recipe_yaml(yaml: &Value) -> bool {
343    yaml.get("schema_version")
344        .and_then(|value| value.as_u64())
345        .is_some_and(|value| value == 1)
346        && (yaml
347            .get("package")
348            .and_then(|value| value.as_mapping())
349            .is_some()
350            || yaml
351                .get("recipe")
352                .and_then(|value| value.as_mapping())
353                .is_some())
354}
355
356fn parse_conda_recipe_yaml(yaml: &Value) -> PackageData {
357    let context = extract_recipe_yaml_context(yaml);
358    let package = yaml
359        .get("package")
360        .or_else(|| yaml.get("recipe"))
361        .and_then(|value| value.as_mapping());
362    let source = yaml.get("source").and_then(|value| value.as_mapping());
363    let about = yaml.get("about").and_then(|value| value.as_mapping());
364
365    let name = package
366        .and_then(|pkg| pkg.get("name"))
367        .and_then(|value| recipe_yaml_value_to_string(value, &context));
368    let version = package
369        .and_then(|pkg| pkg.get("version"))
370        .and_then(|value| recipe_yaml_value_to_string(value, &context));
371
372    let download_url = source
373        .and_then(|src| src.get("url"))
374        .and_then(|value| recipe_yaml_value_to_string(value, &context));
375    let sha256 = source
376        .and_then(|src| src.get("sha256"))
377        .and_then(|value| recipe_yaml_value_to_string(value, &context))
378        .and_then(|value| Sha256Digest::from_hex(&value).ok());
379
380    let extracted_license_statement = about
381        .and_then(|section| section.get("license"))
382        .and_then(|value| recipe_yaml_value_to_string(value, &context));
383    let (declared_license_expression, declared_license_expression_spdx, license_detections) =
384        normalize_conda_declared_license(extracted_license_statement.as_deref());
385
386    let description = about
387        .and_then(|section| section.get("summary"))
388        .and_then(|value| recipe_yaml_value_to_string(value, &context));
389    let homepage_url = about
390        .and_then(|section| section.get("homepage").or_else(|| section.get("home")))
391        .and_then(|value| recipe_yaml_value_to_string(value, &context));
392    let vcs_url = about
393        .and_then(|section| {
394            section
395                .get("repository")
396                .or_else(|| section.get("dev_url"))
397                .or_else(|| section.get("repository_url"))
398        })
399        .and_then(|value| recipe_yaml_value_to_string(value, &context));
400    let documentation_url = about
401        .and_then(|section| section.get("documentation"))
402        .and_then(|value| recipe_yaml_value_to_string(value, &context));
403    let license_file = about
404        .and_then(|section| section.get("license_file"))
405        .and_then(|value| recipe_yaml_value_to_string(value, &context));
406
407    let mut dependencies = Vec::new();
408    let mut extra_data: HashMap<String, serde_json::Value> = HashMap::new();
409    if let Some(requirements) = yaml
410        .get("requirements")
411        .and_then(|value| value.as_mapping())
412    {
413        for (scope_key, reqs_value) in requirements {
414            let Some(scope) = scope_key.as_str() else {
415                continue;
416            };
417            let recipe_requirements = extract_recipe_yaml_requirement_strings(reqs_value, &context);
418            if recipe_requirements.is_empty() {
419                continue;
420            }
421
422            for req in &recipe_requirements {
423                if extract_conda_requirement_name(req)
424                    .is_some_and(|name| name == "pip" || name == "python")
425                {
426                    if let Some(arr) = extra_data
427                        .entry(scope.to_string())
428                        .or_insert_with(|| serde_json::Value::Array(vec![]))
429                        .as_array_mut()
430                    {
431                        arr.push(serde_json::Value::String(truncate_field(req.clone())));
432                    }
433                    continue;
434                }
435
436                if let Some(dep) = parse_conda_requirement(req, scope) {
437                    dependencies.push(dep);
438                }
439            }
440        }
441    }
442
443    if let Some(documentation_url) = documentation_url {
444        extra_data.insert(
445            "documentation".to_string(),
446            serde_json::Value::String(documentation_url),
447        );
448    }
449    if let Some(license_file) = license_file {
450        extra_data.insert(
451            "license_file".to_string(),
452            serde_json::Value::String(license_file),
453        );
454    }
455    extra_data.insert("schema_version".to_string(), serde_json::json!(1));
456
457    let mut pkg = default_package_data(Some(DatasourceId::CondaMetaYaml));
458    pkg.package_type = Some(CondaMetaYamlParser::PACKAGE_TYPE);
459    pkg.datasource_id = Some(DatasourceId::CondaMetaYaml);
460    pkg.name = name;
461    pkg.version = version;
462    pkg.purl = build_conda_package_purl(pkg.name.as_deref(), pkg.version.as_deref());
463    pkg.download_url = download_url;
464    pkg.homepage_url = homepage_url;
465    pkg.declared_license_expression = declared_license_expression.map(truncate_field);
466    pkg.declared_license_expression_spdx = declared_license_expression_spdx.map(truncate_field);
467    pkg.license_detections = license_detections;
468    pkg.extracted_license_statement = extracted_license_statement.map(truncate_field);
469    pkg.description = description;
470    pkg.vcs_url = vcs_url;
471    pkg.sha256 = sha256;
472    pkg.dependencies = dependencies;
473    pkg.extra_data = Some(extra_data);
474    pkg
475}
476
477fn extract_recipe_yaml_context(yaml: &Value) -> HashMap<String, String> {
478    let mut context = HashMap::new();
479    let Some(context_mapping) = yaml.get("context").and_then(|value| value.as_mapping()) else {
480        return context;
481    };
482
483    for (key, value) in context_mapping {
484        let Some(key) = key.as_str() else {
485            continue;
486        };
487        if let Some(value) = yaml_value_to_string(value) {
488            context.insert(truncate_field(key.to_string()), truncate_field(value));
489        }
490    }
491
492    context
493}
494
495fn recipe_yaml_value_to_string(value: &Value, context: &HashMap<String, String>) -> Option<String> {
496    let value = yaml_value_to_string(value)?;
497    Some(resolve_recipe_yaml_expressions(&value, context))
498}
499
500fn resolve_recipe_yaml_expressions(value: &str, context: &HashMap<String, String>) -> String {
501    let Some(re) = Regex::new(r#"\$\{\{\s*([A-Za-z_][A-Za-z0-9_]*)\s*\}\}"#).ok() else {
502        return truncate_field(value.to_string());
503    };
504
505    let resolved = re.replace_all(value, |caps: &regex::Captures| {
506        context
507            .get(&caps[1])
508            .cloned()
509            .unwrap_or_else(|| caps[0].to_string())
510    });
511    truncate_field(resolved.into_owned())
512}
513
514fn extract_recipe_yaml_requirement_strings(
515    value: &Value,
516    context: &HashMap<String, String>,
517) -> Vec<String> {
518    let mut requirements = Vec::new();
519    collect_recipe_yaml_requirement_strings(value, context, &mut requirements);
520    requirements
521}
522
523fn collect_recipe_yaml_requirement_strings(
524    value: &Value,
525    context: &HashMap<String, String>,
526    requirements: &mut Vec<String>,
527) {
528    if let Some(req) = value.as_str() {
529        let resolved = resolve_recipe_yaml_expressions(req, context);
530        if should_keep_recipe_yaml_requirement(&resolved) {
531            requirements.push(resolved);
532        }
533        return;
534    }
535
536    if let Some(items) = value.as_sequence() {
537        for item in items.iter().take(MAX_ITERATION_COUNT) {
538            collect_recipe_yaml_requirement_strings(item, context, requirements);
539        }
540        return;
541    }
542
543    if let Some(mapping) = value.as_mapping() {
544        if let Some(then_value) = mapping.get("then") {
545            collect_recipe_yaml_requirement_strings(then_value, context, requirements);
546        }
547        if let Some(else_value) = mapping.get("else") {
548            collect_recipe_yaml_requirement_strings(else_value, context, requirements);
549        }
550    }
551}
552
553fn should_keep_recipe_yaml_requirement(req: &str) -> bool {
554    let trimmed = req.trim();
555    if trimmed.is_empty() {
556        return false;
557    }
558
559    !(trimmed.contains("${{")
560        || trimmed.contains("compiler('")
561        || trimmed.contains("compiler(\"")
562        || trimmed.contains("pin_subpackage(")
563        || trimmed.contains("pin_compatible(")
564        || trimmed.contains("stdlib('")
565        || trimmed.contains("stdlib(\""))
566}
567
568fn normalize_conda_declared_license(
569    statement: Option<&str>,
570) -> (
571    Option<String>,
572    Option<String>,
573    Vec<crate::models::LicenseDetection>,
574) {
575    match statement.map(str::trim).filter(|value| !value.is_empty()) {
576        Some("Apache Software") => build_declared_license_data_from_pair(
577            "apache-2.0",
578            "Apache-2.0",
579            DeclaredLicenseMatchMetadata::single_line("Apache Software"),
580        ),
581        Some("BSD-3-Clause") => build_declared_license_data_from_pair(
582            "bsd-new",
583            "BSD-3-Clause",
584            DeclaredLicenseMatchMetadata::single_line("BSD-3-Clause"),
585        ),
586        other => normalize_spdx_declared_license(other),
587    }
588}
589
590/// Conda environment file (environment.yml, conda.yaml) parser.
591///
592/// Extracts dependencies from Conda environment files used to define reproducible
593/// environments. Supports both Conda and pip dependencies, with channel specifications.
594pub struct CondaEnvironmentYmlParser;
595
596impl PackageParser for CondaEnvironmentYmlParser {
597    const PACKAGE_TYPE: PackageType = PackageType::Conda;
598
599    fn is_match(path: &Path) -> bool {
600        // Python reference: path_patterns = ('*conda*.yaml', '*env*.yaml', '*environment*.yaml')
601        if let Some(name) = path.file_name().and_then(|n| n.to_str()) {
602            let lower = name.to_lowercase();
603            if matches!(lower.as_str(), "meta.yaml" | "meta.yml" | "recipe.yaml") {
604                return false;
605            }
606            let has_condaish_name =
607                lower.contains("conda") || lower.contains("env") || lower.contains("environment");
608            let has_condaish_ancestor = path
609                .ancestors()
610                .skip(1)
611                .filter_map(|ancestor| ancestor.file_name().and_then(|name| name.to_str()))
612                .any(|ancestor| ancestor.to_ascii_lowercase().contains("conda"));
613            (has_condaish_name || has_condaish_ancestor)
614                && (lower.ends_with(".yaml") || lower.ends_with(".yml"))
615        } else {
616            false
617        }
618    }
619
620    fn extract_packages(path: &Path) -> Vec<PackageData> {
621        let contents = match read_file_to_string(path, None) {
622            Ok(c) => c,
623            Err(e) => {
624                warn!("Failed to read {}: {}", path.display(), e);
625                return vec![default_package_data(Some(DatasourceId::CondaYaml))];
626            }
627        };
628
629        let yaml: Value = match yaml_serde::from_str(&contents) {
630            Ok(y) => y,
631            Err(e) => {
632                warn!("Failed to parse YAML in {}: {}", path.display(), e);
633                return vec![default_package_data(Some(DatasourceId::CondaYaml))];
634            }
635        };
636
637        if !looks_like_conda_environment_yaml(&yaml) {
638            return Vec::new();
639        }
640
641        let name = yaml
642            .get("name")
643            .and_then(|v| v.as_str())
644            .map(|s| truncate_field(s.to_string()));
645
646        let dependencies = extract_environment_dependencies(&yaml);
647
648        let mut extra_data = HashMap::new();
649        if let Some(channels) = yaml.get("channels").and_then(|v| v.as_sequence()) {
650            let channels_vec: Vec<String> = channels
651                .iter()
652                .filter_map(|c| c.as_str().map(|s| truncate_field(s.to_string())))
653                .collect();
654            if !channels_vec.is_empty() {
655                extra_data.insert("channels".to_string(), serde_json::json!(channels_vec));
656            }
657        }
658
659        // Environment files are private (not published packages)
660        let mut pkg = default_package_data(Some(DatasourceId::CondaYaml));
661        pkg.package_type = Some(Self::PACKAGE_TYPE);
662        pkg.datasource_id = Some(DatasourceId::CondaYaml);
663        pkg.name = name;
664        pkg.purl = build_conda_package_purl(pkg.name.as_deref(), pkg.version.as_deref());
665        pkg.primary_language = Some(truncate_field("Python".to_string()));
666        pkg.dependencies = dependencies;
667        pkg.is_private = true;
668        if !extra_data.is_empty() {
669            pkg.extra_data = Some(extra_data);
670        }
671        vec![pkg]
672    }
673}
674
675fn looks_like_conda_environment_yaml(yaml: &Value) -> bool {
676    let has_dependencies = yaml
677        .get("dependencies")
678        .and_then(|value| value.as_sequence())
679        .is_some_and(|items| !items.is_empty());
680    let has_channels = yaml
681        .get("channels")
682        .and_then(|value| value.as_sequence())
683        .is_some_and(|items| !items.is_empty());
684    let has_prefix = yaml
685        .get("prefix")
686        .and_then(|value| value.as_str())
687        .is_some_and(|value| !value.trim().is_empty());
688
689    has_dependencies || has_channels || has_prefix
690}
691
692/// Extract Jinja2-style variables from a Conda meta.yaml
693///
694/// For example, lines like `{% set version = "0.45.0" %}` and
695/// `{% set sha256 = "abc123..." %}` are captured as variables.
696pub fn extract_jinja2_variables(content: &str) -> HashMap<String, String> {
697    let mut variables = HashMap::new();
698
699    for line in content.lines().take(MAX_ITERATION_COUNT) {
700        let trimmed = line.trim();
701        if let Some(inner) = extract_jinja_statement(trimmed)
702            && let Some(inner) = inner.strip_prefix("set").map(str::trim)
703            && let Some((key, value)) = inner.split_once('=')
704        {
705            let key = key.trim();
706            let value = value.trim().trim_matches('"').trim_matches('\'');
707            variables.insert(
708                truncate_field(key.to_string()),
709                truncate_field(value.to_string()),
710            );
711        }
712    }
713
714    variables
715}
716
717/// Apply Jinja2 variable substitutions to YAML content
718///
719/// Supports:
720/// - `{{ variable }}` - Simple substitution
721/// - `{{ variable|lower }}` - Lowercase filter
722pub fn apply_jinja2_substitutions(content: &str, variables: &HashMap<String, String>) -> String {
723    let mut result = Vec::new();
724
725    for line in content.lines() {
726        let trimmed = line.trim();
727
728        if extract_jinja_statement(trimmed).is_some() {
729            continue;
730        }
731
732        let mut processed_line = line.to_string();
733
734        // Apply variable substitutions
735        if line.contains("{{") && line.contains("}}") {
736            for (var_name, var_value) in variables {
737                // Handle |lower filter
738                let pattern_lower = format!("{{{{ {}|lower }}}}", var_name);
739                if processed_line.contains(&pattern_lower) {
740                    processed_line =
741                        processed_line.replace(&pattern_lower, &var_value.to_lowercase());
742                }
743
744                // Handle normal substitution
745                let pattern_normal = format!("{{{{ {} }}}}", var_name);
746                processed_line = processed_line.replace(&pattern_normal, var_value);
747            }
748        }
749
750        // Skip lines with unresolved Jinja2 templates (complex expressions we can't handle)
751        if processed_line.contains("{{") {
752            continue;
753        }
754
755        result.push(processed_line);
756    }
757
758    quote_plain_numeric_version_scalars(&result.join("\n"))
759}
760
761fn quote_plain_numeric_version_scalars(content: &str) -> String {
762    let Some(version_re) =
763        Regex::new(r#"^(\s*(?:-\s*)?version:\s*)([0-9]+(?:\.[0-9]+)+)(\s*)$"#).ok()
764    else {
765        return content.to_string();
766    };
767
768    content
769        .lines()
770        .map(|line| {
771            version_re
772                .replace(line, |caps: &regex::Captures| {
773                    format!(r#"{}"{}"{}"#, &caps[1], &caps[2], &caps[3])
774                })
775                .into_owned()
776        })
777        .collect::<Vec<_>>()
778        .join("\n")
779}
780
781/// Parse a Conda requirement string into a Dependency
782///
783/// Format examples:
784/// - `mccortex ==1.0` - Pinned version with space before operator
785/// - `python >=3.6` - Version constraint
786/// - `conda-forge::numpy=1.15.4` - Namespace and pinned version (no space)
787/// - `bwa` - No version specified
788pub fn parse_conda_requirement(req: &str, scope: &str) -> Option<Dependency> {
789    let req = req.trim();
790
791    // Handle namespace prefix (conda-forge::package)
792    let (namespace, channel_url, req_without_ns) = parse_conda_channel_prefix(req);
793
794    // Split on first space to separate name from version constraint
795    let (name_part, version_constraint) =
796        if let Some((name, constraint)) = req_without_ns.split_once(' ') {
797            (name.trim(), Some(constraint.trim()))
798        } else {
799            (req_without_ns, None)
800        };
801
802    // Check for pinned version with `=` (no space): package=1.0
803    let (name, version, is_pinned, extracted_requirement) = if name_part.contains('=') {
804        let parts: Vec<&str> = name_part.splitn(2, '=').collect();
805        let n = parts[0].trim();
806        let v = if parts.len() > 1 {
807            let parsed = parts[1].trim();
808            if parsed.is_empty() {
809                None
810            } else {
811                Some(truncate_field(parsed.to_string()))
812            }
813        } else {
814            None
815        };
816        let req = v
817            .as_ref()
818            .map(|ver| format!("={}", ver))
819            .unwrap_or_default();
820        (n, v, true, Some(truncate_field(req)))
821    } else if let Some(constraint) = version_constraint {
822        let version_opt = if constraint.starts_with("==") {
823            Some(truncate_field(
824                constraint.trim_start_matches("==").trim().to_string(),
825            ))
826        } else {
827            None
828        };
829        (
830            name_part.trim(),
831            version_opt,
832            false,
833            Some(truncate_field(constraint.to_string())),
834        )
835    } else {
836        (name_part.trim(), None, false, Some(String::new()))
837    };
838
839    // Build PURL
840    let purl = build_purl(
841        "conda",
842        namespace,
843        name,
844        version.as_deref(),
845        None,
846        None,
847        None,
848    );
849
850    // Determine is_runtime and is_optional based on scope
851    let (is_runtime, is_optional) = match scope {
852        "run" => (true, false),
853        _ => (false, true), // build, host, test are all optional
854    };
855
856    let mut extra_data = HashMap::new();
857    if let Some(namespace) = namespace {
858        extra_data.insert(
859            "channel".to_string(),
860            serde_json::json!(truncate_field(namespace.to_string())),
861        );
862    }
863    if let Some(channel_url) = channel_url {
864        extra_data.insert(
865            "channel_url".to_string(),
866            serde_json::json!(truncate_field(channel_url.to_string())),
867        );
868    }
869
870    Some(Dependency {
871        purl,
872        extracted_requirement,
873        scope: Some(truncate_field(scope.to_string())),
874        is_runtime: Some(is_runtime),
875        is_optional: Some(is_optional),
876        is_pinned: Some(is_pinned),
877        is_direct: Some(true),
878        resolved_package: None,
879        extra_data: (!extra_data.is_empty()).then_some(extra_data),
880    })
881}
882
883fn extract_environment_dependencies(yaml: &Value) -> Vec<Dependency> {
884    let dependencies = match yaml.get("dependencies").and_then(|v| v.as_sequence()) {
885        Some(d) => d,
886        None => return Vec::new(),
887    };
888
889    let mut deps = Vec::new();
890    for dep_value in dependencies.iter().take(MAX_ITERATION_COUNT) {
891        if let Some(dep_str) = dep_value.as_str() {
892            if let Some(dep) = parse_environment_string_dependency(dep_str) {
893                deps.push(dep);
894            }
895        } else if let Some(pip_deps) = dep_value.get("pip").and_then(|v| v.as_sequence()) {
896            deps.extend(extract_pip_dependencies(pip_deps));
897        }
898    }
899    deps
900}
901
902fn parse_environment_string_dependency(dep_str: &str) -> Option<Dependency> {
903    let (namespace, channel_url, dep_without_ns) = parse_conda_channel_prefix(dep_str);
904    create_conda_dependency(namespace, channel_url, dep_without_ns, "dependencies")
905}
906
907fn parse_conda_exact_requirement(req_no_space: &str) -> (Option<String>, Option<String>) {
908    let exact = req_no_space
909        .strip_prefix("==")
910        .or_else(|| req_no_space.strip_prefix('='));
911
912    let Some(exact) = exact else {
913        return (None, None);
914    };
915
916    if exact.is_empty() {
917        return (None, None);
918    }
919
920    match exact.split_once('=') {
921        Some((version, build_string)) if !version.is_empty() => (
922            Some(truncate_field(version.to_string())),
923            (!build_string.is_empty()).then(|| truncate_field(build_string.to_string())),
924        ),
925        _ => (Some(truncate_field(exact.to_string())), None),
926    }
927}
928
929fn parse_conda_channel_prefix(dep_str: &str) -> (Option<&str>, Option<&str>, &str) {
930    if let Some((ns, rest)) = dep_str.rsplit_once("::") {
931        if ns.contains('/') || ns.contains(':') {
932            (None, Some(ns), rest)
933        } else {
934            (Some(ns), None, rest)
935        }
936    } else {
937        (None, None, dep_str)
938    }
939}
940
941fn create_conda_dependency(
942    namespace: Option<&str>,
943    channel_url: Option<&str>,
944    dep_without_ns: &str,
945    scope: &str,
946) -> Option<Dependency> {
947    let dep = dep_without_ns.trim();
948    let name_re = match Regex::new(r"^([A-Za-z0-9_.\-]+)") {
949        Ok(re) => re,
950        Err(_) => return None,
951    };
952
953    let caps = name_re.captures(dep)?;
954    let name_match = caps.get(1)?;
955    let name = name_match.as_str().trim();
956    let rest = dep[name_match.end()..].trim();
957
958    let (version, build_string, is_pinned, extracted_requirement) = if rest.is_empty() {
959        (None, None, false, Some(String::new()))
960    } else {
961        let req_no_space = rest.replace(' ', "");
962        let is_exact = req_no_space.starts_with("=") || req_no_space.starts_with("==");
963        let (parsed_version, parsed_build_string) = if is_exact {
964            parse_conda_exact_requirement(&req_no_space)
965        } else {
966            (None, None)
967        };
968
969        (
970            parsed_version,
971            parsed_build_string,
972            is_exact,
973            Some(truncate_field(rest.to_string())),
974        )
975    };
976
977    if name == "pip" || name == "python" {
978        return None;
979    }
980
981    let purl = build_purl(
982        "conda",
983        namespace,
984        name,
985        version.as_deref(),
986        None,
987        None,
988        None,
989    );
990    let mut extra_data = HashMap::new();
991    if let Some(namespace) = namespace {
992        extra_data.insert(
993            "channel".to_string(),
994            serde_json::json!(truncate_field(namespace.to_string())),
995        );
996    }
997    if let Some(channel_url) = channel_url {
998        extra_data.insert(
999            "channel_url".to_string(),
1000            serde_json::json!(truncate_field(channel_url.to_string())),
1001        );
1002    }
1003    if let Some(build_string) = build_string {
1004        extra_data.insert("build_string".to_string(), serde_json::json!(build_string));
1005    }
1006
1007    Some(Dependency {
1008        purl,
1009        extracted_requirement,
1010        scope: Some(truncate_field(scope.to_string())),
1011        is_runtime: Some(true),
1012        is_optional: Some(false),
1013        is_pinned: Some(is_pinned),
1014        is_direct: Some(true),
1015        resolved_package: None,
1016        extra_data: (!extra_data.is_empty()).then_some(extra_data),
1017    })
1018}
1019
1020fn extract_pip_dependencies(pip_deps: &[Value]) -> Vec<Dependency> {
1021    pip_deps
1022        .iter()
1023        .take(MAX_ITERATION_COUNT)
1024        .filter_map(|pip_dep| {
1025            if let Some(pip_req_str) = pip_dep.as_str()
1026                && let Some(parsed_req) =
1027                    crate::parsers::pep508::parse_pep508_requirement(pip_req_str)
1028            {
1029                create_pip_dependency(parsed_req, "dependencies", Some(pip_req_str))
1030            } else {
1031                None
1032            }
1033        })
1034        .collect()
1035}
1036
1037fn create_pip_dependency(
1038    parsed_req: crate::parsers::pep508::Pep508Requirement,
1039    scope: &str,
1040    raw_requirement: Option<&str>,
1041) -> Option<Dependency> {
1042    let name = truncate_field(parsed_req.name);
1043
1044    if name == "pip" || name == "python" {
1045        return None;
1046    }
1047
1048    let specs = if parsed_req.is_name_at_url {
1049        parsed_req
1050            .url
1051            .as_ref()
1052            .map(|url| truncate_field(url.clone()))
1053    } else {
1054        parsed_req.specifiers.clone()
1055    };
1056
1057    let extracted_requirement = if let Some(raw) = raw_requirement {
1058        let raw = raw.trim();
1059        let suffix = raw.strip_prefix(&name).unwrap_or(raw).trim().to_string();
1060        Some(truncate_field(suffix))
1061    } else {
1062        Some(truncate_field(specs.clone().unwrap_or_default()))
1063    };
1064
1065    let version = specs.as_ref().and_then(|spec_str| {
1066        if spec_str.starts_with("==") {
1067            Some(truncate_field(
1068                spec_str.trim_start_matches("==").to_string(),
1069            ))
1070        } else {
1071            None
1072        }
1073    });
1074
1075    let is_pinned = specs.as_ref().map(|s| s.contains("==")).unwrap_or(false);
1076    let purl = build_purl("pypi", None, &name, version.as_deref(), None, None, None);
1077
1078    Some(Dependency {
1079        purl,
1080        extracted_requirement,
1081        scope: Some(truncate_field(scope.to_string())),
1082        is_runtime: Some(true),
1083        is_optional: Some(false),
1084        is_pinned: Some(is_pinned),
1085        is_direct: Some(true),
1086        resolved_package: None,
1087        extra_data: None,
1088    })
1089}