Skip to main content

provenant/parsers/
conda.rs

1//! Parser for Conda/Anaconda package manifest files.
2//!
3//! Extracts package metadata and dependencies from Conda ecosystem manifest files
4//! supporting both recipe definitions and environment specifications.
5//!
6//! # Supported Formats
7//! - meta.yaml (Conda recipe metadata with Jinja2 templating support)
8//! - conda.yaml/environment.yml (Conda environment dependency specifications)
9//!
10//! # Key Features
11//! - YAML parsing for environment files
12//! - Dependency extraction from dependencies and build_requirements sections
13//! - Channel specification and platform detection
14//! - Version constraint parsing for Conda version specifiers
15//! - Package URL (purl) generation for conda packages
16//! - Limited meta.yaml support (note: Jinja2 templating not fully resolved)
17//!
18//! # Implementation Notes
19//! - Uses YAML parsing via `yaml_serde`
20//! - meta.yaml: Jinja2 templates not evaluated (use rendered YAML if available)
21//! - environment.yml: Full dependency specification support
22//! - Graceful error handling with `warn!()` logs
23//!
24//! # References
25//! - <https://docs.conda.io/projects/conda-build/en/latest/resources/define-metadata.html>
26//! - <https://docs.conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html>
27
28use std::collections::HashMap;
29use std::path::Path;
30
31use crate::parser_warn as warn;
32use crate::parsers::utils::{MAX_ITERATION_COUNT, read_file_to_string, truncate_field};
33use regex::Regex;
34use yaml_serde::Value;
35
36use crate::models::{DatasourceId, Dependency, PackageData, PackageType, Sha256Digest};
37
38use super::PackageParser;
39use super::license_normalization::{
40    DeclaredLicenseMatchMetadata, build_declared_license_data_from_pair,
41    normalize_spdx_declared_license,
42};
43
44fn default_package_data(datasource_id: Option<DatasourceId>) -> PackageData {
45    PackageData {
46        package_type: Some(CondaMetaYamlParser::PACKAGE_TYPE),
47        datasource_id,
48        ..Default::default()
49    }
50}
51
52fn is_conda_recipe_yaml_path(path: &Path) -> bool {
53    let Some(name) = path.file_name().and_then(|name| name.to_str()) else {
54        return false;
55    };
56    if name != "recipe.yaml" && name != "recipe.yml" {
57        return false;
58    }
59    path.parent()
60        .and_then(|parent| parent.file_name())
61        .and_then(|name| name.to_str())
62        .is_some_and(|name| name == "recipe")
63}
64
65/// Build a PURL (Package URL) for Conda or PyPI packages
66pub(crate) fn build_purl(
67    package_type: &str,
68    namespace: Option<&str>,
69    name: &str,
70    version: Option<&str>,
71    _qualifiers: Option<&str>,
72    _subpath: Option<&str>,
73    _extras: Option<&str>,
74) -> Option<String> {
75    let purl = match package_type {
76        "conda" => {
77            if let Some(ns) = namespace {
78                match version {
79                    Some(v) => format!("pkg:conda/{}/{}@{}", ns, name, v),
80                    None => format!("pkg:conda/{}/{}", ns, name),
81                }
82            } else {
83                match version {
84                    Some(v) => format!("pkg:conda/{}@{}", name, v),
85                    None => format!("pkg:conda/{}", name),
86                }
87            }
88        }
89        "pypi" => match version {
90            Some(v) => format!("pkg:pypi/{}@{}", name, v),
91            None => format!("pkg:pypi/{}", name),
92        },
93        _ => format!("pkg:{}/{}", package_type, name),
94    };
95    Some(purl)
96}
97
98fn build_conda_package_purl(name: Option<&str>, version: Option<&str>) -> Option<String> {
99    let name = name?;
100    build_purl("conda", None, name, version, None, None, None)
101}
102
103fn yaml_value_to_string(value: &Value) -> Option<String> {
104    match value {
105        Value::String(s) => Some(truncate_field(s.clone())),
106        Value::Number(n) => Some(truncate_field(n.to_string())),
107        Value::Bool(b) => Some(truncate_field(b.to_string())),
108        _ => None,
109    }
110}
111
112fn extract_jinja_statement(trimmed_line: &str) -> Option<&str> {
113    if !trimmed_line.starts_with("{%") {
114        return None;
115    }
116
117    let end = trimmed_line.find("%}")?;
118    Some(trimmed_line[2..end].trim())
119}
120
121fn extract_conda_requirement_name(req: &str) -> Option<String> {
122    let req = req.trim();
123    if req.is_empty() {
124        return None;
125    }
126
127    let req_without_ns = req.rsplit_once("::").map(|(_, rest)| rest).unwrap_or(req);
128
129    let name = req_without_ns
130        .split_whitespace()
131        .next()
132        .unwrap_or(req_without_ns)
133        .split(['=', '<', '>', '!', '~'])
134        .next()
135        .unwrap_or(req_without_ns)
136        .trim();
137
138    if name.is_empty() {
139        None
140    } else {
141        Some(truncate_field(name.to_string()))
142    }
143}
144
145/// Conda recipe manifest (meta.yaml) parser.
146///
147/// Extracts package metadata and dependencies from Conda recipe files, which
148/// define how to build a Conda package. Handles Jinja2 templating used in
149/// recipe files for variable substitution.
150pub struct CondaMetaYamlParser;
151
152impl PackageParser for CondaMetaYamlParser {
153    const PACKAGE_TYPE: PackageType = PackageType::Conda;
154
155    fn is_match(path: &Path) -> bool {
156        // Match */meta.yaml following Python reference logic
157        path.file_name()
158            .is_some_and(|name| name == "meta.yaml" || name == "meta.yml")
159            || is_conda_recipe_yaml_path(path)
160    }
161
162    fn extract_packages(path: &Path) -> Vec<PackageData> {
163        let contents = match read_file_to_string(path, None) {
164            Ok(c) => c,
165            Err(e) => {
166                warn!("Failed to read {}: {}", path.display(), e);
167                return vec![default_package_data(Some(DatasourceId::CondaMetaYaml))];
168            }
169        };
170
171        if is_conda_recipe_yaml_path(path) {
172            let yaml: Value = match yaml_serde::from_str(&contents) {
173                Ok(y) => y,
174                Err(e) => {
175                    warn!("Failed to parse YAML in {}: {}", path.display(), e);
176                    return vec![default_package_data(Some(DatasourceId::CondaMetaYaml))];
177                }
178            };
179
180            if !looks_like_conda_recipe_yaml(&yaml) {
181                return Vec::new();
182            }
183
184            return vec![parse_conda_recipe_yaml(&yaml)];
185        }
186
187        // Extract Jinja2 variables and apply crude substitution
188        let variables = extract_jinja2_variables(&contents);
189        let processed_yaml = apply_jinja2_substitutions(&contents, &variables);
190
191        // Parse YAML after Jinja2 processing
192        let yaml: Value = match yaml_serde::from_str(&processed_yaml) {
193            Ok(y) => y,
194            Err(e) => {
195                warn!("Failed to parse YAML in {}: {}", path.display(), e);
196                return vec![default_package_data(Some(DatasourceId::CondaMetaYaml))];
197            }
198        };
199
200        let package_element = yaml.get("package").and_then(|v| v.as_mapping());
201        let name = package_element
202            .and_then(|p| p.get("name"))
203            .and_then(yaml_value_to_string);
204
205        let version = package_element
206            .and_then(|p| p.get("version"))
207            .and_then(yaml_value_to_string);
208
209        let source = yaml.get("source").and_then(|v| v.as_mapping());
210        let download_url = source
211            .and_then(|s| s.get("url"))
212            .and_then(|v| v.as_str())
213            .map(|s| truncate_field(s.to_string()));
214
215        let sha256 = source
216            .and_then(|s| s.get("sha256"))
217            .and_then(|v| v.as_str())
218            .and_then(|s| Sha256Digest::from_hex(s).ok());
219
220        let about = yaml.get("about").and_then(|v| v.as_mapping());
221        let homepage_url = about
222            .and_then(|a| a.get("home"))
223            .and_then(|v| v.as_str())
224            .map(|s| truncate_field(s.to_string()));
225
226        let extracted_license_statement = about
227            .and_then(|a| a.get("license"))
228            .and_then(|v| v.as_str())
229            .map(|s| truncate_field(s.to_string()));
230        let (declared_license_expression, declared_license_expression_spdx, license_detections) =
231            normalize_conda_declared_license(extracted_license_statement.as_deref());
232
233        let description = about
234            .and_then(|a| a.get("summary"))
235            .and_then(|v| v.as_str())
236            .map(|s| truncate_field(s.to_string()));
237
238        let vcs_url = about
239            .and_then(|a| a.get("dev_url"))
240            .and_then(|v| v.as_str())
241            .map(|s| truncate_field(s.to_string()));
242        let license_file = about
243            .and_then(|a| a.get("license_file"))
244            .and_then(|v| v.as_str())
245            .map(str::trim)
246            .filter(|value| !value.is_empty())
247            .map(|s| truncate_field(s.to_string()));
248
249        // Extract dependencies from requirements sections
250        let mut dependencies = Vec::new();
251        let mut extra_data: HashMap<String, serde_json::Value> = HashMap::new();
252
253        if let Some(requirements) = yaml.get("requirements").and_then(|v| v.as_mapping()) {
254            for (scope_key, reqs_value) in requirements {
255                let scope = scope_key.as_str().unwrap_or("unknown");
256                if let Some(reqs) = reqs_value.as_sequence() {
257                    for req in reqs.iter().take(MAX_ITERATION_COUNT) {
258                        if let Some(req_str) = req.as_str()
259                            && let Some(dep) = parse_conda_requirement(req_str, scope)
260                        {
261                            // Filter out pip/python from dependencies, add to extra_data
262                            if extract_conda_requirement_name(req_str)
263                                .is_some_and(|n| n == "pip" || n == "python")
264                            {
265                                if let Some(arr) = extra_data
266                                    .entry(scope.to_string())
267                                    .or_insert_with(|| serde_json::Value::Array(vec![]))
268                                    .as_array_mut()
269                                {
270                                    arr.push(serde_json::Value::String(truncate_field(
271                                        req_str.to_string(),
272                                    )))
273                                }
274                            } else {
275                                dependencies.push(dep);
276                            }
277                        }
278                    }
279                }
280            }
281        }
282
283        let mut pkg = default_package_data(Some(DatasourceId::CondaMetaYaml));
284        pkg.package_type = Some(Self::PACKAGE_TYPE);
285        pkg.datasource_id = Some(DatasourceId::CondaMetaYaml);
286        pkg.name = name;
287        pkg.version = version;
288        pkg.purl = build_conda_package_purl(pkg.name.as_deref(), pkg.version.as_deref());
289        pkg.download_url = download_url;
290        pkg.homepage_url = homepage_url;
291        pkg.declared_license_expression = declared_license_expression.map(truncate_field);
292        pkg.declared_license_expression_spdx = declared_license_expression_spdx.map(truncate_field);
293        pkg.license_detections = license_detections;
294        pkg.extracted_license_statement = extracted_license_statement.map(truncate_field);
295        pkg.description = description;
296        pkg.vcs_url = vcs_url;
297        pkg.sha256 = sha256;
298        pkg.dependencies = dependencies;
299        if let Some(license_file) = license_file {
300            extra_data.insert(
301                "license_file".to_string(),
302                serde_json::Value::String(license_file),
303            );
304        }
305        if !extra_data.is_empty() {
306            pkg.extra_data = Some(extra_data);
307        }
308        vec![pkg]
309    }
310}
311
312fn looks_like_conda_recipe_yaml(yaml: &Value) -> bool {
313    yaml.get("schema_version")
314        .and_then(|value| value.as_u64())
315        .is_some_and(|value| value == 1)
316        && (yaml
317            .get("package")
318            .and_then(|value| value.as_mapping())
319            .is_some()
320            || yaml
321                .get("recipe")
322                .and_then(|value| value.as_mapping())
323                .is_some())
324}
325
326fn parse_conda_recipe_yaml(yaml: &Value) -> PackageData {
327    let context = extract_recipe_yaml_context(yaml);
328    let package = yaml
329        .get("package")
330        .or_else(|| yaml.get("recipe"))
331        .and_then(|value| value.as_mapping());
332    let source = yaml.get("source").and_then(|value| value.as_mapping());
333    let about = yaml.get("about").and_then(|value| value.as_mapping());
334
335    let name = package
336        .and_then(|pkg| pkg.get("name"))
337        .and_then(|value| recipe_yaml_value_to_string(value, &context));
338    let version = package
339        .and_then(|pkg| pkg.get("version"))
340        .and_then(|value| recipe_yaml_value_to_string(value, &context));
341
342    let download_url = source
343        .and_then(|src| src.get("url"))
344        .and_then(|value| recipe_yaml_value_to_string(value, &context));
345    let sha256 = source
346        .and_then(|src| src.get("sha256"))
347        .and_then(|value| recipe_yaml_value_to_string(value, &context))
348        .and_then(|value| Sha256Digest::from_hex(&value).ok());
349
350    let extracted_license_statement = about
351        .and_then(|section| section.get("license"))
352        .and_then(|value| recipe_yaml_value_to_string(value, &context));
353    let (declared_license_expression, declared_license_expression_spdx, license_detections) =
354        normalize_conda_declared_license(extracted_license_statement.as_deref());
355
356    let description = about
357        .and_then(|section| section.get("summary"))
358        .and_then(|value| recipe_yaml_value_to_string(value, &context));
359    let homepage_url = about
360        .and_then(|section| section.get("homepage").or_else(|| section.get("home")))
361        .and_then(|value| recipe_yaml_value_to_string(value, &context));
362    let vcs_url = about
363        .and_then(|section| {
364            section
365                .get("repository")
366                .or_else(|| section.get("dev_url"))
367                .or_else(|| section.get("repository_url"))
368        })
369        .and_then(|value| recipe_yaml_value_to_string(value, &context));
370    let documentation_url = about
371        .and_then(|section| section.get("documentation"))
372        .and_then(|value| recipe_yaml_value_to_string(value, &context));
373    let license_file = about
374        .and_then(|section| section.get("license_file"))
375        .and_then(|value| recipe_yaml_value_to_string(value, &context));
376
377    let mut dependencies = Vec::new();
378    let mut extra_data: HashMap<String, serde_json::Value> = HashMap::new();
379    if let Some(requirements) = yaml
380        .get("requirements")
381        .and_then(|value| value.as_mapping())
382    {
383        for (scope_key, reqs_value) in requirements {
384            let Some(scope) = scope_key.as_str() else {
385                continue;
386            };
387            let recipe_requirements = extract_recipe_yaml_requirement_strings(reqs_value, &context);
388            if recipe_requirements.is_empty() {
389                continue;
390            }
391
392            for req in &recipe_requirements {
393                if extract_conda_requirement_name(req)
394                    .is_some_and(|name| name == "pip" || name == "python")
395                {
396                    if let Some(arr) = extra_data
397                        .entry(scope.to_string())
398                        .or_insert_with(|| serde_json::Value::Array(vec![]))
399                        .as_array_mut()
400                    {
401                        arr.push(serde_json::Value::String(truncate_field(req.clone())));
402                    }
403                    continue;
404                }
405
406                if let Some(dep) = parse_conda_requirement(req, scope) {
407                    dependencies.push(dep);
408                }
409            }
410        }
411    }
412
413    if let Some(documentation_url) = documentation_url {
414        extra_data.insert(
415            "documentation".to_string(),
416            serde_json::Value::String(documentation_url),
417        );
418    }
419    if let Some(license_file) = license_file {
420        extra_data.insert(
421            "license_file".to_string(),
422            serde_json::Value::String(license_file),
423        );
424    }
425    extra_data.insert("schema_version".to_string(), serde_json::json!(1));
426
427    let mut pkg = default_package_data(Some(DatasourceId::CondaMetaYaml));
428    pkg.package_type = Some(CondaMetaYamlParser::PACKAGE_TYPE);
429    pkg.datasource_id = Some(DatasourceId::CondaMetaYaml);
430    pkg.name = name;
431    pkg.version = version;
432    pkg.purl = build_conda_package_purl(pkg.name.as_deref(), pkg.version.as_deref());
433    pkg.download_url = download_url;
434    pkg.homepage_url = homepage_url;
435    pkg.declared_license_expression = declared_license_expression.map(truncate_field);
436    pkg.declared_license_expression_spdx = declared_license_expression_spdx.map(truncate_field);
437    pkg.license_detections = license_detections;
438    pkg.extracted_license_statement = extracted_license_statement.map(truncate_field);
439    pkg.description = description;
440    pkg.vcs_url = vcs_url;
441    pkg.sha256 = sha256;
442    pkg.dependencies = dependencies;
443    pkg.extra_data = Some(extra_data);
444    pkg
445}
446
447fn extract_recipe_yaml_context(yaml: &Value) -> HashMap<String, String> {
448    let mut context = HashMap::new();
449    let Some(context_mapping) = yaml.get("context").and_then(|value| value.as_mapping()) else {
450        return context;
451    };
452
453    for (key, value) in context_mapping {
454        let Some(key) = key.as_str() else {
455            continue;
456        };
457        if let Some(value) = yaml_value_to_string(value) {
458            context.insert(truncate_field(key.to_string()), truncate_field(value));
459        }
460    }
461
462    context
463}
464
465fn recipe_yaml_value_to_string(value: &Value, context: &HashMap<String, String>) -> Option<String> {
466    let value = yaml_value_to_string(value)?;
467    Some(resolve_recipe_yaml_expressions(&value, context))
468}
469
470fn resolve_recipe_yaml_expressions(value: &str, context: &HashMap<String, String>) -> String {
471    let Some(re) = Regex::new(r#"\$\{\{\s*([A-Za-z_][A-Za-z0-9_]*)\s*\}\}"#).ok() else {
472        return truncate_field(value.to_string());
473    };
474
475    let resolved = re.replace_all(value, |caps: &regex::Captures| {
476        context
477            .get(&caps[1])
478            .cloned()
479            .unwrap_or_else(|| caps[0].to_string())
480    });
481    truncate_field(resolved.into_owned())
482}
483
484fn extract_recipe_yaml_requirement_strings(
485    value: &Value,
486    context: &HashMap<String, String>,
487) -> Vec<String> {
488    let mut requirements = Vec::new();
489    collect_recipe_yaml_requirement_strings(value, context, &mut requirements);
490    requirements
491}
492
493fn collect_recipe_yaml_requirement_strings(
494    value: &Value,
495    context: &HashMap<String, String>,
496    requirements: &mut Vec<String>,
497) {
498    if let Some(req) = value.as_str() {
499        let resolved = resolve_recipe_yaml_expressions(req, context);
500        if should_keep_recipe_yaml_requirement(&resolved) {
501            requirements.push(resolved);
502        }
503        return;
504    }
505
506    if let Some(items) = value.as_sequence() {
507        for item in items.iter().take(MAX_ITERATION_COUNT) {
508            collect_recipe_yaml_requirement_strings(item, context, requirements);
509        }
510        return;
511    }
512
513    if let Some(mapping) = value.as_mapping() {
514        if let Some(then_value) = mapping.get("then") {
515            collect_recipe_yaml_requirement_strings(then_value, context, requirements);
516        }
517        if let Some(else_value) = mapping.get("else") {
518            collect_recipe_yaml_requirement_strings(else_value, context, requirements);
519        }
520    }
521}
522
523fn should_keep_recipe_yaml_requirement(req: &str) -> bool {
524    let trimmed = req.trim();
525    if trimmed.is_empty() {
526        return false;
527    }
528
529    !(trimmed.contains("${{")
530        || trimmed.contains("compiler('")
531        || trimmed.contains("compiler(\"")
532        || trimmed.contains("pin_subpackage(")
533        || trimmed.contains("pin_compatible(")
534        || trimmed.contains("stdlib('")
535        || trimmed.contains("stdlib(\""))
536}
537
538fn normalize_conda_declared_license(
539    statement: Option<&str>,
540) -> (
541    Option<String>,
542    Option<String>,
543    Vec<crate::models::LicenseDetection>,
544) {
545    match statement.map(str::trim).filter(|value| !value.is_empty()) {
546        Some("Apache Software") => build_declared_license_data_from_pair(
547            "apache-2.0",
548            "Apache-2.0",
549            DeclaredLicenseMatchMetadata::single_line("Apache Software"),
550        ),
551        Some("BSD-3-Clause") => build_declared_license_data_from_pair(
552            "bsd-new",
553            "BSD-3-Clause",
554            DeclaredLicenseMatchMetadata::single_line("BSD-3-Clause"),
555        ),
556        other => normalize_spdx_declared_license(other),
557    }
558}
559
560/// Conda environment file (environment.yml, conda.yaml) parser.
561///
562/// Extracts dependencies from Conda environment files used to define reproducible
563/// environments. Supports both Conda and pip dependencies, with channel specifications.
564pub struct CondaEnvironmentYmlParser;
565
566impl PackageParser for CondaEnvironmentYmlParser {
567    const PACKAGE_TYPE: PackageType = PackageType::Conda;
568
569    fn is_match(path: &Path) -> bool {
570        // Python reference: path_patterns = ('*conda*.yaml', '*env*.yaml', '*environment*.yaml')
571        if let Some(name) = path.file_name().and_then(|n| n.to_str()) {
572            let lower = name.to_lowercase();
573            (lower.contains("conda") || lower.contains("env") || lower.contains("environment"))
574                && (lower.ends_with(".yaml") || lower.ends_with(".yml"))
575        } else {
576            false
577        }
578    }
579
580    fn extract_packages(path: &Path) -> Vec<PackageData> {
581        let contents = match read_file_to_string(path, None) {
582            Ok(c) => c,
583            Err(e) => {
584                warn!("Failed to read {}: {}", path.display(), e);
585                return vec![default_package_data(Some(DatasourceId::CondaYaml))];
586            }
587        };
588
589        let yaml: Value = match yaml_serde::from_str(&contents) {
590            Ok(y) => y,
591            Err(e) => {
592                warn!("Failed to parse YAML in {}: {}", path.display(), e);
593                return vec![default_package_data(Some(DatasourceId::CondaYaml))];
594            }
595        };
596
597        if !looks_like_conda_environment_yaml(&yaml) {
598            return Vec::new();
599        }
600
601        let name = yaml
602            .get("name")
603            .and_then(|v| v.as_str())
604            .map(|s| truncate_field(s.to_string()));
605
606        let dependencies = extract_environment_dependencies(&yaml);
607
608        let mut extra_data = HashMap::new();
609        if let Some(channels) = yaml.get("channels").and_then(|v| v.as_sequence()) {
610            let channels_vec: Vec<String> = channels
611                .iter()
612                .filter_map(|c| c.as_str().map(|s| truncate_field(s.to_string())))
613                .collect();
614            if !channels_vec.is_empty() {
615                extra_data.insert("channels".to_string(), serde_json::json!(channels_vec));
616            }
617        }
618
619        // Environment files are private (not published packages)
620        let mut pkg = default_package_data(Some(DatasourceId::CondaYaml));
621        pkg.package_type = Some(Self::PACKAGE_TYPE);
622        pkg.datasource_id = Some(DatasourceId::CondaYaml);
623        pkg.name = name;
624        pkg.purl = build_conda_package_purl(pkg.name.as_deref(), pkg.version.as_deref());
625        pkg.primary_language = Some(truncate_field("Python".to_string()));
626        pkg.dependencies = dependencies;
627        pkg.is_private = true;
628        if !extra_data.is_empty() {
629            pkg.extra_data = Some(extra_data);
630        }
631        vec![pkg]
632    }
633}
634
635fn looks_like_conda_environment_yaml(yaml: &Value) -> bool {
636    let has_dependencies = yaml
637        .get("dependencies")
638        .and_then(|value| value.as_sequence())
639        .is_some_and(|items| !items.is_empty());
640    let has_channels = yaml
641        .get("channels")
642        .and_then(|value| value.as_sequence())
643        .is_some_and(|items| !items.is_empty());
644    let has_prefix = yaml
645        .get("prefix")
646        .and_then(|value| value.as_str())
647        .is_some_and(|value| !value.trim().is_empty());
648
649    has_dependencies || has_channels || has_prefix
650}
651
652/// Extract Jinja2-style variables from a Conda meta.yaml
653///
654/// Example:
655/// ```ignore
656/// {% set version = "0.45.0" %}
657/// {% set sha256 = "abc123..." %}
658/// ```
659pub fn extract_jinja2_variables(content: &str) -> HashMap<String, String> {
660    let mut variables = HashMap::new();
661
662    for line in content.lines().take(MAX_ITERATION_COUNT) {
663        let trimmed = line.trim();
664        if let Some(inner) = extract_jinja_statement(trimmed)
665            && let Some(inner) = inner.strip_prefix("set").map(str::trim)
666            && let Some((key, value)) = inner.split_once('=')
667        {
668            let key = key.trim();
669            let value = value.trim().trim_matches('"').trim_matches('\'');
670            variables.insert(
671                truncate_field(key.to_string()),
672                truncate_field(value.to_string()),
673            );
674        }
675    }
676
677    variables
678}
679
680/// Apply Jinja2 variable substitutions to YAML content
681///
682/// Supports:
683/// - `{{ variable }}` - Simple substitution
684/// - `{{ variable|lower }}` - Lowercase filter
685pub fn apply_jinja2_substitutions(content: &str, variables: &HashMap<String, String>) -> String {
686    let mut result = Vec::new();
687
688    for line in content.lines() {
689        let trimmed = line.trim();
690
691        if extract_jinja_statement(trimmed).is_some() {
692            continue;
693        }
694
695        let mut processed_line = line.to_string();
696
697        // Apply variable substitutions
698        if line.contains("{{") && line.contains("}}") {
699            for (var_name, var_value) in variables {
700                // Handle |lower filter
701                let pattern_lower = format!("{{{{ {}|lower }}}}", var_name);
702                if processed_line.contains(&pattern_lower) {
703                    processed_line =
704                        processed_line.replace(&pattern_lower, &var_value.to_lowercase());
705                }
706
707                // Handle normal substitution
708                let pattern_normal = format!("{{{{ {} }}}}", var_name);
709                processed_line = processed_line.replace(&pattern_normal, var_value);
710            }
711        }
712
713        // Skip lines with unresolved Jinja2 templates (complex expressions we can't handle)
714        if processed_line.contains("{{") {
715            continue;
716        }
717
718        result.push(processed_line);
719    }
720
721    quote_plain_numeric_version_scalars(&result.join("\n"))
722}
723
724fn quote_plain_numeric_version_scalars(content: &str) -> String {
725    let Some(version_re) =
726        Regex::new(r#"^(\s*(?:-\s*)?version:\s*)([0-9]+(?:\.[0-9]+)+)(\s*)$"#).ok()
727    else {
728        return content.to_string();
729    };
730
731    content
732        .lines()
733        .map(|line| {
734            version_re
735                .replace(line, |caps: &regex::Captures| {
736                    format!(r#"{}"{}"{}"#, &caps[1], &caps[2], &caps[3])
737                })
738                .into_owned()
739        })
740        .collect::<Vec<_>>()
741        .join("\n")
742}
743
744/// Parse a Conda requirement string into a Dependency
745///
746/// Format examples:
747/// - `mccortex ==1.0` - Pinned version with space before operator
748/// - `python >=3.6` - Version constraint
749/// - `conda-forge::numpy=1.15.4` - Namespace and pinned version (no space)
750/// - `bwa` - No version specified
751pub fn parse_conda_requirement(req: &str, scope: &str) -> Option<Dependency> {
752    let req = req.trim();
753
754    // Handle namespace prefix (conda-forge::package)
755    let (namespace, channel_url, req_without_ns) = parse_conda_channel_prefix(req);
756
757    // Split on first space to separate name from version constraint
758    let (name_part, version_constraint) =
759        if let Some((name, constraint)) = req_without_ns.split_once(' ') {
760            (name.trim(), Some(constraint.trim()))
761        } else {
762            (req_without_ns, None)
763        };
764
765    // Check for pinned version with `=` (no space): package=1.0
766    let (name, version, is_pinned, extracted_requirement) = if name_part.contains('=') {
767        let parts: Vec<&str> = name_part.splitn(2, '=').collect();
768        let n = parts[0].trim();
769        let v = if parts.len() > 1 {
770            let parsed = parts[1].trim();
771            if parsed.is_empty() {
772                None
773            } else {
774                Some(truncate_field(parsed.to_string()))
775            }
776        } else {
777            None
778        };
779        let req = v
780            .as_ref()
781            .map(|ver| format!("={}", ver))
782            .unwrap_or_default();
783        (n, v, true, Some(truncate_field(req)))
784    } else if let Some(constraint) = version_constraint {
785        let version_opt = if constraint.starts_with("==") {
786            Some(truncate_field(
787                constraint.trim_start_matches("==").trim().to_string(),
788            ))
789        } else {
790            None
791        };
792        (
793            name_part.trim(),
794            version_opt,
795            false,
796            Some(truncate_field(constraint.to_string())),
797        )
798    } else {
799        (name_part.trim(), None, false, Some(String::new()))
800    };
801
802    // Build PURL
803    let purl = build_purl(
804        "conda",
805        namespace,
806        name,
807        version.as_deref(),
808        None,
809        None,
810        None,
811    );
812
813    // Determine is_runtime and is_optional based on scope
814    let (is_runtime, is_optional) = match scope {
815        "run" => (true, false),
816        _ => (false, true), // build, host, test are all optional
817    };
818
819    let mut extra_data = HashMap::new();
820    if let Some(namespace) = namespace {
821        extra_data.insert(
822            "channel".to_string(),
823            serde_json::json!(truncate_field(namespace.to_string())),
824        );
825    }
826    if let Some(channel_url) = channel_url {
827        extra_data.insert(
828            "channel_url".to_string(),
829            serde_json::json!(truncate_field(channel_url.to_string())),
830        );
831    }
832
833    Some(Dependency {
834        purl,
835        extracted_requirement,
836        scope: Some(truncate_field(scope.to_string())),
837        is_runtime: Some(is_runtime),
838        is_optional: Some(is_optional),
839        is_pinned: Some(is_pinned),
840        is_direct: Some(true),
841        resolved_package: None,
842        extra_data: (!extra_data.is_empty()).then_some(extra_data),
843    })
844}
845
846fn extract_environment_dependencies(yaml: &Value) -> Vec<Dependency> {
847    let dependencies = match yaml.get("dependencies").and_then(|v| v.as_sequence()) {
848        Some(d) => d,
849        None => return Vec::new(),
850    };
851
852    let mut deps = Vec::new();
853    for dep_value in dependencies.iter().take(MAX_ITERATION_COUNT) {
854        if let Some(dep_str) = dep_value.as_str() {
855            if let Some(dep) = parse_environment_string_dependency(dep_str) {
856                deps.push(dep);
857            }
858        } else if let Some(pip_deps) = dep_value.get("pip").and_then(|v| v.as_sequence()) {
859            deps.extend(extract_pip_dependencies(pip_deps));
860        }
861    }
862    deps
863}
864
865fn parse_environment_string_dependency(dep_str: &str) -> Option<Dependency> {
866    let (namespace, channel_url, dep_without_ns) = parse_conda_channel_prefix(dep_str);
867    create_conda_dependency(namespace, channel_url, dep_without_ns, "dependencies")
868}
869
870fn parse_conda_exact_requirement(req_no_space: &str) -> (Option<String>, Option<String>) {
871    let exact = req_no_space
872        .strip_prefix("==")
873        .or_else(|| req_no_space.strip_prefix('='));
874
875    let Some(exact) = exact else {
876        return (None, None);
877    };
878
879    if exact.is_empty() {
880        return (None, None);
881    }
882
883    match exact.split_once('=') {
884        Some((version, build_string)) if !version.is_empty() => (
885            Some(truncate_field(version.to_string())),
886            (!build_string.is_empty()).then(|| truncate_field(build_string.to_string())),
887        ),
888        _ => (Some(truncate_field(exact.to_string())), None),
889    }
890}
891
892fn parse_conda_channel_prefix(dep_str: &str) -> (Option<&str>, Option<&str>, &str) {
893    if let Some((ns, rest)) = dep_str.rsplit_once("::") {
894        if ns.contains('/') || ns.contains(':') {
895            (None, Some(ns), rest)
896        } else {
897            (Some(ns), None, rest)
898        }
899    } else {
900        (None, None, dep_str)
901    }
902}
903
904fn create_conda_dependency(
905    namespace: Option<&str>,
906    channel_url: Option<&str>,
907    dep_without_ns: &str,
908    scope: &str,
909) -> Option<Dependency> {
910    let dep = dep_without_ns.trim();
911    let name_re = match Regex::new(r"^([A-Za-z0-9_.\-]+)") {
912        Ok(re) => re,
913        Err(_) => return None,
914    };
915
916    let caps = name_re.captures(dep)?;
917    let name_match = caps.get(1)?;
918    let name = name_match.as_str().trim();
919    let rest = dep[name_match.end()..].trim();
920
921    let (version, build_string, is_pinned, extracted_requirement) = if rest.is_empty() {
922        (None, None, false, Some(String::new()))
923    } else {
924        let req_no_space = rest.replace(' ', "");
925        let is_exact = req_no_space.starts_with("=") || req_no_space.starts_with("==");
926        let (parsed_version, parsed_build_string) = if is_exact {
927            parse_conda_exact_requirement(&req_no_space)
928        } else {
929            (None, None)
930        };
931
932        (
933            parsed_version,
934            parsed_build_string,
935            is_exact,
936            Some(truncate_field(rest.to_string())),
937        )
938    };
939
940    if name == "pip" || name == "python" {
941        return None;
942    }
943
944    let purl = build_purl(
945        "conda",
946        namespace,
947        name,
948        version.as_deref(),
949        None,
950        None,
951        None,
952    );
953    let mut extra_data = HashMap::new();
954    if let Some(namespace) = namespace {
955        extra_data.insert(
956            "channel".to_string(),
957            serde_json::json!(truncate_field(namespace.to_string())),
958        );
959    }
960    if let Some(channel_url) = channel_url {
961        extra_data.insert(
962            "channel_url".to_string(),
963            serde_json::json!(truncate_field(channel_url.to_string())),
964        );
965    }
966    if let Some(build_string) = build_string {
967        extra_data.insert("build_string".to_string(), serde_json::json!(build_string));
968    }
969
970    Some(Dependency {
971        purl,
972        extracted_requirement,
973        scope: Some(truncate_field(scope.to_string())),
974        is_runtime: Some(true),
975        is_optional: Some(false),
976        is_pinned: Some(is_pinned),
977        is_direct: Some(true),
978        resolved_package: None,
979        extra_data: (!extra_data.is_empty()).then_some(extra_data),
980    })
981}
982
983fn extract_pip_dependencies(pip_deps: &[Value]) -> Vec<Dependency> {
984    pip_deps
985        .iter()
986        .take(MAX_ITERATION_COUNT)
987        .filter_map(|pip_dep| {
988            if let Some(pip_req_str) = pip_dep.as_str()
989                && let Ok(parsed_req) = pip_req_str.parse::<pep508_rs::Requirement>()
990            {
991                create_pip_dependency(parsed_req, "dependencies", Some(pip_req_str))
992            } else {
993                None
994            }
995        })
996        .collect()
997}
998
999fn create_pip_dependency(
1000    parsed_req: pep508_rs::Requirement,
1001    scope: &str,
1002    raw_requirement: Option<&str>,
1003) -> Option<Dependency> {
1004    let name = truncate_field(parsed_req.name.to_string());
1005
1006    if name == "pip" || name == "python" {
1007        return None;
1008    }
1009
1010    let specs = parsed_req.version_or_url.as_ref().map(|v| match v {
1011        pep508_rs::VersionOrUrl::VersionSpecifier(spec) => truncate_field(spec.to_string()),
1012        pep508_rs::VersionOrUrl::Url(url) => truncate_field(url.to_string()),
1013    });
1014
1015    let extracted_requirement = if let Some(raw) = raw_requirement {
1016        let raw = raw.trim();
1017        let suffix = raw.strip_prefix(&name).unwrap_or(raw).trim().to_string();
1018        Some(truncate_field(suffix))
1019    } else {
1020        Some(truncate_field(specs.clone().unwrap_or_default()))
1021    };
1022
1023    let version = specs.as_ref().and_then(|spec_str| {
1024        if spec_str.starts_with("==") {
1025            Some(truncate_field(
1026                spec_str.trim_start_matches("==").to_string(),
1027            ))
1028        } else {
1029            None
1030        }
1031    });
1032
1033    let is_pinned = specs.as_ref().map(|s| s.contains("==")).unwrap_or(false);
1034    let purl = build_purl("pypi", None, &name, version.as_deref(), None, None, None);
1035
1036    Some(Dependency {
1037        purl,
1038        extracted_requirement,
1039        scope: Some(truncate_field(scope.to_string())),
1040        is_runtime: Some(true),
1041        is_optional: Some(false),
1042        is_pinned: Some(is_pinned),
1043        is_direct: Some(true),
1044        resolved_package: None,
1045        extra_data: None,
1046    })
1047}
1048
1049crate::register_parser!(
1050    "Conda package manifest and environment file",
1051    &[
1052        "**/meta.yaml",
1053        "**/meta.yml",
1054        "**/recipe/recipe.yaml",
1055        "**/recipe/recipe.yml",
1056        "**/environment.yml",
1057        "**/environment.yaml",
1058        "**/env.yaml",
1059        "**/env.yml",
1060        "**/conda.yaml",
1061        "**/conda.yml",
1062        "**/*conda*.yaml",
1063        "**/*conda*.yml",
1064        "**/*env*.yaml",
1065        "**/*env*.yml",
1066        "**/*environment*.yaml",
1067        "**/*environment*.yml"
1068    ],
1069    "conda",
1070    "Python",
1071    Some("https://docs.conda.io/"),
1072);