1use std::collections::HashMap;
29use std::path::Path;
30
31use crate::parser_warn as warn;
32use crate::parsers::utils::{MAX_ITERATION_COUNT, read_file_to_string, truncate_field};
33use regex::Regex;
34use yaml_serde::Value;
35
36use crate::models::{DatasourceId, Dependency, PackageData, PackageType, Sha256Digest};
37
38use super::PackageParser;
39use super::license_normalization::{
40 DeclaredLicenseMatchMetadata, build_declared_license_data_from_pair,
41 normalize_spdx_declared_license,
42};
43
44fn default_package_data(datasource_id: Option<DatasourceId>) -> PackageData {
45 PackageData {
46 package_type: Some(CondaMetaYamlParser::PACKAGE_TYPE),
47 datasource_id,
48 ..Default::default()
49 }
50}
51
52pub(crate) fn build_purl(
54 package_type: &str,
55 namespace: Option<&str>,
56 name: &str,
57 version: Option<&str>,
58 _qualifiers: Option<&str>,
59 _subpath: Option<&str>,
60 _extras: Option<&str>,
61) -> Option<String> {
62 let purl = match package_type {
63 "conda" => {
64 if let Some(ns) = namespace {
65 match version {
66 Some(v) => format!("pkg:conda/{}/{}@{}", ns, name, v),
67 None => format!("pkg:conda/{}/{}", ns, name),
68 }
69 } else {
70 match version {
71 Some(v) => format!("pkg:conda/{}@{}", name, v),
72 None => format!("pkg:conda/{}", name),
73 }
74 }
75 }
76 "pypi" => match version {
77 Some(v) => format!("pkg:pypi/{}@{}", name, v),
78 None => format!("pkg:pypi/{}", name),
79 },
80 _ => format!("pkg:{}/{}", package_type, name),
81 };
82 Some(purl)
83}
84
85fn build_conda_package_purl(name: Option<&str>, version: Option<&str>) -> Option<String> {
86 let name = name?;
87 build_purl("conda", None, name, version, None, None, None)
88}
89
90fn yaml_value_to_string(value: &Value) -> Option<String> {
91 match value {
92 Value::String(s) => Some(truncate_field(s.clone())),
93 Value::Number(n) => Some(truncate_field(n.to_string())),
94 Value::Bool(b) => Some(truncate_field(b.to_string())),
95 _ => None,
96 }
97}
98
99fn extract_jinja_statement(trimmed_line: &str) -> Option<&str> {
100 if !trimmed_line.starts_with("{%") {
101 return None;
102 }
103
104 let end = trimmed_line.find("%}")?;
105 Some(trimmed_line[2..end].trim())
106}
107
108fn extract_conda_requirement_name(req: &str) -> Option<String> {
109 let req = req.trim();
110 if req.is_empty() {
111 return None;
112 }
113
114 let req_without_ns = req.rsplit_once("::").map(|(_, rest)| rest).unwrap_or(req);
115
116 let name = req_without_ns
117 .split_whitespace()
118 .next()
119 .unwrap_or(req_without_ns)
120 .split(['=', '<', '>', '!', '~'])
121 .next()
122 .unwrap_or(req_without_ns)
123 .trim();
124
125 if name.is_empty() {
126 None
127 } else {
128 Some(truncate_field(name.to_string()))
129 }
130}
131
132pub struct CondaMetaYamlParser;
138
139impl PackageParser for CondaMetaYamlParser {
140 const PACKAGE_TYPE: PackageType = PackageType::Conda;
141
142 fn is_match(path: &Path) -> bool {
143 path.file_name()
145 .is_some_and(|name| name == "meta.yaml" || name == "meta.yml")
146 }
147
148 fn extract_packages(path: &Path) -> Vec<PackageData> {
149 let contents = match read_file_to_string(path, None) {
150 Ok(c) => c,
151 Err(e) => {
152 warn!("Failed to read {}: {}", path.display(), e);
153 return vec![default_package_data(Some(DatasourceId::CondaMetaYaml))];
154 }
155 };
156
157 let variables = extract_jinja2_variables(&contents);
159 let processed_yaml = apply_jinja2_substitutions(&contents, &variables);
160
161 let yaml: Value = match yaml_serde::from_str(&processed_yaml) {
163 Ok(y) => y,
164 Err(e) => {
165 warn!("Failed to parse YAML in {}: {}", path.display(), e);
166 return vec![default_package_data(Some(DatasourceId::CondaMetaYaml))];
167 }
168 };
169
170 let package_element = yaml.get("package").and_then(|v| v.as_mapping());
171 let name = package_element
172 .and_then(|p| p.get("name"))
173 .and_then(yaml_value_to_string);
174
175 let version = package_element
176 .and_then(|p| p.get("version"))
177 .and_then(yaml_value_to_string);
178
179 let source = yaml.get("source").and_then(|v| v.as_mapping());
180 let download_url = source
181 .and_then(|s| s.get("url"))
182 .and_then(|v| v.as_str())
183 .map(|s| truncate_field(s.to_string()));
184
185 let sha256 = source
186 .and_then(|s| s.get("sha256"))
187 .and_then(|v| v.as_str())
188 .and_then(|s| Sha256Digest::from_hex(s).ok());
189
190 let about = yaml.get("about").and_then(|v| v.as_mapping());
191 let homepage_url = about
192 .and_then(|a| a.get("home"))
193 .and_then(|v| v.as_str())
194 .map(|s| truncate_field(s.to_string()));
195
196 let extracted_license_statement = about
197 .and_then(|a| a.get("license"))
198 .and_then(|v| v.as_str())
199 .map(|s| truncate_field(s.to_string()));
200 let (declared_license_expression, declared_license_expression_spdx, license_detections) =
201 normalize_conda_declared_license(extracted_license_statement.as_deref());
202
203 let description = about
204 .and_then(|a| a.get("summary"))
205 .and_then(|v| v.as_str())
206 .map(|s| truncate_field(s.to_string()));
207
208 let vcs_url = about
209 .and_then(|a| a.get("dev_url"))
210 .and_then(|v| v.as_str())
211 .map(|s| truncate_field(s.to_string()));
212 let license_file = about
213 .and_then(|a| a.get("license_file"))
214 .and_then(|v| v.as_str())
215 .map(str::trim)
216 .filter(|value| !value.is_empty())
217 .map(|s| truncate_field(s.to_string()));
218
219 let mut dependencies = Vec::new();
221 let mut extra_data: HashMap<String, serde_json::Value> = HashMap::new();
222
223 if let Some(requirements) = yaml.get("requirements").and_then(|v| v.as_mapping()) {
224 for (scope_key, reqs_value) in requirements {
225 let scope = scope_key.as_str().unwrap_or("unknown");
226 if let Some(reqs) = reqs_value.as_sequence() {
227 for req in reqs.iter().take(MAX_ITERATION_COUNT) {
228 if let Some(req_str) = req.as_str()
229 && let Some(dep) = parse_conda_requirement(req_str, scope)
230 {
231 if extract_conda_requirement_name(req_str)
233 .is_some_and(|n| n == "pip" || n == "python")
234 {
235 if let Some(arr) = extra_data
236 .entry(scope.to_string())
237 .or_insert_with(|| serde_json::Value::Array(vec![]))
238 .as_array_mut()
239 {
240 arr.push(serde_json::Value::String(truncate_field(
241 req_str.to_string(),
242 )))
243 }
244 } else {
245 dependencies.push(dep);
246 }
247 }
248 }
249 }
250 }
251 }
252
253 let mut pkg = default_package_data(Some(DatasourceId::CondaMetaYaml));
254 pkg.package_type = Some(Self::PACKAGE_TYPE);
255 pkg.datasource_id = Some(DatasourceId::CondaMetaYaml);
256 pkg.name = name;
257 pkg.version = version;
258 pkg.purl = build_conda_package_purl(pkg.name.as_deref(), pkg.version.as_deref());
259 pkg.download_url = download_url;
260 pkg.homepage_url = homepage_url;
261 pkg.declared_license_expression = declared_license_expression.map(truncate_field);
262 pkg.declared_license_expression_spdx = declared_license_expression_spdx.map(truncate_field);
263 pkg.license_detections = license_detections;
264 pkg.extracted_license_statement = extracted_license_statement.map(truncate_field);
265 pkg.description = description;
266 pkg.vcs_url = vcs_url;
267 pkg.sha256 = sha256;
268 pkg.dependencies = dependencies;
269 if let Some(license_file) = license_file {
270 extra_data.insert(
271 "license_file".to_string(),
272 serde_json::Value::String(license_file),
273 );
274 }
275 if !extra_data.is_empty() {
276 pkg.extra_data = Some(extra_data);
277 }
278 vec![pkg]
279 }
280}
281
282fn normalize_conda_declared_license(
283 statement: Option<&str>,
284) -> (
285 Option<String>,
286 Option<String>,
287 Vec<crate::models::LicenseDetection>,
288) {
289 match statement.map(str::trim).filter(|value| !value.is_empty()) {
290 Some("Apache Software") => build_declared_license_data_from_pair(
291 "apache-2.0",
292 "Apache-2.0",
293 DeclaredLicenseMatchMetadata::single_line("Apache Software"),
294 ),
295 Some("BSD-3-Clause") => build_declared_license_data_from_pair(
296 "bsd-new",
297 "BSD-3-Clause",
298 DeclaredLicenseMatchMetadata::single_line("BSD-3-Clause"),
299 ),
300 other => normalize_spdx_declared_license(other),
301 }
302}
303
304pub struct CondaEnvironmentYmlParser;
309
310impl PackageParser for CondaEnvironmentYmlParser {
311 const PACKAGE_TYPE: PackageType = PackageType::Conda;
312
313 fn is_match(path: &Path) -> bool {
314 if let Some(name) = path.file_name().and_then(|n| n.to_str()) {
316 let lower = name.to_lowercase();
317 (lower.contains("conda") || lower.contains("env") || lower.contains("environment"))
318 && (lower.ends_with(".yaml") || lower.ends_with(".yml"))
319 } else {
320 false
321 }
322 }
323
324 fn extract_packages(path: &Path) -> Vec<PackageData> {
325 let contents = match read_file_to_string(path, None) {
326 Ok(c) => c,
327 Err(e) => {
328 warn!("Failed to read {}: {}", path.display(), e);
329 return vec![default_package_data(Some(DatasourceId::CondaYaml))];
330 }
331 };
332
333 let yaml: Value = match yaml_serde::from_str(&contents) {
334 Ok(y) => y,
335 Err(e) => {
336 warn!("Failed to parse YAML in {}: {}", path.display(), e);
337 return vec![default_package_data(Some(DatasourceId::CondaYaml))];
338 }
339 };
340
341 if !looks_like_conda_environment_yaml(&yaml) {
342 return Vec::new();
343 }
344
345 let name = yaml
346 .get("name")
347 .and_then(|v| v.as_str())
348 .map(|s| truncate_field(s.to_string()));
349
350 let dependencies = extract_environment_dependencies(&yaml);
351
352 let mut extra_data = HashMap::new();
353 if let Some(channels) = yaml.get("channels").and_then(|v| v.as_sequence()) {
354 let channels_vec: Vec<String> = channels
355 .iter()
356 .filter_map(|c| c.as_str().map(|s| truncate_field(s.to_string())))
357 .collect();
358 if !channels_vec.is_empty() {
359 extra_data.insert("channels".to_string(), serde_json::json!(channels_vec));
360 }
361 }
362
363 let mut pkg = default_package_data(Some(DatasourceId::CondaYaml));
365 pkg.package_type = Some(Self::PACKAGE_TYPE);
366 pkg.datasource_id = Some(DatasourceId::CondaYaml);
367 pkg.name = name;
368 pkg.purl = build_conda_package_purl(pkg.name.as_deref(), pkg.version.as_deref());
369 pkg.primary_language = Some(truncate_field("Python".to_string()));
370 pkg.dependencies = dependencies;
371 pkg.is_private = true;
372 if !extra_data.is_empty() {
373 pkg.extra_data = Some(extra_data);
374 }
375 vec![pkg]
376 }
377}
378
379fn looks_like_conda_environment_yaml(yaml: &Value) -> bool {
380 let has_dependencies = yaml
381 .get("dependencies")
382 .and_then(|value| value.as_sequence())
383 .is_some_and(|items| !items.is_empty());
384 let has_channels = yaml
385 .get("channels")
386 .and_then(|value| value.as_sequence())
387 .is_some_and(|items| !items.is_empty());
388 let has_prefix = yaml
389 .get("prefix")
390 .and_then(|value| value.as_str())
391 .is_some_and(|value| !value.trim().is_empty());
392
393 has_dependencies || has_channels || has_prefix
394}
395
396pub fn extract_jinja2_variables(content: &str) -> HashMap<String, String> {
404 let mut variables = HashMap::new();
405
406 for line in content.lines().take(MAX_ITERATION_COUNT) {
407 let trimmed = line.trim();
408 if let Some(inner) = extract_jinja_statement(trimmed)
409 && let Some(inner) = inner.strip_prefix("set").map(str::trim)
410 && let Some((key, value)) = inner.split_once('=')
411 {
412 let key = key.trim();
413 let value = value.trim().trim_matches('"').trim_matches('\'');
414 variables.insert(
415 truncate_field(key.to_string()),
416 truncate_field(value.to_string()),
417 );
418 }
419 }
420
421 variables
422}
423
424pub fn apply_jinja2_substitutions(content: &str, variables: &HashMap<String, String>) -> String {
430 let mut result = Vec::new();
431
432 for line in content.lines() {
433 let trimmed = line.trim();
434
435 if extract_jinja_statement(trimmed).is_some() {
436 continue;
437 }
438
439 let mut processed_line = line.to_string();
440
441 if line.contains("{{") && line.contains("}}") {
443 for (var_name, var_value) in variables {
444 let pattern_lower = format!("{{{{ {}|lower }}}}", var_name);
446 if processed_line.contains(&pattern_lower) {
447 processed_line =
448 processed_line.replace(&pattern_lower, &var_value.to_lowercase());
449 }
450
451 let pattern_normal = format!("{{{{ {} }}}}", var_name);
453 processed_line = processed_line.replace(&pattern_normal, var_value);
454 }
455 }
456
457 if processed_line.contains("{{") {
459 continue;
460 }
461
462 result.push(processed_line);
463 }
464
465 result.join("\n")
466}
467
468pub fn parse_conda_requirement(req: &str, scope: &str) -> Option<Dependency> {
476 let req = req.trim();
477
478 let (namespace, channel_url, req_without_ns) = parse_conda_channel_prefix(req);
480
481 let (name_part, version_constraint) =
483 if let Some((name, constraint)) = req_without_ns.split_once(' ') {
484 (name.trim(), Some(constraint.trim()))
485 } else {
486 (req_without_ns, None)
487 };
488
489 let (name, version, is_pinned, extracted_requirement) = if name_part.contains('=') {
491 let parts: Vec<&str> = name_part.splitn(2, '=').collect();
492 let n = parts[0].trim();
493 let v = if parts.len() > 1 {
494 let parsed = parts[1].trim();
495 if parsed.is_empty() {
496 None
497 } else {
498 Some(truncate_field(parsed.to_string()))
499 }
500 } else {
501 None
502 };
503 let req = v
504 .as_ref()
505 .map(|ver| format!("={}", ver))
506 .unwrap_or_default();
507 (n, v, true, Some(truncate_field(req)))
508 } else if let Some(constraint) = version_constraint {
509 let version_opt = if constraint.starts_with("==") {
510 Some(truncate_field(
511 constraint.trim_start_matches("==").trim().to_string(),
512 ))
513 } else {
514 None
515 };
516 (
517 name_part.trim(),
518 version_opt,
519 false,
520 Some(truncate_field(constraint.to_string())),
521 )
522 } else {
523 (name_part.trim(), None, false, Some(String::new()))
524 };
525
526 let purl = build_purl(
528 "conda",
529 namespace,
530 name,
531 version.as_deref(),
532 None,
533 None,
534 None,
535 );
536
537 let (is_runtime, is_optional) = match scope {
539 "run" => (true, false),
540 _ => (false, true), };
542
543 let mut extra_data = HashMap::new();
544 if let Some(namespace) = namespace {
545 extra_data.insert(
546 "channel".to_string(),
547 serde_json::json!(truncate_field(namespace.to_string())),
548 );
549 }
550 if let Some(channel_url) = channel_url {
551 extra_data.insert(
552 "channel_url".to_string(),
553 serde_json::json!(truncate_field(channel_url.to_string())),
554 );
555 }
556
557 Some(Dependency {
558 purl,
559 extracted_requirement,
560 scope: Some(truncate_field(scope.to_string())),
561 is_runtime: Some(is_runtime),
562 is_optional: Some(is_optional),
563 is_pinned: Some(is_pinned),
564 is_direct: Some(true),
565 resolved_package: None,
566 extra_data: (!extra_data.is_empty()).then_some(extra_data),
567 })
568}
569
570fn extract_environment_dependencies(yaml: &Value) -> Vec<Dependency> {
571 let dependencies = match yaml.get("dependencies").and_then(|v| v.as_sequence()) {
572 Some(d) => d,
573 None => return Vec::new(),
574 };
575
576 let mut deps = Vec::new();
577 for dep_value in dependencies.iter().take(MAX_ITERATION_COUNT) {
578 if let Some(dep_str) = dep_value.as_str() {
579 if let Some(dep) = parse_environment_string_dependency(dep_str) {
580 deps.push(dep);
581 }
582 } else if let Some(pip_deps) = dep_value.get("pip").and_then(|v| v.as_sequence()) {
583 deps.extend(extract_pip_dependencies(pip_deps));
584 }
585 }
586 deps
587}
588
589fn parse_environment_string_dependency(dep_str: &str) -> Option<Dependency> {
590 let (namespace, channel_url, dep_without_ns) = parse_conda_channel_prefix(dep_str);
591 create_conda_dependency(namespace, channel_url, dep_without_ns, "dependencies")
592}
593
594fn parse_conda_channel_prefix(dep_str: &str) -> (Option<&str>, Option<&str>, &str) {
595 if let Some((ns, rest)) = dep_str.rsplit_once("::") {
596 if ns.contains('/') || ns.contains(':') {
597 (None, Some(ns), rest)
598 } else {
599 (Some(ns), None, rest)
600 }
601 } else {
602 (None, None, dep_str)
603 }
604}
605
606fn create_conda_dependency(
607 namespace: Option<&str>,
608 channel_url: Option<&str>,
609 dep_without_ns: &str,
610 scope: &str,
611) -> Option<Dependency> {
612 let dep = dep_without_ns.trim();
613 let name_re = match Regex::new(r"^([A-Za-z0-9_.\-]+)") {
614 Ok(re) => re,
615 Err(_) => return None,
616 };
617
618 let caps = name_re.captures(dep)?;
619 let name_match = caps.get(1)?;
620 let name = name_match.as_str().trim();
621 let rest = dep[name_match.end()..].trim();
622
623 let (version, is_pinned, extracted_requirement) = if rest.is_empty() {
624 (None, false, Some(String::new()))
625 } else {
626 let req_no_space = rest.replace(' ', "");
627 let is_exact = req_no_space.starts_with("=") || req_no_space.starts_with("==");
628 let parsed_version = if is_exact {
629 Some(truncate_field(
630 req_no_space
631 .trim_start_matches('=')
632 .trim_start_matches('=')
633 .to_string(),
634 ))
635 } else {
636 None
637 };
638
639 (
640 parsed_version,
641 is_exact,
642 Some(truncate_field(rest.to_string())),
643 )
644 };
645
646 if name == "pip" || name == "python" {
647 return None;
648 }
649
650 let purl = build_purl(
651 "conda",
652 namespace,
653 name,
654 version.as_deref(),
655 None,
656 None,
657 None,
658 );
659 let mut extra_data = HashMap::new();
660 if let Some(namespace) = namespace {
661 extra_data.insert(
662 "channel".to_string(),
663 serde_json::json!(truncate_field(namespace.to_string())),
664 );
665 }
666 if let Some(channel_url) = channel_url {
667 extra_data.insert(
668 "channel_url".to_string(),
669 serde_json::json!(truncate_field(channel_url.to_string())),
670 );
671 }
672
673 Some(Dependency {
674 purl,
675 extracted_requirement,
676 scope: Some(truncate_field(scope.to_string())),
677 is_runtime: Some(true),
678 is_optional: Some(false),
679 is_pinned: Some(is_pinned),
680 is_direct: Some(true),
681 resolved_package: None,
682 extra_data: (!extra_data.is_empty()).then_some(extra_data),
683 })
684}
685
686fn extract_pip_dependencies(pip_deps: &[Value]) -> Vec<Dependency> {
687 pip_deps
688 .iter()
689 .take(MAX_ITERATION_COUNT)
690 .filter_map(|pip_dep| {
691 if let Some(pip_req_str) = pip_dep.as_str()
692 && let Ok(parsed_req) = pip_req_str.parse::<pep508_rs::Requirement>()
693 {
694 create_pip_dependency(parsed_req, "dependencies", Some(pip_req_str))
695 } else {
696 None
697 }
698 })
699 .collect()
700}
701
702fn create_pip_dependency(
703 parsed_req: pep508_rs::Requirement,
704 scope: &str,
705 raw_requirement: Option<&str>,
706) -> Option<Dependency> {
707 let name = truncate_field(parsed_req.name.to_string());
708
709 if name == "pip" || name == "python" {
710 return None;
711 }
712
713 let specs = parsed_req.version_or_url.as_ref().map(|v| match v {
714 pep508_rs::VersionOrUrl::VersionSpecifier(spec) => truncate_field(spec.to_string()),
715 pep508_rs::VersionOrUrl::Url(url) => truncate_field(url.to_string()),
716 });
717
718 let extracted_requirement = if let Some(raw) = raw_requirement {
719 let raw = raw.trim();
720 let suffix = raw.strip_prefix(&name).unwrap_or(raw).trim().to_string();
721 Some(truncate_field(suffix))
722 } else {
723 Some(truncate_field(specs.clone().unwrap_or_default()))
724 };
725
726 let version = specs.as_ref().and_then(|spec_str| {
727 if spec_str.starts_with("==") {
728 Some(truncate_field(
729 spec_str.trim_start_matches("==").to_string(),
730 ))
731 } else {
732 None
733 }
734 });
735
736 let is_pinned = specs.as_ref().map(|s| s.contains("==")).unwrap_or(false);
737 let purl = build_purl("pypi", None, &name, version.as_deref(), None, None, None);
738
739 Some(Dependency {
740 purl,
741 extracted_requirement,
742 scope: Some(truncate_field(scope.to_string())),
743 is_runtime: Some(true),
744 is_optional: Some(false),
745 is_pinned: Some(is_pinned),
746 is_direct: Some(true),
747 resolved_package: None,
748 extra_data: None,
749 })
750}
751
752crate::register_parser!(
753 "Conda package manifest and environment file",
754 &[
755 "**/meta.yaml",
756 "**/meta.yml",
757 "**/environment.yml",
758 "**/environment.yaml",
759 "**/env.yaml",
760 "**/env.yml",
761 "**/conda.yaml",
762 "**/conda.yml",
763 "**/*conda*.yaml",
764 "**/*conda*.yml",
765 "**/*env*.yaml",
766 "**/*env*.yml",
767 "**/*environment*.yaml",
768 "**/*environment*.yml"
769 ],
770 "conda",
771 "Python",
772 Some("https://docs.conda.io/"),
773);