1use std::path::Path;
24
25use crate::parser_warn as warn;
26use regex::Regex;
27
28use crate::models::{
29 DatasourceId, Dependency, Md5Digest, PackageData, PackageType, Party, Sha1Digest, Sha256Digest,
30 Sha512Digest,
31};
32use crate::parsers::PackageParser;
33
34use super::license_normalization::{
35 DeclaredLicenseMatchMetadata, build_declared_license_data_from_pair,
36 normalize_spdx_declared_license,
37};
38
39pub struct OpamParser;
44
45impl PackageParser for OpamParser {
46 const PACKAGE_TYPE: PackageType = PackageType::Opam;
47
48 fn is_match(path: &Path) -> bool {
49 path.file_name().is_some_and(|name| {
50 name.to_string_lossy().ends_with(".opam") || name.to_string_lossy() == "opam"
51 })
52 }
53
54 fn extract_packages(path: &Path) -> Vec<PackageData> {
55 vec![match std::fs::read_to_string(path) {
56 Ok(text) => parse_opam(&text),
57 Err(e) => {
58 warn!("Failed to read OPAM file {:?}: {}", path, e);
59 default_package_data()
60 }
61 }]
62 }
63}
64
65#[derive(Debug, Default)]
67struct OpamData {
68 name: Option<String>,
69 version: Option<String>,
70 synopsis: Option<String>,
71 description: Option<String>,
72 homepage: Option<String>,
73 dev_repo: Option<String>,
74 bug_reports: Option<String>,
75 src: Option<String>,
76 authors: Vec<String>,
77 maintainers: Vec<String>,
78 license: Option<String>,
79 sha1: Option<Sha1Digest>,
80 md5: Option<Md5Digest>,
81 sha256: Option<Sha256Digest>,
82 sha512: Option<Sha512Digest>,
83 dependencies: Vec<(String, String)>, }
85
86fn default_package_data() -> PackageData {
87 PackageData {
88 package_type: Some(OpamParser::PACKAGE_TYPE),
89 primary_language: Some("Ocaml".to_string()),
90 datasource_id: Some(DatasourceId::OpamFile),
91 ..Default::default()
92 }
93}
94
95fn parse_opam(text: &str) -> PackageData {
97 let opam_data = parse_opam_data(text);
98
99 let description = build_description(&opam_data.synopsis, &opam_data.description);
100 let parties = extract_parties(&opam_data.authors, &opam_data.maintainers);
101 let dependencies = extract_dependencies(&opam_data.dependencies);
102
103 let (repository_homepage_url, api_data_url, purl) =
104 build_opam_urls(&opam_data.name, &opam_data.version);
105 let (declared_license_expression, declared_license_expression_spdx, license_detections) =
106 normalize_opam_declared_license(opam_data.license.as_deref());
107
108 PackageData {
109 package_type: Some(OpamParser::PACKAGE_TYPE),
110 namespace: None,
111 name: opam_data.name,
112 version: opam_data.version,
113 qualifiers: None,
114 subpath: None,
115 primary_language: Some("Ocaml".to_string()),
116 description,
117 release_date: None,
118 parties,
119 keywords: Vec::new(),
120 homepage_url: opam_data.homepage,
121 download_url: opam_data.src,
122 size: None,
123 sha1: opam_data.sha1,
124 md5: opam_data.md5,
125 sha256: opam_data.sha256,
126 sha512: opam_data.sha512,
127 bug_tracking_url: opam_data.bug_reports,
128 code_view_url: None,
129 vcs_url: opam_data.dev_repo,
130 copyright: None,
131 holder: None,
132 declared_license_expression,
133 declared_license_expression_spdx,
134 license_detections,
135 other_license_expression: None,
136 other_license_expression_spdx: None,
137 other_license_detections: Vec::new(),
138 extracted_license_statement: opam_data.license,
139 notice_text: None,
140 source_packages: Vec::new(),
141 file_references: Vec::new(),
142 is_private: false,
143 is_virtual: false,
144 extra_data: None,
145 dependencies,
146 repository_homepage_url,
147 repository_download_url: None,
148 api_data_url,
149 datasource_id: Some(DatasourceId::OpamFile),
150 purl,
151 }
152}
153
154fn normalize_opam_declared_license(
155 statement: Option<&str>,
156) -> (
157 Option<String>,
158 Option<String>,
159 Vec<crate::models::LicenseDetection>,
160) {
161 let Some(statement) = statement.map(str::trim).filter(|value| !value.is_empty()) else {
162 return super::license_normalization::empty_declared_license_data();
163 };
164
165 match statement {
166 "GPL-2.0-only" => build_declared_license_data_from_pair(
167 "gpl-2.0",
168 "GPL-2.0-only",
169 DeclaredLicenseMatchMetadata::single_line(statement),
170 ),
171 "GPL-3.0-only" => build_declared_license_data_from_pair(
172 "gpl-3.0",
173 "GPL-3.0-only",
174 DeclaredLicenseMatchMetadata::single_line(statement),
175 ),
176 "LGPL-3.0-only with OCaml-LGPL-linking-exception" => build_declared_license_data_from_pair(
177 "lgpl-3.0 WITH ocaml-lgpl-linking-exception",
178 "LGPL-3.0-only WITH OCaml-LGPL-linking-exception",
179 DeclaredLicenseMatchMetadata::single_line(statement),
180 ),
181 _ => normalize_spdx_declared_license(Some(statement)),
182 }
183}
184
185fn build_opam_urls(
186 name: &Option<String>,
187 version: &Option<String>,
188) -> (Option<String>, Option<String>, Option<String>) {
189 let repository_homepage_url = name
190 .as_ref()
191 .map(|_| "{https://opam.ocaml.org/packages}/{name}".to_string());
192
193 let api_data_url = match (name, version) {
194 (Some(n), Some(v)) => Some(format!(
195 "https://github.com/ocaml/opam-repository/blob/master/packages/{}/{}.{}/opam",
196 n, n, v
197 )),
198 _ => None,
199 };
200
201 let purl = match (name, version) {
202 (Some(n), Some(v)) => Some(format!("pkg:opam/{}@{}", n, v)),
203 (Some(n), None) => Some(format!("pkg:opam/{}", n)),
204 _ => None,
205 };
206
207 (repository_homepage_url, api_data_url, purl)
208}
209
210fn parse_opam_data(text: &str) -> OpamData {
212 let mut data = OpamData::default();
213 let lines: Vec<&str> = text.lines().collect();
214 let mut i = 0;
215
216 while i < lines.len() {
217 let line = lines[i];
218
219 if let Some((key, value)) = parse_key_value(line) {
221 match key.as_str() {
222 "name" => data.name = clean_value(&value),
223 "version" => data.version = clean_value(&value),
224 "synopsis" => data.synopsis = clean_value(&value),
225 "description" => {
226 data.description = parse_multiline_string(&lines, &mut i);
227 }
228 "homepage" => data.homepage = clean_value(&value),
229 "dev-repo" => data.dev_repo = clean_value(&value),
230 "bug-reports" => data.bug_reports = clean_value(&value),
231 "src" => {
232 if value.trim().is_empty() && i + 1 < lines.len() {
233 i += 1;
234 data.src = clean_value(lines[i]);
235 } else {
236 data.src = clean_value(&value);
237 }
238 }
239 "license" => data.license = clean_value(&value),
240 "authors" => {
241 data.authors = parse_string_array(&lines, &mut i, &value);
242 }
243 "maintainer" => {
244 data.maintainers = parse_string_array(&lines, &mut i, &value);
245 }
246 "depends" => {
247 data.dependencies = parse_dependency_array(&lines, &mut i);
248 }
249 "checksum" => {
250 parse_checksums(&lines, &mut i, &mut data);
251 }
252 _ => {}
253 }
254 }
255
256 i += 1;
257 }
258
259 data
260}
261
262fn parse_key_value(line: &str) -> Option<(String, String)> {
264 let line = line.trim();
265 if line.is_empty() || line.starts_with('#') {
266 return None;
267 }
268
269 if let Some(colon_pos) = line.find(':') {
270 let key = line[..colon_pos].trim().to_string();
271 let value = line[colon_pos + 1..].trim().to_string();
272 Some((key, value))
273 } else {
274 None
275 }
276}
277
278fn clean_value(value: &str) -> Option<String> {
280 let cleaned = value
281 .trim()
282 .trim_matches('"')
283 .trim_matches('[')
284 .trim_matches(']')
285 .trim();
286
287 if cleaned.is_empty() {
288 None
289 } else {
290 Some(cleaned.to_string())
291 }
292}
293
294fn parse_multiline_string(lines: &[&str], i: &mut usize) -> Option<String> {
296 let mut result = String::new();
297
298 if let Some((_, value)) = parse_key_value(lines[*i]) {
300 result.push_str(value.trim_matches('"').trim());
301 }
302
303 *i += 1;
304 while *i < lines.len() {
305 let line = lines[*i];
306 result.push(' ');
307 result.push_str(line.trim_matches('"').trim());
308
309 if line.contains("\"\"\"") {
310 break;
311 }
312 *i += 1;
313 }
314
315 let cleaned = result.trim().to_string();
316 if cleaned.is_empty() {
317 None
318 } else {
319 Some(cleaned)
320 }
321}
322
323fn parse_string_array(lines: &[&str], i: &mut usize, first_value: &str) -> Vec<String> {
325 let mut result = Vec::new();
326
327 let mut content = first_value.to_string();
328
329 if content.contains('[') && !content.contains(']') {
331 *i += 1;
332 while *i < lines.len() {
333 let line = lines[*i];
334 content.push(' ');
335 content.push_str(line);
336
337 if line.contains(']') {
338 break;
339 }
340 *i += 1;
341 }
342 }
343
344 let cleaned = content.trim_matches('[').trim_matches(']').trim();
346
347 for part in split_quoted_strings(cleaned) {
349 let p = part.trim_matches('"').trim();
350 if !p.is_empty() {
351 result.push(p.to_string());
352 }
353 }
354
355 result
356}
357
358fn parse_dependency_array(lines: &[&str], i: &mut usize) -> Vec<(String, String)> {
360 let mut result = Vec::new();
361
362 *i += 1;
363 while *i < lines.len() {
364 let line = lines[*i];
365
366 if line.trim().contains(']') {
367 break;
368 }
369
370 if let Some((name, version)) = parse_dependency_line(line) {
371 result.push((name, version));
372 }
373
374 *i += 1;
375 }
376
377 result
378}
379
380fn parse_dependency_line(line: &str) -> Option<(String, String)> {
382 let line = line.trim();
383 if line.is_empty() {
384 return None;
385 }
386
387 let regex = Regex::new(r#""([^"]+)"\s*(.*)$"#).ok()?;
389 let caps = regex.captures(line)?;
390
391 let name = caps.get(1)?.as_str().to_string();
392 let version_part = caps.get(2)?.as_str().trim();
393
394 let constraint = if version_part.is_empty() {
396 String::new()
397 } else {
398 extract_version_constraint(version_part)
399 };
400
401 Some((name, constraint))
402}
403
404fn extract_version_constraint(version_part: &str) -> String {
406 let regex = Regex::new(r#"\{\s*([<>=!]+)\s*"([^"]*)"\s*\}"#);
407 if let Ok(re) = regex
408 && let Some(caps) = re.captures(version_part)
409 {
410 let op = caps.get(1).map(|m| m.as_str()).unwrap_or("");
411 let ver = caps.get(2).map(|m| m.as_str()).unwrap_or("");
412 if !op.is_empty() && !ver.is_empty() {
413 return format!("{} {}", op, ver);
414 }
415 }
416
417 let content = version_part
419 .trim_matches('{')
420 .trim_matches('}')
421 .trim_matches('"')
422 .trim();
423
424 content.replace('"', "")
425}
426
427fn parse_checksums(lines: &[&str], i: &mut usize, data: &mut OpamData) {
429 if let Some((_, first_value)) = parse_key_value(lines[*i]) {
430 let inline = first_value.trim();
431 if !inline.is_empty() && inline != "[" {
432 if let Some((key, value)) = parse_checksum_line(inline) {
433 match key.as_str() {
434 "sha1" => data.sha1 = Sha1Digest::from_hex(&value).ok(),
435 "md5" => data.md5 = Md5Digest::from_hex(&value).ok(),
436 "sha256" => data.sha256 = Sha256Digest::from_hex(&value).ok(),
437 "sha512" => data.sha512 = Sha512Digest::from_hex(&value).ok(),
438 _ => {}
439 }
440 }
441 return;
442 }
443 }
444
445 *i += 1;
446 while *i < lines.len() {
447 let line = lines[*i];
448
449 if line.trim().contains(']') {
450 break;
451 }
452
453 if let Some((key, value)) = parse_checksum_line(line) {
454 match key.as_str() {
455 "sha1" => data.sha1 = Sha1Digest::from_hex(&value).ok(),
456 "md5" => data.md5 = Md5Digest::from_hex(&value).ok(),
457 "sha256" => data.sha256 = Sha256Digest::from_hex(&value).ok(),
458 "sha512" => data.sha512 = Sha512Digest::from_hex(&value).ok(),
459 _ => {}
460 }
461 }
462
463 *i += 1;
464 }
465}
466
467fn parse_checksum_line(line: &str) -> Option<(String, String)> {
469 let line = line.trim().trim_matches('"').trim();
470
471 let regex = Regex::new(r"^(\w+)\s*=\s*(.+)$").ok()?;
472 let caps = regex.captures(line)?;
473
474 let key = caps.get(1)?.as_str().to_string();
475 let value = caps.get(2)?.as_str().to_string();
476
477 Some((key, value))
478}
479
480fn split_quoted_strings(content: &str) -> Vec<String> {
482 let mut result = Vec::new();
483 let mut current = String::new();
484 let mut in_quotes = false;
485
486 for ch in content.chars() {
487 match ch {
488 '"' => in_quotes = !in_quotes,
489 ' ' if !in_quotes => {
490 if !current.is_empty() {
491 result.push(current.trim_matches('"').to_string());
492 current.clear();
493 }
494 }
495 _ => current.push(ch),
496 }
497 }
498
499 if !current.is_empty() {
500 result.push(current.trim_matches('"').to_string());
501 }
502
503 result
504}
505
506fn build_description(synopsis: &Option<String>, description: &Option<String>) -> Option<String> {
508 let parts: Vec<&str> = vec![synopsis.as_deref(), description.as_deref()]
509 .into_iter()
510 .filter(|p| p.is_some())
511 .flatten()
512 .collect();
513
514 if parts.is_empty() {
515 None
516 } else {
517 Some(parts.join("\n"))
518 }
519}
520
521fn extract_parties(authors: &[String], maintainers: &[String]) -> Vec<Party> {
523 let mut parties = Vec::new();
524
525 for author in authors {
527 parties.push(Party {
528 r#type: Some("person".to_string()),
529 role: Some("author".to_string()),
530 name: Some(author.clone()),
531 email: None,
532 url: None,
533 organization: None,
534 organization_url: None,
535 timezone: None,
536 });
537 }
538
539 for maintainer in maintainers {
541 parties.push(Party {
542 r#type: Some("person".to_string()),
543 role: Some("maintainer".to_string()),
544 name: None,
545 email: Some(maintainer.clone()),
546 url: None,
547 organization: None,
548 organization_url: None,
549 timezone: None,
550 });
551 }
552
553 parties
554}
555
556fn extract_dependencies(deps: &[(String, String)]) -> Vec<Dependency> {
558 deps.iter()
559 .map(|(name, version_constraint)| Dependency {
560 purl: Some(format!("pkg:opam/{}", name)),
561 extracted_requirement: Some(version_constraint.clone()),
562 scope: Some("dependency".to_string()),
563 is_runtime: Some(true),
564 is_optional: Some(false),
565 is_pinned: Some(false),
566 is_direct: Some(true),
567 resolved_package: None,
568 extra_data: None,
569 })
570 .collect()
571}
572
573#[cfg(test)]
574mod tests {
575 use super::*;
576 use crate::parsers::PackageParser;
577
578 #[test]
579 fn test_is_match_with_opam_extension() {
580 let path = Path::new("sample.opam");
581 assert!(OpamParser::is_match(path));
582 }
583
584 #[test]
585 fn test_is_match_with_opam_name() {
586 let path = Path::new("opam");
587 assert!(OpamParser::is_match(path));
588 }
589
590 #[test]
591 fn test_is_match_with_non_opam() {
592 let path = Path::new("sample.txt");
593 assert!(!OpamParser::is_match(path));
594 }
595
596 #[test]
597 fn test_parse_key_value() {
598 let (key, value) = parse_key_value("name: \"js_of_ocaml\"").unwrap();
599 assert_eq!(key, "name");
600 assert_eq!(value, "\"js_of_ocaml\"");
601 }
602
603 #[test]
604 fn test_clean_value() {
605 assert_eq!(
606 clean_value("\"js_of_ocaml\""),
607 Some("js_of_ocaml".to_string())
608 );
609 assert_eq!(clean_value("\"\""), None);
610 }
611
612 #[test]
613 fn test_extract_version_constraint() {
614 let result = extract_version_constraint(r#"{>= "4.02.0"}"#);
615 assert_eq!(result, ">= 4.02.0");
616 }
617
618 #[test]
619 fn test_parse_dependency_line() {
620 let (name, version) = parse_dependency_line(r#""ocaml" {>= "4.02.0"}"#).unwrap();
621 assert_eq!(name, "ocaml");
622 assert_eq!(version, ">= 4.02.0");
623 }
624
625 #[test]
626 fn test_parse_dependency_line_without_version() {
627 let (name, version) = parse_dependency_line(r#""uchar""#).unwrap();
628 assert_eq!(name, "uchar");
629 assert_eq!(version, "");
630 }
631
632 #[test]
633 fn test_split_quoted_strings() {
634 let parts = split_quoted_strings(r#""str1" "str2""#);
635 assert_eq!(parts, vec!["str1", "str2"]);
636 }
637
638 #[test]
639 fn test_build_description() {
640 let synopsis = Some("Short description".to_string());
641 let description = Some("Long description".to_string());
642 let result = build_description(&synopsis, &description);
643 assert_eq!(
644 result,
645 Some("Short description\nLong description".to_string())
646 );
647 }
648
649 #[test]
650 fn test_extract_parties() {
651 let authors = vec!["Author One".to_string()];
652 let maintainers = vec!["maintainer@example.com".to_string()];
653 let parties = extract_parties(&authors, &maintainers);
654
655 assert_eq!(parties.len(), 2);
656 assert_eq!(parties[0].name, Some("Author One".to_string()));
657 assert_eq!(parties[0].role, Some("author".to_string()));
658 assert_eq!(parties[1].email, Some("maintainer@example.com".to_string()));
659 assert_eq!(parties[1].role, Some("maintainer".to_string()));
660 }
661
662 #[test]
663 fn test_normalize_opam_declared_license_preserves_scancode_style_expression() {
664 let (declared, declared_spdx, detections) = normalize_opam_declared_license(Some(
665 "LGPL-3.0-only with OCaml-LGPL-linking-exception",
666 ));
667
668 assert_eq!(
669 declared.as_deref(),
670 Some("lgpl-3.0 WITH ocaml-lgpl-linking-exception")
671 );
672 assert_eq!(
673 declared_spdx.as_deref(),
674 Some("LGPL-3.0-only WITH OCaml-LGPL-linking-exception")
675 );
676 assert_eq!(detections.len(), 1);
677 assert_eq!(
678 detections[0].license_expression,
679 "lgpl-3.0 WITH ocaml-lgpl-linking-exception"
680 );
681 }
682}
683
684crate::register_parser!(
685 "OCaml OPAM package manifest",
686 &["**/*.opam", "**/opam"],
687 "opam",
688 "OCaml",
689 Some("https://opam.ocaml.org/doc/Manual.html"),
690);