1use std::path::Path;
27
28use crate::parser_warn as warn;
29use regex::Regex;
30
31use crate::models::{
32 DatasourceId, Dependency, Md5Digest, PackageData, PackageType, Party, Sha1Digest, Sha256Digest,
33 Sha512Digest,
34};
35use crate::parsers::PackageParser;
36use crate::parsers::utils::{MAX_ITERATION_COUNT, read_file_to_string, truncate_field};
37
38use super::license_normalization::{
39 DeclaredLicenseMatchMetadata, build_declared_license_data_from_pair,
40 normalize_spdx_declared_license,
41};
42
43pub struct OpamParser;
48
49impl PackageParser for OpamParser {
50 const PACKAGE_TYPE: PackageType = PackageType::Opam;
51
52 fn is_match(path: &Path) -> bool {
53 path.file_name().is_some_and(|name| {
54 name.to_string_lossy().ends_with(".opam") || name.to_string_lossy() == "opam"
55 })
56 }
57
58 fn extract_packages(path: &Path) -> Vec<PackageData> {
59 vec![match read_file_to_string(path, None) {
60 Ok(text) => parse_opam(&text),
61 Err(e) => {
62 warn!("Failed to read OPAM file {:?}: {}", path, e);
63 default_package_data()
64 }
65 }]
66 }
67}
68
69#[derive(Debug, Default)]
71struct OpamData {
72 name: Option<String>,
73 version: Option<String>,
74 synopsis: Option<String>,
75 description: Option<String>,
76 homepage: Option<String>,
77 dev_repo: Option<String>,
78 bug_reports: Option<String>,
79 src: Option<String>,
80 authors: Vec<String>,
81 maintainers: Vec<String>,
82 license: Option<String>,
83 sha1: Option<Sha1Digest>,
84 md5: Option<Md5Digest>,
85 sha256: Option<Sha256Digest>,
86 sha512: Option<Sha512Digest>,
87 dependencies: Vec<(String, String)>, }
89
90fn default_package_data() -> PackageData {
91 PackageData {
92 package_type: Some(OpamParser::PACKAGE_TYPE),
93 primary_language: Some("Ocaml".to_string()),
94 datasource_id: Some(DatasourceId::OpamFile),
95 ..Default::default()
96 }
97}
98
99fn parse_opam(text: &str) -> PackageData {
101 let opam_data = parse_opam_data(text);
102
103 let description = build_description(&opam_data.synopsis, &opam_data.description);
104 let parties = extract_parties(&opam_data.authors, &opam_data.maintainers);
105 let dependencies = extract_dependencies(&opam_data.dependencies);
106
107 let (repository_homepage_url, api_data_url, purl) =
108 build_opam_urls(&opam_data.name, &opam_data.version);
109 let (declared_license_expression, declared_license_expression_spdx, license_detections) =
110 normalize_opam_declared_license(opam_data.license.as_deref());
111
112 PackageData {
113 package_type: Some(OpamParser::PACKAGE_TYPE),
114 namespace: None,
115 name: opam_data.name,
116 version: opam_data.version,
117 qualifiers: None,
118 subpath: None,
119 primary_language: Some("Ocaml".to_string()),
120 description,
121 release_date: None,
122 parties,
123 keywords: Vec::new(),
124 homepage_url: opam_data.homepage,
125 download_url: opam_data.src,
126 size: None,
127 sha1: opam_data.sha1,
128 md5: opam_data.md5,
129 sha256: opam_data.sha256,
130 sha512: opam_data.sha512,
131 bug_tracking_url: opam_data.bug_reports,
132 code_view_url: None,
133 vcs_url: opam_data.dev_repo,
134 copyright: None,
135 holder: None,
136 declared_license_expression,
137 declared_license_expression_spdx,
138 license_detections,
139 other_license_expression: None,
140 other_license_expression_spdx: None,
141 other_license_detections: Vec::new(),
142 extracted_license_statement: opam_data.license,
143 notice_text: None,
144 source_packages: Vec::new(),
145 file_references: Vec::new(),
146 is_private: false,
147 is_virtual: false,
148 extra_data: None,
149 dependencies,
150 repository_homepage_url,
151 repository_download_url: None,
152 api_data_url,
153 datasource_id: Some(DatasourceId::OpamFile),
154 purl,
155 }
156}
157
158fn normalize_opam_declared_license(
159 statement: Option<&str>,
160) -> (
161 Option<String>,
162 Option<String>,
163 Vec<crate::models::LicenseDetection>,
164) {
165 let Some(statement) = statement.map(str::trim).filter(|value| !value.is_empty()) else {
166 return super::license_normalization::empty_declared_license_data();
167 };
168
169 match statement {
170 "GPL-2.0-only" => build_declared_license_data_from_pair(
171 "gpl-2.0",
172 "GPL-2.0-only",
173 DeclaredLicenseMatchMetadata::single_line(statement),
174 ),
175 "GPL-3.0-only" => build_declared_license_data_from_pair(
176 "gpl-3.0",
177 "GPL-3.0-only",
178 DeclaredLicenseMatchMetadata::single_line(statement),
179 ),
180 "LGPL-3.0-only with OCaml-LGPL-linking-exception" => build_declared_license_data_from_pair(
181 "lgpl-3.0 WITH ocaml-lgpl-linking-exception",
182 "LGPL-3.0-only WITH OCaml-LGPL-linking-exception",
183 DeclaredLicenseMatchMetadata::single_line(statement),
184 ),
185 _ => normalize_spdx_declared_license(Some(statement)),
186 }
187}
188
189fn build_opam_urls(
190 name: &Option<String>,
191 version: &Option<String>,
192) -> (Option<String>, Option<String>, Option<String>) {
193 let repository_homepage_url = name
194 .as_ref()
195 .map(|n| format!("https://opam.ocaml.org/packages/{}", n));
196
197 let api_data_url = match (name, version) {
198 (Some(n), Some(v)) => Some(format!(
199 "https://github.com/ocaml/opam-repository/blob/master/packages/{}/{}.{}/opam",
200 n, n, v
201 )),
202 _ => None,
203 };
204
205 let purl = match (name, version) {
206 (Some(n), Some(v)) => Some(format!("pkg:opam/{}@{}", n, v)),
207 (Some(n), None) => Some(format!("pkg:opam/{}", n)),
208 _ => None,
209 };
210
211 (repository_homepage_url, api_data_url, purl)
212}
213
214fn parse_opam_data(text: &str) -> OpamData {
216 let mut data = OpamData::default();
217 let lines: Vec<&str> = text.lines().collect();
218 let mut i = 0;
219 let mut iteration_count: usize = 0;
220
221 while i < lines.len() {
222 iteration_count += 1;
223 if iteration_count > MAX_ITERATION_COUNT {
224 warn!("parse_opam_data: exceeded MAX_ITERATION_COUNT, breaking");
225 break;
226 }
227 let line = lines[i];
228
229 if let Some((key, value)) = parse_key_value(line) {
231 match key.as_str() {
232 "name" => data.name = clean_value(&value),
233 "version" => data.version = clean_value(&value),
234 "synopsis" => data.synopsis = clean_value(&value),
235 "description" => {
236 data.description = parse_multiline_string(&lines, &mut i);
237 }
238 "homepage" => data.homepage = clean_value(&value),
239 "dev-repo" => data.dev_repo = clean_value(&value),
240 "bug-reports" => data.bug_reports = clean_value(&value),
241 "src" => {
242 if value.trim().is_empty() && i + 1 < lines.len() {
243 i += 1;
244 data.src = clean_value(lines[i]);
245 } else {
246 data.src = clean_value(&value);
247 }
248 }
249 "license" => data.license = clean_value(&value),
250 "authors" => {
251 data.authors = parse_string_array(&lines, &mut i, &value);
252 }
253 "maintainer" => {
254 data.maintainers = parse_string_array(&lines, &mut i, &value);
255 }
256 "depends" => {
257 data.dependencies = parse_dependency_array(&lines, &mut i);
258 }
259 "checksum" => {
260 parse_checksums(&lines, &mut i, &mut data);
261 }
262 _ => {}
263 }
264 }
265
266 i += 1;
267 }
268
269 data
270}
271
272fn parse_key_value(line: &str) -> Option<(String, String)> {
274 let line = line.trim();
275 if line.is_empty() || line.starts_with('#') {
276 return None;
277 }
278
279 if let Some(colon_pos) = line.find(':') {
280 let key = line[..colon_pos].trim().to_string();
281 let value = line[colon_pos + 1..].trim().to_string();
282 Some((key, value))
283 } else {
284 None
285 }
286}
287
288fn clean_value(value: &str) -> Option<String> {
290 let cleaned = value
291 .trim()
292 .trim_matches('"')
293 .trim_matches('[')
294 .trim_matches(']')
295 .trim();
296
297 if cleaned.is_empty() {
298 None
299 } else {
300 Some(truncate_field(cleaned.to_string()))
301 }
302}
303
304fn parse_multiline_string(lines: &[&str], i: &mut usize) -> Option<String> {
306 let mut result = String::new();
307 let mut iteration_count: usize = 0;
308
309 if let Some((_, value)) = parse_key_value(lines[*i]) {
310 result.push_str(value.trim_matches('"').trim());
311 }
312
313 *i += 1;
314 while *i < lines.len() {
315 iteration_count += 1;
316 if iteration_count > MAX_ITERATION_COUNT {
317 warn!("parse_multiline_string: exceeded MAX_ITERATION_COUNT, breaking");
318 break;
319 }
320 let line = lines[*i];
321 result.push(' ');
322 result.push_str(line.trim_matches('"').trim());
323
324 if line.contains("\"\"\"") {
325 break;
326 }
327 *i += 1;
328 }
329
330 let cleaned = result.trim().to_string();
331 if cleaned.is_empty() {
332 None
333 } else {
334 Some(truncate_field(cleaned))
335 }
336}
337
338fn parse_string_array(lines: &[&str], i: &mut usize, first_value: &str) -> Vec<String> {
340 let mut result = Vec::new();
341 let mut iteration_count: usize = 0;
342
343 let mut content = first_value.to_string();
344
345 if content.contains('[') && !content.contains(']') {
346 *i += 1;
347 while *i < lines.len() {
348 iteration_count += 1;
349 if iteration_count > MAX_ITERATION_COUNT {
350 warn!("parse_string_array: exceeded MAX_ITERATION_COUNT, breaking");
351 break;
352 }
353 let line = lines[*i];
354 content.push(' ');
355 content.push_str(line);
356
357 if line.contains(']') {
358 break;
359 }
360 *i += 1;
361 }
362 }
363
364 let cleaned = content.trim_matches('[').trim_matches(']').trim();
365
366 for part in split_quoted_strings(cleaned) {
367 let p = part.trim_matches('"').trim();
368 if !p.is_empty() {
369 result.push(truncate_field(p.to_string()));
370 }
371 }
372
373 result
374}
375
376fn parse_dependency_array(lines: &[&str], i: &mut usize) -> Vec<(String, String)> {
378 let mut result = Vec::new();
379 let mut iteration_count: usize = 0;
380
381 *i += 1;
382 while *i < lines.len() {
383 iteration_count += 1;
384 if iteration_count > MAX_ITERATION_COUNT {
385 warn!("parse_dependency_array: exceeded MAX_ITERATION_COUNT, breaking");
386 break;
387 }
388 let line = lines[*i];
389
390 if line.trim().contains(']') {
391 break;
392 }
393
394 if let Some((name, version)) = parse_dependency_line(line) {
395 result.push((name, version));
396 }
397
398 *i += 1;
399 }
400
401 result
402}
403
404fn parse_dependency_line(line: &str) -> Option<(String, String)> {
406 let line = line.trim();
407 if line.is_empty() {
408 return None;
409 }
410
411 let regex = Regex::new(r#""([^"]+)"\s*(.*)$"#).ok()?;
413 let caps = regex.captures(line)?;
414
415 let name = truncate_field(caps.get(1)?.as_str().to_string());
416 let version_part = caps.get(2)?.as_str().trim();
417
418 let constraint = if version_part.is_empty() {
420 String::new()
421 } else {
422 truncate_field(extract_version_constraint(version_part))
423 };
424
425 Some((name, constraint))
426}
427
428fn extract_version_constraint(version_part: &str) -> String {
430 let regex = Regex::new(r#"\{\s*([<>=!]+)\s*"([^"]*)"\s*\}"#);
431 if let Ok(re) = regex
432 && let Some(caps) = re.captures(version_part)
433 {
434 let op = caps.get(1).map(|m| m.as_str()).unwrap_or("");
435 let ver = caps.get(2).map(|m| m.as_str()).unwrap_or("");
436 if !op.is_empty() && !ver.is_empty() {
437 return format!("{} {}", op, ver);
438 }
439 }
440
441 let content = version_part
443 .trim_matches('{')
444 .trim_matches('}')
445 .trim_matches('"')
446 .trim();
447
448 content.replace('"', "")
449}
450
451fn parse_checksums(lines: &[&str], i: &mut usize, data: &mut OpamData) {
453 if let Some((_, first_value)) = parse_key_value(lines[*i]) {
454 let inline = first_value.trim();
455 if !inline.is_empty() && inline != "[" {
456 if let Some((key, value)) = parse_checksum_line(inline) {
457 match key.as_str() {
458 "sha1" => data.sha1 = Sha1Digest::from_hex(&value).ok(),
459 "md5" => data.md5 = Md5Digest::from_hex(&value).ok(),
460 "sha256" => data.sha256 = Sha256Digest::from_hex(&value).ok(),
461 "sha512" => data.sha512 = Sha512Digest::from_hex(&value).ok(),
462 _ => {}
463 }
464 }
465 return;
466 }
467 }
468
469 let mut iteration_count: usize = 0;
470 *i += 1;
471 while *i < lines.len() {
472 iteration_count += 1;
473 if iteration_count > MAX_ITERATION_COUNT {
474 warn!("parse_checksums: exceeded MAX_ITERATION_COUNT, breaking");
475 break;
476 }
477 let line = lines[*i];
478
479 if line.trim().contains(']') {
480 break;
481 }
482
483 if let Some((key, value)) = parse_checksum_line(line) {
484 match key.as_str() {
485 "sha1" => data.sha1 = Sha1Digest::from_hex(&value).ok(),
486 "md5" => data.md5 = Md5Digest::from_hex(&value).ok(),
487 "sha256" => data.sha256 = Sha256Digest::from_hex(&value).ok(),
488 "sha512" => data.sha512 = Sha512Digest::from_hex(&value).ok(),
489 _ => {}
490 }
491 }
492
493 *i += 1;
494 }
495}
496
497fn parse_checksum_line(line: &str) -> Option<(String, String)> {
499 let line = line.trim().trim_matches('"').trim();
500
501 let regex = Regex::new(r"^(\w+)\s*=\s*(.+)$").ok()?;
502 let caps = regex.captures(line)?;
503
504 let key = caps.get(1)?.as_str().to_string();
505 let value = caps.get(2)?.as_str().to_string();
506
507 Some((key, value))
508}
509
510fn split_quoted_strings(content: &str) -> Vec<String> {
512 let mut result = Vec::new();
513 let mut current = String::new();
514 let mut in_quotes = false;
515
516 for ch in content.chars() {
517 match ch {
518 '"' => in_quotes = !in_quotes,
519 ' ' if !in_quotes => {
520 if !current.is_empty() {
521 result.push(current.trim_matches('"').to_string());
522 current.clear();
523 }
524 }
525 _ => current.push(ch),
526 }
527 }
528
529 if !current.is_empty() {
530 result.push(current.trim_matches('"').to_string());
531 }
532
533 result
534}
535
536fn build_description(synopsis: &Option<String>, description: &Option<String>) -> Option<String> {
538 let parts: Vec<&str> = vec![synopsis.as_deref(), description.as_deref()]
539 .into_iter()
540 .filter(|p| p.is_some())
541 .flatten()
542 .collect();
543
544 if parts.is_empty() {
545 None
546 } else {
547 Some(parts.join("\n"))
548 }
549}
550
551fn extract_parties(authors: &[String], maintainers: &[String]) -> Vec<Party> {
553 let mut parties = Vec::new();
554
555 for author in authors {
557 parties.push(Party {
558 r#type: Some("person".to_string()),
559 role: Some("author".to_string()),
560 name: Some(truncate_field(author.clone())),
561 email: None,
562 url: None,
563 organization: None,
564 organization_url: None,
565 timezone: None,
566 });
567 }
568
569 for maintainer in maintainers {
571 parties.push(Party {
572 r#type: Some("person".to_string()),
573 role: Some("maintainer".to_string()),
574 name: None,
575 email: Some(truncate_field(maintainer.clone())),
576 url: None,
577 organization: None,
578 organization_url: None,
579 timezone: None,
580 });
581 }
582
583 parties
584}
585
586fn extract_dependencies(deps: &[(String, String)]) -> Vec<Dependency> {
588 deps.iter()
589 .map(|(name, version_constraint)| Dependency {
590 purl: Some(truncate_field(format!("pkg:opam/{}", name))),
591 extracted_requirement: Some(truncate_field(version_constraint.clone())),
592 scope: Some("dependency".to_string()),
593 is_runtime: Some(true),
594 is_optional: Some(false),
595 is_pinned: Some(false),
596 is_direct: Some(true),
597 resolved_package: None,
598 extra_data: None,
599 })
600 .collect()
601}
602
603#[cfg(test)]
604mod tests {
605 use super::*;
606 use crate::parsers::PackageParser;
607
608 #[test]
609 fn test_is_match_with_opam_extension() {
610 let path = Path::new("sample.opam");
611 assert!(OpamParser::is_match(path));
612 }
613
614 #[test]
615 fn test_is_match_with_opam_name() {
616 let path = Path::new("opam");
617 assert!(OpamParser::is_match(path));
618 }
619
620 #[test]
621 fn test_is_match_with_non_opam() {
622 let path = Path::new("sample.txt");
623 assert!(!OpamParser::is_match(path));
624 }
625
626 #[test]
627 fn test_parse_key_value() {
628 let (key, value) = parse_key_value("name: \"js_of_ocaml\"").unwrap();
629 assert_eq!(key, "name");
630 assert_eq!(value, "\"js_of_ocaml\"");
631 }
632
633 #[test]
634 fn test_clean_value() {
635 assert_eq!(
636 clean_value("\"js_of_ocaml\""),
637 Some("js_of_ocaml".to_string())
638 );
639 assert_eq!(clean_value("\"\""), None);
640 }
641
642 #[test]
643 fn test_extract_version_constraint() {
644 let result = extract_version_constraint(r#"{>= "4.02.0"}"#);
645 assert_eq!(result, ">= 4.02.0");
646 }
647
648 #[test]
649 fn test_parse_dependency_line() {
650 let (name, version) = parse_dependency_line(r#""ocaml" {>= "4.02.0"}"#).unwrap();
651 assert_eq!(name, "ocaml");
652 assert_eq!(version, ">= 4.02.0");
653 }
654
655 #[test]
656 fn test_parse_dependency_line_without_version() {
657 let (name, version) = parse_dependency_line(r#""uchar""#).unwrap();
658 assert_eq!(name, "uchar");
659 assert_eq!(version, "");
660 }
661
662 #[test]
663 fn test_split_quoted_strings() {
664 let parts = split_quoted_strings(r#""str1" "str2""#);
665 assert_eq!(parts, vec!["str1", "str2"]);
666 }
667
668 #[test]
669 fn test_build_description() {
670 let synopsis = Some("Short description".to_string());
671 let description = Some("Long description".to_string());
672 let result = build_description(&synopsis, &description);
673 assert_eq!(
674 result,
675 Some("Short description\nLong description".to_string())
676 );
677 }
678
679 #[test]
680 fn test_extract_parties() {
681 let authors = vec!["Author One".to_string()];
682 let maintainers = vec!["maintainer@example.com".to_string()];
683 let parties = extract_parties(&authors, &maintainers);
684
685 assert_eq!(parties.len(), 2);
686 assert_eq!(parties[0].name, Some("Author One".to_string()));
687 assert_eq!(parties[0].role, Some("author".to_string()));
688 assert_eq!(parties[1].email, Some("maintainer@example.com".to_string()));
689 assert_eq!(parties[1].role, Some("maintainer".to_string()));
690 }
691
692 #[test]
693 fn test_normalize_opam_declared_license_preserves_scancode_style_expression() {
694 let (declared, declared_spdx, detections) = normalize_opam_declared_license(Some(
695 "LGPL-3.0-only with OCaml-LGPL-linking-exception",
696 ));
697
698 assert_eq!(
699 declared.as_deref(),
700 Some("lgpl-3.0 WITH ocaml-lgpl-linking-exception")
701 );
702 assert_eq!(
703 declared_spdx.as_deref(),
704 Some("LGPL-3.0-only WITH OCaml-LGPL-linking-exception")
705 );
706 assert_eq!(detections.len(), 1);
707 assert_eq!(
708 detections[0].license_expression,
709 "lgpl-3.0 WITH ocaml-lgpl-linking-exception"
710 );
711 }
712}
713
714crate::register_parser!(
715 "OCaml OPAM package manifest",
716 &["**/*.opam", "**/opam"],
717 "opam",
718 "OCaml",
719 Some("https://opam.ocaml.org/doc/Manual.html"),
720);