1use std::path::Path;
24
25use log::warn;
26use regex::Regex;
27
28use crate::models::{DatasourceId, Dependency, PackageData, PackageType, Party};
29use crate::parsers::PackageParser;
30
31use super::license_normalization::{
32 DeclaredLicenseMatchMetadata, build_declared_license_data_from_pair,
33 normalize_spdx_declared_license,
34};
35
36pub struct OpamParser;
41
42impl PackageParser for OpamParser {
43 const PACKAGE_TYPE: PackageType = PackageType::Opam;
44
45 fn is_match(path: &Path) -> bool {
46 path.file_name().is_some_and(|name| {
47 name.to_string_lossy().ends_with(".opam") || name.to_string_lossy() == "opam"
48 })
49 }
50
51 fn extract_packages(path: &Path) -> Vec<PackageData> {
52 vec![match std::fs::read_to_string(path) {
53 Ok(text) => parse_opam(&text),
54 Err(e) => {
55 warn!("Failed to read OPAM file {:?}: {}", path, e);
56 default_package_data()
57 }
58 }]
59 }
60}
61
62#[derive(Debug, Default)]
64struct OpamData {
65 name: Option<String>,
66 version: Option<String>,
67 synopsis: Option<String>,
68 description: Option<String>,
69 homepage: Option<String>,
70 dev_repo: Option<String>,
71 bug_reports: Option<String>,
72 src: Option<String>,
73 authors: Vec<String>,
74 maintainers: Vec<String>,
75 license: Option<String>,
76 sha1: Option<String>,
77 md5: Option<String>,
78 sha256: Option<String>,
79 sha512: Option<String>,
80 dependencies: Vec<(String, String)>, }
82
83fn default_package_data() -> PackageData {
84 PackageData {
85 package_type: Some(OpamParser::PACKAGE_TYPE),
86 primary_language: Some("Ocaml".to_string()),
87 datasource_id: Some(DatasourceId::OpamFile),
88 ..Default::default()
89 }
90}
91
92fn parse_opam(text: &str) -> PackageData {
94 let opam_data = parse_opam_data(text);
95
96 let description = build_description(&opam_data.synopsis, &opam_data.description);
97 let parties = extract_parties(&opam_data.authors, &opam_data.maintainers);
98 let dependencies = extract_dependencies(&opam_data.dependencies);
99
100 let (repository_homepage_url, api_data_url, purl) =
101 build_opam_urls(&opam_data.name, &opam_data.version);
102 let (declared_license_expression, declared_license_expression_spdx, license_detections) =
103 normalize_opam_declared_license(opam_data.license.as_deref());
104
105 PackageData {
106 package_type: Some(OpamParser::PACKAGE_TYPE),
107 namespace: None,
108 name: opam_data.name,
109 version: opam_data.version,
110 qualifiers: None,
111 subpath: None,
112 primary_language: Some("Ocaml".to_string()),
113 description,
114 release_date: None,
115 parties,
116 keywords: Vec::new(),
117 homepage_url: opam_data.homepage,
118 download_url: opam_data.src,
119 size: None,
120 sha1: opam_data.sha1,
121 md5: opam_data.md5,
122 sha256: opam_data.sha256,
123 sha512: opam_data.sha512,
124 bug_tracking_url: opam_data.bug_reports,
125 code_view_url: None,
126 vcs_url: opam_data.dev_repo,
127 copyright: None,
128 holder: None,
129 declared_license_expression,
130 declared_license_expression_spdx,
131 license_detections,
132 other_license_expression: None,
133 other_license_expression_spdx: None,
134 other_license_detections: Vec::new(),
135 extracted_license_statement: opam_data.license,
136 notice_text: None,
137 source_packages: Vec::new(),
138 file_references: Vec::new(),
139 is_private: false,
140 is_virtual: false,
141 extra_data: None,
142 dependencies,
143 repository_homepage_url,
144 repository_download_url: None,
145 api_data_url,
146 datasource_id: Some(DatasourceId::OpamFile),
147 purl,
148 }
149}
150
151fn normalize_opam_declared_license(
152 statement: Option<&str>,
153) -> (
154 Option<String>,
155 Option<String>,
156 Vec<crate::models::LicenseDetection>,
157) {
158 let Some(statement) = statement.map(str::trim).filter(|value| !value.is_empty()) else {
159 return super::license_normalization::empty_declared_license_data();
160 };
161
162 match statement {
163 "GPL-2.0-only" => build_declared_license_data_from_pair(
164 "gpl-2.0",
165 "GPL-2.0-only",
166 DeclaredLicenseMatchMetadata::single_line(statement),
167 ),
168 "GPL-3.0-only" => build_declared_license_data_from_pair(
169 "gpl-3.0",
170 "GPL-3.0-only",
171 DeclaredLicenseMatchMetadata::single_line(statement),
172 ),
173 "LGPL-3.0-only with OCaml-LGPL-linking-exception" => build_declared_license_data_from_pair(
174 "lgpl-3.0 WITH ocaml-lgpl-linking-exception",
175 "LGPL-3.0-only WITH OCaml-LGPL-linking-exception",
176 DeclaredLicenseMatchMetadata::single_line(statement),
177 ),
178 _ => normalize_spdx_declared_license(Some(statement)),
179 }
180}
181
182fn build_opam_urls(
183 name: &Option<String>,
184 version: &Option<String>,
185) -> (Option<String>, Option<String>, Option<String>) {
186 let repository_homepage_url = name
187 .as_ref()
188 .map(|_| "{https://opam.ocaml.org/packages}/{name}".to_string());
189
190 let api_data_url = match (name, version) {
191 (Some(n), Some(v)) => Some(format!(
192 "https://github.com/ocaml/opam-repository/blob/master/packages/{}/{}.{}/opam",
193 n, n, v
194 )),
195 _ => None,
196 };
197
198 let purl = match (name, version) {
199 (Some(n), Some(v)) => Some(format!("pkg:opam/{}@{}", n, v)),
200 (Some(n), None) => Some(format!("pkg:opam/{}", n)),
201 _ => None,
202 };
203
204 (repository_homepage_url, api_data_url, purl)
205}
206
207fn parse_opam_data(text: &str) -> OpamData {
209 let mut data = OpamData::default();
210 let lines: Vec<&str> = text.lines().collect();
211 let mut i = 0;
212
213 while i < lines.len() {
214 let line = lines[i];
215
216 if let Some((key, value)) = parse_key_value(line) {
218 match key.as_str() {
219 "name" => data.name = clean_value(&value),
220 "version" => data.version = clean_value(&value),
221 "synopsis" => data.synopsis = clean_value(&value),
222 "description" => {
223 data.description = parse_multiline_string(&lines, &mut i);
224 }
225 "homepage" => data.homepage = clean_value(&value),
226 "dev-repo" => data.dev_repo = clean_value(&value),
227 "bug-reports" => data.bug_reports = clean_value(&value),
228 "src" => {
229 if value.trim().is_empty() && i + 1 < lines.len() {
230 i += 1;
231 data.src = clean_value(lines[i]);
232 } else {
233 data.src = clean_value(&value);
234 }
235 }
236 "license" => data.license = clean_value(&value),
237 "authors" => {
238 data.authors = parse_string_array(&lines, &mut i, &value);
239 }
240 "maintainer" => {
241 data.maintainers = parse_string_array(&lines, &mut i, &value);
242 }
243 "depends" => {
244 data.dependencies = parse_dependency_array(&lines, &mut i);
245 }
246 "checksum" => {
247 parse_checksums(&lines, &mut i, &mut data);
248 }
249 _ => {}
250 }
251 }
252
253 i += 1;
254 }
255
256 data
257}
258
259fn parse_key_value(line: &str) -> Option<(String, String)> {
261 let line = line.trim();
262 if line.is_empty() || line.starts_with('#') {
263 return None;
264 }
265
266 if let Some(colon_pos) = line.find(':') {
267 let key = line[..colon_pos].trim().to_string();
268 let value = line[colon_pos + 1..].trim().to_string();
269 Some((key, value))
270 } else {
271 None
272 }
273}
274
275fn clean_value(value: &str) -> Option<String> {
277 let cleaned = value
278 .trim()
279 .trim_matches('"')
280 .trim_matches('[')
281 .trim_matches(']')
282 .trim();
283
284 if cleaned.is_empty() {
285 None
286 } else {
287 Some(cleaned.to_string())
288 }
289}
290
291fn parse_multiline_string(lines: &[&str], i: &mut usize) -> Option<String> {
293 let mut result = String::new();
294
295 if let Some((_, value)) = parse_key_value(lines[*i]) {
297 result.push_str(value.trim_matches('"').trim());
298 }
299
300 *i += 1;
301 while *i < lines.len() {
302 let line = lines[*i];
303 result.push(' ');
304 result.push_str(line.trim_matches('"').trim());
305
306 if line.contains("\"\"\"") {
307 break;
308 }
309 *i += 1;
310 }
311
312 let cleaned = result.trim().to_string();
313 if cleaned.is_empty() {
314 None
315 } else {
316 Some(cleaned)
317 }
318}
319
320fn parse_string_array(lines: &[&str], i: &mut usize, first_value: &str) -> Vec<String> {
322 let mut result = Vec::new();
323
324 let mut content = first_value.to_string();
325
326 if content.contains('[') && !content.contains(']') {
328 *i += 1;
329 while *i < lines.len() {
330 let line = lines[*i];
331 content.push(' ');
332 content.push_str(line);
333
334 if line.contains(']') {
335 break;
336 }
337 *i += 1;
338 }
339 }
340
341 let cleaned = content.trim_matches('[').trim_matches(']').trim();
343
344 for part in split_quoted_strings(cleaned) {
346 let p = part.trim_matches('"').trim();
347 if !p.is_empty() {
348 result.push(p.to_string());
349 }
350 }
351
352 result
353}
354
355fn parse_dependency_array(lines: &[&str], i: &mut usize) -> Vec<(String, String)> {
357 let mut result = Vec::new();
358
359 *i += 1;
360 while *i < lines.len() {
361 let line = lines[*i];
362
363 if line.trim().contains(']') {
364 break;
365 }
366
367 if let Some((name, version)) = parse_dependency_line(line) {
368 result.push((name, version));
369 }
370
371 *i += 1;
372 }
373
374 result
375}
376
377fn parse_dependency_line(line: &str) -> Option<(String, String)> {
379 let line = line.trim();
380 if line.is_empty() {
381 return None;
382 }
383
384 let regex = Regex::new(r#""([^"]+)"\s*(.*)$"#).ok()?;
386 let caps = regex.captures(line)?;
387
388 let name = caps.get(1)?.as_str().to_string();
389 let version_part = caps.get(2)?.as_str().trim();
390
391 let constraint = if version_part.is_empty() {
393 String::new()
394 } else {
395 extract_version_constraint(version_part)
396 };
397
398 Some((name, constraint))
399}
400
401fn extract_version_constraint(version_part: &str) -> String {
403 let regex = Regex::new(r#"\{\s*([<>=!]+)\s*"([^"]*)"\s*\}"#);
404 if let Ok(re) = regex
405 && let Some(caps) = re.captures(version_part)
406 {
407 let op = caps.get(1).map(|m| m.as_str()).unwrap_or("");
408 let ver = caps.get(2).map(|m| m.as_str()).unwrap_or("");
409 if !op.is_empty() && !ver.is_empty() {
410 return format!("{} {}", op, ver);
411 }
412 }
413
414 let content = version_part
416 .trim_matches('{')
417 .trim_matches('}')
418 .trim_matches('"')
419 .trim();
420
421 content.replace('"', "")
422}
423
424fn parse_checksums(lines: &[&str], i: &mut usize, data: &mut OpamData) {
426 if let Some((_, first_value)) = parse_key_value(lines[*i]) {
427 let inline = first_value.trim();
428 if !inline.is_empty() && inline != "[" {
429 if let Some((key, value)) = parse_checksum_line(inline) {
430 match key.as_str() {
431 "sha1" => data.sha1 = Some(value),
432 "md5" => data.md5 = Some(value),
433 "sha256" => data.sha256 = Some(value),
434 "sha512" => data.sha512 = Some(value),
435 _ => {}
436 }
437 }
438 return;
439 }
440 }
441
442 *i += 1;
443 while *i < lines.len() {
444 let line = lines[*i];
445
446 if line.trim().contains(']') {
447 break;
448 }
449
450 if let Some((key, value)) = parse_checksum_line(line) {
451 match key.as_str() {
452 "sha1" => data.sha1 = Some(value),
453 "md5" => data.md5 = Some(value),
454 "sha256" => data.sha256 = Some(value),
455 "sha512" => data.sha512 = Some(value),
456 _ => {}
457 }
458 }
459
460 *i += 1;
461 }
462}
463
464fn parse_checksum_line(line: &str) -> Option<(String, String)> {
466 let line = line.trim().trim_matches('"').trim();
467
468 let regex = Regex::new(r"^(\w+)\s*=\s*(.+)$").ok()?;
469 let caps = regex.captures(line)?;
470
471 let key = caps.get(1)?.as_str().to_string();
472 let value = caps.get(2)?.as_str().to_string();
473
474 Some((key, value))
475}
476
477fn split_quoted_strings(content: &str) -> Vec<String> {
479 let mut result = Vec::new();
480 let mut current = String::new();
481 let mut in_quotes = false;
482
483 for ch in content.chars() {
484 match ch {
485 '"' => in_quotes = !in_quotes,
486 ' ' if !in_quotes => {
487 if !current.is_empty() {
488 result.push(current.trim_matches('"').to_string());
489 current.clear();
490 }
491 }
492 _ => current.push(ch),
493 }
494 }
495
496 if !current.is_empty() {
497 result.push(current.trim_matches('"').to_string());
498 }
499
500 result
501}
502
503fn build_description(synopsis: &Option<String>, description: &Option<String>) -> Option<String> {
505 let parts: Vec<&str> = vec![synopsis.as_deref(), description.as_deref()]
506 .into_iter()
507 .filter(|p| p.is_some())
508 .flatten()
509 .collect();
510
511 if parts.is_empty() {
512 None
513 } else {
514 Some(parts.join("\n"))
515 }
516}
517
518fn extract_parties(authors: &[String], maintainers: &[String]) -> Vec<Party> {
520 let mut parties = Vec::new();
521
522 for author in authors {
524 parties.push(Party {
525 r#type: Some("person".to_string()),
526 role: Some("author".to_string()),
527 name: Some(author.clone()),
528 email: None,
529 url: None,
530 organization: None,
531 organization_url: None,
532 timezone: None,
533 });
534 }
535
536 for maintainer in maintainers {
538 parties.push(Party {
539 r#type: Some("person".to_string()),
540 role: Some("maintainer".to_string()),
541 name: None,
542 email: Some(maintainer.clone()),
543 url: None,
544 organization: None,
545 organization_url: None,
546 timezone: None,
547 });
548 }
549
550 parties
551}
552
553fn extract_dependencies(deps: &[(String, String)]) -> Vec<Dependency> {
555 deps.iter()
556 .map(|(name, version_constraint)| Dependency {
557 purl: Some(format!("pkg:opam/{}", name)),
558 extracted_requirement: Some(version_constraint.clone()),
559 scope: Some("dependency".to_string()),
560 is_runtime: Some(true),
561 is_optional: Some(false),
562 is_pinned: Some(false),
563 is_direct: Some(true),
564 resolved_package: None,
565 extra_data: None,
566 })
567 .collect()
568}
569
570#[cfg(test)]
571mod tests {
572 use super::*;
573 use crate::parsers::PackageParser;
574
575 #[test]
576 fn test_is_match_with_opam_extension() {
577 let path = Path::new("sample.opam");
578 assert!(OpamParser::is_match(path));
579 }
580
581 #[test]
582 fn test_is_match_with_opam_name() {
583 let path = Path::new("opam");
584 assert!(OpamParser::is_match(path));
585 }
586
587 #[test]
588 fn test_is_match_with_non_opam() {
589 let path = Path::new("sample.txt");
590 assert!(!OpamParser::is_match(path));
591 }
592
593 #[test]
594 fn test_parse_key_value() {
595 let (key, value) = parse_key_value("name: \"js_of_ocaml\"").unwrap();
596 assert_eq!(key, "name");
597 assert_eq!(value, "\"js_of_ocaml\"");
598 }
599
600 #[test]
601 fn test_clean_value() {
602 assert_eq!(
603 clean_value("\"js_of_ocaml\""),
604 Some("js_of_ocaml".to_string())
605 );
606 assert_eq!(clean_value("\"\""), None);
607 }
608
609 #[test]
610 fn test_extract_version_constraint() {
611 let result = extract_version_constraint(r#"{>= "4.02.0"}"#);
612 assert_eq!(result, ">= 4.02.0");
613 }
614
615 #[test]
616 fn test_parse_dependency_line() {
617 let (name, version) = parse_dependency_line(r#""ocaml" {>= "4.02.0"}"#).unwrap();
618 assert_eq!(name, "ocaml");
619 assert_eq!(version, ">= 4.02.0");
620 }
621
622 #[test]
623 fn test_parse_dependency_line_without_version() {
624 let (name, version) = parse_dependency_line(r#""uchar""#).unwrap();
625 assert_eq!(name, "uchar");
626 assert_eq!(version, "");
627 }
628
629 #[test]
630 fn test_split_quoted_strings() {
631 let parts = split_quoted_strings(r#""str1" "str2""#);
632 assert_eq!(parts, vec!["str1", "str2"]);
633 }
634
635 #[test]
636 fn test_build_description() {
637 let synopsis = Some("Short description".to_string());
638 let description = Some("Long description".to_string());
639 let result = build_description(&synopsis, &description);
640 assert_eq!(
641 result,
642 Some("Short description\nLong description".to_string())
643 );
644 }
645
646 #[test]
647 fn test_extract_parties() {
648 let authors = vec!["Author One".to_string()];
649 let maintainers = vec!["maintainer@example.com".to_string()];
650 let parties = extract_parties(&authors, &maintainers);
651
652 assert_eq!(parties.len(), 2);
653 assert_eq!(parties[0].name, Some("Author One".to_string()));
654 assert_eq!(parties[0].role, Some("author".to_string()));
655 assert_eq!(parties[1].email, Some("maintainer@example.com".to_string()));
656 assert_eq!(parties[1].role, Some("maintainer".to_string()));
657 }
658
659 #[test]
660 fn test_normalize_opam_declared_license_preserves_scancode_style_expression() {
661 let (declared, declared_spdx, detections) = normalize_opam_declared_license(Some(
662 "LGPL-3.0-only with OCaml-LGPL-linking-exception",
663 ));
664
665 assert_eq!(
666 declared.as_deref(),
667 Some("lgpl-3.0 WITH ocaml-lgpl-linking-exception")
668 );
669 assert_eq!(
670 declared_spdx.as_deref(),
671 Some("LGPL-3.0-only WITH OCaml-LGPL-linking-exception")
672 );
673 assert_eq!(detections.len(), 1);
674 assert_eq!(
675 detections[0].license_expression,
676 "lgpl-3.0 WITH ocaml-lgpl-linking-exception"
677 );
678 }
679}
680
681crate::register_parser!(
682 "OCaml OPAM package manifest",
683 &["**/*.opam", "**/opam"],
684 "opam",
685 "OCaml",
686 Some("https://opam.ocaml.org/doc/Manual.html"),
687);