1use std::path::Path;
24
25use log::warn;
26use regex::Regex;
27
28use crate::models::{DatasourceId, Dependency, PackageData, PackageType, Party};
29use crate::parsers::PackageParser;
30
31pub struct OpamParser;
36
37impl PackageParser for OpamParser {
38 const PACKAGE_TYPE: PackageType = PackageType::Opam;
39
40 fn is_match(path: &Path) -> bool {
41 path.file_name().is_some_and(|name| {
42 name.to_string_lossy().ends_with(".opam") || name.to_string_lossy() == "opam"
43 })
44 }
45
46 fn extract_packages(path: &Path) -> Vec<PackageData> {
47 vec![match std::fs::read_to_string(path) {
48 Ok(text) => parse_opam(&text),
49 Err(e) => {
50 warn!("Failed to read OPAM file {:?}: {}", path, e);
51 default_package_data()
52 }
53 }]
54 }
55}
56
57#[derive(Debug, Default)]
59struct OpamData {
60 name: Option<String>,
61 version: Option<String>,
62 synopsis: Option<String>,
63 description: Option<String>,
64 homepage: Option<String>,
65 dev_repo: Option<String>,
66 bug_reports: Option<String>,
67 src: Option<String>,
68 authors: Vec<String>,
69 maintainers: Vec<String>,
70 license: Option<String>,
71 sha1: Option<String>,
72 md5: Option<String>,
73 sha256: Option<String>,
74 sha512: Option<String>,
75 dependencies: Vec<(String, String)>, }
77
78fn default_package_data() -> PackageData {
79 PackageData {
80 package_type: Some(OpamParser::PACKAGE_TYPE),
81 primary_language: Some("Ocaml".to_string()),
82 datasource_id: Some(DatasourceId::OpamFile),
83 ..Default::default()
84 }
85}
86
87fn parse_opam(text: &str) -> PackageData {
89 let opam_data = parse_opam_data(text);
90
91 let description = build_description(&opam_data.synopsis, &opam_data.description);
92 let parties = extract_parties(&opam_data.authors, &opam_data.maintainers);
93 let dependencies = extract_dependencies(&opam_data.dependencies);
94
95 let (repository_homepage_url, api_data_url, purl) =
96 build_opam_urls(&opam_data.name, &opam_data.version);
97
98 PackageData {
99 package_type: Some(OpamParser::PACKAGE_TYPE),
100 namespace: None,
101 name: opam_data.name,
102 version: opam_data.version,
103 qualifiers: None,
104 subpath: None,
105 primary_language: Some("Ocaml".to_string()),
106 description,
107 release_date: None,
108 parties,
109 keywords: Vec::new(),
110 homepage_url: opam_data.homepage,
111 download_url: opam_data.src,
112 size: None,
113 sha1: opam_data.sha1,
114 md5: opam_data.md5,
115 sha256: opam_data.sha256,
116 sha512: opam_data.sha512,
117 bug_tracking_url: opam_data.bug_reports,
118 code_view_url: None,
119 vcs_url: opam_data.dev_repo,
120 copyright: None,
121 holder: None,
122 declared_license_expression: None,
123 declared_license_expression_spdx: None,
124 license_detections: Vec::new(),
125 other_license_expression: None,
126 other_license_expression_spdx: None,
127 other_license_detections: Vec::new(),
128 extracted_license_statement: opam_data.license,
129 notice_text: None,
130 source_packages: Vec::new(),
131 file_references: Vec::new(),
132 is_private: false,
133 is_virtual: false,
134 extra_data: None,
135 dependencies,
136 repository_homepage_url,
137 repository_download_url: None,
138 api_data_url,
139 datasource_id: Some(DatasourceId::OpamFile),
140 purl,
141 }
142}
143
144fn build_opam_urls(
145 name: &Option<String>,
146 version: &Option<String>,
147) -> (Option<String>, Option<String>, Option<String>) {
148 let repository_homepage_url = name
149 .as_ref()
150 .map(|_| "{https://opam.ocaml.org/packages}/{name}".to_string());
151
152 let api_data_url = match (name, version) {
153 (Some(n), Some(v)) => Some(format!(
154 "https://github.com/ocaml/opam-repository/blob/master/packages/{}/{}.{}/opam",
155 n, n, v
156 )),
157 _ => None,
158 };
159
160 let purl = match (name, version) {
161 (Some(n), Some(v)) => Some(format!("pkg:opam/{}@{}", n, v)),
162 (Some(n), None) => Some(format!("pkg:opam/{}", n)),
163 _ => None,
164 };
165
166 (repository_homepage_url, api_data_url, purl)
167}
168
169fn parse_opam_data(text: &str) -> OpamData {
171 let mut data = OpamData::default();
172 let lines: Vec<&str> = text.lines().collect();
173 let mut i = 0;
174
175 while i < lines.len() {
176 let line = lines[i];
177
178 if let Some((key, value)) = parse_key_value(line) {
180 match key.as_str() {
181 "name" => data.name = clean_value(&value),
182 "version" => data.version = clean_value(&value),
183 "synopsis" => data.synopsis = clean_value(&value),
184 "description" => {
185 data.description = parse_multiline_string(&lines, &mut i);
186 }
187 "homepage" => data.homepage = clean_value(&value),
188 "dev-repo" => data.dev_repo = clean_value(&value),
189 "bug-reports" => data.bug_reports = clean_value(&value),
190 "src" => {
191 if value.trim().is_empty() && i + 1 < lines.len() {
192 i += 1;
193 data.src = clean_value(lines[i]);
194 } else {
195 data.src = clean_value(&value);
196 }
197 }
198 "license" => data.license = clean_value(&value),
199 "authors" => {
200 data.authors = parse_string_array(&lines, &mut i, &value);
201 }
202 "maintainer" => {
203 data.maintainers = parse_string_array(&lines, &mut i, &value);
204 }
205 "depends" => {
206 data.dependencies = parse_dependency_array(&lines, &mut i);
207 }
208 "checksum" => {
209 parse_checksums(&lines, &mut i, &mut data);
210 }
211 _ => {}
212 }
213 }
214
215 i += 1;
216 }
217
218 data
219}
220
221fn parse_key_value(line: &str) -> Option<(String, String)> {
223 let line = line.trim();
224 if line.is_empty() || line.starts_with('#') {
225 return None;
226 }
227
228 if let Some(colon_pos) = line.find(':') {
229 let key = line[..colon_pos].trim().to_string();
230 let value = line[colon_pos + 1..].trim().to_string();
231 Some((key, value))
232 } else {
233 None
234 }
235}
236
237fn clean_value(value: &str) -> Option<String> {
239 let cleaned = value
240 .trim()
241 .trim_matches('"')
242 .trim_matches('[')
243 .trim_matches(']')
244 .trim();
245
246 if cleaned.is_empty() {
247 None
248 } else {
249 Some(cleaned.to_string())
250 }
251}
252
253fn parse_multiline_string(lines: &[&str], i: &mut usize) -> Option<String> {
255 let mut result = String::new();
256
257 if let Some((_, value)) = parse_key_value(lines[*i]) {
259 result.push_str(value.trim_matches('"').trim());
260 }
261
262 *i += 1;
263 while *i < lines.len() {
264 let line = lines[*i];
265 result.push(' ');
266 result.push_str(line.trim_matches('"').trim());
267
268 if line.contains("\"\"\"") {
269 break;
270 }
271 *i += 1;
272 }
273
274 let cleaned = result.trim().to_string();
275 if cleaned.is_empty() {
276 None
277 } else {
278 Some(cleaned)
279 }
280}
281
282fn parse_string_array(lines: &[&str], i: &mut usize, first_value: &str) -> Vec<String> {
284 let mut result = Vec::new();
285
286 let mut content = first_value.to_string();
287
288 if content.contains('[') && !content.contains(']') {
290 *i += 1;
291 while *i < lines.len() {
292 let line = lines[*i];
293 content.push(' ');
294 content.push_str(line);
295
296 if line.contains(']') {
297 break;
298 }
299 *i += 1;
300 }
301 }
302
303 let cleaned = content.trim_matches('[').trim_matches(']').trim();
305
306 for part in split_quoted_strings(cleaned) {
308 let p = part.trim_matches('"').trim();
309 if !p.is_empty() {
310 result.push(p.to_string());
311 }
312 }
313
314 result
315}
316
317fn parse_dependency_array(lines: &[&str], i: &mut usize) -> Vec<(String, String)> {
319 let mut result = Vec::new();
320
321 *i += 1;
322 while *i < lines.len() {
323 let line = lines[*i];
324
325 if line.trim().contains(']') {
326 break;
327 }
328
329 if let Some((name, version)) = parse_dependency_line(line) {
330 result.push((name, version));
331 }
332
333 *i += 1;
334 }
335
336 result
337}
338
339fn parse_dependency_line(line: &str) -> Option<(String, String)> {
341 let line = line.trim();
342 if line.is_empty() {
343 return None;
344 }
345
346 let regex = Regex::new(r#""([^"]+)"\s*(.*)$"#).ok()?;
348 let caps = regex.captures(line)?;
349
350 let name = caps.get(1)?.as_str().to_string();
351 let version_part = caps.get(2)?.as_str().trim();
352
353 let constraint = if version_part.is_empty() {
355 String::new()
356 } else {
357 extract_version_constraint(version_part)
358 };
359
360 Some((name, constraint))
361}
362
363fn extract_version_constraint(version_part: &str) -> String {
365 let regex = Regex::new(r#"\{\s*([<>=!]+)\s*"([^"]*)"\s*\}"#);
366 if let Ok(re) = regex
367 && let Some(caps) = re.captures(version_part)
368 {
369 let op = caps.get(1).map(|m| m.as_str()).unwrap_or("");
370 let ver = caps.get(2).map(|m| m.as_str()).unwrap_or("");
371 if !op.is_empty() && !ver.is_empty() {
372 return format!("{} {}", op, ver);
373 }
374 }
375
376 let content = version_part
378 .trim_matches('{')
379 .trim_matches('}')
380 .trim_matches('"')
381 .trim();
382
383 content.replace('"', "")
384}
385
386fn parse_checksums(lines: &[&str], i: &mut usize, data: &mut OpamData) {
388 if let Some((_, first_value)) = parse_key_value(lines[*i]) {
389 let inline = first_value.trim();
390 if !inline.is_empty() && inline != "[" {
391 if let Some((key, value)) = parse_checksum_line(inline) {
392 match key.as_str() {
393 "sha1" => data.sha1 = Some(value),
394 "md5" => data.md5 = Some(value),
395 "sha256" => data.sha256 = Some(value),
396 "sha512" => data.sha512 = Some(value),
397 _ => {}
398 }
399 }
400 return;
401 }
402 }
403
404 *i += 1;
405 while *i < lines.len() {
406 let line = lines[*i];
407
408 if line.trim().contains(']') {
409 break;
410 }
411
412 if let Some((key, value)) = parse_checksum_line(line) {
413 match key.as_str() {
414 "sha1" => data.sha1 = Some(value),
415 "md5" => data.md5 = Some(value),
416 "sha256" => data.sha256 = Some(value),
417 "sha512" => data.sha512 = Some(value),
418 _ => {}
419 }
420 }
421
422 *i += 1;
423 }
424}
425
426fn parse_checksum_line(line: &str) -> Option<(String, String)> {
428 let line = line.trim().trim_matches('"').trim();
429
430 let regex = Regex::new(r"^(\w+)\s*=\s*(.+)$").ok()?;
431 let caps = regex.captures(line)?;
432
433 let key = caps.get(1)?.as_str().to_string();
434 let value = caps.get(2)?.as_str().to_string();
435
436 Some((key, value))
437}
438
439fn split_quoted_strings(content: &str) -> Vec<String> {
441 let mut result = Vec::new();
442 let mut current = String::new();
443 let mut in_quotes = false;
444
445 for ch in content.chars() {
446 match ch {
447 '"' => in_quotes = !in_quotes,
448 ' ' if !in_quotes => {
449 if !current.is_empty() {
450 result.push(current.trim_matches('"').to_string());
451 current.clear();
452 }
453 }
454 _ => current.push(ch),
455 }
456 }
457
458 if !current.is_empty() {
459 result.push(current.trim_matches('"').to_string());
460 }
461
462 result
463}
464
465fn build_description(synopsis: &Option<String>, description: &Option<String>) -> Option<String> {
467 let parts: Vec<&str> = vec![synopsis.as_deref(), description.as_deref()]
468 .into_iter()
469 .filter(|p| p.is_some())
470 .flatten()
471 .collect();
472
473 if parts.is_empty() {
474 None
475 } else {
476 Some(parts.join("\n"))
477 }
478}
479
480fn extract_parties(authors: &[String], maintainers: &[String]) -> Vec<Party> {
482 let mut parties = Vec::new();
483
484 for author in authors {
486 parties.push(Party {
487 r#type: Some("person".to_string()),
488 role: Some("author".to_string()),
489 name: Some(author.clone()),
490 email: None,
491 url: None,
492 organization: None,
493 organization_url: None,
494 timezone: None,
495 });
496 }
497
498 for maintainer in maintainers {
500 parties.push(Party {
501 r#type: Some("person".to_string()),
502 role: Some("maintainer".to_string()),
503 name: None,
504 email: Some(maintainer.clone()),
505 url: None,
506 organization: None,
507 organization_url: None,
508 timezone: None,
509 });
510 }
511
512 parties
513}
514
515fn extract_dependencies(deps: &[(String, String)]) -> Vec<Dependency> {
517 deps.iter()
518 .map(|(name, version_constraint)| Dependency {
519 purl: Some(format!("pkg:opam/{}", name)),
520 extracted_requirement: Some(version_constraint.clone()),
521 scope: Some("dependency".to_string()),
522 is_runtime: Some(true),
523 is_optional: Some(false),
524 is_pinned: Some(false),
525 is_direct: Some(true),
526 resolved_package: None,
527 extra_data: None,
528 })
529 .collect()
530}
531
532#[cfg(test)]
533mod tests {
534 use super::*;
535 use crate::parsers::PackageParser;
536
537 #[test]
538 fn test_is_match_with_opam_extension() {
539 let path = Path::new("sample.opam");
540 assert!(OpamParser::is_match(path));
541 }
542
543 #[test]
544 fn test_is_match_with_opam_name() {
545 let path = Path::new("opam");
546 assert!(OpamParser::is_match(path));
547 }
548
549 #[test]
550 fn test_is_match_with_non_opam() {
551 let path = Path::new("sample.txt");
552 assert!(!OpamParser::is_match(path));
553 }
554
555 #[test]
556 fn test_parse_key_value() {
557 let (key, value) = parse_key_value("name: \"js_of_ocaml\"").unwrap();
558 assert_eq!(key, "name");
559 assert_eq!(value, "\"js_of_ocaml\"");
560 }
561
562 #[test]
563 fn test_clean_value() {
564 assert_eq!(
565 clean_value("\"js_of_ocaml\""),
566 Some("js_of_ocaml".to_string())
567 );
568 assert_eq!(clean_value("\"\""), None);
569 }
570
571 #[test]
572 fn test_extract_version_constraint() {
573 let result = extract_version_constraint(r#"{>= "4.02.0"}"#);
574 assert_eq!(result, ">= 4.02.0");
575 }
576
577 #[test]
578 fn test_parse_dependency_line() {
579 let (name, version) = parse_dependency_line(r#""ocaml" {>= "4.02.0"}"#).unwrap();
580 assert_eq!(name, "ocaml");
581 assert_eq!(version, ">= 4.02.0");
582 }
583
584 #[test]
585 fn test_parse_dependency_line_without_version() {
586 let (name, version) = parse_dependency_line(r#""uchar""#).unwrap();
587 assert_eq!(name, "uchar");
588 assert_eq!(version, "");
589 }
590
591 #[test]
592 fn test_split_quoted_strings() {
593 let parts = split_quoted_strings(r#""str1" "str2""#);
594 assert_eq!(parts, vec!["str1", "str2"]);
595 }
596
597 #[test]
598 fn test_build_description() {
599 let synopsis = Some("Short description".to_string());
600 let description = Some("Long description".to_string());
601 let result = build_description(&synopsis, &description);
602 assert_eq!(
603 result,
604 Some("Short description\nLong description".to_string())
605 );
606 }
607
608 #[test]
609 fn test_extract_parties() {
610 let authors = vec!["Author One".to_string()];
611 let maintainers = vec!["maintainer@example.com".to_string()];
612 let parties = extract_parties(&authors, &maintainers);
613
614 assert_eq!(parties.len(), 2);
615 assert_eq!(parties[0].name, Some("Author One".to_string()));
616 assert_eq!(parties[0].role, Some("author".to_string()));
617 assert_eq!(parties[1].email, Some("maintainer@example.com".to_string()));
618 assert_eq!(parties[1].role, Some("maintainer".to_string()));
619 }
620}
621
622crate::register_parser!(
623 "OCaml OPAM package manifest",
624 &["**/*.opam", "**/opam"],
625 "opam",
626 "OCaml",
627 Some("https://opam.ocaml.org/doc/Manual.html"),
628);