Skip to main content

provenant/parsers/
erlang_otp.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4use std::collections::HashMap;
5use std::path::Path;
6
7use packageurl::PackageUrl;
8use serde_json::Value as JsonValue;
9
10use crate::models::{
11    DatasourceId, Dependency, PackageData, PackageType, ResolvedPackage, Sha256Digest,
12};
13use crate::parser_warn as warn;
14use crate::parsers::utils::{
15    MAX_ITERATION_COUNT, MAX_RECURSION_DEPTH, read_file_to_string, truncate_field,
16};
17
18use super::PackageParser;
19
20// ── Parser structs ──
21
22pub struct ErlangAppSrcParser;
23pub struct RebarConfigParser;
24pub struct RebarLockParser;
25
26// ── Erlang term AST ──
27
28#[derive(Clone, Debug)]
29enum ErlTerm {
30    Atom(String),
31    String(String),
32    Binary(String),
33    Integer(i64),
34    Float(f64),
35    Tuple(Vec<ErlTerm>),
36    List(Vec<ErlTerm>),
37    Map(Vec<(ErlTerm, ErlTerm)>),
38}
39
40// ── Erlang term parser ──
41
42struct ErlParser {
43    chars: Vec<char>,
44    pos: usize,
45    depth: usize,
46}
47
48impl ErlParser {
49    fn new(source: &str) -> Self {
50        Self {
51            chars: source.chars().collect(),
52            pos: 0,
53            depth: 0,
54        }
55    }
56
57    fn parse_term(&mut self) -> Result<ErlTerm, String> {
58        if self.depth >= MAX_RECURSION_DEPTH {
59            return Err("recursion depth exceeded".to_string());
60        }
61        self.depth += 1;
62        let result = self.parse_term_inner();
63        self.depth -= 1;
64        result
65    }
66
67    fn parse_term_inner(&mut self) -> Result<ErlTerm, String> {
68        self.skip_whitespace_and_comments();
69        match self.peek() {
70            Some('{') => self.parse_tuple(),
71            Some('[') => self.parse_list(),
72            Some('#') if self.peek_n(1) == Some('{') => self.parse_map(),
73            Some('"') => self.parse_string().map(ErlTerm::String),
74            Some('<') if self.peek_n(1) == Some('<') => self.parse_binary().map(ErlTerm::Binary),
75            Some('\'') => self.parse_quoted_atom().map(ErlTerm::Atom),
76            Some(c) if c.is_ascii_digit() || c == '-' => self.parse_number(),
77            Some(c) if c.is_ascii_lowercase() || c == '_' => self.parse_atom_or_bool(),
78            Some(c) => Err(format!(
79                "Unexpected character '{}' at position {}",
80                c, self.pos
81            )),
82            None => Err("Unexpected end of input".to_string()),
83        }
84    }
85
86    fn parse_tuple(&mut self) -> Result<ErlTerm, String> {
87        self.expect('{')?;
88        let items = self.parse_comma_separated('}')?;
89        Ok(ErlTerm::Tuple(items))
90    }
91
92    fn parse_list(&mut self) -> Result<ErlTerm, String> {
93        self.expect('[')?;
94        let items = self.parse_comma_separated(']')?;
95        Ok(ErlTerm::List(items))
96    }
97
98    fn parse_map(&mut self) -> Result<ErlTerm, String> {
99        self.expect('#')?;
100        self.expect('{')?;
101
102        let mut entries = Vec::new();
103        let mut count = 0usize;
104
105        loop {
106            self.skip_whitespace_and_comments();
107            if self.peek() == Some('}') {
108                self.pos += 1;
109                break;
110            }
111
112            if count >= MAX_ITERATION_COUNT {
113                return Err("too many map entries".to_string());
114            }
115
116            let key = self.parse_term()?;
117            self.skip_whitespace_and_comments();
118
119            match (self.peek(), self.peek_n(1)) {
120                (Some('='), Some('>')) | (Some(':'), Some('=')) => {
121                    self.pos += 2;
122                }
123                _ => {
124                    return Err(format!(
125                        "Expected map association operator at position {}",
126                        self.pos
127                    ));
128                }
129            }
130
131            let value = self.parse_term()?;
132            entries.push((key, value));
133            count += 1;
134
135            self.skip_whitespace_and_comments();
136            match self.peek() {
137                Some(',') => {
138                    self.pos += 1;
139                }
140                Some('}') => {
141                    self.pos += 1;
142                    break;
143                }
144                Some(c) => {
145                    return Err(format!(
146                        "Expected ',' or '}}' in map but found '{}' at position {}",
147                        c, self.pos
148                    ));
149                }
150                None => return Err("Unterminated map literal".to_string()),
151            }
152        }
153
154        Ok(ErlTerm::Map(entries))
155    }
156
157    fn parse_comma_separated(&mut self, closing: char) -> Result<Vec<ErlTerm>, String> {
158        let mut items = Vec::new();
159        let mut count = 0usize;
160        loop {
161            self.skip_whitespace_and_comments();
162            if self.peek() == Some(closing) {
163                self.pos += 1;
164                break;
165            }
166            if count >= MAX_ITERATION_COUNT {
167                return Err("too many items".to_string());
168            }
169            items.push(self.parse_term()?);
170            count += 1;
171            self.skip_whitespace_and_comments();
172            if self.peek() == Some(',') {
173                self.pos += 1;
174            } else if self.peek() == Some('|') {
175                // list tail syntax: [H | T] — skip rest
176                self.pos += 1;
177                self.parse_term()?;
178                self.skip_whitespace_and_comments();
179                if self.peek() == Some(closing) {
180                    self.pos += 1;
181                }
182                break;
183            }
184        }
185        Ok(items)
186    }
187
188    fn parse_string(&mut self) -> Result<String, String> {
189        self.expect('"')?;
190        let mut out = String::new();
191        while let Some(c) = self.peek() {
192            self.pos += 1;
193            match c {
194                '"' => return Ok(out),
195                '\\' => {
196                    let escaped = self
197                        .peek()
198                        .ok_or_else(|| "Unterminated string escape".to_string())?;
199                    self.pos += 1;
200                    out.push(match escaped {
201                        'n' => '\n',
202                        'r' => '\r',
203                        't' => '\t',
204                        '"' => '"',
205                        '\\' => '\\',
206                        other => other,
207                    });
208                }
209                other => out.push(other),
210            }
211        }
212        Err("Unterminated string literal".to_string())
213    }
214
215    fn parse_binary(&mut self) -> Result<String, String> {
216        self.expect('<')?;
217        self.expect('<')?;
218        self.skip_whitespace_and_comments();
219        let value = if self.peek() == Some('"') {
220            self.parse_string()?
221        } else {
222            String::new()
223        };
224        self.skip_whitespace_and_comments();
225        self.expect('>')?;
226        self.expect('>')?;
227        Ok(value)
228    }
229
230    fn parse_quoted_atom(&mut self) -> Result<String, String> {
231        self.expect('\'')?;
232        let mut out = String::new();
233        while let Some(c) = self.peek() {
234            self.pos += 1;
235            match c {
236                '\'' => return Ok(out),
237                '\\' => {
238                    if let Some(escaped) = self.peek() {
239                        self.pos += 1;
240                        out.push(escaped);
241                    }
242                }
243                other => out.push(other),
244            }
245        }
246        Err("Unterminated quoted atom".to_string())
247    }
248
249    fn parse_atom_or_bool(&mut self) -> Result<ErlTerm, String> {
250        let atom = self.parse_bare_atom()?;
251        match atom.as_str() {
252            "true" => Ok(ErlTerm::Atom("true".to_string())),
253            "false" => Ok(ErlTerm::Atom("false".to_string())),
254            _ => Ok(ErlTerm::Atom(atom)),
255        }
256    }
257
258    fn parse_bare_atom(&mut self) -> Result<String, String> {
259        let start = self.pos;
260        while let Some(c) = self.peek() {
261            if c.is_ascii_alphanumeric() || c == '_' || c == '@' {
262                self.pos += 1;
263            } else {
264                break;
265            }
266        }
267        if self.pos == start {
268            return Err("Expected atom".to_string());
269        }
270        Ok(self.chars[start..self.pos].iter().collect())
271    }
272
273    fn parse_number(&mut self) -> Result<ErlTerm, String> {
274        let start = self.pos;
275        if self.peek() == Some('-') {
276            self.pos += 1;
277        }
278        while let Some(c) = self.peek() {
279            if c.is_ascii_digit() {
280                self.pos += 1;
281            } else {
282                break;
283            }
284        }
285        if self.peek() == Some('.') && self.peek_n(1).is_some_and(|c| c.is_ascii_digit()) {
286            self.pos += 1;
287            while let Some(c) = self.peek() {
288                if c.is_ascii_digit() {
289                    self.pos += 1;
290                } else {
291                    break;
292                }
293            }
294            let s: String = self.chars[start..self.pos].iter().collect();
295            return s
296                .parse::<f64>()
297                .map(ErlTerm::Float)
298                .map_err(|e| format!("Invalid float: {}", e));
299        }
300        let s: String = self.chars[start..self.pos].iter().collect();
301        s.parse::<i64>()
302            .map(ErlTerm::Integer)
303            .map_err(|e| format!("Invalid integer: {}", e))
304    }
305
306    fn skip_whitespace_and_comments(&mut self) {
307        loop {
308            match self.peek() {
309                Some(c) if c.is_whitespace() => {
310                    self.pos += 1;
311                }
312                Some('%') => {
313                    while let Some(c) = self.peek() {
314                        self.pos += 1;
315                        if c == '\n' {
316                            break;
317                        }
318                    }
319                }
320                _ => break,
321            }
322        }
323    }
324
325    fn expect(&mut self, expected: char) -> Result<(), String> {
326        self.skip_whitespace_and_comments();
327        match self.peek() {
328            Some(c) if c == expected => {
329                self.pos += 1;
330                Ok(())
331            }
332            Some(c) => Err(format!(
333                "Expected '{}' but found '{}' at position {}",
334                expected, c, self.pos
335            )),
336            None => Err(format!("Expected '{}' but reached end of input", expected)),
337        }
338    }
339
340    fn peek(&self) -> Option<char> {
341        self.chars.get(self.pos).copied()
342    }
343
344    fn peek_n(&self, n: usize) -> Option<char> {
345        self.chars.get(self.pos + n).copied()
346    }
347
348    fn is_eof(&self) -> bool {
349        self.pos >= self.chars.len()
350    }
351}
352
353fn parse_dotted_terms(content: &str) -> Result<Vec<ErlTerm>, String> {
354    let normalized = strip_template_placeholders(content);
355    let mut parser = ErlParser::new(&normalized);
356    let mut terms = Vec::new();
357    let mut count = 0usize;
358    loop {
359        parser.skip_whitespace_and_comments();
360        if parser.is_eof() {
361            break;
362        }
363        if parser.peek() == Some('.') {
364            parser.pos += 1;
365            continue;
366        }
367        if count >= MAX_ITERATION_COUNT {
368            break;
369        }
370        let term = parser.parse_term()?;
371        terms.push(term);
372        count += 1;
373        parser.skip_whitespace_and_comments();
374        if parser.peek() == Some('.') {
375            parser.pos += 1;
376        }
377    }
378    Ok(terms)
379}
380
381fn strip_template_placeholders(source: &str) -> String {
382    let chars: Vec<char> = source.chars().collect();
383    let mut result = String::with_capacity(source.len());
384    let mut i = 0usize;
385    let mut in_string = false;
386    let mut in_quoted_atom = false;
387
388    while i < chars.len() {
389        let c = chars[i];
390
391        if in_string {
392            result.push(c);
393            i += 1;
394            if c == '\\' && i < chars.len() {
395                result.push(chars[i]);
396                i += 1;
397                continue;
398            }
399            if c == '"' {
400                in_string = false;
401            }
402            continue;
403        }
404
405        if in_quoted_atom {
406            result.push(c);
407            i += 1;
408            if c == '\\' && i < chars.len() {
409                result.push(chars[i]);
410                i += 1;
411                continue;
412            }
413            if c == '\'' {
414                in_quoted_atom = false;
415            }
416            continue;
417        }
418
419        match c {
420            '"' => {
421                in_string = true;
422                result.push(c);
423                i += 1;
424            }
425            '\'' => {
426                in_quoted_atom = true;
427                result.push(c);
428                i += 1;
429            }
430            '%' if chars.get(i + 1) != Some(&'%') => {
431                let line_end = chars[i..]
432                    .iter()
433                    .position(|&ch| ch == '\n')
434                    .map(|offset| i + offset)
435                    .unwrap_or(chars.len());
436
437                let last_percent = chars[i + 1..line_end]
438                    .iter()
439                    .rposition(|&ch| ch == '%')
440                    .map(|offset| i + 1 + offset);
441
442                if let Some(last_percent) = last_percent {
443                    let placeholder_body: String = chars[i + 1..last_percent].iter().collect();
444                    let trailing: String = chars[last_percent + 1..line_end].iter().collect();
445                    let looks_like_placeholder = !placeholder_body.is_empty()
446                        && placeholder_body.chars().all(|ch| {
447                            ch.is_ascii_uppercase()
448                                || ch.is_ascii_digit()
449                                || matches!(ch, '_' | ',' | '%')
450                        })
451                        && trailing
452                            .chars()
453                            .all(|ch| ch.is_whitespace() || matches!(ch, ',' | ']' | '}' | ')'));
454
455                    if looks_like_placeholder {
456                        i = last_percent + 1;
457                        continue;
458                    }
459                }
460
461                result.push(c);
462                i += 1;
463            }
464            _ => {
465                result.push(c);
466                i += 1;
467            }
468        }
469    }
470
471    result
472}
473
474// ── Helpers ──
475
476fn term_to_str(term: &ErlTerm) -> Option<String> {
477    match term {
478        ErlTerm::String(s) | ErlTerm::Binary(s) | ErlTerm::Atom(s) => Some(s.clone()),
479        ErlTerm::Integer(n) => Some(n.to_string()),
480        ErlTerm::Float(f) => Some(f.to_string()),
481        _ => None,
482    }
483}
484
485fn term_to_proplist(term: &ErlTerm) -> Option<Vec<(String, ErlTerm)>> {
486    let items = match term {
487        ErlTerm::List(items) => items,
488        _ => return None,
489    };
490    let mut result = Vec::new();
491    for item in items {
492        if let ErlTerm::Tuple(fields) = item
493            && fields.len() == 2
494            && let Some(key) = term_to_str(&fields[0])
495        {
496            result.push((key, fields[1].clone()));
497        }
498    }
499    Some(result)
500}
501
502fn term_to_key_value_pairs(term: &ErlTerm) -> Option<Vec<(String, ErlTerm)>> {
503    match term {
504        ErlTerm::Map(entries) => Some(
505            entries
506                .iter()
507                .filter_map(|(key, value)| term_to_str(key).map(|key| (key, value.clone())))
508                .collect(),
509        ),
510        _ => term_to_proplist(term),
511    }
512}
513
514fn term_to_atom_list(term: &ErlTerm) -> Vec<String> {
515    match term {
516        ErlTerm::List(items) => items.iter().filter_map(term_to_str).collect(),
517        _ => Vec::new(),
518    }
519}
520
521fn build_hex_purl(name: &str, version: Option<&str>) -> Option<String> {
522    let mut purl = PackageUrl::new("hex", name).ok()?;
523    if let Some(version) = version {
524        purl.with_version(version).ok()?;
525    }
526    Some(purl.to_string())
527}
528
529// ── ErlangAppSrcParser ──
530
531impl PackageParser for ErlangAppSrcParser {
532    const PACKAGE_TYPE: PackageType = PackageType::Hex;
533
534    fn is_match(path: &Path) -> bool {
535        path.extension()
536            .and_then(|e| e.to_str())
537            .is_some_and(|ext| ext == "src")
538            && path
539                .file_stem()
540                .and_then(|s| s.to_str())
541                .is_some_and(|stem| stem.ends_with(".app"))
542    }
543
544    fn extract_packages(path: &Path) -> Vec<PackageData> {
545        let content = match read_file_to_string(path, None) {
546            Ok(c) => c,
547            Err(e) => {
548                warn!("Failed to read {:?}: {}", path, e);
549                return vec![default_app_src_package()];
550            }
551        };
552
553        match parse_app_src(&content) {
554            Ok(pkg) => vec![pkg],
555            Err(e) => {
556                warn!("Failed to parse {:?}: {}", path, e);
557                vec![default_app_src_package()]
558            }
559        }
560    }
561}
562
563fn default_app_src_package() -> PackageData {
564    PackageData {
565        package_type: Some(PackageType::Hex),
566        primary_language: Some("Erlang".to_string()),
567        datasource_id: Some(DatasourceId::ErlangOtpAppSrc),
568        ..Default::default()
569    }
570}
571
572fn parse_app_src(content: &str) -> Result<PackageData, String> {
573    let terms = parse_dotted_terms(content)?;
574
575    let app_tuple = terms
576        .into_iter()
577        .find_map(|term| {
578            if let ErlTerm::Tuple(fields) = &term
579                && fields.len() == 3
580                && term_to_str(&fields[0]).as_deref() == Some("application")
581            {
582                Some(term)
583            } else {
584                None
585            }
586        })
587        .ok_or_else(|| "No {application, _, _} tuple found".to_string())?;
588
589    let fields = match app_tuple {
590        ErlTerm::Tuple(fields) => fields,
591        _ => unreachable!(),
592    };
593
594    let app_name = term_to_str(&fields[1]);
595    let props = term_to_proplist(&fields[2]).unwrap_or_default();
596
597    let mut package = default_app_src_package();
598    package.name = app_name.map(truncate_field);
599
600    let mut extra_data = HashMap::new();
601
602    for (key, value) in &props {
603        match key.as_str() {
604            "vsn" => {
605                if let Some(v) = term_to_str(value)
606                    && !v.contains('%')
607                {
608                    package.version = Some(truncate_field(v));
609                }
610            }
611            "description" => {
612                package.description = term_to_str(value).map(truncate_field);
613            }
614            "licenses" => {
615                let licenses = term_to_atom_list(value);
616                if !licenses.is_empty() {
617                    package.extracted_license_statement = Some(truncate_field(licenses.join(", ")));
618                }
619            }
620            "links" => {
621                if let Some(link_props) = term_to_key_value_pairs(value) {
622                    for (link_name, link_val) in &link_props {
623                        if let Some(url) = term_to_str(link_val) {
624                            let lower = link_name.to_lowercase();
625                            if lower.contains("github")
626                                || lower.contains("source")
627                                || lower.contains("repo")
628                            {
629                                package.vcs_url = Some(truncate_field(url.clone()));
630                            }
631                            if package.homepage_url.is_none() {
632                                package.homepage_url = Some(truncate_field(url));
633                            }
634                        }
635                    }
636                }
637            }
638            "applications" => {
639                let apps = term_to_atom_list(value);
640                for app in apps {
641                    if is_otp_stdlib(&app) {
642                        continue;
643                    }
644                    package.dependencies.push(Dependency {
645                        purl: build_hex_purl(&app, None).map(truncate_field),
646                        extracted_requirement: None,
647                        scope: Some("dependencies".to_string()),
648                        is_runtime: Some(true),
649                        is_optional: None,
650                        is_pinned: None,
651                        is_direct: None,
652                        resolved_package: None,
653                        extra_data: None,
654                    });
655                }
656            }
657            "runtime_dependencies" => {
658                let deps = term_to_atom_list(value);
659                for dep_str in deps {
660                    if let Some((name, version)) = dep_str.split_once('-') {
661                        if is_otp_stdlib(name) {
662                            continue;
663                        }
664                        let version_str = if version.starts_with('@') {
665                            None
666                        } else {
667                            Some(version)
668                        };
669                        package.dependencies.push(Dependency {
670                            purl: build_hex_purl(name, version_str).map(truncate_field),
671                            extracted_requirement: version_str
672                                .map(|v| truncate_field(v.to_string())),
673                            scope: Some("dependencies".to_string()),
674                            is_runtime: Some(true),
675                            is_optional: None,
676                            is_pinned: None,
677                            is_direct: None,
678                            resolved_package: None,
679                            extra_data: None,
680                        });
681                    }
682                }
683            }
684            "maintainers" => {
685                let maintainers = term_to_atom_list(value);
686                if !maintainers.is_empty() {
687                    extra_data.insert(
688                        "maintainers".to_string(),
689                        JsonValue::Array(
690                            maintainers
691                                .into_iter()
692                                .map(|m| JsonValue::String(truncate_field(m)))
693                                .collect(),
694                        ),
695                    );
696                }
697            }
698            "keywords" => {
699                let keywords = term_to_atom_list(value);
700                if !keywords.is_empty() {
701                    package.keywords = keywords.into_iter().map(truncate_field).collect();
702                }
703            }
704            _ => {}
705        }
706    }
707
708    if let Some(ref name) = package.name {
709        package.purl = build_hex_purl(name, package.version.as_deref()).map(truncate_field);
710        package.repository_homepage_url =
711            Some(truncate_field(format!("https://hex.pm/packages/{}", name)));
712        package.api_data_url = Some(truncate_field(format!(
713            "https://hex.pm/api/packages/{}",
714            name
715        )));
716    }
717
718    if !extra_data.is_empty() {
719        package.extra_data = Some(extra_data);
720    }
721
722    Ok(package)
723}
724
725fn is_otp_stdlib(name: &str) -> bool {
726    matches!(
727        name,
728        "kernel"
729            | "stdlib"
730            | "sasl"
731            | "erts"
732            | "compiler"
733            | "crypto"
734            | "inets"
735            | "ssl"
736            | "public_key"
737            | "asn1"
738            | "syntax_tools"
739            | "tools"
740            | "os_mon"
741            | "runtime_tools"
742            | "mnesia"
743            | "observer"
744            | "wx"
745            | "debugger"
746            | "reltool"
747            | "xmerl"
748            | "edoc"
749            | "eunit"
750            | "common_test"
751            | "dialyzer"
752            | "et"
753            | "megaco"
754            | "parsetools"
755            | "snmp"
756            | "ssh"
757            | "tftp"
758            | "ftp"
759            | "erl_interface"
760            | "jinterface"
761            | "odbc"
762            | "eldap"
763            | "diameter"
764    )
765}
766
767// ── RebarConfigParser ──
768
769impl PackageParser for RebarConfigParser {
770    const PACKAGE_TYPE: PackageType = PackageType::Hex;
771
772    fn is_match(path: &Path) -> bool {
773        path.file_name().and_then(|n| n.to_str()) == Some("rebar.config")
774    }
775
776    fn extract_packages(path: &Path) -> Vec<PackageData> {
777        let content = match read_file_to_string(path, None) {
778            Ok(c) => c,
779            Err(e) => {
780                warn!("Failed to read {:?}: {}", path, e);
781                return vec![default_rebar_config_package()];
782            }
783        };
784
785        match parse_rebar_config(&content) {
786            Ok(pkg) => vec![pkg],
787            Err(e) => {
788                warn!("Failed to parse {:?}: {}", path, e);
789                vec![default_rebar_config_package()]
790            }
791        }
792    }
793}
794
795fn default_rebar_config_package() -> PackageData {
796    PackageData {
797        package_type: Some(PackageType::Hex),
798        primary_language: Some("Erlang".to_string()),
799        datasource_id: Some(DatasourceId::RebarConfig),
800        ..Default::default()
801    }
802}
803
804fn parse_rebar_config(content: &str) -> Result<PackageData, String> {
805    let terms = parse_dotted_terms(content)?;
806
807    let mut package = default_rebar_config_package();
808
809    for term in &terms {
810        if let ErlTerm::Tuple(fields) = term
811            && fields.len() == 2
812        {
813            let key = term_to_str(&fields[0]);
814            match key.as_deref() {
815                Some("deps") => {
816                    if let ErlTerm::List(deps) = &fields[1] {
817                        for dep in deps.iter().take(MAX_ITERATION_COUNT) {
818                            if let Some(d) = parse_rebar_dep(dep) {
819                                package.dependencies.push(d);
820                            }
821                        }
822                    }
823                }
824                Some("profiles") => {
825                    parse_profile_deps(&fields[1], &mut package.dependencies);
826                }
827                _ => {}
828            }
829        }
830    }
831
832    Ok(package)
833}
834
835fn parse_rebar_dep(term: &ErlTerm) -> Option<Dependency> {
836    let fields = match term {
837        ErlTerm::Tuple(fields) => fields,
838        _ => return None,
839    };
840
841    if fields.is_empty() {
842        return None;
843    }
844
845    if let Some(key) = term_to_str(&fields[0])
846        && key.starts_with("if_")
847    {
848        return None;
849    }
850
851    let app_name = term_to_str(&fields[0])?;
852
853    match fields.len() {
854        // {Name, Version} or {Name, {git, URL, Ref}}
855        2 => {
856            if let Some(version) = term_to_str(&fields[1]) {
857                // {Name, Version}
858                Some(Dependency {
859                    purl: build_hex_purl(&app_name, Some(&version)).map(truncate_field),
860                    extracted_requirement: Some(truncate_field(version)),
861                    scope: Some("dependencies".to_string()),
862                    is_runtime: None,
863                    is_optional: None,
864                    is_pinned: None,
865                    is_direct: None,
866                    resolved_package: None,
867                    extra_data: None,
868                })
869            } else {
870                let package_name = extract_rebar_package_name(&fields[1], &app_name);
871                let vcs_url = extract_git_url(&fields[1]);
872                let version = extract_git_version(&fields[1]);
873                Some(Dependency {
874                    purl: build_hex_purl(&package_name, version.as_deref()).map(truncate_field),
875                    extracted_requirement: version.map(truncate_field),
876                    scope: Some("dependencies".to_string()),
877                    is_runtime: None,
878                    is_optional: None,
879                    is_pinned: None,
880                    is_direct: None,
881                    resolved_package: None,
882                    extra_data: build_rebar_dependency_extra_data(
883                        vcs_url,
884                        app_name.as_str(),
885                        package_name.as_str(),
886                    ),
887                })
888            }
889        }
890        // {Name, Version, Source}
891        3 => {
892            if let Some(version) = term_to_str(&fields[1]) {
893                let package_name = extract_rebar_package_name(&fields[2], &app_name);
894                let vcs_url = extract_git_url(&fields[2]);
895                Some(Dependency {
896                    purl: build_hex_purl(&package_name, Some(&version)).map(truncate_field),
897                    extracted_requirement: Some(truncate_field(version)),
898                    scope: Some("dependencies".to_string()),
899                    is_runtime: None,
900                    is_optional: None,
901                    is_pinned: None,
902                    is_direct: None,
903                    resolved_package: None,
904                    extra_data: build_rebar_dependency_extra_data(
905                        vcs_url,
906                        app_name.as_str(),
907                        package_name.as_str(),
908                    ),
909                })
910            } else {
911                let package_name = extract_rebar_package_name(&fields[1], &app_name);
912                let vcs_url = extract_git_url(&fields[1]);
913                let version = extract_git_version(&fields[1]);
914                Some(Dependency {
915                    purl: build_hex_purl(&package_name, version.as_deref()).map(truncate_field),
916                    extracted_requirement: version.map(truncate_field),
917                    scope: Some("dependencies".to_string()),
918                    is_runtime: None,
919                    is_optional: None,
920                    is_pinned: None,
921                    is_direct: None,
922                    resolved_package: None,
923                    extra_data: build_rebar_dependency_extra_data(
924                        vcs_url,
925                        app_name.as_str(),
926                        package_name.as_str(),
927                    ),
928                })
929            }
930        }
931        _ => None,
932    }
933}
934
935fn extract_rebar_package_name(term: &ErlTerm, fallback_name: &str) -> String {
936    if let ErlTerm::Tuple(fields) = term
937        && fields.len() >= 2
938        && term_to_str(&fields[0]).as_deref() == Some("pkg")
939        && let Some(package_name) = term_to_str(&fields[1])
940    {
941        package_name
942    } else {
943        fallback_name.to_string()
944    }
945}
946
947fn build_rebar_dependency_extra_data(
948    vcs_url: Option<String>,
949    app_name: &str,
950    package_name: &str,
951) -> Option<HashMap<String, JsonValue>> {
952    let mut extra_data = HashMap::new();
953
954    if let Some(url) = vcs_url {
955        extra_data.insert(
956            "vcs_url".to_string(),
957            JsonValue::String(truncate_field(url)),
958        );
959    }
960
961    if app_name != package_name {
962        extra_data.insert(
963            "app_name".to_string(),
964            JsonValue::String(truncate_field(app_name.to_string())),
965        );
966    }
967
968    if extra_data.is_empty() {
969        None
970    } else {
971        Some(extra_data)
972    }
973}
974
975fn extract_git_url(term: &ErlTerm) -> Option<String> {
976    if let ErlTerm::Tuple(fields) = term
977        && fields.len() >= 2
978        && matches!(
979            term_to_str(&fields[0]).as_deref(),
980            Some("git") | Some("git_subdir")
981        )
982    {
983        term_to_str(&fields[1])
984    } else {
985        None
986    }
987}
988
989fn extract_git_version(term: &ErlTerm) -> Option<String> {
990    if let ErlTerm::Tuple(fields) = term
991        && fields.len() >= 3
992        && matches!(
993            term_to_str(&fields[0]).as_deref(),
994            Some("git") | Some("git_subdir")
995        )
996    {
997        if let ErlTerm::Tuple(ref_fields) = &fields[2]
998            && ref_fields.len() == 2
999        {
1000            let ref_type = term_to_str(&ref_fields[0])?;
1001            let ref_val = term_to_str(&ref_fields[1])?;
1002            match ref_type.as_str() {
1003                "tag" => Some(ref_val),
1004                _ => None,
1005            }
1006        } else {
1007            None
1008        }
1009    } else {
1010        None
1011    }
1012}
1013
1014fn parse_profile_deps(term: &ErlTerm, dependencies: &mut Vec<Dependency>) {
1015    let profiles = match term {
1016        ErlTerm::List(items) => items,
1017        _ => return,
1018    };
1019
1020    for profile in profiles.iter().take(MAX_ITERATION_COUNT) {
1021        if let ErlTerm::Tuple(fields) = profile
1022            && fields.len() == 2
1023        {
1024            let profile_name = term_to_str(&fields[0]).unwrap_or_default();
1025            if let ErlTerm::List(profile_opts) = &fields[1] {
1026                for opt in profile_opts {
1027                    if let ErlTerm::Tuple(opt_fields) = opt
1028                        && opt_fields.len() == 2
1029                        && term_to_str(&opt_fields[0]).as_deref() == Some("deps")
1030                        && let ErlTerm::List(deps) = &opt_fields[1]
1031                    {
1032                        for dep in deps.iter().take(MAX_ITERATION_COUNT) {
1033                            if let Some(mut d) = parse_rebar_dep(dep) {
1034                                d.scope = Some(truncate_field(profile_name.clone()));
1035                                dependencies.push(d);
1036                            }
1037                        }
1038                    }
1039                }
1040            }
1041        }
1042    }
1043}
1044
1045// ── RebarLockParser ──
1046
1047impl PackageParser for RebarLockParser {
1048    const PACKAGE_TYPE: PackageType = PackageType::Hex;
1049
1050    fn is_match(path: &Path) -> bool {
1051        path.file_name().and_then(|n| n.to_str()) == Some("rebar.lock")
1052    }
1053
1054    fn extract_packages(path: &Path) -> Vec<PackageData> {
1055        let content = match read_file_to_string(path, None) {
1056            Ok(c) => c,
1057            Err(e) => {
1058                warn!("Failed to read {:?}: {}", path, e);
1059                return vec![default_rebar_lock_package()];
1060            }
1061        };
1062
1063        match parse_rebar_lock(&content) {
1064            Ok(pkg) => vec![pkg],
1065            Err(e) => {
1066                warn!("Failed to parse {:?}: {}", path, e);
1067                vec![default_rebar_lock_package()]
1068            }
1069        }
1070    }
1071}
1072
1073fn default_rebar_lock_package() -> PackageData {
1074    PackageData {
1075        package_type: Some(PackageType::Hex),
1076        primary_language: Some("Erlang".to_string()),
1077        datasource_id: Some(DatasourceId::RebarLock),
1078        ..Default::default()
1079    }
1080}
1081
1082fn parse_rebar_lock(content: &str) -> Result<PackageData, String> {
1083    let terms = parse_dotted_terms(content)?;
1084
1085    // rebar.lock format: first term is either:
1086    // - {Version, [deps]}  (v2 format, e.g. {"1.2.0", [...]})
1087    // - [deps]             (v1 format, flat list)
1088    // Second term (if present): [{pkg_hash, [...]}, {pkg_hash_ext, [...]}]
1089
1090    let (dep_list, hash_map) = match terms.as_slice() {
1091        // v2 format: {"1.2.0", [deps]}
1092        [ErlTerm::Tuple(fields), rest @ ..] if fields.len() == 2 => {
1093            let deps = match &fields[1] {
1094                ErlTerm::List(items) => items.clone(),
1095                _ => return Err("Expected dependency list in lock tuple".to_string()),
1096            };
1097            let hashes = rest.first().map(extract_pkg_hashes).unwrap_or_default();
1098            (deps, hashes)
1099        }
1100        // v1 format: [deps]
1101        [ErlTerm::List(items), rest @ ..] => {
1102            let hashes = rest.first().map(extract_pkg_hashes).unwrap_or_default();
1103            (items.clone(), hashes)
1104        }
1105        _ => return Err("Unrecognized rebar.lock format".to_string()),
1106    };
1107
1108    let mut package = default_rebar_lock_package();
1109
1110    for dep_term in dep_list.iter().take(MAX_ITERATION_COUNT) {
1111        if let Some(dep) = parse_lock_dep(dep_term, &hash_map) {
1112            package.dependencies.push(dep);
1113        }
1114    }
1115
1116    Ok(package)
1117}
1118
1119fn parse_lock_dep(term: &ErlTerm, hashes: &HashMap<String, String>) -> Option<Dependency> {
1120    let fields = match term {
1121        ErlTerm::Tuple(fields) if fields.len() >= 3 => fields,
1122        _ => return None,
1123    };
1124
1125    let app_name = term_to_str(&fields[0])?;
1126    // fields[2] is the level (integer)
1127
1128    let (package_name, version, vcs_url) = match &fields[1] {
1129        // {pkg, <<"name">>, <<"version">>}
1130        ErlTerm::Tuple(pkg_fields)
1131            if pkg_fields.len() >= 3 && term_to_str(&pkg_fields[0]).as_deref() == Some("pkg") =>
1132        {
1133            let package_name = term_to_str(&pkg_fields[1]).unwrap_or_else(|| app_name.clone());
1134            let ver = term_to_str(&pkg_fields[2]);
1135            (package_name, ver, None)
1136        }
1137        // {git, "url", {ref, "hash"}}
1138        ErlTerm::Tuple(git_fields)
1139            if git_fields.len() >= 2
1140                && matches!(
1141                    term_to_str(&git_fields[0]).as_deref(),
1142                    Some("git") | Some("git_subdir")
1143                ) =>
1144        {
1145            let url = term_to_str(&git_fields[1]);
1146            let ver = if git_fields.len() >= 3 {
1147                extract_git_version_from_lock_ref(&git_fields[2])
1148            } else {
1149                None
1150            };
1151            (app_name.clone(), ver, url)
1152        }
1153        _ => (app_name.clone(), None, None),
1154    };
1155
1156    let sha256 = hashes
1157        .get(&app_name)
1158        .or_else(|| hashes.get(&package_name))
1159        .and_then(|h| Sha256Digest::from_hex(h).ok());
1160
1161    let resolved_package = ResolvedPackage {
1162        primary_language: Some("Erlang".to_string()),
1163        sha256,
1164        is_virtual: true,
1165        datasource_id: Some(DatasourceId::RebarLock),
1166        purl: build_hex_purl(&package_name, version.as_deref()).map(truncate_field),
1167        repository_homepage_url: Some(truncate_field(format!(
1168            "https://hex.pm/packages/{}",
1169            package_name
1170        ))),
1171        api_data_url: Some(truncate_field(format!(
1172            "https://hex.pm/api/packages/{}",
1173            package_name
1174        ))),
1175        ..ResolvedPackage::new(
1176            PackageType::Hex,
1177            String::new(),
1178            package_name.clone(),
1179            version.clone().unwrap_or_default(),
1180        )
1181    };
1182
1183    Some(Dependency {
1184        purl: build_hex_purl(&package_name, version.as_deref()).map(truncate_field),
1185        extracted_requirement: version.map(truncate_field),
1186        scope: Some("dependencies".to_string()),
1187        is_runtime: None,
1188        is_optional: None,
1189        is_pinned: Some(true),
1190        is_direct: None,
1191        resolved_package: Some(Box::new(resolved_package)),
1192        extra_data: build_rebar_dependency_extra_data(
1193            vcs_url,
1194            app_name.as_str(),
1195            package_name.as_str(),
1196        ),
1197    })
1198}
1199
1200fn extract_git_version_from_lock_ref(term: &ErlTerm) -> Option<String> {
1201    if let ErlTerm::Tuple(fields) = term
1202        && fields.len() == 2
1203        && term_to_str(&fields[0]).as_deref() == Some("ref")
1204    {
1205        term_to_str(&fields[1])
1206    } else {
1207        None
1208    }
1209}
1210
1211fn extract_pkg_hashes(term: &ErlTerm) -> HashMap<String, String> {
1212    let items = match term {
1213        ErlTerm::List(items) => items,
1214        _ => return HashMap::new(),
1215    };
1216
1217    let mut hashes = HashMap::new();
1218    for item in items {
1219        if let ErlTerm::Tuple(fields) = item
1220            && fields.len() == 2
1221            && term_to_str(&fields[0]).as_deref() == Some("pkg_hash")
1222            && let ErlTerm::List(hash_list) = &fields[1]
1223        {
1224            for entry in hash_list.iter().take(MAX_ITERATION_COUNT) {
1225                if let ErlTerm::Tuple(pair) = entry
1226                    && pair.len() == 2
1227                    && let (Some(name), Some(hash)) = (term_to_str(&pair[0]), term_to_str(&pair[1]))
1228                {
1229                    hashes.insert(name, hash);
1230                }
1231            }
1232        }
1233    }
1234    hashes
1235}
1236
1237// ── Parser metadata registration ──
1238
1239crate::register_parser!(
1240    "Erlang OTP application resource file",
1241    &["**/*.app.src"],
1242    "hex",
1243    "Erlang",
1244    Some("https://www.erlang.org/doc/apps/kernel/application"),
1245);
1246
1247crate::register_parser!(
1248    "Rebar3 configuration",
1249    &["**/rebar.config"],
1250    "hex",
1251    "Erlang",
1252    Some("https://rebar3.org/docs/configuration/configuration/"),
1253);
1254
1255crate::register_parser!(
1256    "Rebar3 lockfile",
1257    &["**/rebar.lock"],
1258    "hex",
1259    "Erlang",
1260    Some("https://rebar3.org/docs/configuration/configuration/"),
1261);