Skip to main content

provenant/parsers/
erlang_otp.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4use std::collections::HashMap;
5use std::path::Path;
6
7use packageurl::PackageUrl;
8use serde_json::Value as JsonValue;
9
10use crate::models::{
11    DatasourceId, Dependency, PackageData, PackageType, ResolvedPackage, Sha256Digest,
12};
13use crate::parser_warn as warn;
14use crate::parsers::utils::{
15    MAX_ITERATION_COUNT, MAX_RECURSION_DEPTH, read_file_to_string, truncate_field,
16};
17
18use super::PackageParser;
19use super::metadata::ParserMetadata;
20
21// ── Parser structs ──
22
23pub struct ErlangAppSrcParser;
24pub struct RebarConfigParser;
25pub struct RebarLockParser;
26
27// ── Erlang term AST ──
28
29#[derive(Clone, Debug)]
30enum ErlTerm {
31    Atom(String),
32    String(String),
33    Binary(String),
34    Integer(i64),
35    Float(f64),
36    Tuple(Vec<ErlTerm>),
37    List(Vec<ErlTerm>),
38    Map(Vec<(ErlTerm, ErlTerm)>),
39}
40
41// ── Erlang term parser ──
42
43struct ErlParser {
44    chars: Vec<char>,
45    pos: usize,
46    depth: usize,
47}
48
49impl ErlParser {
50    fn new(source: &str) -> Self {
51        Self {
52            chars: source.chars().collect(),
53            pos: 0,
54            depth: 0,
55        }
56    }
57
58    fn parse_term(&mut self) -> Result<ErlTerm, String> {
59        if self.depth >= MAX_RECURSION_DEPTH {
60            return Err("recursion depth exceeded".to_string());
61        }
62        self.depth += 1;
63        let result = self.parse_term_inner();
64        self.depth -= 1;
65        result
66    }
67
68    fn parse_term_inner(&mut self) -> Result<ErlTerm, String> {
69        self.skip_whitespace_and_comments();
70        match self.peek() {
71            Some('{') => self.parse_tuple(),
72            Some('[') => self.parse_list(),
73            Some('#') if self.peek_n(1) == Some('{') => self.parse_map(),
74            Some('"') => self.parse_string().map(ErlTerm::String),
75            Some('<') if self.peek_n(1) == Some('<') => self.parse_binary().map(ErlTerm::Binary),
76            Some('\'') => self.parse_quoted_atom().map(ErlTerm::Atom),
77            Some(c) if c.is_ascii_digit() || c == '-' => self.parse_number(),
78            Some(c) if c.is_ascii_lowercase() || c == '_' => self.parse_atom_or_bool(),
79            Some(c) => Err(format!(
80                "Unexpected character '{}' at position {}",
81                c, self.pos
82            )),
83            None => Err("Unexpected end of input".to_string()),
84        }
85    }
86
87    fn parse_tuple(&mut self) -> Result<ErlTerm, String> {
88        self.expect('{')?;
89        let items = self.parse_comma_separated('}')?;
90        Ok(ErlTerm::Tuple(items))
91    }
92
93    fn parse_list(&mut self) -> Result<ErlTerm, String> {
94        self.expect('[')?;
95        let items = self.parse_comma_separated(']')?;
96        Ok(ErlTerm::List(items))
97    }
98
99    fn parse_map(&mut self) -> Result<ErlTerm, String> {
100        self.expect('#')?;
101        self.expect('{')?;
102
103        let mut entries = Vec::new();
104        let mut count = 0usize;
105
106        loop {
107            self.skip_whitespace_and_comments();
108            if self.peek() == Some('}') {
109                self.pos += 1;
110                break;
111            }
112
113            if count >= MAX_ITERATION_COUNT {
114                return Err("too many map entries".to_string());
115            }
116
117            let key = self.parse_term()?;
118            self.skip_whitespace_and_comments();
119
120            match (self.peek(), self.peek_n(1)) {
121                (Some('='), Some('>')) | (Some(':'), Some('=')) => {
122                    self.pos += 2;
123                }
124                _ => {
125                    return Err(format!(
126                        "Expected map association operator at position {}",
127                        self.pos
128                    ));
129                }
130            }
131
132            let value = self.parse_term()?;
133            entries.push((key, value));
134            count += 1;
135
136            self.skip_whitespace_and_comments();
137            match self.peek() {
138                Some(',') => {
139                    self.pos += 1;
140                }
141                Some('}') => {
142                    self.pos += 1;
143                    break;
144                }
145                Some(c) => {
146                    return Err(format!(
147                        "Expected ',' or '}}' in map but found '{}' at position {}",
148                        c, self.pos
149                    ));
150                }
151                None => return Err("Unterminated map literal".to_string()),
152            }
153        }
154
155        Ok(ErlTerm::Map(entries))
156    }
157
158    fn parse_comma_separated(&mut self, closing: char) -> Result<Vec<ErlTerm>, String> {
159        let mut items = Vec::new();
160        let mut count = 0usize;
161        loop {
162            self.skip_whitespace_and_comments();
163            if self.peek() == Some(closing) {
164                self.pos += 1;
165                break;
166            }
167            if count >= MAX_ITERATION_COUNT {
168                return Err("too many items".to_string());
169            }
170            items.push(self.parse_term()?);
171            count += 1;
172            self.skip_whitespace_and_comments();
173            if self.peek() == Some(',') {
174                self.pos += 1;
175            } else if self.peek() == Some('|') {
176                // list tail syntax: [H | T] — skip rest
177                self.pos += 1;
178                self.parse_term()?;
179                self.skip_whitespace_and_comments();
180                if self.peek() == Some(closing) {
181                    self.pos += 1;
182                }
183                break;
184            }
185        }
186        Ok(items)
187    }
188
189    fn parse_string(&mut self) -> Result<String, String> {
190        self.expect('"')?;
191        let mut out = String::new();
192        while let Some(c) = self.peek() {
193            self.pos += 1;
194            match c {
195                '"' => return Ok(out),
196                '\\' => {
197                    let escaped = self
198                        .peek()
199                        .ok_or_else(|| "Unterminated string escape".to_string())?;
200                    self.pos += 1;
201                    out.push(match escaped {
202                        'n' => '\n',
203                        'r' => '\r',
204                        't' => '\t',
205                        '"' => '"',
206                        '\\' => '\\',
207                        other => other,
208                    });
209                }
210                other => out.push(other),
211            }
212        }
213        Err("Unterminated string literal".to_string())
214    }
215
216    fn parse_binary(&mut self) -> Result<String, String> {
217        self.expect('<')?;
218        self.expect('<')?;
219        self.skip_whitespace_and_comments();
220        let value = if self.peek() == Some('"') {
221            self.parse_string()?
222        } else {
223            String::new()
224        };
225        self.skip_whitespace_and_comments();
226        self.expect('>')?;
227        self.expect('>')?;
228        Ok(value)
229    }
230
231    fn parse_quoted_atom(&mut self) -> Result<String, String> {
232        self.expect('\'')?;
233        let mut out = String::new();
234        while let Some(c) = self.peek() {
235            self.pos += 1;
236            match c {
237                '\'' => return Ok(out),
238                '\\' => {
239                    if let Some(escaped) = self.peek() {
240                        self.pos += 1;
241                        out.push(escaped);
242                    }
243                }
244                other => out.push(other),
245            }
246        }
247        Err("Unterminated quoted atom".to_string())
248    }
249
250    fn parse_atom_or_bool(&mut self) -> Result<ErlTerm, String> {
251        let atom = self.parse_bare_atom()?;
252        match atom.as_str() {
253            "true" => Ok(ErlTerm::Atom("true".to_string())),
254            "false" => Ok(ErlTerm::Atom("false".to_string())),
255            _ => Ok(ErlTerm::Atom(atom)),
256        }
257    }
258
259    fn parse_bare_atom(&mut self) -> Result<String, String> {
260        let start = self.pos;
261        while let Some(c) = self.peek() {
262            if c.is_ascii_alphanumeric() || c == '_' || c == '@' {
263                self.pos += 1;
264            } else {
265                break;
266            }
267        }
268        if self.pos == start {
269            return Err("Expected atom".to_string());
270        }
271        Ok(self.chars[start..self.pos].iter().collect())
272    }
273
274    fn parse_number(&mut self) -> Result<ErlTerm, String> {
275        let start = self.pos;
276        if self.peek() == Some('-') {
277            self.pos += 1;
278        }
279        while let Some(c) = self.peek() {
280            if c.is_ascii_digit() {
281                self.pos += 1;
282            } else {
283                break;
284            }
285        }
286        if self.peek() == Some('.') && self.peek_n(1).is_some_and(|c| c.is_ascii_digit()) {
287            self.pos += 1;
288            while let Some(c) = self.peek() {
289                if c.is_ascii_digit() {
290                    self.pos += 1;
291                } else {
292                    break;
293                }
294            }
295            let s: String = self.chars[start..self.pos].iter().collect();
296            return s
297                .parse::<f64>()
298                .map(ErlTerm::Float)
299                .map_err(|e| format!("Invalid float: {}", e));
300        }
301        let s: String = self.chars[start..self.pos].iter().collect();
302        s.parse::<i64>()
303            .map(ErlTerm::Integer)
304            .map_err(|e| format!("Invalid integer: {}", e))
305    }
306
307    fn skip_whitespace_and_comments(&mut self) {
308        loop {
309            match self.peek() {
310                Some(c) if c.is_whitespace() => {
311                    self.pos += 1;
312                }
313                Some('%') => {
314                    while let Some(c) = self.peek() {
315                        self.pos += 1;
316                        if c == '\n' {
317                            break;
318                        }
319                    }
320                }
321                _ => break,
322            }
323        }
324    }
325
326    fn expect(&mut self, expected: char) -> Result<(), String> {
327        self.skip_whitespace_and_comments();
328        match self.peek() {
329            Some(c) if c == expected => {
330                self.pos += 1;
331                Ok(())
332            }
333            Some(c) => Err(format!(
334                "Expected '{}' but found '{}' at position {}",
335                expected, c, self.pos
336            )),
337            None => Err(format!("Expected '{}' but reached end of input", expected)),
338        }
339    }
340
341    fn peek(&self) -> Option<char> {
342        self.chars.get(self.pos).copied()
343    }
344
345    fn peek_n(&self, n: usize) -> Option<char> {
346        self.chars.get(self.pos + n).copied()
347    }
348
349    fn is_eof(&self) -> bool {
350        self.pos >= self.chars.len()
351    }
352}
353
354fn parse_dotted_terms(content: &str) -> Result<Vec<ErlTerm>, String> {
355    let normalized = strip_template_placeholders(content);
356    let mut parser = ErlParser::new(&normalized);
357    let mut terms = Vec::new();
358    let mut count = 0usize;
359    loop {
360        parser.skip_whitespace_and_comments();
361        if parser.is_eof() {
362            break;
363        }
364        if parser.peek() == Some('.') {
365            parser.pos += 1;
366            continue;
367        }
368        if count >= MAX_ITERATION_COUNT {
369            break;
370        }
371        let term = parser.parse_term()?;
372        terms.push(term);
373        count += 1;
374        parser.skip_whitespace_and_comments();
375        if parser.peek() == Some('.') {
376            parser.pos += 1;
377        }
378    }
379    Ok(terms)
380}
381
382fn strip_template_placeholders(source: &str) -> String {
383    let chars: Vec<char> = source.chars().collect();
384    let mut result = String::with_capacity(source.len());
385    let mut i = 0usize;
386    let mut in_string = false;
387    let mut in_quoted_atom = false;
388
389    while i < chars.len() {
390        let c = chars[i];
391
392        if in_string {
393            result.push(c);
394            i += 1;
395            if c == '\\' && i < chars.len() {
396                result.push(chars[i]);
397                i += 1;
398                continue;
399            }
400            if c == '"' {
401                in_string = false;
402            }
403            continue;
404        }
405
406        if in_quoted_atom {
407            result.push(c);
408            i += 1;
409            if c == '\\' && i < chars.len() {
410                result.push(chars[i]);
411                i += 1;
412                continue;
413            }
414            if c == '\'' {
415                in_quoted_atom = false;
416            }
417            continue;
418        }
419
420        match c {
421            '"' => {
422                in_string = true;
423                result.push(c);
424                i += 1;
425            }
426            '\'' => {
427                in_quoted_atom = true;
428                result.push(c);
429                i += 1;
430            }
431            '%' if chars.get(i + 1) != Some(&'%') => {
432                let line_end = chars[i..]
433                    .iter()
434                    .position(|&ch| ch == '\n')
435                    .map(|offset| i + offset)
436                    .unwrap_or(chars.len());
437
438                let last_percent = chars[i + 1..line_end]
439                    .iter()
440                    .rposition(|&ch| ch == '%')
441                    .map(|offset| i + 1 + offset);
442
443                if let Some(last_percent) = last_percent {
444                    let placeholder_body: String = chars[i + 1..last_percent].iter().collect();
445                    let trailing: String = chars[last_percent + 1..line_end].iter().collect();
446                    let looks_like_placeholder = !placeholder_body.is_empty()
447                        && placeholder_body.chars().all(|ch| {
448                            ch.is_ascii_uppercase()
449                                || ch.is_ascii_digit()
450                                || matches!(ch, '_' | ',' | '%')
451                        })
452                        && trailing
453                            .chars()
454                            .all(|ch| ch.is_whitespace() || matches!(ch, ',' | ']' | '}' | ')'));
455
456                    if looks_like_placeholder {
457                        i = last_percent + 1;
458                        continue;
459                    }
460                }
461
462                result.push(c);
463                i += 1;
464            }
465            _ => {
466                result.push(c);
467                i += 1;
468            }
469        }
470    }
471
472    result
473}
474
475// ── Helpers ──
476
477fn term_to_str(term: &ErlTerm) -> Option<String> {
478    match term {
479        ErlTerm::String(s) | ErlTerm::Binary(s) | ErlTerm::Atom(s) => Some(s.clone()),
480        ErlTerm::Integer(n) => Some(n.to_string()),
481        ErlTerm::Float(f) => Some(f.to_string()),
482        _ => None,
483    }
484}
485
486fn term_to_proplist(term: &ErlTerm) -> Option<Vec<(String, ErlTerm)>> {
487    let items = match term {
488        ErlTerm::List(items) => items,
489        _ => return None,
490    };
491    let mut result = Vec::new();
492    for item in items {
493        if let ErlTerm::Tuple(fields) = item
494            && fields.len() == 2
495            && let Some(key) = term_to_str(&fields[0])
496        {
497            result.push((key, fields[1].clone()));
498        }
499    }
500    Some(result)
501}
502
503fn term_to_key_value_pairs(term: &ErlTerm) -> Option<Vec<(String, ErlTerm)>> {
504    match term {
505        ErlTerm::Map(entries) => Some(
506            entries
507                .iter()
508                .filter_map(|(key, value)| term_to_str(key).map(|key| (key, value.clone())))
509                .collect(),
510        ),
511        _ => term_to_proplist(term),
512    }
513}
514
515fn term_to_atom_list(term: &ErlTerm) -> Vec<String> {
516    match term {
517        ErlTerm::List(items) => items.iter().filter_map(term_to_str).collect(),
518        _ => Vec::new(),
519    }
520}
521
522fn build_hex_purl(name: &str, version: Option<&str>) -> Option<String> {
523    let mut purl = PackageUrl::new("hex", name).ok()?;
524    if let Some(version) = version {
525        purl.with_version(version).ok()?;
526    }
527    Some(purl.to_string())
528}
529
530// ── ErlangAppSrcParser ──
531
532impl PackageParser for ErlangAppSrcParser {
533    const PACKAGE_TYPE: PackageType = PackageType::Hex;
534
535    fn metadata() -> Vec<ParserMetadata> {
536        vec![ParserMetadata {
537            description: "Erlang OTP application resource file",
538            file_patterns: &["**/*.app.src"],
539            package_type: "hex",
540            primary_language: "Erlang",
541            documentation_url: Some("https://www.erlang.org/doc/apps/kernel/application"),
542        }]
543    }
544
545    fn is_match(path: &Path) -> bool {
546        path.extension()
547            .and_then(|e| e.to_str())
548            .is_some_and(|ext| ext == "src")
549            && path
550                .file_stem()
551                .and_then(|s| s.to_str())
552                .is_some_and(|stem| stem.ends_with(".app"))
553    }
554
555    fn extract_packages(path: &Path) -> Vec<PackageData> {
556        let content = match read_file_to_string(path, None) {
557            Ok(c) => c,
558            Err(e) => {
559                warn!("Failed to read {:?}: {}", path, e);
560                return vec![default_app_src_package()];
561            }
562        };
563
564        match parse_app_src(&content) {
565            Ok(pkg) => vec![pkg],
566            Err(e) => {
567                warn!("Failed to parse {:?}: {}", path, e);
568                vec![default_app_src_package()]
569            }
570        }
571    }
572}
573
574fn default_app_src_package() -> PackageData {
575    PackageData {
576        package_type: Some(PackageType::Hex),
577        primary_language: Some("Erlang".to_string()),
578        datasource_id: Some(DatasourceId::ErlangOtpAppSrc),
579        ..Default::default()
580    }
581}
582
583fn parse_app_src(content: &str) -> Result<PackageData, String> {
584    let terms = parse_dotted_terms(content)?;
585
586    let app_tuple = terms
587        .into_iter()
588        .find_map(|term| {
589            if let ErlTerm::Tuple(fields) = &term
590                && fields.len() == 3
591                && term_to_str(&fields[0]).as_deref() == Some("application")
592            {
593                Some(term)
594            } else {
595                None
596            }
597        })
598        .ok_or_else(|| "No {application, _, _} tuple found".to_string())?;
599
600    let fields = match app_tuple {
601        ErlTerm::Tuple(fields) => fields,
602        _ => unreachable!(),
603    };
604
605    let app_name = term_to_str(&fields[1]);
606    let props = term_to_proplist(&fields[2]).unwrap_or_default();
607
608    let mut package = default_app_src_package();
609    package.name = app_name.map(truncate_field);
610
611    let mut extra_data = HashMap::new();
612
613    for (key, value) in &props {
614        match key.as_str() {
615            "vsn" => {
616                if let Some(v) = term_to_str(value)
617                    && !v.contains('%')
618                {
619                    package.version = Some(truncate_field(v));
620                }
621            }
622            "description" => {
623                package.description = term_to_str(value).map(truncate_field);
624            }
625            "licenses" => {
626                let licenses = term_to_atom_list(value);
627                if !licenses.is_empty() {
628                    package.extracted_license_statement = Some(truncate_field(licenses.join(", ")));
629                }
630            }
631            "links" => {
632                if let Some(link_props) = term_to_key_value_pairs(value) {
633                    for (link_name, link_val) in &link_props {
634                        if let Some(url) = term_to_str(link_val) {
635                            let lower = link_name.to_lowercase();
636                            if lower.contains("github")
637                                || lower.contains("source")
638                                || lower.contains("repo")
639                            {
640                                package.vcs_url = Some(truncate_field(url.clone()));
641                            }
642                            if package.homepage_url.is_none() {
643                                package.homepage_url = Some(truncate_field(url));
644                            }
645                        }
646                    }
647                }
648            }
649            "applications" => {
650                let apps = term_to_atom_list(value);
651                for app in apps {
652                    if is_otp_stdlib(&app) {
653                        continue;
654                    }
655                    package.dependencies.push(Dependency {
656                        purl: build_hex_purl(&app, None).map(truncate_field),
657                        extracted_requirement: None,
658                        scope: Some("dependencies".to_string()),
659                        is_runtime: Some(true),
660                        is_optional: None,
661                        is_pinned: None,
662                        is_direct: None,
663                        resolved_package: None,
664                        extra_data: None,
665                    });
666                }
667            }
668            "runtime_dependencies" => {
669                let deps = term_to_atom_list(value);
670                for dep_str in deps {
671                    if let Some((name, version)) = dep_str.split_once('-') {
672                        if is_otp_stdlib(name) {
673                            continue;
674                        }
675                        let version_str = if version.starts_with('@') {
676                            None
677                        } else {
678                            Some(version)
679                        };
680                        package.dependencies.push(Dependency {
681                            purl: build_hex_purl(name, version_str).map(truncate_field),
682                            extracted_requirement: version_str
683                                .map(|v| truncate_field(v.to_string())),
684                            scope: Some("dependencies".to_string()),
685                            is_runtime: Some(true),
686                            is_optional: None,
687                            is_pinned: None,
688                            is_direct: None,
689                            resolved_package: None,
690                            extra_data: None,
691                        });
692                    }
693                }
694            }
695            "maintainers" => {
696                let maintainers = term_to_atom_list(value);
697                if !maintainers.is_empty() {
698                    extra_data.insert(
699                        "maintainers".to_string(),
700                        JsonValue::Array(
701                            maintainers
702                                .into_iter()
703                                .map(|m| JsonValue::String(truncate_field(m)))
704                                .collect(),
705                        ),
706                    );
707                }
708            }
709            "keywords" => {
710                let keywords = term_to_atom_list(value);
711                if !keywords.is_empty() {
712                    package.keywords = keywords.into_iter().map(truncate_field).collect();
713                }
714            }
715            _ => {}
716        }
717    }
718
719    if let Some(ref name) = package.name {
720        package.purl = build_hex_purl(name, package.version.as_deref()).map(truncate_field);
721        package.repository_homepage_url =
722            Some(truncate_field(format!("https://hex.pm/packages/{}", name)));
723        package.api_data_url = Some(truncate_field(format!(
724            "https://hex.pm/api/packages/{}",
725            name
726        )));
727    }
728
729    if !extra_data.is_empty() {
730        package.extra_data = Some(extra_data);
731    }
732
733    Ok(package)
734}
735
736fn is_otp_stdlib(name: &str) -> bool {
737    matches!(
738        name,
739        "kernel"
740            | "stdlib"
741            | "sasl"
742            | "erts"
743            | "compiler"
744            | "crypto"
745            | "inets"
746            | "ssl"
747            | "public_key"
748            | "asn1"
749            | "syntax_tools"
750            | "tools"
751            | "os_mon"
752            | "runtime_tools"
753            | "mnesia"
754            | "observer"
755            | "wx"
756            | "debugger"
757            | "reltool"
758            | "xmerl"
759            | "edoc"
760            | "eunit"
761            | "common_test"
762            | "dialyzer"
763            | "et"
764            | "megaco"
765            | "parsetools"
766            | "snmp"
767            | "ssh"
768            | "tftp"
769            | "ftp"
770            | "erl_interface"
771            | "jinterface"
772            | "odbc"
773            | "eldap"
774            | "diameter"
775    )
776}
777
778// ── RebarConfigParser ──
779
780impl PackageParser for RebarConfigParser {
781    const PACKAGE_TYPE: PackageType = PackageType::Hex;
782
783    fn metadata() -> Vec<ParserMetadata> {
784        vec![ParserMetadata {
785            description: "Rebar3 configuration",
786            file_patterns: &["**/rebar.config"],
787            package_type: "hex",
788            primary_language: "Erlang",
789            documentation_url: Some("https://rebar3.org/docs/configuration/configuration/"),
790        }]
791    }
792
793    fn is_match(path: &Path) -> bool {
794        path.file_name().and_then(|n| n.to_str()) == Some("rebar.config")
795    }
796
797    fn extract_packages(path: &Path) -> Vec<PackageData> {
798        let content = match read_file_to_string(path, None) {
799            Ok(c) => c,
800            Err(e) => {
801                warn!("Failed to read {:?}: {}", path, e);
802                return vec![default_rebar_config_package()];
803            }
804        };
805
806        match parse_rebar_config(&content) {
807            Ok(pkg) => vec![pkg],
808            Err(e) => {
809                warn!("Failed to parse {:?}: {}", path, e);
810                vec![default_rebar_config_package()]
811            }
812        }
813    }
814}
815
816fn default_rebar_config_package() -> PackageData {
817    PackageData {
818        package_type: Some(PackageType::Hex),
819        primary_language: Some("Erlang".to_string()),
820        datasource_id: Some(DatasourceId::RebarConfig),
821        ..Default::default()
822    }
823}
824
825fn parse_rebar_config(content: &str) -> Result<PackageData, String> {
826    let terms = parse_dotted_terms(content)?;
827
828    let mut package = default_rebar_config_package();
829
830    for term in &terms {
831        if let ErlTerm::Tuple(fields) = term
832            && fields.len() == 2
833        {
834            let key = term_to_str(&fields[0]);
835            match key.as_deref() {
836                Some("deps") => {
837                    if let ErlTerm::List(deps) = &fields[1] {
838                        for dep in deps.iter().take(MAX_ITERATION_COUNT) {
839                            if let Some(d) = parse_rebar_dep(dep) {
840                                package.dependencies.push(d);
841                            }
842                        }
843                    }
844                }
845                Some("profiles") => {
846                    parse_profile_deps(&fields[1], &mut package.dependencies);
847                }
848                _ => {}
849            }
850        }
851    }
852
853    Ok(package)
854}
855
856fn parse_rebar_dep(term: &ErlTerm) -> Option<Dependency> {
857    let fields = match term {
858        ErlTerm::Tuple(fields) => fields,
859        _ => return None,
860    };
861
862    if fields.is_empty() {
863        return None;
864    }
865
866    if let Some(key) = term_to_str(&fields[0])
867        && key.starts_with("if_")
868    {
869        return None;
870    }
871
872    let app_name = term_to_str(&fields[0])?;
873
874    match fields.len() {
875        // {Name, Version} or {Name, {git, URL, Ref}}
876        2 => {
877            if let Some(version) = term_to_str(&fields[1]) {
878                // {Name, Version}
879                Some(Dependency {
880                    purl: build_hex_purl(&app_name, Some(&version)).map(truncate_field),
881                    extracted_requirement: Some(truncate_field(version)),
882                    scope: Some("dependencies".to_string()),
883                    is_runtime: None,
884                    is_optional: None,
885                    is_pinned: None,
886                    is_direct: None,
887                    resolved_package: None,
888                    extra_data: None,
889                })
890            } else {
891                let package_name = extract_rebar_package_name(&fields[1], &app_name);
892                let vcs_url = extract_git_url(&fields[1]);
893                let version = extract_git_version(&fields[1]);
894                Some(Dependency {
895                    purl: build_hex_purl(&package_name, version.as_deref()).map(truncate_field),
896                    extracted_requirement: version.map(truncate_field),
897                    scope: Some("dependencies".to_string()),
898                    is_runtime: None,
899                    is_optional: None,
900                    is_pinned: None,
901                    is_direct: None,
902                    resolved_package: None,
903                    extra_data: build_rebar_dependency_extra_data(
904                        vcs_url,
905                        app_name.as_str(),
906                        package_name.as_str(),
907                    ),
908                })
909            }
910        }
911        // {Name, Version, Source}
912        3 => {
913            if let Some(version) = term_to_str(&fields[1]) {
914                let package_name = extract_rebar_package_name(&fields[2], &app_name);
915                let vcs_url = extract_git_url(&fields[2]);
916                Some(Dependency {
917                    purl: build_hex_purl(&package_name, Some(&version)).map(truncate_field),
918                    extracted_requirement: Some(truncate_field(version)),
919                    scope: Some("dependencies".to_string()),
920                    is_runtime: None,
921                    is_optional: None,
922                    is_pinned: None,
923                    is_direct: None,
924                    resolved_package: None,
925                    extra_data: build_rebar_dependency_extra_data(
926                        vcs_url,
927                        app_name.as_str(),
928                        package_name.as_str(),
929                    ),
930                })
931            } else {
932                let package_name = extract_rebar_package_name(&fields[1], &app_name);
933                let vcs_url = extract_git_url(&fields[1]);
934                let version = extract_git_version(&fields[1]);
935                Some(Dependency {
936                    purl: build_hex_purl(&package_name, version.as_deref()).map(truncate_field),
937                    extracted_requirement: version.map(truncate_field),
938                    scope: Some("dependencies".to_string()),
939                    is_runtime: None,
940                    is_optional: None,
941                    is_pinned: None,
942                    is_direct: None,
943                    resolved_package: None,
944                    extra_data: build_rebar_dependency_extra_data(
945                        vcs_url,
946                        app_name.as_str(),
947                        package_name.as_str(),
948                    ),
949                })
950            }
951        }
952        _ => None,
953    }
954}
955
956fn extract_rebar_package_name(term: &ErlTerm, fallback_name: &str) -> String {
957    if let ErlTerm::Tuple(fields) = term
958        && fields.len() >= 2
959        && term_to_str(&fields[0]).as_deref() == Some("pkg")
960        && let Some(package_name) = term_to_str(&fields[1])
961    {
962        package_name
963    } else {
964        fallback_name.to_string()
965    }
966}
967
968fn build_rebar_dependency_extra_data(
969    vcs_url: Option<String>,
970    app_name: &str,
971    package_name: &str,
972) -> Option<HashMap<String, JsonValue>> {
973    let mut extra_data = HashMap::new();
974
975    if let Some(url) = vcs_url {
976        extra_data.insert(
977            "vcs_url".to_string(),
978            JsonValue::String(truncate_field(url)),
979        );
980    }
981
982    if app_name != package_name {
983        extra_data.insert(
984            "app_name".to_string(),
985            JsonValue::String(truncate_field(app_name.to_string())),
986        );
987    }
988
989    if extra_data.is_empty() {
990        None
991    } else {
992        Some(extra_data)
993    }
994}
995
996fn extract_git_url(term: &ErlTerm) -> Option<String> {
997    if let ErlTerm::Tuple(fields) = term
998        && fields.len() >= 2
999        && matches!(
1000            term_to_str(&fields[0]).as_deref(),
1001            Some("git") | Some("git_subdir")
1002        )
1003    {
1004        term_to_str(&fields[1])
1005    } else {
1006        None
1007    }
1008}
1009
1010fn extract_git_version(term: &ErlTerm) -> Option<String> {
1011    if let ErlTerm::Tuple(fields) = term
1012        && fields.len() >= 3
1013        && matches!(
1014            term_to_str(&fields[0]).as_deref(),
1015            Some("git") | Some("git_subdir")
1016        )
1017    {
1018        if let ErlTerm::Tuple(ref_fields) = &fields[2]
1019            && ref_fields.len() == 2
1020        {
1021            let ref_type = term_to_str(&ref_fields[0])?;
1022            let ref_val = term_to_str(&ref_fields[1])?;
1023            match ref_type.as_str() {
1024                "tag" => Some(ref_val),
1025                _ => None,
1026            }
1027        } else {
1028            None
1029        }
1030    } else {
1031        None
1032    }
1033}
1034
1035fn parse_profile_deps(term: &ErlTerm, dependencies: &mut Vec<Dependency>) {
1036    let profiles = match term {
1037        ErlTerm::List(items) => items,
1038        _ => return,
1039    };
1040
1041    for profile in profiles.iter().take(MAX_ITERATION_COUNT) {
1042        if let ErlTerm::Tuple(fields) = profile
1043            && fields.len() == 2
1044        {
1045            let profile_name = term_to_str(&fields[0]).unwrap_or_default();
1046            if let ErlTerm::List(profile_opts) = &fields[1] {
1047                for opt in profile_opts {
1048                    if let ErlTerm::Tuple(opt_fields) = opt
1049                        && opt_fields.len() == 2
1050                        && term_to_str(&opt_fields[0]).as_deref() == Some("deps")
1051                        && let ErlTerm::List(deps) = &opt_fields[1]
1052                    {
1053                        for dep in deps.iter().take(MAX_ITERATION_COUNT) {
1054                            if let Some(mut d) = parse_rebar_dep(dep) {
1055                                d.scope = Some(truncate_field(profile_name.clone()));
1056                                dependencies.push(d);
1057                            }
1058                        }
1059                    }
1060                }
1061            }
1062        }
1063    }
1064}
1065
1066// ── RebarLockParser ──
1067
1068impl PackageParser for RebarLockParser {
1069    const PACKAGE_TYPE: PackageType = PackageType::Hex;
1070
1071    fn metadata() -> Vec<ParserMetadata> {
1072        vec![ParserMetadata {
1073            description: "Rebar3 lockfile",
1074            file_patterns: &["**/rebar.lock"],
1075            package_type: "hex",
1076            primary_language: "Erlang",
1077            documentation_url: Some("https://rebar3.org/docs/configuration/configuration/"),
1078        }]
1079    }
1080
1081    fn is_match(path: &Path) -> bool {
1082        path.file_name().and_then(|n| n.to_str()) == Some("rebar.lock")
1083    }
1084
1085    fn extract_packages(path: &Path) -> Vec<PackageData> {
1086        let content = match read_file_to_string(path, None) {
1087            Ok(c) => c,
1088            Err(e) => {
1089                warn!("Failed to read {:?}: {}", path, e);
1090                return vec![default_rebar_lock_package()];
1091            }
1092        };
1093
1094        match parse_rebar_lock(&content) {
1095            Ok(pkg) => vec![pkg],
1096            Err(e) => {
1097                warn!("Failed to parse {:?}: {}", path, e);
1098                vec![default_rebar_lock_package()]
1099            }
1100        }
1101    }
1102}
1103
1104fn default_rebar_lock_package() -> PackageData {
1105    PackageData {
1106        package_type: Some(PackageType::Hex),
1107        primary_language: Some("Erlang".to_string()),
1108        datasource_id: Some(DatasourceId::RebarLock),
1109        ..Default::default()
1110    }
1111}
1112
1113fn parse_rebar_lock(content: &str) -> Result<PackageData, String> {
1114    let terms = parse_dotted_terms(content)?;
1115
1116    // rebar.lock format: first term is either:
1117    // - {Version, [deps]}  (v2 format, e.g. {"1.2.0", [...]})
1118    // - [deps]             (v1 format, flat list)
1119    // Second term (if present): [{pkg_hash, [...]}, {pkg_hash_ext, [...]}]
1120
1121    let (dep_list, hash_map) = match terms.as_slice() {
1122        // v2 format: {"1.2.0", [deps]}
1123        [ErlTerm::Tuple(fields), rest @ ..] if fields.len() == 2 => {
1124            let deps = match &fields[1] {
1125                ErlTerm::List(items) => items.clone(),
1126                _ => return Err("Expected dependency list in lock tuple".to_string()),
1127            };
1128            let hashes = rest.first().map(extract_pkg_hashes).unwrap_or_default();
1129            (deps, hashes)
1130        }
1131        // v1 format: [deps]
1132        [ErlTerm::List(items), rest @ ..] => {
1133            let hashes = rest.first().map(extract_pkg_hashes).unwrap_or_default();
1134            (items.clone(), hashes)
1135        }
1136        _ => return Err("Unrecognized rebar.lock format".to_string()),
1137    };
1138
1139    let mut package = default_rebar_lock_package();
1140
1141    for dep_term in dep_list.iter().take(MAX_ITERATION_COUNT) {
1142        if let Some(dep) = parse_lock_dep(dep_term, &hash_map) {
1143            package.dependencies.push(dep);
1144        }
1145    }
1146
1147    Ok(package)
1148}
1149
1150fn parse_lock_dep(term: &ErlTerm, hashes: &HashMap<String, String>) -> Option<Dependency> {
1151    let fields = match term {
1152        ErlTerm::Tuple(fields) if fields.len() >= 3 => fields,
1153        _ => return None,
1154    };
1155
1156    let app_name = term_to_str(&fields[0])?;
1157    // fields[2] is the level (integer)
1158
1159    let (package_name, version, vcs_url) = match &fields[1] {
1160        // {pkg, <<"name">>, <<"version">>}
1161        ErlTerm::Tuple(pkg_fields)
1162            if pkg_fields.len() >= 3 && term_to_str(&pkg_fields[0]).as_deref() == Some("pkg") =>
1163        {
1164            let package_name = term_to_str(&pkg_fields[1]).unwrap_or_else(|| app_name.clone());
1165            let ver = term_to_str(&pkg_fields[2]);
1166            (package_name, ver, None)
1167        }
1168        // {git, "url", {ref, "hash"}}
1169        ErlTerm::Tuple(git_fields)
1170            if git_fields.len() >= 2
1171                && matches!(
1172                    term_to_str(&git_fields[0]).as_deref(),
1173                    Some("git") | Some("git_subdir")
1174                ) =>
1175        {
1176            let url = term_to_str(&git_fields[1]);
1177            let ver = if git_fields.len() >= 3 {
1178                extract_git_version_from_lock_ref(&git_fields[2])
1179            } else {
1180                None
1181            };
1182            (app_name.clone(), ver, url)
1183        }
1184        _ => (app_name.clone(), None, None),
1185    };
1186
1187    let sha256 = hashes
1188        .get(&app_name)
1189        .or_else(|| hashes.get(&package_name))
1190        .and_then(|h| Sha256Digest::from_hex(h).ok());
1191
1192    let resolved_package = ResolvedPackage {
1193        primary_language: Some("Erlang".to_string()),
1194        sha256,
1195        is_virtual: true,
1196        datasource_id: Some(DatasourceId::RebarLock),
1197        purl: build_hex_purl(&package_name, version.as_deref()).map(truncate_field),
1198        repository_homepage_url: Some(truncate_field(format!(
1199            "https://hex.pm/packages/{}",
1200            package_name
1201        ))),
1202        api_data_url: Some(truncate_field(format!(
1203            "https://hex.pm/api/packages/{}",
1204            package_name
1205        ))),
1206        ..ResolvedPackage::new(
1207            PackageType::Hex,
1208            String::new(),
1209            package_name.clone(),
1210            version.clone().unwrap_or_default(),
1211        )
1212    };
1213
1214    Some(Dependency {
1215        purl: build_hex_purl(&package_name, version.as_deref()).map(truncate_field),
1216        extracted_requirement: version.map(truncate_field),
1217        scope: Some("dependencies".to_string()),
1218        is_runtime: None,
1219        is_optional: None,
1220        is_pinned: Some(true),
1221        is_direct: None,
1222        resolved_package: Some(Box::new(resolved_package)),
1223        extra_data: build_rebar_dependency_extra_data(
1224            vcs_url,
1225            app_name.as_str(),
1226            package_name.as_str(),
1227        ),
1228    })
1229}
1230
1231fn extract_git_version_from_lock_ref(term: &ErlTerm) -> Option<String> {
1232    if let ErlTerm::Tuple(fields) = term
1233        && fields.len() == 2
1234        && term_to_str(&fields[0]).as_deref() == Some("ref")
1235    {
1236        term_to_str(&fields[1])
1237    } else {
1238        None
1239    }
1240}
1241
1242fn extract_pkg_hashes(term: &ErlTerm) -> HashMap<String, String> {
1243    let items = match term {
1244        ErlTerm::List(items) => items,
1245        _ => return HashMap::new(),
1246    };
1247
1248    let mut hashes = HashMap::new();
1249    for item in items {
1250        if let ErlTerm::Tuple(fields) = item
1251            && fields.len() == 2
1252            && term_to_str(&fields[0]).as_deref() == Some("pkg_hash")
1253            && let ErlTerm::List(hash_list) = &fields[1]
1254        {
1255            for entry in hash_list.iter().take(MAX_ITERATION_COUNT) {
1256                if let ErlTerm::Tuple(pair) = entry
1257                    && pair.len() == 2
1258                    && let (Some(name), Some(hash)) = (term_to_str(&pair[0]), term_to_str(&pair[1]))
1259                {
1260                    hashes.insert(name, hash);
1261                }
1262            }
1263        }
1264    }
1265    hashes
1266}