Skip to main content

provenant/parsers/
hex_lock.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4use std::collections::HashMap;
5use std::path::Path;
6
7use crate::parser_warn as warn;
8use crate::parsers::utils::{
9    MAX_ITERATION_COUNT, RecursionGuard, read_file_to_string, truncate_field,
10};
11use packageurl::PackageUrl;
12use serde_json::Value as JsonValue;
13
14use crate::models::{
15    DatasourceId, Dependency, PackageData, PackageType, ResolvedPackage, Sha256Digest,
16};
17
18use super::PackageParser;
19
20pub struct HexLockParser;
21
22#[derive(Clone, Debug)]
23enum Term {
24    Map(Vec<(Term, Term)>),
25    Tuple(Vec<Term>),
26    List(Vec<Term>),
27    KeywordList(Vec<(String, Term)>),
28    String(String),
29    Atom(String),
30    Bool(bool),
31    Integer(i64),
32}
33
34struct Parser<'a> {
35    chars: Vec<char>,
36    pos: usize,
37    source: &'a str,
38    guard: RecursionGuard<()>,
39}
40
41impl PackageParser for HexLockParser {
42    const PACKAGE_TYPE: PackageType = PackageType::Hex;
43
44    fn is_match(path: &Path) -> bool {
45        path.file_name().and_then(|name| name.to_str()) == Some("mix.lock")
46    }
47
48    fn extract_packages(path: &Path) -> Vec<PackageData> {
49        let content = match read_file_to_string(path, None) {
50            Ok(content) => content,
51            Err(e) => {
52                warn!("Failed to read mix.lock at {:?}: {}", path, e);
53                return vec![default_package_data()];
54            }
55        };
56
57        match parse_mix_lock(&content) {
58            Ok(package_data) => vec![package_data],
59            Err(e) => {
60                warn!("Failed to parse mix.lock at {:?}: {}", path, e);
61                vec![default_package_data()]
62            }
63        }
64    }
65}
66
67fn default_package_data() -> PackageData {
68    PackageData {
69        package_type: Some(PackageType::Hex),
70        primary_language: Some("Elixir".to_string()),
71        datasource_id: Some(DatasourceId::HexMixLock),
72        ..Default::default()
73    }
74}
75
76fn parse_mix_lock(content: &str) -> Result<PackageData, String> {
77    let mut parser = Parser::new(content);
78    let term = parser.parse_term()?;
79    parser.skip_ws();
80    if !parser.is_eof() {
81        return Err("Unexpected trailing content in mix.lock".to_string());
82    }
83
84    let entries = match term {
85        Term::Map(entries) => entries,
86        _ => return Err("mix.lock root must be a map".to_string()),
87    };
88
89    let mut dependencies = Vec::new();
90    for (key, value) in entries.into_iter().take(MAX_ITERATION_COUNT) {
91        if let Some(dep) = build_dependency_from_lock_entry(&key, &value)? {
92            dependencies.push(dep);
93        }
94    }
95
96    let mut package = default_package_data();
97    package.dependencies = dependencies;
98    Ok(package)
99}
100
101fn build_dependency_from_lock_entry(
102    key: &Term,
103    value: &Term,
104) -> Result<Option<Dependency>, String> {
105    let app_name = truncate_field(term_to_string(key)?);
106
107    let tuple = match value {
108        Term::Tuple(items) => items,
109        _ => return Ok(None),
110    };
111
112    if tuple.len() < 8 {
113        return Ok(None);
114    }
115
116    let kind = term_to_atom(&tuple[0])?;
117    if kind != "hex" {
118        return Ok(None);
119    }
120
121    let package_name = truncate_field(term_to_atom(&tuple[1])?);
122    let version = truncate_field(term_to_string(&tuple[2])?);
123    let inner_checksum = truncate_field(term_to_string(&tuple[3])?);
124    let managers = term_to_atom_list(&tuple[4])?;
125    let nested_dependencies = term_to_dependency_tuples(&tuple[5])?;
126    let repo = truncate_field(term_to_string(&tuple[6])?);
127    let outer_checksum = truncate_field(term_to_string(&tuple[7])?);
128
129    let purl = build_hex_purl(&package_name, Some(&version), Some(&repo));
130    let resolved_package = ResolvedPackage {
131        primary_language: Some("Elixir".to_string()),
132        download_url: None,
133        sha1: None,
134        sha256: Sha256Digest::from_hex(&inner_checksum).ok(),
135        sha512: None,
136        md5: None,
137        is_virtual: true,
138        extra_data: Some(HashMap::from([
139            (
140                "repo".to_string(),
141                JsonValue::String(truncate_field(repo.clone())),
142            ),
143            (
144                "outer_checksum".to_string(),
145                JsonValue::String(truncate_field(outer_checksum.clone())),
146            ),
147            (
148                "managers".to_string(),
149                JsonValue::Array(
150                    managers
151                        .into_iter()
152                        .map(|m| JsonValue::String(truncate_field(m)))
153                        .collect(),
154                ),
155            ),
156        ])),
157        dependencies: nested_dependencies
158            .into_iter()
159            .map(build_nested_dependency)
160            .collect::<Result<Vec<_>, _>>()?,
161        repository_homepage_url: Some(truncate_field(build_hexdocs_homepage(&package_name, &repo))),
162        repository_download_url: None,
163        api_data_url: Some(truncate_field(build_hex_api_url(&package_name, &repo))),
164        datasource_id: Some(DatasourceId::HexMixLock),
165        purl: build_hex_purl(&package_name, Some(&version), Some(&repo)).map(truncate_field),
166        ..ResolvedPackage::new(
167            PackageType::Hex,
168            if repo == "hexpm" {
169                String::new()
170            } else {
171                repo.clone()
172            },
173            package_name.clone(),
174            version.clone(),
175        )
176    };
177
178    Ok(Some(Dependency {
179        purl: purl.map(truncate_field),
180        extracted_requirement: Some(truncate_field(version)),
181        scope: Some("dependencies".to_string()),
182        is_runtime: None,
183        is_optional: None,
184        is_pinned: Some(true),
185        is_direct: None,
186        resolved_package: Some(Box::new(resolved_package)),
187        extra_data: Some(HashMap::from([(
188            "app".to_string(),
189            JsonValue::String(truncate_field(app_name)),
190        )])),
191    }))
192}
193
194fn build_nested_dependency(tuple: DependencyTuple) -> Result<Dependency, String> {
195    let package_name = truncate_field(
196        tuple
197            .hex_name
198            .clone()
199            .unwrap_or_else(|| tuple.app_name.clone()),
200    );
201    Ok(Dependency {
202        purl: build_hex_purl(&package_name, None, tuple.repo.as_deref()).map(truncate_field),
203        extracted_requirement: Some(truncate_field(tuple.requirement)),
204        scope: Some("dependencies".to_string()),
205        is_runtime: Some(!tuple.optional),
206        is_optional: Some(tuple.optional),
207        is_pinned: Some(false),
208        is_direct: Some(true),
209        resolved_package: None,
210        extra_data: None,
211    })
212}
213
214crate::register_parser!(
215    "Hex mix.lock lockfile",
216    &["**/mix.lock"],
217    "hex",
218    "Elixir",
219    Some("https://hexdocs.pm/mix/Mix.Tasks.Deps.html"),
220);
221
222#[derive(Debug)]
223struct DependencyTuple {
224    app_name: String,
225    requirement: String,
226    hex_name: Option<String>,
227    repo: Option<String>,
228    optional: bool,
229}
230
231fn term_to_dependency_tuples(term: &Term) -> Result<Vec<DependencyTuple>, String> {
232    let items = match term {
233        Term::List(items) => items,
234        _ => return Ok(Vec::new()),
235    };
236
237    let mut result = Vec::new();
238    for item in items.iter().take(MAX_ITERATION_COUNT) {
239        let tuple = match item {
240            Term::Tuple(items) if items.len() == 3 => items,
241            _ => continue,
242        };
243
244        let app_name = truncate_field(term_to_atom(&tuple[0])?);
245        let requirement = truncate_field(term_to_string(&tuple[1])?);
246        let opts = term_to_keyword_map(&tuple[2])?;
247        let hex_name = opts
248            .get("hex")
249            .map(term_to_atom)
250            .transpose()?
251            .map(truncate_field);
252        let repo = opts
253            .get("repo")
254            .map(term_to_string)
255            .transpose()?
256            .map(truncate_field);
257        let optional = opts
258            .get("optional")
259            .and_then(|term| match term {
260                Term::Bool(value) => Some(*value),
261                _ => None,
262            })
263            .unwrap_or(false);
264
265        result.push(DependencyTuple {
266            app_name,
267            requirement,
268            hex_name,
269            repo,
270            optional,
271        });
272    }
273
274    Ok(result)
275}
276
277fn term_to_keyword_map(term: &Term) -> Result<HashMap<String, Term>, String> {
278    match term {
279        Term::KeywordList(entries) => Ok(entries.iter().cloned().collect()),
280        Term::List(entries) => {
281            let mut map = HashMap::new();
282            for entry in entries {
283                if let Term::Tuple(items) = entry
284                    && items.len() == 2
285                {
286                    map.insert(term_to_atom(&items[0])?, items[1].clone());
287                }
288            }
289            Ok(map)
290        }
291        _ => Ok(HashMap::new()),
292    }
293}
294
295fn build_hex_purl(name: &str, version: Option<&str>, repo: Option<&str>) -> Option<String> {
296    let mut purl = PackageUrl::new("hex", name).ok()?;
297    if let Some(repo) = repo
298        && repo != "hexpm"
299    {
300        purl.with_namespace(repo).ok()?;
301    }
302    if let Some(version) = version {
303        purl.with_version(version).ok()?;
304    }
305    Some(purl.to_string())
306}
307
308fn build_hexdocs_homepage(name: &str, repo: &str) -> String {
309    if repo == "hexpm" {
310        format!("https://hex.pm/packages/{}", name)
311    } else {
312        format!("https://hex.pm/packages/{}?repo={}", name, repo)
313    }
314}
315
316fn build_hex_api_url(name: &str, repo: &str) -> String {
317    if repo == "hexpm" {
318        format!("https://hex.pm/api/packages/{}", name)
319    } else {
320        format!("https://hex.pm/api/repos/{}/packages/{}", repo, name)
321    }
322}
323
324fn term_to_string(term: &Term) -> Result<String, String> {
325    match term {
326        Term::String(value) => Ok(value.clone()),
327        Term::Atom(value) => Ok(value.clone()),
328        Term::Integer(value) => Ok(value.to_string()),
329        _ => Err("Expected string-like term".to_string()),
330    }
331}
332
333fn term_to_atom(term: &Term) -> Result<String, String> {
334    match term {
335        Term::Atom(value) => Ok(value.clone()),
336        _ => Err("Expected atom".to_string()),
337    }
338}
339
340fn term_to_atom_list(term: &Term) -> Result<Vec<String>, String> {
341    let items = match term {
342        Term::List(items) => items,
343        _ => return Ok(Vec::new()),
344    };
345    items.iter().map(term_to_atom).collect()
346}
347
348impl<'a> Parser<'a> {
349    fn new(source: &'a str) -> Self {
350        Self {
351            chars: source.chars().collect(),
352            pos: 0,
353            source,
354            guard: RecursionGuard::depth_only(),
355        }
356    }
357
358    fn parse_term(&mut self) -> Result<Term, String> {
359        if self.guard.descend() {
360            return Err("recursion depth exceeded".to_string());
361        }
362        self.skip_ws();
363        let result = match self.peek() {
364            Some('%') => self.parse_map(),
365            Some('{') => self.parse_tuple(),
366            Some('[') => self.parse_list(),
367            Some('"') => self.parse_string().map(Term::String),
368            Some(':') => self.parse_atom().map(Term::Atom),
369            Some(c) if c.is_ascii_digit() || c == '-' => self.parse_integer().map(Term::Integer),
370            Some('t') | Some('f') => self.parse_bool().map(Term::Bool),
371            Some(other) => Err(format!("Unexpected character '{}' at {}", other, self.pos)),
372            None => Err("Unexpected end of mix.lock".to_string()),
373        };
374        self.guard.ascend();
375        result
376    }
377
378    fn parse_map(&mut self) -> Result<Term, String> {
379        self.expect('%')?;
380        self.expect('{')?;
381        let mut entries = Vec::new();
382        let mut count = 0usize;
383        loop {
384            self.skip_ws();
385            if self.peek() == Some('}') {
386                self.pos += 1;
387                break;
388            }
389            if count >= MAX_ITERATION_COUNT {
390                warn!("map entry count exceeded MAX_ITERATION_COUNT in mix.lock");
391                break;
392            }
393            let key = self.parse_term()?;
394            self.skip_ws();
395            if self.starts_with("=>") {
396                self.expect_sequence("=>")?;
397            } else {
398                self.expect(':')?;
399            }
400            let value = self.parse_term()?;
401            entries.push((key, value));
402            count += 1;
403            self.skip_ws();
404            if self.peek() == Some(',') {
405                self.pos += 1;
406            }
407        }
408        Ok(Term::Map(entries))
409    }
410
411    fn parse_tuple(&mut self) -> Result<Term, String> {
412        self.expect('{')?;
413        let mut items = Vec::new();
414        let mut count = 0usize;
415        loop {
416            self.skip_ws();
417            if self.peek() == Some('}') {
418                self.pos += 1;
419                break;
420            }
421            if count >= MAX_ITERATION_COUNT {
422                warn!("tuple item count exceeded MAX_ITERATION_COUNT in mix.lock");
423                break;
424            }
425            items.push(self.parse_term()?);
426            count += 1;
427            self.skip_ws();
428            if self.peek() == Some(',') {
429                self.pos += 1;
430            }
431        }
432        Ok(Term::Tuple(items))
433    }
434
435    fn parse_list(&mut self) -> Result<Term, String> {
436        self.expect('[')?;
437        let mut keyword_entries = Vec::new();
438        let mut items = Vec::new();
439        let mut saw_keyword = false;
440        let mut count = 0usize;
441
442        loop {
443            self.skip_ws();
444            if self.peek() == Some(']') {
445                self.pos += 1;
446                break;
447            }
448            if count >= MAX_ITERATION_COUNT {
449                warn!("list item count exceeded MAX_ITERATION_COUNT in mix.lock");
450                break;
451            }
452
453            if let Some(keyword) = self.try_parse_keyword_key() {
454                saw_keyword = true;
455                let value = self.parse_term()?;
456                keyword_entries.push((keyword, value));
457            } else {
458                items.push(self.parse_term()?);
459            }
460
461            count += 1;
462            self.skip_ws();
463            if self.peek() == Some(',') {
464                self.pos += 1;
465            }
466        }
467
468        if saw_keyword && items.is_empty() {
469            Ok(Term::KeywordList(keyword_entries))
470        } else if saw_keyword {
471            let mut merged = items;
472            merged.extend(
473                keyword_entries
474                    .into_iter()
475                    .map(|(k, v)| Term::Tuple(vec![Term::Atom(k), v])),
476            );
477            Ok(Term::List(merged))
478        } else {
479            Ok(Term::List(items))
480        }
481    }
482
483    fn try_parse_keyword_key(&mut self) -> Option<String> {
484        let saved = self.pos;
485        self.skip_ws();
486        let start = self.pos;
487        while let Some(c) = self.peek() {
488            if c.is_ascii_alphanumeric() || c == '_' || c == '?' || c == '!' {
489                self.pos += 1;
490            } else {
491                break;
492            }
493        }
494        if self.pos == start || self.peek() != Some(':') || self.peek_n(1) == Some(':') {
495            self.pos = saved;
496            return None;
497        }
498        let key: String = self.chars[start..self.pos].iter().collect();
499        self.pos += 1;
500        Some(key)
501    }
502
503    fn parse_string(&mut self) -> Result<String, String> {
504        self.expect('"')?;
505        let mut out = String::new();
506        while let Some(c) = self.peek() {
507            self.pos += 1;
508            match c {
509                '"' => return Ok(out),
510                '\\' => {
511                    let escaped = self
512                        .peek()
513                        .ok_or_else(|| "Unterminated string escape".to_string())?;
514                    self.pos += 1;
515                    out.push(match escaped {
516                        'n' => '\n',
517                        'r' => '\r',
518                        't' => '\t',
519                        '"' => '"',
520                        '\\' => '\\',
521                        other => other,
522                    });
523                }
524                other => out.push(other),
525            }
526        }
527        Err("Unterminated string literal".to_string())
528    }
529
530    fn parse_atom(&mut self) -> Result<String, String> {
531        self.expect(':')?;
532        let start = self.pos;
533        while let Some(c) = self.peek() {
534            if c.is_ascii_alphanumeric() || c == '_' || c == '?' || c == '!' || c == '@' {
535                self.pos += 1;
536            } else {
537                break;
538            }
539        }
540        if self.pos == start {
541            return Err("Expected atom after ':'".to_string());
542        }
543        Ok(self.chars[start..self.pos].iter().collect())
544    }
545
546    fn parse_integer(&mut self) -> Result<i64, String> {
547        let start = self.pos;
548        if self.peek() == Some('-') {
549            self.pos += 1;
550        }
551        while let Some(c) = self.peek() {
552            if c.is_ascii_digit() {
553                self.pos += 1;
554            } else {
555                break;
556            }
557        }
558        self.source[start..self.byte_index(self.pos)]
559            .parse::<i64>()
560            .map_err(|e| format!("Invalid integer: {}", e))
561    }
562
563    fn parse_bool(&mut self) -> Result<bool, String> {
564        if self.starts_with("true") {
565            self.pos += 4;
566            Ok(true)
567        } else if self.starts_with("false") {
568            self.pos += 5;
569            Ok(false)
570        } else {
571            Err("Invalid boolean".to_string())
572        }
573    }
574
575    fn skip_ws(&mut self) {
576        while let Some(c) = self.peek() {
577            if c.is_whitespace() {
578                self.pos += 1;
579            } else {
580                break;
581            }
582        }
583    }
584
585    fn expect(&mut self, expected: char) -> Result<(), String> {
586        match self.peek() {
587            Some(c) if c == expected => {
588                self.pos += 1;
589                Ok(())
590            }
591            Some(c) => Err(format!("Expected '{}' but found '{}'", expected, c)),
592            None => Err(format!("Expected '{}' but reached end of input", expected)),
593        }
594    }
595
596    fn expect_sequence(&mut self, expected: &str) -> Result<(), String> {
597        if self.starts_with(expected) {
598            self.pos += expected.chars().count();
599            Ok(())
600        } else {
601            Err(format!("Expected '{}' at {}", expected, self.pos))
602        }
603    }
604
605    fn starts_with(&self, s: &str) -> bool {
606        self.chars[self.pos..]
607            .iter()
608            .collect::<String>()
609            .starts_with(s)
610    }
611
612    fn peek(&self) -> Option<char> {
613        self.chars.get(self.pos).copied()
614    }
615
616    fn peek_n(&self, n: usize) -> Option<char> {
617        self.chars.get(self.pos + n).copied()
618    }
619
620    fn is_eof(&self) -> bool {
621        self.pos >= self.chars.len()
622    }
623
624    fn byte_index(&self, char_pos: usize) -> usize {
625        self.chars.iter().take(char_pos).map(|c| c.len_utf8()).sum()
626    }
627}