Skip to main content

provenant/parsers/
hex_lock.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4use std::collections::HashMap;
5use std::path::Path;
6
7use crate::parser_warn as warn;
8use crate::parsers::utils::{
9    MAX_ITERATION_COUNT, RecursionGuard, read_file_to_string, truncate_field,
10};
11use packageurl::PackageUrl;
12use serde_json::Value as JsonValue;
13
14use crate::models::{
15    DatasourceId, Dependency, PackageData, PackageType, ResolvedPackage, Sha256Digest,
16};
17
18use super::PackageParser;
19use super::metadata::ParserMetadata;
20
21pub struct HexLockParser;
22
23#[derive(Clone, Debug)]
24enum Term {
25    Map(Vec<(Term, Term)>),
26    Tuple(Vec<Term>),
27    List(Vec<Term>),
28    KeywordList(Vec<(String, Term)>),
29    String(String),
30    Atom(String),
31    Bool(bool),
32    Integer(i64),
33}
34
35struct Parser<'a> {
36    chars: Vec<char>,
37    pos: usize,
38    source: &'a str,
39    guard: RecursionGuard<()>,
40}
41
42impl PackageParser for HexLockParser {
43    const PACKAGE_TYPE: PackageType = PackageType::Hex;
44
45    fn metadata() -> Vec<ParserMetadata> {
46        vec![ParserMetadata {
47            description: "Hex mix.lock lockfile",
48            file_patterns: &["**/mix.lock"],
49            package_type: "hex",
50            primary_language: "Elixir",
51            documentation_url: Some("https://hexdocs.pm/mix/Mix.Tasks.Deps.html"),
52        }]
53    }
54
55    fn is_match(path: &Path) -> bool {
56        path.file_name().and_then(|name| name.to_str()) == Some("mix.lock")
57    }
58
59    fn extract_packages(path: &Path) -> Vec<PackageData> {
60        let content = match read_file_to_string(path, None) {
61            Ok(content) => content,
62            Err(e) => {
63                warn!("Failed to read mix.lock at {:?}: {}", path, e);
64                return vec![default_package_data()];
65            }
66        };
67
68        match parse_mix_lock(&content) {
69            Ok(package_data) => vec![package_data],
70            Err(e) => {
71                warn!("Failed to parse mix.lock at {:?}: {}", path, e);
72                vec![default_package_data()]
73            }
74        }
75    }
76}
77
78fn default_package_data() -> PackageData {
79    PackageData {
80        package_type: Some(PackageType::Hex),
81        primary_language: Some("Elixir".to_string()),
82        datasource_id: Some(DatasourceId::HexMixLock),
83        ..Default::default()
84    }
85}
86
87fn parse_mix_lock(content: &str) -> Result<PackageData, String> {
88    let mut parser = Parser::new(content);
89    let term = parser.parse_term()?;
90    parser.skip_ws();
91    if !parser.is_eof() {
92        return Err("Unexpected trailing content in mix.lock".to_string());
93    }
94
95    let entries = match term {
96        Term::Map(entries) => entries,
97        _ => return Err("mix.lock root must be a map".to_string()),
98    };
99
100    let mut dependencies = Vec::new();
101    for (key, value) in entries.into_iter().take(MAX_ITERATION_COUNT) {
102        if let Some(dep) = build_dependency_from_lock_entry(&key, &value)? {
103            dependencies.push(dep);
104        }
105    }
106
107    let mut package = default_package_data();
108    package.dependencies = dependencies;
109    Ok(package)
110}
111
112fn build_dependency_from_lock_entry(
113    key: &Term,
114    value: &Term,
115) -> Result<Option<Dependency>, String> {
116    let app_name = truncate_field(term_to_string(key)?);
117
118    let tuple = match value {
119        Term::Tuple(items) => items,
120        _ => return Ok(None),
121    };
122
123    if tuple.len() < 8 {
124        return Ok(None);
125    }
126
127    let kind = term_to_atom(&tuple[0])?;
128    if kind != "hex" {
129        return Ok(None);
130    }
131
132    let package_name = truncate_field(term_to_atom(&tuple[1])?);
133    let version = truncate_field(term_to_string(&tuple[2])?);
134    let inner_checksum = truncate_field(term_to_string(&tuple[3])?);
135    let managers = term_to_atom_list(&tuple[4])?;
136    let nested_dependencies = term_to_dependency_tuples(&tuple[5])?;
137    let repo = truncate_field(term_to_string(&tuple[6])?);
138    let outer_checksum = truncate_field(term_to_string(&tuple[7])?);
139
140    let purl = build_hex_purl(&package_name, Some(&version), Some(&repo));
141    let resolved_package = ResolvedPackage {
142        primary_language: Some("Elixir".to_string()),
143        download_url: None,
144        sha1: None,
145        sha256: Sha256Digest::from_hex(&inner_checksum).ok(),
146        sha512: None,
147        md5: None,
148        is_virtual: true,
149        extra_data: Some(HashMap::from([
150            (
151                "repo".to_string(),
152                JsonValue::String(truncate_field(repo.clone())),
153            ),
154            (
155                "outer_checksum".to_string(),
156                JsonValue::String(truncate_field(outer_checksum.clone())),
157            ),
158            (
159                "managers".to_string(),
160                JsonValue::Array(
161                    managers
162                        .into_iter()
163                        .map(|m| JsonValue::String(truncate_field(m)))
164                        .collect(),
165                ),
166            ),
167        ])),
168        dependencies: nested_dependencies
169            .into_iter()
170            .map(build_nested_dependency)
171            .collect::<Result<Vec<_>, _>>()?,
172        repository_homepage_url: Some(truncate_field(build_hexdocs_homepage(&package_name, &repo))),
173        repository_download_url: None,
174        api_data_url: Some(truncate_field(build_hex_api_url(&package_name, &repo))),
175        datasource_id: Some(DatasourceId::HexMixLock),
176        purl: build_hex_purl(&package_name, Some(&version), Some(&repo)).map(truncate_field),
177        ..ResolvedPackage::new(
178            PackageType::Hex,
179            if repo == "hexpm" {
180                String::new()
181            } else {
182                repo.clone()
183            },
184            package_name.clone(),
185            version.clone(),
186        )
187    };
188
189    Ok(Some(Dependency {
190        purl: purl.map(truncate_field),
191        extracted_requirement: Some(truncate_field(version)),
192        scope: Some("dependencies".to_string()),
193        is_runtime: None,
194        is_optional: None,
195        is_pinned: Some(true),
196        is_direct: None,
197        resolved_package: Some(Box::new(resolved_package)),
198        extra_data: Some(HashMap::from([(
199            "app".to_string(),
200            JsonValue::String(truncate_field(app_name)),
201        )])),
202    }))
203}
204
205fn build_nested_dependency(tuple: DependencyTuple) -> Result<Dependency, String> {
206    let package_name = truncate_field(
207        tuple
208            .hex_name
209            .clone()
210            .unwrap_or_else(|| tuple.app_name.clone()),
211    );
212    Ok(Dependency {
213        purl: build_hex_purl(&package_name, None, tuple.repo.as_deref()).map(truncate_field),
214        extracted_requirement: Some(truncate_field(tuple.requirement)),
215        scope: Some("dependencies".to_string()),
216        is_runtime: Some(!tuple.optional),
217        is_optional: Some(tuple.optional),
218        is_pinned: Some(false),
219        is_direct: Some(true),
220        resolved_package: None,
221        extra_data: None,
222    })
223}
224
225#[derive(Debug)]
226struct DependencyTuple {
227    app_name: String,
228    requirement: String,
229    hex_name: Option<String>,
230    repo: Option<String>,
231    optional: bool,
232}
233
234fn term_to_dependency_tuples(term: &Term) -> Result<Vec<DependencyTuple>, String> {
235    let items = match term {
236        Term::List(items) => items,
237        _ => return Ok(Vec::new()),
238    };
239
240    let mut result = Vec::new();
241    for item in items.iter().take(MAX_ITERATION_COUNT) {
242        let tuple = match item {
243            Term::Tuple(items) if items.len() == 3 => items,
244            _ => continue,
245        };
246
247        let app_name = truncate_field(term_to_atom(&tuple[0])?);
248        let requirement = truncate_field(term_to_string(&tuple[1])?);
249        let opts = term_to_keyword_map(&tuple[2])?;
250        let hex_name = opts
251            .get("hex")
252            .map(term_to_atom)
253            .transpose()?
254            .map(truncate_field);
255        let repo = opts
256            .get("repo")
257            .map(term_to_string)
258            .transpose()?
259            .map(truncate_field);
260        let optional = opts
261            .get("optional")
262            .and_then(|term| match term {
263                Term::Bool(value) => Some(*value),
264                _ => None,
265            })
266            .unwrap_or(false);
267
268        result.push(DependencyTuple {
269            app_name,
270            requirement,
271            hex_name,
272            repo,
273            optional,
274        });
275    }
276
277    Ok(result)
278}
279
280fn term_to_keyword_map(term: &Term) -> Result<HashMap<String, Term>, String> {
281    match term {
282        Term::KeywordList(entries) => Ok(entries.iter().cloned().collect()),
283        Term::List(entries) => {
284            let mut map = HashMap::new();
285            for entry in entries {
286                if let Term::Tuple(items) = entry
287                    && items.len() == 2
288                {
289                    map.insert(term_to_atom(&items[0])?, items[1].clone());
290                }
291            }
292            Ok(map)
293        }
294        _ => Ok(HashMap::new()),
295    }
296}
297
298fn build_hex_purl(name: &str, version: Option<&str>, repo: Option<&str>) -> Option<String> {
299    let mut purl = PackageUrl::new("hex", name).ok()?;
300    if let Some(repo) = repo
301        && repo != "hexpm"
302    {
303        purl.with_namespace(repo).ok()?;
304    }
305    if let Some(version) = version {
306        purl.with_version(version).ok()?;
307    }
308    Some(purl.to_string())
309}
310
311fn build_hexdocs_homepage(name: &str, repo: &str) -> String {
312    if repo == "hexpm" {
313        format!("https://hex.pm/packages/{}", name)
314    } else {
315        format!("https://hex.pm/packages/{}?repo={}", name, repo)
316    }
317}
318
319fn build_hex_api_url(name: &str, repo: &str) -> String {
320    if repo == "hexpm" {
321        format!("https://hex.pm/api/packages/{}", name)
322    } else {
323        format!("https://hex.pm/api/repos/{}/packages/{}", repo, name)
324    }
325}
326
327fn term_to_string(term: &Term) -> Result<String, String> {
328    match term {
329        Term::String(value) => Ok(value.clone()),
330        Term::Atom(value) => Ok(value.clone()),
331        Term::Integer(value) => Ok(value.to_string()),
332        _ => Err("Expected string-like term".to_string()),
333    }
334}
335
336fn term_to_atom(term: &Term) -> Result<String, String> {
337    match term {
338        Term::Atom(value) => Ok(value.clone()),
339        _ => Err("Expected atom".to_string()),
340    }
341}
342
343fn term_to_atom_list(term: &Term) -> Result<Vec<String>, String> {
344    let items = match term {
345        Term::List(items) => items,
346        _ => return Ok(Vec::new()),
347    };
348    items.iter().map(term_to_atom).collect()
349}
350
351impl<'a> Parser<'a> {
352    fn new(source: &'a str) -> Self {
353        Self {
354            chars: source.chars().collect(),
355            pos: 0,
356            source,
357            guard: RecursionGuard::depth_only(),
358        }
359    }
360
361    fn parse_term(&mut self) -> Result<Term, String> {
362        if self.guard.descend() {
363            return Err("recursion depth exceeded".to_string());
364        }
365        self.skip_ws();
366        let result = match self.peek() {
367            Some('%') => self.parse_map(),
368            Some('{') => self.parse_tuple(),
369            Some('[') => self.parse_list(),
370            Some('"') => self.parse_string().map(Term::String),
371            Some(':') => self.parse_atom().map(Term::Atom),
372            Some(c) if c.is_ascii_digit() || c == '-' => self.parse_integer().map(Term::Integer),
373            Some('t') | Some('f') => self.parse_bool().map(Term::Bool),
374            Some(other) => Err(format!("Unexpected character '{}' at {}", other, self.pos)),
375            None => Err("Unexpected end of mix.lock".to_string()),
376        };
377        self.guard.ascend();
378        result
379    }
380
381    fn parse_map(&mut self) -> Result<Term, String> {
382        self.expect('%')?;
383        self.expect('{')?;
384        let mut entries = Vec::new();
385        let mut count = 0usize;
386        loop {
387            self.skip_ws();
388            if self.peek() == Some('}') {
389                self.pos += 1;
390                break;
391            }
392            if count >= MAX_ITERATION_COUNT {
393                warn!("map entry count exceeded MAX_ITERATION_COUNT in mix.lock");
394                break;
395            }
396            let key = self.parse_term()?;
397            self.skip_ws();
398            if self.starts_with("=>") {
399                self.expect_sequence("=>")?;
400            } else {
401                self.expect(':')?;
402            }
403            let value = self.parse_term()?;
404            entries.push((key, value));
405            count += 1;
406            self.skip_ws();
407            if self.peek() == Some(',') {
408                self.pos += 1;
409            }
410        }
411        Ok(Term::Map(entries))
412    }
413
414    fn parse_tuple(&mut self) -> Result<Term, String> {
415        self.expect('{')?;
416        let mut items = Vec::new();
417        let mut count = 0usize;
418        loop {
419            self.skip_ws();
420            if self.peek() == Some('}') {
421                self.pos += 1;
422                break;
423            }
424            if count >= MAX_ITERATION_COUNT {
425                warn!("tuple item count exceeded MAX_ITERATION_COUNT in mix.lock");
426                break;
427            }
428            items.push(self.parse_term()?);
429            count += 1;
430            self.skip_ws();
431            if self.peek() == Some(',') {
432                self.pos += 1;
433            }
434        }
435        Ok(Term::Tuple(items))
436    }
437
438    fn parse_list(&mut self) -> Result<Term, String> {
439        self.expect('[')?;
440        let mut keyword_entries = Vec::new();
441        let mut items = Vec::new();
442        let mut saw_keyword = false;
443        let mut count = 0usize;
444
445        loop {
446            self.skip_ws();
447            if self.peek() == Some(']') {
448                self.pos += 1;
449                break;
450            }
451            if count >= MAX_ITERATION_COUNT {
452                warn!("list item count exceeded MAX_ITERATION_COUNT in mix.lock");
453                break;
454            }
455
456            if let Some(keyword) = self.try_parse_keyword_key() {
457                saw_keyword = true;
458                let value = self.parse_term()?;
459                keyword_entries.push((keyword, value));
460            } else {
461                items.push(self.parse_term()?);
462            }
463
464            count += 1;
465            self.skip_ws();
466            if self.peek() == Some(',') {
467                self.pos += 1;
468            }
469        }
470
471        if saw_keyword && items.is_empty() {
472            Ok(Term::KeywordList(keyword_entries))
473        } else if saw_keyword {
474            let mut merged = items;
475            merged.extend(
476                keyword_entries
477                    .into_iter()
478                    .map(|(k, v)| Term::Tuple(vec![Term::Atom(k), v])),
479            );
480            Ok(Term::List(merged))
481        } else {
482            Ok(Term::List(items))
483        }
484    }
485
486    fn try_parse_keyword_key(&mut self) -> Option<String> {
487        let saved = self.pos;
488        self.skip_ws();
489        let start = self.pos;
490        while let Some(c) = self.peek() {
491            if c.is_ascii_alphanumeric() || c == '_' || c == '?' || c == '!' {
492                self.pos += 1;
493            } else {
494                break;
495            }
496        }
497        if self.pos == start || self.peek() != Some(':') || self.peek_n(1) == Some(':') {
498            self.pos = saved;
499            return None;
500        }
501        let key: String = self.chars[start..self.pos].iter().collect();
502        self.pos += 1;
503        Some(key)
504    }
505
506    fn parse_string(&mut self) -> Result<String, String> {
507        self.expect('"')?;
508        let mut out = String::new();
509        while let Some(c) = self.peek() {
510            self.pos += 1;
511            match c {
512                '"' => return Ok(out),
513                '\\' => {
514                    let escaped = self
515                        .peek()
516                        .ok_or_else(|| "Unterminated string escape".to_string())?;
517                    self.pos += 1;
518                    out.push(match escaped {
519                        'n' => '\n',
520                        'r' => '\r',
521                        't' => '\t',
522                        '"' => '"',
523                        '\\' => '\\',
524                        other => other,
525                    });
526                }
527                other => out.push(other),
528            }
529        }
530        Err("Unterminated string literal".to_string())
531    }
532
533    fn parse_atom(&mut self) -> Result<String, String> {
534        self.expect(':')?;
535        let start = self.pos;
536        while let Some(c) = self.peek() {
537            if c.is_ascii_alphanumeric() || c == '_' || c == '?' || c == '!' || c == '@' {
538                self.pos += 1;
539            } else {
540                break;
541            }
542        }
543        if self.pos == start {
544            return Err("Expected atom after ':'".to_string());
545        }
546        Ok(self.chars[start..self.pos].iter().collect())
547    }
548
549    fn parse_integer(&mut self) -> Result<i64, String> {
550        let start = self.pos;
551        if self.peek() == Some('-') {
552            self.pos += 1;
553        }
554        while let Some(c) = self.peek() {
555            if c.is_ascii_digit() {
556                self.pos += 1;
557            } else {
558                break;
559            }
560        }
561        self.source[start..self.byte_index(self.pos)]
562            .parse::<i64>()
563            .map_err(|e| format!("Invalid integer: {}", e))
564    }
565
566    fn parse_bool(&mut self) -> Result<bool, String> {
567        if self.starts_with("true") {
568            self.pos += 4;
569            Ok(true)
570        } else if self.starts_with("false") {
571            self.pos += 5;
572            Ok(false)
573        } else {
574            Err("Invalid boolean".to_string())
575        }
576    }
577
578    fn skip_ws(&mut self) {
579        while let Some(c) = self.peek() {
580            if c.is_whitespace() {
581                self.pos += 1;
582            } else {
583                break;
584            }
585        }
586    }
587
588    fn expect(&mut self, expected: char) -> Result<(), String> {
589        match self.peek() {
590            Some(c) if c == expected => {
591                self.pos += 1;
592                Ok(())
593            }
594            Some(c) => Err(format!("Expected '{}' but found '{}'", expected, c)),
595            None => Err(format!("Expected '{}' but reached end of input", expected)),
596        }
597    }
598
599    fn expect_sequence(&mut self, expected: &str) -> Result<(), String> {
600        if self.starts_with(expected) {
601            self.pos += expected.chars().count();
602            Ok(())
603        } else {
604            Err(format!("Expected '{}' at {}", expected, self.pos))
605        }
606    }
607
608    fn starts_with(&self, s: &str) -> bool {
609        self.chars[self.pos..]
610            .iter()
611            .collect::<String>()
612            .starts_with(s)
613    }
614
615    fn peek(&self) -> Option<char> {
616        self.chars.get(self.pos).copied()
617    }
618
619    fn peek_n(&self, n: usize) -> Option<char> {
620        self.chars.get(self.pos + n).copied()
621    }
622
623    fn is_eof(&self) -> bool {
624        self.pos >= self.chars.len()
625    }
626
627    fn byte_index(&self, char_pos: usize) -> usize {
628        self.chars.iter().take(char_pos).map(|c| c.len_utf8()).sum()
629    }
630}