Skip to main content

provenant/parsers/
hex_lock.rs

1use std::collections::HashMap;
2use std::fs;
3use std::path::Path;
4
5use crate::parser_warn as warn;
6use packageurl::PackageUrl;
7use serde_json::Value as JsonValue;
8
9use crate::models::{DatasourceId, Dependency, PackageData, PackageType, ResolvedPackage};
10
11use super::PackageParser;
12
13pub struct HexLockParser;
14
15#[derive(Clone, Debug)]
16enum Term {
17    Map(Vec<(Term, Term)>),
18    Tuple(Vec<Term>),
19    List(Vec<Term>),
20    KeywordList(Vec<(String, Term)>),
21    String(String),
22    Atom(String),
23    Bool(bool),
24    Integer(i64),
25}
26
27struct Parser<'a> {
28    chars: Vec<char>,
29    pos: usize,
30    source: &'a str,
31}
32
33impl PackageParser for HexLockParser {
34    const PACKAGE_TYPE: PackageType = PackageType::Hex;
35
36    fn is_match(path: &Path) -> bool {
37        path.file_name().and_then(|name| name.to_str()) == Some("mix.lock")
38    }
39
40    fn extract_packages(path: &Path) -> Vec<PackageData> {
41        let content = match fs::read_to_string(path) {
42            Ok(content) => content,
43            Err(e) => {
44                warn!("Failed to read mix.lock at {:?}: {}", path, e);
45                return vec![default_package_data()];
46            }
47        };
48
49        match parse_mix_lock(&content) {
50            Ok(package_data) => vec![package_data],
51            Err(e) => {
52                warn!("Failed to parse mix.lock at {:?}: {}", path, e);
53                vec![default_package_data()]
54            }
55        }
56    }
57}
58
59fn default_package_data() -> PackageData {
60    PackageData {
61        package_type: Some(PackageType::Hex),
62        primary_language: Some("Elixir".to_string()),
63        datasource_id: Some(DatasourceId::HexMixLock),
64        ..Default::default()
65    }
66}
67
68fn parse_mix_lock(content: &str) -> Result<PackageData, String> {
69    let mut parser = Parser::new(content);
70    let term = parser.parse_term()?;
71    parser.skip_ws();
72    if !parser.is_eof() {
73        return Err("Unexpected trailing content in mix.lock".to_string());
74    }
75
76    let entries = match term {
77        Term::Map(entries) => entries,
78        _ => return Err("mix.lock root must be a map".to_string()),
79    };
80
81    let mut dependencies = Vec::new();
82    for (key, value) in entries {
83        if let Some(dep) = build_dependency_from_lock_entry(&key, &value)? {
84            dependencies.push(dep);
85        }
86    }
87
88    let mut package = default_package_data();
89    package.dependencies = dependencies;
90    Ok(package)
91}
92
93fn build_dependency_from_lock_entry(
94    key: &Term,
95    value: &Term,
96) -> Result<Option<Dependency>, String> {
97    let app_name = term_to_string(key)?;
98
99    let tuple = match value {
100        Term::Tuple(items) => items,
101        _ => return Ok(None),
102    };
103
104    if tuple.len() < 8 {
105        return Ok(None);
106    }
107
108    let kind = term_to_atom(&tuple[0])?;
109    if kind != "hex" {
110        return Ok(None);
111    }
112
113    let package_name = term_to_atom(&tuple[1])?;
114    let version = term_to_string(&tuple[2])?;
115    let inner_checksum = term_to_string(&tuple[3])?;
116    let managers = term_to_atom_list(&tuple[4])?;
117    let nested_dependencies = term_to_dependency_tuples(&tuple[5])?;
118    let repo = term_to_string(&tuple[6])?;
119    let outer_checksum = term_to_string(&tuple[7])?;
120
121    let purl = build_hex_purl(&package_name, Some(&version), Some(&repo));
122    let resolved_package = ResolvedPackage {
123        primary_language: Some("Elixir".to_string()),
124        download_url: None,
125        sha1: None,
126        sha256: Some(inner_checksum),
127        sha512: None,
128        md5: None,
129        is_virtual: true,
130        extra_data: Some(HashMap::from([
131            ("repo".to_string(), JsonValue::String(repo.clone())),
132            (
133                "outer_checksum".to_string(),
134                JsonValue::String(outer_checksum.clone()),
135            ),
136            (
137                "managers".to_string(),
138                JsonValue::Array(managers.into_iter().map(JsonValue::String).collect()),
139            ),
140        ])),
141        dependencies: nested_dependencies
142            .into_iter()
143            .map(build_nested_dependency)
144            .collect::<Result<Vec<_>, _>>()?,
145        repository_homepage_url: Some(build_hexdocs_homepage(&package_name, &repo)),
146        repository_download_url: None,
147        api_data_url: Some(build_hex_api_url(&package_name, &repo)),
148        datasource_id: Some(DatasourceId::HexMixLock),
149        purl: build_hex_purl(&package_name, Some(&version), Some(&repo)),
150        ..ResolvedPackage::new(
151            PackageType::Hex,
152            if repo == "hexpm" {
153                String::new()
154            } else {
155                repo.clone()
156            },
157            package_name.clone(),
158            version.clone(),
159        )
160    };
161
162    Ok(Some(Dependency {
163        purl,
164        extracted_requirement: Some(version),
165        scope: Some("dependencies".to_string()),
166        is_runtime: Some(true),
167        is_optional: Some(false),
168        is_pinned: Some(true),
169        is_direct: Some(false),
170        resolved_package: Some(Box::new(resolved_package)),
171        extra_data: Some(HashMap::from([(
172            "app".to_string(),
173            JsonValue::String(app_name),
174        )])),
175    }))
176}
177
178fn build_nested_dependency(tuple: DependencyTuple) -> Result<Dependency, String> {
179    let package_name = tuple
180        .hex_name
181        .clone()
182        .unwrap_or_else(|| tuple.app_name.clone());
183    Ok(Dependency {
184        purl: build_hex_purl(&package_name, None, tuple.repo.as_deref()),
185        extracted_requirement: Some(tuple.requirement),
186        scope: Some("dependencies".to_string()),
187        is_runtime: Some(!tuple.optional),
188        is_optional: Some(tuple.optional),
189        is_pinned: Some(false),
190        is_direct: Some(true),
191        resolved_package: None,
192        extra_data: None,
193    })
194}
195
196crate::register_parser!(
197    "Hex mix.lock lockfile",
198    &["**/mix.lock"],
199    "hex",
200    "Elixir",
201    Some("https://hexdocs.pm/mix/Mix.Tasks.Deps.html"),
202);
203
204#[derive(Debug)]
205struct DependencyTuple {
206    app_name: String,
207    requirement: String,
208    hex_name: Option<String>,
209    repo: Option<String>,
210    optional: bool,
211}
212
213fn term_to_dependency_tuples(term: &Term) -> Result<Vec<DependencyTuple>, String> {
214    let items = match term {
215        Term::List(items) => items,
216        _ => return Ok(Vec::new()),
217    };
218
219    let mut result = Vec::new();
220    for item in items {
221        let tuple = match item {
222            Term::Tuple(items) if items.len() == 3 => items,
223            _ => continue,
224        };
225
226        let app_name = term_to_atom(&tuple[0])?;
227        let requirement = term_to_string(&tuple[1])?;
228        let opts = term_to_keyword_map(&tuple[2])?;
229        let hex_name = opts.get("hex").map(term_to_atom).transpose()?;
230        let repo = opts.get("repo").map(term_to_string).transpose()?;
231        let optional = opts
232            .get("optional")
233            .and_then(|term| match term {
234                Term::Bool(value) => Some(*value),
235                _ => None,
236            })
237            .unwrap_or(false);
238
239        result.push(DependencyTuple {
240            app_name,
241            requirement,
242            hex_name,
243            repo,
244            optional,
245        });
246    }
247
248    Ok(result)
249}
250
251fn term_to_keyword_map(term: &Term) -> Result<HashMap<String, Term>, String> {
252    match term {
253        Term::KeywordList(entries) => Ok(entries.iter().cloned().collect()),
254        Term::List(entries) => {
255            let mut map = HashMap::new();
256            for entry in entries {
257                if let Term::Tuple(items) = entry
258                    && items.len() == 2
259                {
260                    map.insert(term_to_atom(&items[0])?, items[1].clone());
261                }
262            }
263            Ok(map)
264        }
265        _ => Ok(HashMap::new()),
266    }
267}
268
269fn build_hex_purl(name: &str, version: Option<&str>, repo: Option<&str>) -> Option<String> {
270    let mut purl = PackageUrl::new("hex", name).ok()?;
271    if let Some(repo) = repo
272        && repo != "hexpm"
273    {
274        purl.with_namespace(repo).ok()?;
275    }
276    if let Some(version) = version {
277        purl.with_version(version).ok()?;
278    }
279    Some(purl.to_string())
280}
281
282fn build_hexdocs_homepage(name: &str, repo: &str) -> String {
283    if repo == "hexpm" {
284        format!("https://hex.pm/packages/{}", name)
285    } else {
286        format!("https://hex.pm/packages/{}?repo={}", name, repo)
287    }
288}
289
290fn build_hex_api_url(name: &str, repo: &str) -> String {
291    if repo == "hexpm" {
292        format!("https://hex.pm/api/packages/{}", name)
293    } else {
294        format!("https://hex.pm/api/repos/{}/packages/{}", repo, name)
295    }
296}
297
298fn term_to_string(term: &Term) -> Result<String, String> {
299    match term {
300        Term::String(value) => Ok(value.clone()),
301        Term::Atom(value) => Ok(value.clone()),
302        Term::Integer(value) => Ok(value.to_string()),
303        _ => Err("Expected string-like term".to_string()),
304    }
305}
306
307fn term_to_atom(term: &Term) -> Result<String, String> {
308    match term {
309        Term::Atom(value) => Ok(value.clone()),
310        _ => Err("Expected atom".to_string()),
311    }
312}
313
314fn term_to_atom_list(term: &Term) -> Result<Vec<String>, String> {
315    let items = match term {
316        Term::List(items) => items,
317        _ => return Ok(Vec::new()),
318    };
319    items.iter().map(term_to_atom).collect()
320}
321
322impl<'a> Parser<'a> {
323    fn new(source: &'a str) -> Self {
324        Self {
325            chars: source.chars().collect(),
326            pos: 0,
327            source,
328        }
329    }
330
331    fn parse_term(&mut self) -> Result<Term, String> {
332        self.skip_ws();
333        match self.peek() {
334            Some('%') => self.parse_map(),
335            Some('{') => self.parse_tuple(),
336            Some('[') => self.parse_list(),
337            Some('"') => self.parse_string().map(Term::String),
338            Some(':') => self.parse_atom().map(Term::Atom),
339            Some(c) if c.is_ascii_digit() || c == '-' => self.parse_integer().map(Term::Integer),
340            Some('t') | Some('f') => self.parse_bool().map(Term::Bool),
341            Some(other) => Err(format!("Unexpected character '{}' at {}", other, self.pos)),
342            None => Err("Unexpected end of mix.lock".to_string()),
343        }
344    }
345
346    fn parse_map(&mut self) -> Result<Term, String> {
347        self.expect('%')?;
348        self.expect('{')?;
349        let mut entries = Vec::new();
350        loop {
351            self.skip_ws();
352            if self.peek() == Some('}') {
353                self.pos += 1;
354                break;
355            }
356            let key = self.parse_term()?;
357            self.skip_ws();
358            if self.starts_with("=>") {
359                self.expect_sequence("=>")?;
360            } else {
361                self.expect(':')?;
362            }
363            let value = self.parse_term()?;
364            entries.push((key, value));
365            self.skip_ws();
366            if self.peek() == Some(',') {
367                self.pos += 1;
368            }
369        }
370        Ok(Term::Map(entries))
371    }
372
373    fn parse_tuple(&mut self) -> Result<Term, String> {
374        self.expect('{')?;
375        let mut items = Vec::new();
376        loop {
377            self.skip_ws();
378            if self.peek() == Some('}') {
379                self.pos += 1;
380                break;
381            }
382            items.push(self.parse_term()?);
383            self.skip_ws();
384            if self.peek() == Some(',') {
385                self.pos += 1;
386            }
387        }
388        Ok(Term::Tuple(items))
389    }
390
391    fn parse_list(&mut self) -> Result<Term, String> {
392        self.expect('[')?;
393        let mut keyword_entries = Vec::new();
394        let mut items = Vec::new();
395        let mut saw_keyword = false;
396
397        loop {
398            self.skip_ws();
399            if self.peek() == Some(']') {
400                self.pos += 1;
401                break;
402            }
403
404            if let Some(keyword) = self.try_parse_keyword_key() {
405                saw_keyword = true;
406                let value = self.parse_term()?;
407                keyword_entries.push((keyword, value));
408            } else {
409                items.push(self.parse_term()?);
410            }
411
412            self.skip_ws();
413            if self.peek() == Some(',') {
414                self.pos += 1;
415            }
416        }
417
418        if saw_keyword && items.is_empty() {
419            Ok(Term::KeywordList(keyword_entries))
420        } else if saw_keyword {
421            let mut merged = items;
422            merged.extend(
423                keyword_entries
424                    .into_iter()
425                    .map(|(k, v)| Term::Tuple(vec![Term::Atom(k), v])),
426            );
427            Ok(Term::List(merged))
428        } else {
429            Ok(Term::List(items))
430        }
431    }
432
433    fn try_parse_keyword_key(&mut self) -> Option<String> {
434        let saved = self.pos;
435        self.skip_ws();
436        let start = self.pos;
437        while let Some(c) = self.peek() {
438            if c.is_ascii_alphanumeric() || c == '_' || c == '?' || c == '!' {
439                self.pos += 1;
440            } else {
441                break;
442            }
443        }
444        if self.pos == start || self.peek() != Some(':') || self.peek_n(1) == Some(':') {
445            self.pos = saved;
446            return None;
447        }
448        let key: String = self.chars[start..self.pos].iter().collect();
449        self.pos += 1;
450        Some(key)
451    }
452
453    fn parse_string(&mut self) -> Result<String, String> {
454        self.expect('"')?;
455        let mut out = String::new();
456        while let Some(c) = self.peek() {
457            self.pos += 1;
458            match c {
459                '"' => return Ok(out),
460                '\\' => {
461                    let escaped = self
462                        .peek()
463                        .ok_or_else(|| "Unterminated string escape".to_string())?;
464                    self.pos += 1;
465                    out.push(match escaped {
466                        'n' => '\n',
467                        'r' => '\r',
468                        't' => '\t',
469                        '"' => '"',
470                        '\\' => '\\',
471                        other => other,
472                    });
473                }
474                other => out.push(other),
475            }
476        }
477        Err("Unterminated string literal".to_string())
478    }
479
480    fn parse_atom(&mut self) -> Result<String, String> {
481        self.expect(':')?;
482        let start = self.pos;
483        while let Some(c) = self.peek() {
484            if c.is_ascii_alphanumeric() || c == '_' || c == '?' || c == '!' || c == '@' {
485                self.pos += 1;
486            } else {
487                break;
488            }
489        }
490        if self.pos == start {
491            return Err("Expected atom after ':'".to_string());
492        }
493        Ok(self.chars[start..self.pos].iter().collect())
494    }
495
496    fn parse_integer(&mut self) -> Result<i64, String> {
497        let start = self.pos;
498        if self.peek() == Some('-') {
499            self.pos += 1;
500        }
501        while let Some(c) = self.peek() {
502            if c.is_ascii_digit() {
503                self.pos += 1;
504            } else {
505                break;
506            }
507        }
508        self.source[start..self.byte_index(self.pos)]
509            .parse::<i64>()
510            .map_err(|e| format!("Invalid integer: {}", e))
511    }
512
513    fn parse_bool(&mut self) -> Result<bool, String> {
514        if self.starts_with("true") {
515            self.pos += 4;
516            Ok(true)
517        } else if self.starts_with("false") {
518            self.pos += 5;
519            Ok(false)
520        } else {
521            Err("Invalid boolean".to_string())
522        }
523    }
524
525    fn skip_ws(&mut self) {
526        while let Some(c) = self.peek() {
527            if c.is_whitespace() {
528                self.pos += 1;
529            } else {
530                break;
531            }
532        }
533    }
534
535    fn expect(&mut self, expected: char) -> Result<(), String> {
536        match self.peek() {
537            Some(c) if c == expected => {
538                self.pos += 1;
539                Ok(())
540            }
541            Some(c) => Err(format!("Expected '{}' but found '{}'", expected, c)),
542            None => Err(format!("Expected '{}' but reached end of input", expected)),
543        }
544    }
545
546    fn expect_sequence(&mut self, expected: &str) -> Result<(), String> {
547        if self.starts_with(expected) {
548            self.pos += expected.chars().count();
549            Ok(())
550        } else {
551            Err(format!("Expected '{}' at {}", expected, self.pos))
552        }
553    }
554
555    fn starts_with(&self, s: &str) -> bool {
556        self.chars[self.pos..]
557            .iter()
558            .collect::<String>()
559            .starts_with(s)
560    }
561
562    fn peek(&self) -> Option<char> {
563        self.chars.get(self.pos).copied()
564    }
565
566    fn peek_n(&self, n: usize) -> Option<char> {
567        self.chars.get(self.pos + n).copied()
568    }
569
570    fn is_eof(&self) -> bool {
571        self.pos >= self.chars.len()
572    }
573
574    fn byte_index(&self, char_pos: usize) -> usize {
575        self.chars.iter().take(char_pos).map(|c| c.len_utf8()).sum()
576    }
577}