Skip to main content

provenant/parsers/
hex_lock.rs

1use std::collections::HashMap;
2use std::fs;
3use std::path::Path;
4
5use crate::parser_warn as warn;
6use packageurl::PackageUrl;
7use serde_json::Value as JsonValue;
8
9use crate::models::{
10    DatasourceId, Dependency, PackageData, PackageType, ResolvedPackage, Sha256Digest,
11};
12
13use super::PackageParser;
14
15pub struct HexLockParser;
16
17#[derive(Clone, Debug)]
18enum Term {
19    Map(Vec<(Term, Term)>),
20    Tuple(Vec<Term>),
21    List(Vec<Term>),
22    KeywordList(Vec<(String, Term)>),
23    String(String),
24    Atom(String),
25    Bool(bool),
26    Integer(i64),
27}
28
29struct Parser<'a> {
30    chars: Vec<char>,
31    pos: usize,
32    source: &'a str,
33}
34
35impl PackageParser for HexLockParser {
36    const PACKAGE_TYPE: PackageType = PackageType::Hex;
37
38    fn is_match(path: &Path) -> bool {
39        path.file_name().and_then(|name| name.to_str()) == Some("mix.lock")
40    }
41
42    fn extract_packages(path: &Path) -> Vec<PackageData> {
43        let content = match fs::read_to_string(path) {
44            Ok(content) => content,
45            Err(e) => {
46                warn!("Failed to read mix.lock at {:?}: {}", path, e);
47                return vec![default_package_data()];
48            }
49        };
50
51        match parse_mix_lock(&content) {
52            Ok(package_data) => vec![package_data],
53            Err(e) => {
54                warn!("Failed to parse mix.lock at {:?}: {}", path, e);
55                vec![default_package_data()]
56            }
57        }
58    }
59}
60
61fn default_package_data() -> PackageData {
62    PackageData {
63        package_type: Some(PackageType::Hex),
64        primary_language: Some("Elixir".to_string()),
65        datasource_id: Some(DatasourceId::HexMixLock),
66        ..Default::default()
67    }
68}
69
70fn parse_mix_lock(content: &str) -> Result<PackageData, String> {
71    let mut parser = Parser::new(content);
72    let term = parser.parse_term()?;
73    parser.skip_ws();
74    if !parser.is_eof() {
75        return Err("Unexpected trailing content in mix.lock".to_string());
76    }
77
78    let entries = match term {
79        Term::Map(entries) => entries,
80        _ => return Err("mix.lock root must be a map".to_string()),
81    };
82
83    let mut dependencies = Vec::new();
84    for (key, value) in entries {
85        if let Some(dep) = build_dependency_from_lock_entry(&key, &value)? {
86            dependencies.push(dep);
87        }
88    }
89
90    let mut package = default_package_data();
91    package.dependencies = dependencies;
92    Ok(package)
93}
94
95fn build_dependency_from_lock_entry(
96    key: &Term,
97    value: &Term,
98) -> Result<Option<Dependency>, String> {
99    let app_name = term_to_string(key)?;
100
101    let tuple = match value {
102        Term::Tuple(items) => items,
103        _ => return Ok(None),
104    };
105
106    if tuple.len() < 8 {
107        return Ok(None);
108    }
109
110    let kind = term_to_atom(&tuple[0])?;
111    if kind != "hex" {
112        return Ok(None);
113    }
114
115    let package_name = term_to_atom(&tuple[1])?;
116    let version = term_to_string(&tuple[2])?;
117    let inner_checksum = term_to_string(&tuple[3])?;
118    let managers = term_to_atom_list(&tuple[4])?;
119    let nested_dependencies = term_to_dependency_tuples(&tuple[5])?;
120    let repo = term_to_string(&tuple[6])?;
121    let outer_checksum = term_to_string(&tuple[7])?;
122
123    let purl = build_hex_purl(&package_name, Some(&version), Some(&repo));
124    let resolved_package = ResolvedPackage {
125        primary_language: Some("Elixir".to_string()),
126        download_url: None,
127        sha1: None,
128        sha256: Sha256Digest::from_hex(&inner_checksum).ok(),
129        sha512: None,
130        md5: None,
131        is_virtual: true,
132        extra_data: Some(HashMap::from([
133            ("repo".to_string(), JsonValue::String(repo.clone())),
134            (
135                "outer_checksum".to_string(),
136                JsonValue::String(outer_checksum.clone()),
137            ),
138            (
139                "managers".to_string(),
140                JsonValue::Array(managers.into_iter().map(JsonValue::String).collect()),
141            ),
142        ])),
143        dependencies: nested_dependencies
144            .into_iter()
145            .map(build_nested_dependency)
146            .collect::<Result<Vec<_>, _>>()?,
147        repository_homepage_url: Some(build_hexdocs_homepage(&package_name, &repo)),
148        repository_download_url: None,
149        api_data_url: Some(build_hex_api_url(&package_name, &repo)),
150        datasource_id: Some(DatasourceId::HexMixLock),
151        purl: build_hex_purl(&package_name, Some(&version), Some(&repo)),
152        ..ResolvedPackage::new(
153            PackageType::Hex,
154            if repo == "hexpm" {
155                String::new()
156            } else {
157                repo.clone()
158            },
159            package_name.clone(),
160            version.clone(),
161        )
162    };
163
164    Ok(Some(Dependency {
165        purl,
166        extracted_requirement: Some(version),
167        scope: Some("dependencies".to_string()),
168        is_runtime: Some(true),
169        is_optional: Some(false),
170        is_pinned: Some(true),
171        is_direct: Some(false),
172        resolved_package: Some(Box::new(resolved_package)),
173        extra_data: Some(HashMap::from([(
174            "app".to_string(),
175            JsonValue::String(app_name),
176        )])),
177    }))
178}
179
180fn build_nested_dependency(tuple: DependencyTuple) -> Result<Dependency, String> {
181    let package_name = tuple
182        .hex_name
183        .clone()
184        .unwrap_or_else(|| tuple.app_name.clone());
185    Ok(Dependency {
186        purl: build_hex_purl(&package_name, None, tuple.repo.as_deref()),
187        extracted_requirement: Some(tuple.requirement),
188        scope: Some("dependencies".to_string()),
189        is_runtime: Some(!tuple.optional),
190        is_optional: Some(tuple.optional),
191        is_pinned: Some(false),
192        is_direct: Some(true),
193        resolved_package: None,
194        extra_data: None,
195    })
196}
197
198crate::register_parser!(
199    "Hex mix.lock lockfile",
200    &["**/mix.lock"],
201    "hex",
202    "Elixir",
203    Some("https://hexdocs.pm/mix/Mix.Tasks.Deps.html"),
204);
205
206#[derive(Debug)]
207struct DependencyTuple {
208    app_name: String,
209    requirement: String,
210    hex_name: Option<String>,
211    repo: Option<String>,
212    optional: bool,
213}
214
215fn term_to_dependency_tuples(term: &Term) -> Result<Vec<DependencyTuple>, String> {
216    let items = match term {
217        Term::List(items) => items,
218        _ => return Ok(Vec::new()),
219    };
220
221    let mut result = Vec::new();
222    for item in items {
223        let tuple = match item {
224            Term::Tuple(items) if items.len() == 3 => items,
225            _ => continue,
226        };
227
228        let app_name = term_to_atom(&tuple[0])?;
229        let requirement = term_to_string(&tuple[1])?;
230        let opts = term_to_keyword_map(&tuple[2])?;
231        let hex_name = opts.get("hex").map(term_to_atom).transpose()?;
232        let repo = opts.get("repo").map(term_to_string).transpose()?;
233        let optional = opts
234            .get("optional")
235            .and_then(|term| match term {
236                Term::Bool(value) => Some(*value),
237                _ => None,
238            })
239            .unwrap_or(false);
240
241        result.push(DependencyTuple {
242            app_name,
243            requirement,
244            hex_name,
245            repo,
246            optional,
247        });
248    }
249
250    Ok(result)
251}
252
253fn term_to_keyword_map(term: &Term) -> Result<HashMap<String, Term>, String> {
254    match term {
255        Term::KeywordList(entries) => Ok(entries.iter().cloned().collect()),
256        Term::List(entries) => {
257            let mut map = HashMap::new();
258            for entry in entries {
259                if let Term::Tuple(items) = entry
260                    && items.len() == 2
261                {
262                    map.insert(term_to_atom(&items[0])?, items[1].clone());
263                }
264            }
265            Ok(map)
266        }
267        _ => Ok(HashMap::new()),
268    }
269}
270
271fn build_hex_purl(name: &str, version: Option<&str>, repo: Option<&str>) -> Option<String> {
272    let mut purl = PackageUrl::new("hex", name).ok()?;
273    if let Some(repo) = repo
274        && repo != "hexpm"
275    {
276        purl.with_namespace(repo).ok()?;
277    }
278    if let Some(version) = version {
279        purl.with_version(version).ok()?;
280    }
281    Some(purl.to_string())
282}
283
284fn build_hexdocs_homepage(name: &str, repo: &str) -> String {
285    if repo == "hexpm" {
286        format!("https://hex.pm/packages/{}", name)
287    } else {
288        format!("https://hex.pm/packages/{}?repo={}", name, repo)
289    }
290}
291
292fn build_hex_api_url(name: &str, repo: &str) -> String {
293    if repo == "hexpm" {
294        format!("https://hex.pm/api/packages/{}", name)
295    } else {
296        format!("https://hex.pm/api/repos/{}/packages/{}", repo, name)
297    }
298}
299
300fn term_to_string(term: &Term) -> Result<String, String> {
301    match term {
302        Term::String(value) => Ok(value.clone()),
303        Term::Atom(value) => Ok(value.clone()),
304        Term::Integer(value) => Ok(value.to_string()),
305        _ => Err("Expected string-like term".to_string()),
306    }
307}
308
309fn term_to_atom(term: &Term) -> Result<String, String> {
310    match term {
311        Term::Atom(value) => Ok(value.clone()),
312        _ => Err("Expected atom".to_string()),
313    }
314}
315
316fn term_to_atom_list(term: &Term) -> Result<Vec<String>, String> {
317    let items = match term {
318        Term::List(items) => items,
319        _ => return Ok(Vec::new()),
320    };
321    items.iter().map(term_to_atom).collect()
322}
323
324impl<'a> Parser<'a> {
325    fn new(source: &'a str) -> Self {
326        Self {
327            chars: source.chars().collect(),
328            pos: 0,
329            source,
330        }
331    }
332
333    fn parse_term(&mut self) -> Result<Term, String> {
334        self.skip_ws();
335        match self.peek() {
336            Some('%') => self.parse_map(),
337            Some('{') => self.parse_tuple(),
338            Some('[') => self.parse_list(),
339            Some('"') => self.parse_string().map(Term::String),
340            Some(':') => self.parse_atom().map(Term::Atom),
341            Some(c) if c.is_ascii_digit() || c == '-' => self.parse_integer().map(Term::Integer),
342            Some('t') | Some('f') => self.parse_bool().map(Term::Bool),
343            Some(other) => Err(format!("Unexpected character '{}' at {}", other, self.pos)),
344            None => Err("Unexpected end of mix.lock".to_string()),
345        }
346    }
347
348    fn parse_map(&mut self) -> Result<Term, String> {
349        self.expect('%')?;
350        self.expect('{')?;
351        let mut entries = Vec::new();
352        loop {
353            self.skip_ws();
354            if self.peek() == Some('}') {
355                self.pos += 1;
356                break;
357            }
358            let key = self.parse_term()?;
359            self.skip_ws();
360            if self.starts_with("=>") {
361                self.expect_sequence("=>")?;
362            } else {
363                self.expect(':')?;
364            }
365            let value = self.parse_term()?;
366            entries.push((key, value));
367            self.skip_ws();
368            if self.peek() == Some(',') {
369                self.pos += 1;
370            }
371        }
372        Ok(Term::Map(entries))
373    }
374
375    fn parse_tuple(&mut self) -> Result<Term, String> {
376        self.expect('{')?;
377        let mut items = Vec::new();
378        loop {
379            self.skip_ws();
380            if self.peek() == Some('}') {
381                self.pos += 1;
382                break;
383            }
384            items.push(self.parse_term()?);
385            self.skip_ws();
386            if self.peek() == Some(',') {
387                self.pos += 1;
388            }
389        }
390        Ok(Term::Tuple(items))
391    }
392
393    fn parse_list(&mut self) -> Result<Term, String> {
394        self.expect('[')?;
395        let mut keyword_entries = Vec::new();
396        let mut items = Vec::new();
397        let mut saw_keyword = false;
398
399        loop {
400            self.skip_ws();
401            if self.peek() == Some(']') {
402                self.pos += 1;
403                break;
404            }
405
406            if let Some(keyword) = self.try_parse_keyword_key() {
407                saw_keyword = true;
408                let value = self.parse_term()?;
409                keyword_entries.push((keyword, value));
410            } else {
411                items.push(self.parse_term()?);
412            }
413
414            self.skip_ws();
415            if self.peek() == Some(',') {
416                self.pos += 1;
417            }
418        }
419
420        if saw_keyword && items.is_empty() {
421            Ok(Term::KeywordList(keyword_entries))
422        } else if saw_keyword {
423            let mut merged = items;
424            merged.extend(
425                keyword_entries
426                    .into_iter()
427                    .map(|(k, v)| Term::Tuple(vec![Term::Atom(k), v])),
428            );
429            Ok(Term::List(merged))
430        } else {
431            Ok(Term::List(items))
432        }
433    }
434
435    fn try_parse_keyword_key(&mut self) -> Option<String> {
436        let saved = self.pos;
437        self.skip_ws();
438        let start = self.pos;
439        while let Some(c) = self.peek() {
440            if c.is_ascii_alphanumeric() || c == '_' || c == '?' || c == '!' {
441                self.pos += 1;
442            } else {
443                break;
444            }
445        }
446        if self.pos == start || self.peek() != Some(':') || self.peek_n(1) == Some(':') {
447            self.pos = saved;
448            return None;
449        }
450        let key: String = self.chars[start..self.pos].iter().collect();
451        self.pos += 1;
452        Some(key)
453    }
454
455    fn parse_string(&mut self) -> Result<String, String> {
456        self.expect('"')?;
457        let mut out = String::new();
458        while let Some(c) = self.peek() {
459            self.pos += 1;
460            match c {
461                '"' => return Ok(out),
462                '\\' => {
463                    let escaped = self
464                        .peek()
465                        .ok_or_else(|| "Unterminated string escape".to_string())?;
466                    self.pos += 1;
467                    out.push(match escaped {
468                        'n' => '\n',
469                        'r' => '\r',
470                        't' => '\t',
471                        '"' => '"',
472                        '\\' => '\\',
473                        other => other,
474                    });
475                }
476                other => out.push(other),
477            }
478        }
479        Err("Unterminated string literal".to_string())
480    }
481
482    fn parse_atom(&mut self) -> Result<String, String> {
483        self.expect(':')?;
484        let start = self.pos;
485        while let Some(c) = self.peek() {
486            if c.is_ascii_alphanumeric() || c == '_' || c == '?' || c == '!' || c == '@' {
487                self.pos += 1;
488            } else {
489                break;
490            }
491        }
492        if self.pos == start {
493            return Err("Expected atom after ':'".to_string());
494        }
495        Ok(self.chars[start..self.pos].iter().collect())
496    }
497
498    fn parse_integer(&mut self) -> Result<i64, String> {
499        let start = self.pos;
500        if self.peek() == Some('-') {
501            self.pos += 1;
502        }
503        while let Some(c) = self.peek() {
504            if c.is_ascii_digit() {
505                self.pos += 1;
506            } else {
507                break;
508            }
509        }
510        self.source[start..self.byte_index(self.pos)]
511            .parse::<i64>()
512            .map_err(|e| format!("Invalid integer: {}", e))
513    }
514
515    fn parse_bool(&mut self) -> Result<bool, String> {
516        if self.starts_with("true") {
517            self.pos += 4;
518            Ok(true)
519        } else if self.starts_with("false") {
520            self.pos += 5;
521            Ok(false)
522        } else {
523            Err("Invalid boolean".to_string())
524        }
525    }
526
527    fn skip_ws(&mut self) {
528        while let Some(c) = self.peek() {
529            if c.is_whitespace() {
530                self.pos += 1;
531            } else {
532                break;
533            }
534        }
535    }
536
537    fn expect(&mut self, expected: char) -> Result<(), String> {
538        match self.peek() {
539            Some(c) if c == expected => {
540                self.pos += 1;
541                Ok(())
542            }
543            Some(c) => Err(format!("Expected '{}' but found '{}'", expected, c)),
544            None => Err(format!("Expected '{}' but reached end of input", expected)),
545        }
546    }
547
548    fn expect_sequence(&mut self, expected: &str) -> Result<(), String> {
549        if self.starts_with(expected) {
550            self.pos += expected.chars().count();
551            Ok(())
552        } else {
553            Err(format!("Expected '{}' at {}", expected, self.pos))
554        }
555    }
556
557    fn starts_with(&self, s: &str) -> bool {
558        self.chars[self.pos..]
559            .iter()
560            .collect::<String>()
561            .starts_with(s)
562    }
563
564    fn peek(&self) -> Option<char> {
565        self.chars.get(self.pos).copied()
566    }
567
568    fn peek_n(&self, n: usize) -> Option<char> {
569        self.chars.get(self.pos + n).copied()
570    }
571
572    fn is_eof(&self) -> bool {
573        self.pos >= self.chars.len()
574    }
575
576    fn byte_index(&self, char_pos: usize) -> usize {
577        self.chars.iter().take(char_pos).map(|c| c.len_utf8()).sum()
578    }
579}