Skip to main content

provenant/parsers/
hex_lock.rs

1use std::collections::HashMap;
2use std::fs;
3use std::path::Path;
4
5use crate::parser_warn as warn;
6use packageurl::PackageUrl;
7use serde_json::Value as JsonValue;
8
9use crate::models::{DatasourceId, Dependency, PackageData, PackageType, ResolvedPackage};
10
11use super::PackageParser;
12
13pub struct HexLockParser;
14
15#[derive(Clone, Debug)]
16enum Term {
17    Map(Vec<(Term, Term)>),
18    Tuple(Vec<Term>),
19    List(Vec<Term>),
20    KeywordList(Vec<(String, Term)>),
21    String(String),
22    Atom(String),
23    Bool(bool),
24    Integer(i64),
25}
26
27struct Parser<'a> {
28    chars: Vec<char>,
29    pos: usize,
30    source: &'a str,
31}
32
33impl PackageParser for HexLockParser {
34    const PACKAGE_TYPE: PackageType = PackageType::Hex;
35
36    fn is_match(path: &Path) -> bool {
37        path.file_name().and_then(|name| name.to_str()) == Some("mix.lock")
38    }
39
40    fn extract_packages(path: &Path) -> Vec<PackageData> {
41        let content = match fs::read_to_string(path) {
42            Ok(content) => content,
43            Err(e) => {
44                warn!("Failed to read mix.lock at {:?}: {}", path, e);
45                return vec![default_package_data()];
46            }
47        };
48
49        match parse_mix_lock(&content) {
50            Ok(package_data) => vec![package_data],
51            Err(e) => {
52                warn!("Failed to parse mix.lock at {:?}: {}", path, e);
53                vec![default_package_data()]
54            }
55        }
56    }
57}
58
59fn default_package_data() -> PackageData {
60    PackageData {
61        package_type: Some(PackageType::Hex),
62        primary_language: Some("Elixir".to_string()),
63        datasource_id: Some(DatasourceId::HexMixLock),
64        ..Default::default()
65    }
66}
67
68fn parse_mix_lock(content: &str) -> Result<PackageData, String> {
69    let mut parser = Parser::new(content);
70    let term = parser.parse_term()?;
71    parser.skip_ws();
72    if !parser.is_eof() {
73        return Err("Unexpected trailing content in mix.lock".to_string());
74    }
75
76    let entries = match term {
77        Term::Map(entries) => entries,
78        _ => return Err("mix.lock root must be a map".to_string()),
79    };
80
81    let mut dependencies = Vec::new();
82    for (key, value) in entries {
83        if let Some(dep) = build_dependency_from_lock_entry(&key, &value)? {
84            dependencies.push(dep);
85        }
86    }
87
88    let mut package = default_package_data();
89    package.dependencies = dependencies;
90    Ok(package)
91}
92
93fn build_dependency_from_lock_entry(
94    key: &Term,
95    value: &Term,
96) -> Result<Option<Dependency>, String> {
97    let app_name = term_to_string(key)?;
98
99    let tuple = match value {
100        Term::Tuple(items) => items,
101        _ => return Ok(None),
102    };
103
104    if tuple.len() < 8 {
105        return Ok(None);
106    }
107
108    let kind = term_to_atom(&tuple[0])?;
109    if kind != "hex" {
110        return Ok(None);
111    }
112
113    let package_name = term_to_atom(&tuple[1])?;
114    let version = term_to_string(&tuple[2])?;
115    let inner_checksum = term_to_string(&tuple[3])?;
116    let managers = term_to_atom_list(&tuple[4])?;
117    let nested_dependencies = term_to_dependency_tuples(&tuple[5])?;
118    let repo = term_to_string(&tuple[6])?;
119    let outer_checksum = term_to_string(&tuple[7])?;
120
121    let purl = build_hex_purl(&package_name, Some(&version), Some(&repo));
122    let resolved_package = ResolvedPackage {
123        package_type: PackageType::Hex,
124        namespace: if repo == "hexpm" {
125            String::new()
126        } else {
127            repo.clone()
128        },
129        name: package_name.clone(),
130        version: version.clone(),
131        primary_language: Some("Elixir".to_string()),
132        download_url: None,
133        sha1: None,
134        sha256: Some(inner_checksum),
135        sha512: None,
136        md5: None,
137        is_virtual: true,
138        extra_data: Some(HashMap::from([
139            ("repo".to_string(), JsonValue::String(repo.clone())),
140            (
141                "outer_checksum".to_string(),
142                JsonValue::String(outer_checksum.clone()),
143            ),
144            (
145                "managers".to_string(),
146                JsonValue::Array(managers.into_iter().map(JsonValue::String).collect()),
147            ),
148        ])),
149        dependencies: nested_dependencies
150            .into_iter()
151            .map(build_nested_dependency)
152            .collect::<Result<Vec<_>, _>>()?,
153        repository_homepage_url: Some(build_hexdocs_homepage(&package_name, &repo)),
154        repository_download_url: None,
155        api_data_url: Some(build_hex_api_url(&package_name, &repo)),
156        datasource_id: Some(DatasourceId::HexMixLock),
157        purl: build_hex_purl(&package_name, Some(&version), Some(&repo)),
158    };
159
160    Ok(Some(Dependency {
161        purl,
162        extracted_requirement: Some(version),
163        scope: Some("dependencies".to_string()),
164        is_runtime: Some(true),
165        is_optional: Some(false),
166        is_pinned: Some(true),
167        is_direct: Some(false),
168        resolved_package: Some(Box::new(resolved_package)),
169        extra_data: Some(HashMap::from([(
170            "app".to_string(),
171            JsonValue::String(app_name),
172        )])),
173    }))
174}
175
176fn build_nested_dependency(tuple: DependencyTuple) -> Result<Dependency, String> {
177    let package_name = tuple
178        .hex_name
179        .clone()
180        .unwrap_or_else(|| tuple.app_name.clone());
181    Ok(Dependency {
182        purl: build_hex_purl(&package_name, None, tuple.repo.as_deref()),
183        extracted_requirement: Some(tuple.requirement),
184        scope: Some("dependencies".to_string()),
185        is_runtime: Some(!tuple.optional),
186        is_optional: Some(tuple.optional),
187        is_pinned: Some(false),
188        is_direct: Some(true),
189        resolved_package: None,
190        extra_data: None,
191    })
192}
193
194crate::register_parser!(
195    "Hex mix.lock lockfile",
196    &["**/mix.lock"],
197    "hex",
198    "Elixir",
199    Some("https://hexdocs.pm/mix/Mix.Tasks.Deps.html"),
200);
201
202#[derive(Debug)]
203struct DependencyTuple {
204    app_name: String,
205    requirement: String,
206    hex_name: Option<String>,
207    repo: Option<String>,
208    optional: bool,
209}
210
211fn term_to_dependency_tuples(term: &Term) -> Result<Vec<DependencyTuple>, String> {
212    let items = match term {
213        Term::List(items) => items,
214        _ => return Ok(Vec::new()),
215    };
216
217    let mut result = Vec::new();
218    for item in items {
219        let tuple = match item {
220            Term::Tuple(items) if items.len() == 3 => items,
221            _ => continue,
222        };
223
224        let app_name = term_to_atom(&tuple[0])?;
225        let requirement = term_to_string(&tuple[1])?;
226        let opts = term_to_keyword_map(&tuple[2])?;
227        let hex_name = opts.get("hex").map(term_to_atom).transpose()?;
228        let repo = opts.get("repo").map(term_to_string).transpose()?;
229        let optional = opts
230            .get("optional")
231            .and_then(|term| match term {
232                Term::Bool(value) => Some(*value),
233                _ => None,
234            })
235            .unwrap_or(false);
236
237        result.push(DependencyTuple {
238            app_name,
239            requirement,
240            hex_name,
241            repo,
242            optional,
243        });
244    }
245
246    Ok(result)
247}
248
249fn term_to_keyword_map(term: &Term) -> Result<HashMap<String, Term>, String> {
250    match term {
251        Term::KeywordList(entries) => Ok(entries.iter().cloned().collect()),
252        Term::List(entries) => {
253            let mut map = HashMap::new();
254            for entry in entries {
255                if let Term::Tuple(items) = entry
256                    && items.len() == 2
257                {
258                    map.insert(term_to_atom(&items[0])?, items[1].clone());
259                }
260            }
261            Ok(map)
262        }
263        _ => Ok(HashMap::new()),
264    }
265}
266
267fn build_hex_purl(name: &str, version: Option<&str>, repo: Option<&str>) -> Option<String> {
268    let mut purl = PackageUrl::new("hex", name).ok()?;
269    if let Some(repo) = repo
270        && repo != "hexpm"
271    {
272        purl.with_namespace(repo).ok()?;
273    }
274    if let Some(version) = version {
275        purl.with_version(version).ok()?;
276    }
277    Some(purl.to_string())
278}
279
280fn build_hexdocs_homepage(name: &str, repo: &str) -> String {
281    if repo == "hexpm" {
282        format!("https://hex.pm/packages/{}", name)
283    } else {
284        format!("https://hex.pm/packages/{}?repo={}", name, repo)
285    }
286}
287
288fn build_hex_api_url(name: &str, repo: &str) -> String {
289    if repo == "hexpm" {
290        format!("https://hex.pm/api/packages/{}", name)
291    } else {
292        format!("https://hex.pm/api/repos/{}/packages/{}", repo, name)
293    }
294}
295
296fn term_to_string(term: &Term) -> Result<String, String> {
297    match term {
298        Term::String(value) => Ok(value.clone()),
299        Term::Atom(value) => Ok(value.clone()),
300        Term::Integer(value) => Ok(value.to_string()),
301        _ => Err("Expected string-like term".to_string()),
302    }
303}
304
305fn term_to_atom(term: &Term) -> Result<String, String> {
306    match term {
307        Term::Atom(value) => Ok(value.clone()),
308        _ => Err("Expected atom".to_string()),
309    }
310}
311
312fn term_to_atom_list(term: &Term) -> Result<Vec<String>, String> {
313    let items = match term {
314        Term::List(items) => items,
315        _ => return Ok(Vec::new()),
316    };
317    items.iter().map(term_to_atom).collect()
318}
319
320impl<'a> Parser<'a> {
321    fn new(source: &'a str) -> Self {
322        Self {
323            chars: source.chars().collect(),
324            pos: 0,
325            source,
326        }
327    }
328
329    fn parse_term(&mut self) -> Result<Term, String> {
330        self.skip_ws();
331        match self.peek() {
332            Some('%') => self.parse_map(),
333            Some('{') => self.parse_tuple(),
334            Some('[') => self.parse_list(),
335            Some('"') => self.parse_string().map(Term::String),
336            Some(':') => self.parse_atom().map(Term::Atom),
337            Some(c) if c.is_ascii_digit() || c == '-' => self.parse_integer().map(Term::Integer),
338            Some('t') | Some('f') => self.parse_bool().map(Term::Bool),
339            Some(other) => Err(format!("Unexpected character '{}' at {}", other, self.pos)),
340            None => Err("Unexpected end of mix.lock".to_string()),
341        }
342    }
343
344    fn parse_map(&mut self) -> Result<Term, String> {
345        self.expect('%')?;
346        self.expect('{')?;
347        let mut entries = Vec::new();
348        loop {
349            self.skip_ws();
350            if self.peek() == Some('}') {
351                self.pos += 1;
352                break;
353            }
354            let key = self.parse_term()?;
355            self.skip_ws();
356            if self.starts_with("=>") {
357                self.expect_sequence("=>")?;
358            } else {
359                self.expect(':')?;
360            }
361            let value = self.parse_term()?;
362            entries.push((key, value));
363            self.skip_ws();
364            if self.peek() == Some(',') {
365                self.pos += 1;
366            }
367        }
368        Ok(Term::Map(entries))
369    }
370
371    fn parse_tuple(&mut self) -> Result<Term, String> {
372        self.expect('{')?;
373        let mut items = Vec::new();
374        loop {
375            self.skip_ws();
376            if self.peek() == Some('}') {
377                self.pos += 1;
378                break;
379            }
380            items.push(self.parse_term()?);
381            self.skip_ws();
382            if self.peek() == Some(',') {
383                self.pos += 1;
384            }
385        }
386        Ok(Term::Tuple(items))
387    }
388
389    fn parse_list(&mut self) -> Result<Term, String> {
390        self.expect('[')?;
391        let mut keyword_entries = Vec::new();
392        let mut items = Vec::new();
393        let mut saw_keyword = false;
394
395        loop {
396            self.skip_ws();
397            if self.peek() == Some(']') {
398                self.pos += 1;
399                break;
400            }
401
402            if let Some(keyword) = self.try_parse_keyword_key() {
403                saw_keyword = true;
404                let value = self.parse_term()?;
405                keyword_entries.push((keyword, value));
406            } else {
407                items.push(self.parse_term()?);
408            }
409
410            self.skip_ws();
411            if self.peek() == Some(',') {
412                self.pos += 1;
413            }
414        }
415
416        if saw_keyword && items.is_empty() {
417            Ok(Term::KeywordList(keyword_entries))
418        } else if saw_keyword {
419            let mut merged = items;
420            merged.extend(
421                keyword_entries
422                    .into_iter()
423                    .map(|(k, v)| Term::Tuple(vec![Term::Atom(k), v])),
424            );
425            Ok(Term::List(merged))
426        } else {
427            Ok(Term::List(items))
428        }
429    }
430
431    fn try_parse_keyword_key(&mut self) -> Option<String> {
432        let saved = self.pos;
433        self.skip_ws();
434        let start = self.pos;
435        while let Some(c) = self.peek() {
436            if c.is_ascii_alphanumeric() || c == '_' || c == '?' || c == '!' {
437                self.pos += 1;
438            } else {
439                break;
440            }
441        }
442        if self.pos == start || self.peek() != Some(':') || self.peek_n(1) == Some(':') {
443            self.pos = saved;
444            return None;
445        }
446        let key: String = self.chars[start..self.pos].iter().collect();
447        self.pos += 1;
448        Some(key)
449    }
450
451    fn parse_string(&mut self) -> Result<String, String> {
452        self.expect('"')?;
453        let mut out = String::new();
454        while let Some(c) = self.peek() {
455            self.pos += 1;
456            match c {
457                '"' => return Ok(out),
458                '\\' => {
459                    let escaped = self
460                        .peek()
461                        .ok_or_else(|| "Unterminated string escape".to_string())?;
462                    self.pos += 1;
463                    out.push(match escaped {
464                        'n' => '\n',
465                        'r' => '\r',
466                        't' => '\t',
467                        '"' => '"',
468                        '\\' => '\\',
469                        other => other,
470                    });
471                }
472                other => out.push(other),
473            }
474        }
475        Err("Unterminated string literal".to_string())
476    }
477
478    fn parse_atom(&mut self) -> Result<String, String> {
479        self.expect(':')?;
480        let start = self.pos;
481        while let Some(c) = self.peek() {
482            if c.is_ascii_alphanumeric() || c == '_' || c == '?' || c == '!' || c == '@' {
483                self.pos += 1;
484            } else {
485                break;
486            }
487        }
488        if self.pos == start {
489            return Err("Expected atom after ':'".to_string());
490        }
491        Ok(self.chars[start..self.pos].iter().collect())
492    }
493
494    fn parse_integer(&mut self) -> Result<i64, String> {
495        let start = self.pos;
496        if self.peek() == Some('-') {
497            self.pos += 1;
498        }
499        while let Some(c) = self.peek() {
500            if c.is_ascii_digit() {
501                self.pos += 1;
502            } else {
503                break;
504            }
505        }
506        self.source[start..self.byte_index(self.pos)]
507            .parse::<i64>()
508            .map_err(|e| format!("Invalid integer: {}", e))
509    }
510
511    fn parse_bool(&mut self) -> Result<bool, String> {
512        if self.starts_with("true") {
513            self.pos += 4;
514            Ok(true)
515        } else if self.starts_with("false") {
516            self.pos += 5;
517            Ok(false)
518        } else {
519            Err("Invalid boolean".to_string())
520        }
521    }
522
523    fn skip_ws(&mut self) {
524        while let Some(c) = self.peek() {
525            if c.is_whitespace() {
526                self.pos += 1;
527            } else {
528                break;
529            }
530        }
531    }
532
533    fn expect(&mut self, expected: char) -> Result<(), String> {
534        match self.peek() {
535            Some(c) if c == expected => {
536                self.pos += 1;
537                Ok(())
538            }
539            Some(c) => Err(format!("Expected '{}' but found '{}'", expected, c)),
540            None => Err(format!("Expected '{}' but reached end of input", expected)),
541        }
542    }
543
544    fn expect_sequence(&mut self, expected: &str) -> Result<(), String> {
545        if self.starts_with(expected) {
546            self.pos += expected.chars().count();
547            Ok(())
548        } else {
549            Err(format!("Expected '{}' at {}", expected, self.pos))
550        }
551    }
552
553    fn starts_with(&self, s: &str) -> bool {
554        self.chars[self.pos..]
555            .iter()
556            .collect::<String>()
557            .starts_with(s)
558    }
559
560    fn peek(&self) -> Option<char> {
561        self.chars.get(self.pos).copied()
562    }
563
564    fn peek_n(&self, n: usize) -> Option<char> {
565        self.chars.get(self.pos + n).copied()
566    }
567
568    fn is_eof(&self) -> bool {
569        self.pos >= self.chars.len()
570    }
571
572    fn byte_index(&self, char_pos: usize) -> usize {
573        self.chars.iter().take(char_pos).map(|c| c.len_utf8()).sum()
574    }
575}