Skip to main content

provenant/parsers/
hex_lock.rs

1use std::collections::HashMap;
2use std::path::Path;
3
4use crate::parser_warn as warn;
5use crate::parsers::utils::{MAX_ITERATION_COUNT, read_file_to_string, truncate_field};
6use packageurl::PackageUrl;
7use serde_json::Value as JsonValue;
8
9use crate::models::{
10    DatasourceId, Dependency, PackageData, PackageType, ResolvedPackage, Sha256Digest,
11};
12
13use super::PackageParser;
14
15const MAX_RECURSION_DEPTH: usize = 50;
16
17pub struct HexLockParser;
18
19#[derive(Clone, Debug)]
20enum Term {
21    Map(Vec<(Term, Term)>),
22    Tuple(Vec<Term>),
23    List(Vec<Term>),
24    KeywordList(Vec<(String, Term)>),
25    String(String),
26    Atom(String),
27    Bool(bool),
28    Integer(i64),
29}
30
31struct Parser<'a> {
32    chars: Vec<char>,
33    pos: usize,
34    source: &'a str,
35    depth: usize,
36}
37
38impl PackageParser for HexLockParser {
39    const PACKAGE_TYPE: PackageType = PackageType::Hex;
40
41    fn is_match(path: &Path) -> bool {
42        path.file_name().and_then(|name| name.to_str()) == Some("mix.lock")
43    }
44
45    fn extract_packages(path: &Path) -> Vec<PackageData> {
46        let content = match read_file_to_string(path, None) {
47            Ok(content) => content,
48            Err(e) => {
49                warn!("Failed to read mix.lock at {:?}: {}", path, e);
50                return vec![default_package_data()];
51            }
52        };
53
54        match parse_mix_lock(&content) {
55            Ok(package_data) => vec![package_data],
56            Err(e) => {
57                warn!("Failed to parse mix.lock at {:?}: {}", path, e);
58                vec![default_package_data()]
59            }
60        }
61    }
62}
63
64fn default_package_data() -> PackageData {
65    PackageData {
66        package_type: Some(PackageType::Hex),
67        primary_language: Some("Elixir".to_string()),
68        datasource_id: Some(DatasourceId::HexMixLock),
69        ..Default::default()
70    }
71}
72
73fn parse_mix_lock(content: &str) -> Result<PackageData, String> {
74    let mut parser = Parser::new(content);
75    let term = parser.parse_term()?;
76    parser.skip_ws();
77    if !parser.is_eof() {
78        return Err("Unexpected trailing content in mix.lock".to_string());
79    }
80
81    let entries = match term {
82        Term::Map(entries) => entries,
83        _ => return Err("mix.lock root must be a map".to_string()),
84    };
85
86    let mut dependencies = Vec::new();
87    for (key, value) in entries.into_iter().take(MAX_ITERATION_COUNT) {
88        if let Some(dep) = build_dependency_from_lock_entry(&key, &value)? {
89            dependencies.push(dep);
90        }
91    }
92
93    let mut package = default_package_data();
94    package.dependencies = dependencies;
95    Ok(package)
96}
97
98fn build_dependency_from_lock_entry(
99    key: &Term,
100    value: &Term,
101) -> Result<Option<Dependency>, String> {
102    let app_name = truncate_field(term_to_string(key)?);
103
104    let tuple = match value {
105        Term::Tuple(items) => items,
106        _ => return Ok(None),
107    };
108
109    if tuple.len() < 8 {
110        return Ok(None);
111    }
112
113    let kind = term_to_atom(&tuple[0])?;
114    if kind != "hex" {
115        return Ok(None);
116    }
117
118    let package_name = truncate_field(term_to_atom(&tuple[1])?);
119    let version = truncate_field(term_to_string(&tuple[2])?);
120    let inner_checksum = truncate_field(term_to_string(&tuple[3])?);
121    let managers = term_to_atom_list(&tuple[4])?;
122    let nested_dependencies = term_to_dependency_tuples(&tuple[5])?;
123    let repo = truncate_field(term_to_string(&tuple[6])?);
124    let outer_checksum = truncate_field(term_to_string(&tuple[7])?);
125
126    let purl = build_hex_purl(&package_name, Some(&version), Some(&repo));
127    let resolved_package = ResolvedPackage {
128        primary_language: Some("Elixir".to_string()),
129        download_url: None,
130        sha1: None,
131        sha256: Sha256Digest::from_hex(&inner_checksum).ok(),
132        sha512: None,
133        md5: None,
134        is_virtual: true,
135        extra_data: Some(HashMap::from([
136            (
137                "repo".to_string(),
138                JsonValue::String(truncate_field(repo.clone())),
139            ),
140            (
141                "outer_checksum".to_string(),
142                JsonValue::String(truncate_field(outer_checksum.clone())),
143            ),
144            (
145                "managers".to_string(),
146                JsonValue::Array(
147                    managers
148                        .into_iter()
149                        .map(|m| JsonValue::String(truncate_field(m)))
150                        .collect(),
151                ),
152            ),
153        ])),
154        dependencies: nested_dependencies
155            .into_iter()
156            .map(build_nested_dependency)
157            .collect::<Result<Vec<_>, _>>()?,
158        repository_homepage_url: Some(truncate_field(build_hexdocs_homepage(&package_name, &repo))),
159        repository_download_url: None,
160        api_data_url: Some(truncate_field(build_hex_api_url(&package_name, &repo))),
161        datasource_id: Some(DatasourceId::HexMixLock),
162        purl: build_hex_purl(&package_name, Some(&version), Some(&repo)).map(truncate_field),
163        ..ResolvedPackage::new(
164            PackageType::Hex,
165            if repo == "hexpm" {
166                String::new()
167            } else {
168                repo.clone()
169            },
170            package_name.clone(),
171            version.clone(),
172        )
173    };
174
175    Ok(Some(Dependency {
176        purl: purl.map(truncate_field),
177        extracted_requirement: Some(truncate_field(version)),
178        scope: Some("dependencies".to_string()),
179        is_runtime: None,
180        is_optional: None,
181        is_pinned: Some(true),
182        is_direct: None,
183        resolved_package: Some(Box::new(resolved_package)),
184        extra_data: Some(HashMap::from([(
185            "app".to_string(),
186            JsonValue::String(truncate_field(app_name)),
187        )])),
188    }))
189}
190
191fn build_nested_dependency(tuple: DependencyTuple) -> Result<Dependency, String> {
192    let package_name = truncate_field(
193        tuple
194            .hex_name
195            .clone()
196            .unwrap_or_else(|| tuple.app_name.clone()),
197    );
198    Ok(Dependency {
199        purl: build_hex_purl(&package_name, None, tuple.repo.as_deref()).map(truncate_field),
200        extracted_requirement: Some(truncate_field(tuple.requirement)),
201        scope: Some("dependencies".to_string()),
202        is_runtime: Some(!tuple.optional),
203        is_optional: Some(tuple.optional),
204        is_pinned: Some(false),
205        is_direct: Some(true),
206        resolved_package: None,
207        extra_data: None,
208    })
209}
210
211crate::register_parser!(
212    "Hex mix.lock lockfile",
213    &["**/mix.lock"],
214    "hex",
215    "Elixir",
216    Some("https://hexdocs.pm/mix/Mix.Tasks.Deps.html"),
217);
218
219#[derive(Debug)]
220struct DependencyTuple {
221    app_name: String,
222    requirement: String,
223    hex_name: Option<String>,
224    repo: Option<String>,
225    optional: bool,
226}
227
228fn term_to_dependency_tuples(term: &Term) -> Result<Vec<DependencyTuple>, String> {
229    let items = match term {
230        Term::List(items) => items,
231        _ => return Ok(Vec::new()),
232    };
233
234    let mut result = Vec::new();
235    for item in items.iter().take(MAX_ITERATION_COUNT) {
236        let tuple = match item {
237            Term::Tuple(items) if items.len() == 3 => items,
238            _ => continue,
239        };
240
241        let app_name = truncate_field(term_to_atom(&tuple[0])?);
242        let requirement = truncate_field(term_to_string(&tuple[1])?);
243        let opts = term_to_keyword_map(&tuple[2])?;
244        let hex_name = opts
245            .get("hex")
246            .map(term_to_atom)
247            .transpose()?
248            .map(truncate_field);
249        let repo = opts
250            .get("repo")
251            .map(term_to_string)
252            .transpose()?
253            .map(truncate_field);
254        let optional = opts
255            .get("optional")
256            .and_then(|term| match term {
257                Term::Bool(value) => Some(*value),
258                _ => None,
259            })
260            .unwrap_or(false);
261
262        result.push(DependencyTuple {
263            app_name,
264            requirement,
265            hex_name,
266            repo,
267            optional,
268        });
269    }
270
271    Ok(result)
272}
273
274fn term_to_keyword_map(term: &Term) -> Result<HashMap<String, Term>, String> {
275    match term {
276        Term::KeywordList(entries) => Ok(entries.iter().cloned().collect()),
277        Term::List(entries) => {
278            let mut map = HashMap::new();
279            for entry in entries {
280                if let Term::Tuple(items) = entry
281                    && items.len() == 2
282                {
283                    map.insert(term_to_atom(&items[0])?, items[1].clone());
284                }
285            }
286            Ok(map)
287        }
288        _ => Ok(HashMap::new()),
289    }
290}
291
292fn build_hex_purl(name: &str, version: Option<&str>, repo: Option<&str>) -> Option<String> {
293    let mut purl = PackageUrl::new("hex", name).ok()?;
294    if let Some(repo) = repo
295        && repo != "hexpm"
296    {
297        purl.with_namespace(repo).ok()?;
298    }
299    if let Some(version) = version {
300        purl.with_version(version).ok()?;
301    }
302    Some(purl.to_string())
303}
304
305fn build_hexdocs_homepage(name: &str, repo: &str) -> String {
306    if repo == "hexpm" {
307        format!("https://hex.pm/packages/{}", name)
308    } else {
309        format!("https://hex.pm/packages/{}?repo={}", name, repo)
310    }
311}
312
313fn build_hex_api_url(name: &str, repo: &str) -> String {
314    if repo == "hexpm" {
315        format!("https://hex.pm/api/packages/{}", name)
316    } else {
317        format!("https://hex.pm/api/repos/{}/packages/{}", repo, name)
318    }
319}
320
321fn term_to_string(term: &Term) -> Result<String, String> {
322    match term {
323        Term::String(value) => Ok(value.clone()),
324        Term::Atom(value) => Ok(value.clone()),
325        Term::Integer(value) => Ok(value.to_string()),
326        _ => Err("Expected string-like term".to_string()),
327    }
328}
329
330fn term_to_atom(term: &Term) -> Result<String, String> {
331    match term {
332        Term::Atom(value) => Ok(value.clone()),
333        _ => Err("Expected atom".to_string()),
334    }
335}
336
337fn term_to_atom_list(term: &Term) -> Result<Vec<String>, String> {
338    let items = match term {
339        Term::List(items) => items,
340        _ => return Ok(Vec::new()),
341    };
342    items.iter().map(term_to_atom).collect()
343}
344
345impl<'a> Parser<'a> {
346    fn new(source: &'a str) -> Self {
347        Self {
348            chars: source.chars().collect(),
349            pos: 0,
350            source,
351            depth: 0,
352        }
353    }
354
355    fn parse_term(&mut self) -> Result<Term, String> {
356        if self.depth > MAX_RECURSION_DEPTH {
357            return Err("recursion depth exceeded".to_string());
358        }
359        self.skip_ws();
360        match self.peek() {
361            Some('%') => self.parse_map(),
362            Some('{') => self.parse_tuple(),
363            Some('[') => self.parse_list(),
364            Some('"') => self.parse_string().map(Term::String),
365            Some(':') => self.parse_atom().map(Term::Atom),
366            Some(c) if c.is_ascii_digit() || c == '-' => self.parse_integer().map(Term::Integer),
367            Some('t') | Some('f') => self.parse_bool().map(Term::Bool),
368            Some(other) => Err(format!("Unexpected character '{}' at {}", other, self.pos)),
369            None => Err("Unexpected end of mix.lock".to_string()),
370        }
371    }
372
373    fn parse_map(&mut self) -> Result<Term, String> {
374        self.expect('%')?;
375        self.expect('{')?;
376        let mut entries = Vec::new();
377        let mut count = 0usize;
378        loop {
379            self.skip_ws();
380            if self.peek() == Some('}') {
381                self.pos += 1;
382                break;
383            }
384            if count >= MAX_ITERATION_COUNT {
385                warn!("map entry count exceeded MAX_ITERATION_COUNT in mix.lock");
386                break;
387            }
388            self.depth += 1;
389            let key = self.parse_term()?;
390            self.depth -= 1;
391            self.skip_ws();
392            if self.starts_with("=>") {
393                self.expect_sequence("=>")?;
394            } else {
395                self.expect(':')?;
396            }
397            self.depth += 1;
398            let value = self.parse_term()?;
399            self.depth -= 1;
400            entries.push((key, value));
401            count += 1;
402            self.skip_ws();
403            if self.peek() == Some(',') {
404                self.pos += 1;
405            }
406        }
407        Ok(Term::Map(entries))
408    }
409
410    fn parse_tuple(&mut self) -> Result<Term, String> {
411        self.expect('{')?;
412        let mut items = Vec::new();
413        let mut count = 0usize;
414        loop {
415            self.skip_ws();
416            if self.peek() == Some('}') {
417                self.pos += 1;
418                break;
419            }
420            if count >= MAX_ITERATION_COUNT {
421                warn!("tuple item count exceeded MAX_ITERATION_COUNT in mix.lock");
422                break;
423            }
424            self.depth += 1;
425            items.push(self.parse_term()?);
426            self.depth -= 1;
427            count += 1;
428            self.skip_ws();
429            if self.peek() == Some(',') {
430                self.pos += 1;
431            }
432        }
433        Ok(Term::Tuple(items))
434    }
435
436    fn parse_list(&mut self) -> Result<Term, String> {
437        self.expect('[')?;
438        let mut keyword_entries = Vec::new();
439        let mut items = Vec::new();
440        let mut saw_keyword = false;
441        let mut count = 0usize;
442
443        loop {
444            self.skip_ws();
445            if self.peek() == Some(']') {
446                self.pos += 1;
447                break;
448            }
449            if count >= MAX_ITERATION_COUNT {
450                warn!("list item count exceeded MAX_ITERATION_COUNT in mix.lock");
451                break;
452            }
453
454            if let Some(keyword) = self.try_parse_keyword_key() {
455                saw_keyword = true;
456                self.depth += 1;
457                let value = self.parse_term()?;
458                self.depth -= 1;
459                keyword_entries.push((keyword, value));
460            } else {
461                self.depth += 1;
462                items.push(self.parse_term()?);
463                self.depth -= 1;
464            }
465
466            count += 1;
467            self.skip_ws();
468            if self.peek() == Some(',') {
469                self.pos += 1;
470            }
471        }
472
473        if saw_keyword && items.is_empty() {
474            Ok(Term::KeywordList(keyword_entries))
475        } else if saw_keyword {
476            let mut merged = items;
477            merged.extend(
478                keyword_entries
479                    .into_iter()
480                    .map(|(k, v)| Term::Tuple(vec![Term::Atom(k), v])),
481            );
482            Ok(Term::List(merged))
483        } else {
484            Ok(Term::List(items))
485        }
486    }
487
488    fn try_parse_keyword_key(&mut self) -> Option<String> {
489        let saved = self.pos;
490        self.skip_ws();
491        let start = self.pos;
492        while let Some(c) = self.peek() {
493            if c.is_ascii_alphanumeric() || c == '_' || c == '?' || c == '!' {
494                self.pos += 1;
495            } else {
496                break;
497            }
498        }
499        if self.pos == start || self.peek() != Some(':') || self.peek_n(1) == Some(':') {
500            self.pos = saved;
501            return None;
502        }
503        let key: String = self.chars[start..self.pos].iter().collect();
504        self.pos += 1;
505        Some(key)
506    }
507
508    fn parse_string(&mut self) -> Result<String, String> {
509        self.expect('"')?;
510        let mut out = String::new();
511        while let Some(c) = self.peek() {
512            self.pos += 1;
513            match c {
514                '"' => return Ok(out),
515                '\\' => {
516                    let escaped = self
517                        .peek()
518                        .ok_or_else(|| "Unterminated string escape".to_string())?;
519                    self.pos += 1;
520                    out.push(match escaped {
521                        'n' => '\n',
522                        'r' => '\r',
523                        't' => '\t',
524                        '"' => '"',
525                        '\\' => '\\',
526                        other => other,
527                    });
528                }
529                other => out.push(other),
530            }
531        }
532        Err("Unterminated string literal".to_string())
533    }
534
535    fn parse_atom(&mut self) -> Result<String, String> {
536        self.expect(':')?;
537        let start = self.pos;
538        while let Some(c) = self.peek() {
539            if c.is_ascii_alphanumeric() || c == '_' || c == '?' || c == '!' || c == '@' {
540                self.pos += 1;
541            } else {
542                break;
543            }
544        }
545        if self.pos == start {
546            return Err("Expected atom after ':'".to_string());
547        }
548        Ok(self.chars[start..self.pos].iter().collect())
549    }
550
551    fn parse_integer(&mut self) -> Result<i64, String> {
552        let start = self.pos;
553        if self.peek() == Some('-') {
554            self.pos += 1;
555        }
556        while let Some(c) = self.peek() {
557            if c.is_ascii_digit() {
558                self.pos += 1;
559            } else {
560                break;
561            }
562        }
563        self.source[start..self.byte_index(self.pos)]
564            .parse::<i64>()
565            .map_err(|e| format!("Invalid integer: {}", e))
566    }
567
568    fn parse_bool(&mut self) -> Result<bool, String> {
569        if self.starts_with("true") {
570            self.pos += 4;
571            Ok(true)
572        } else if self.starts_with("false") {
573            self.pos += 5;
574            Ok(false)
575        } else {
576            Err("Invalid boolean".to_string())
577        }
578    }
579
580    fn skip_ws(&mut self) {
581        while let Some(c) = self.peek() {
582            if c.is_whitespace() {
583                self.pos += 1;
584            } else {
585                break;
586            }
587        }
588    }
589
590    fn expect(&mut self, expected: char) -> Result<(), String> {
591        match self.peek() {
592            Some(c) if c == expected => {
593                self.pos += 1;
594                Ok(())
595            }
596            Some(c) => Err(format!("Expected '{}' but found '{}'", expected, c)),
597            None => Err(format!("Expected '{}' but reached end of input", expected)),
598        }
599    }
600
601    fn expect_sequence(&mut self, expected: &str) -> Result<(), String> {
602        if self.starts_with(expected) {
603            self.pos += expected.chars().count();
604            Ok(())
605        } else {
606            Err(format!("Expected '{}' at {}", expected, self.pos))
607        }
608    }
609
610    fn starts_with(&self, s: &str) -> bool {
611        self.chars[self.pos..]
612            .iter()
613            .collect::<String>()
614            .starts_with(s)
615    }
616
617    fn peek(&self) -> Option<char> {
618        self.chars.get(self.pos).copied()
619    }
620
621    fn peek_n(&self, n: usize) -> Option<char> {
622        self.chars.get(self.pos + n).copied()
623    }
624
625    fn is_eof(&self) -> bool {
626        self.pos >= self.chars.len()
627    }
628
629    fn byte_index(&self, char_pos: usize) -> usize {
630        self.chars.iter().take(char_pos).map(|c| c.len_utf8()).sum()
631    }
632}