Skip to main content

provenant/parsers/
clojure.rs

1use std::collections::HashMap;
2use std::fs;
3use std::path::Path;
4
5use log::warn;
6use packageurl::PackageUrl;
7use serde_json::Value as JsonValue;
8
9use crate::models::{DatasourceId, Dependency, PackageData, PackageType};
10
11use super::PackageParser;
12
13pub struct ClojureDepsEdnParser;
14
15impl PackageParser for ClojureDepsEdnParser {
16    const PACKAGE_TYPE: PackageType = PackageType::Maven;
17
18    fn is_match(path: &Path) -> bool {
19        path.file_name().is_some_and(|name| name == "deps.edn")
20    }
21
22    fn extract_packages(path: &Path) -> Vec<PackageData> {
23        let content = match fs::read_to_string(path) {
24            Ok(content) => content,
25            Err(error) => {
26                warn!("Failed to read deps.edn at {:?}: {}", path, error);
27                return vec![default_package_data(Some(DatasourceId::ClojureDepsEdn))];
28            }
29        };
30
31        match parse_forms(&content)
32            .and_then(|forms| {
33                forms
34                    .into_iter()
35                    .next()
36                    .ok_or_else(|| "deps.edn contained no readable forms".to_string())
37            })
38            .and_then(|form| parse_deps_edn_form(&form))
39        {
40            Ok(package) => vec![package],
41            Err(error) => {
42                warn!("Failed to parse deps.edn at {:?}: {}", path, error);
43                vec![default_package_data(Some(DatasourceId::ClojureDepsEdn))]
44            }
45        }
46    }
47}
48
49pub struct ClojureProjectCljParser;
50
51impl PackageParser for ClojureProjectCljParser {
52    const PACKAGE_TYPE: PackageType = PackageType::Maven;
53
54    fn is_match(path: &Path) -> bool {
55        path.file_name().is_some_and(|name| name == "project.clj")
56    }
57
58    fn extract_packages(path: &Path) -> Vec<PackageData> {
59        let content = match fs::read_to_string(path) {
60            Ok(content) => content,
61            Err(error) => {
62                warn!("Failed to read project.clj at {:?}: {}", path, error);
63                return vec![default_package_data(Some(DatasourceId::ClojureProjectClj))];
64            }
65        };
66
67        match parse_forms(&content)
68            .and_then(|forms| {
69                forms.into_iter().find(|form| {
70                    matches!(
71                        form,
72                        Form::List(items) if matches!(items.first(), Some(Form::Symbol(symbol)) if symbol == "defproject")
73                    )
74                }).ok_or_else(|| "project.clj did not contain a defproject form".to_string())
75            })
76            .and_then(|form| parse_project_clj_form(&form))
77        {
78            Ok(package) => vec![package],
79            Err(error) => {
80                warn!("Failed to parse project.clj at {:?}: {}", path, error);
81                vec![default_package_data(Some(DatasourceId::ClojureProjectClj))]
82            }
83        }
84    }
85}
86
87#[derive(Clone, Debug)]
88enum Form {
89    Nil,
90    Bool(bool),
91    String(String),
92    Keyword(String),
93    Symbol(String),
94    Vector(Vec<Form>),
95    List(Vec<Form>),
96    Map(Vec<(Form, Form)>),
97    Prefixed(Box<Form>),
98}
99
100struct Reader {
101    chars: Vec<char>,
102    index: usize,
103}
104
105impl Reader {
106    fn new(input: &str) -> Self {
107        Self {
108            chars: input.chars().collect(),
109            index: 0,
110        }
111    }
112
113    fn parse_all(mut self) -> Result<Vec<Form>, String> {
114        let mut forms = Vec::new();
115        while self.skip_ws_and_comments() {
116            forms.push(self.parse_form()?);
117        }
118        Ok(forms)
119    }
120
121    fn skip_ws_and_comments(&mut self) -> bool {
122        loop {
123            while self
124                .peek()
125                .is_some_and(|ch| ch.is_whitespace() || ch == ',')
126            {
127                self.index += 1;
128            }
129            if self.peek() == Some(';') {
130                while let Some(ch) = self.peek() {
131                    self.index += 1;
132                    if ch == '\n' {
133                        break;
134                    }
135                }
136                continue;
137            }
138            return self.peek().is_some();
139        }
140    }
141
142    fn parse_form(&mut self) -> Result<Form, String> {
143        self.skip_ws_and_comments();
144        match self.peek() {
145            Some('"') => self.parse_string().map(Form::String),
146            Some(':') => self.parse_keyword().map(Form::Keyword),
147            Some('[') => self.parse_collection('[', ']').map(Form::Vector),
148            Some('(') => self.parse_collection('(', ')').map(Form::List),
149            Some('{') => self.parse_map(),
150            Some('^') => {
151                self.index += 1;
152                let _ = self.parse_form()?;
153                self.parse_form()
154            }
155            Some('~') | Some('\'') | Some('`') | Some('@') => {
156                self.index += 1;
157                let form = self.parse_form()?;
158                Ok(Form::Prefixed(Box::new(form)))
159            }
160            Some('#') if self.peek_n(1) == Some('_') => {
161                self.index += 2;
162                let _ = self.parse_form()?;
163                self.parse_form()
164            }
165            Some(_) => self.parse_atom(),
166            None => Err("unexpected end of input".to_string()),
167        }
168    }
169
170    fn parse_string(&mut self) -> Result<String, String> {
171        self.expect('"')?;
172        let mut result = String::new();
173        let mut escaped = false;
174        while let Some(ch) = self.peek() {
175            self.index += 1;
176            if escaped {
177                result.push(match ch {
178                    'n' => '\n',
179                    'r' => '\r',
180                    't' => '\t',
181                    '"' => '"',
182                    '\\' => '\\',
183                    other => other,
184                });
185                escaped = false;
186            } else if ch == '\\' {
187                escaped = true;
188            } else if ch == '"' {
189                return Ok(result);
190            } else {
191                result.push(ch);
192            }
193        }
194        Err("unterminated string".to_string())
195    }
196
197    fn parse_keyword(&mut self) -> Result<String, String> {
198        self.expect(':')?;
199        let start = self.index;
200        while let Some(ch) = self.peek() {
201            if is_delimiter(ch) {
202                break;
203            }
204            self.index += 1;
205        }
206        if self.index == start {
207            return Err("empty keyword".to_string());
208        }
209        Ok(self.chars[start..self.index].iter().collect())
210    }
211
212    fn parse_collection(&mut self, open: char, close: char) -> Result<Vec<Form>, String> {
213        self.expect(open)?;
214        let mut forms = Vec::new();
215        loop {
216            self.skip_ws_and_comments();
217            if self.peek() == Some(close) {
218                self.index += 1;
219                return Ok(forms);
220            }
221            if self.peek().is_none() {
222                return Err(format!("unterminated collection starting with {open}"));
223            }
224            forms.push(self.parse_form()?);
225        }
226    }
227
228    fn parse_map(&mut self) -> Result<Form, String> {
229        self.expect('{')?;
230        let mut entries = Vec::new();
231        loop {
232            self.skip_ws_and_comments();
233            if self.peek() == Some('}') {
234                self.index += 1;
235                return Ok(Form::Map(entries));
236            }
237            if self.peek().is_none() {
238                return Err("unterminated map".to_string());
239            }
240            let key = self.parse_form()?;
241            self.skip_ws_and_comments();
242            if self.peek() == Some('}') {
243                return Err("map missing value".to_string());
244            }
245            let value = self.parse_form()?;
246            entries.push((key, value));
247        }
248    }
249
250    fn parse_atom(&mut self) -> Result<Form, String> {
251        let start = self.index;
252        while let Some(ch) = self.peek() {
253            if is_delimiter(ch) {
254                break;
255            }
256            self.index += 1;
257        }
258        let token: String = self.chars[start..self.index].iter().collect();
259        if token.is_empty() {
260            return Err("empty token".to_string());
261        }
262        Ok(match token.as_str() {
263            "nil" => Form::Nil,
264            "true" => Form::Bool(true),
265            "false" => Form::Bool(false),
266            _ => Form::Symbol(token),
267        })
268    }
269
270    fn expect(&mut self, expected: char) -> Result<(), String> {
271        match self.peek() {
272            Some(ch) if ch == expected => {
273                self.index += 1;
274                Ok(())
275            }
276            Some(ch) => Err(format!("expected '{expected}', found '{ch}'")),
277            None => Err(format!("expected '{expected}', found end of input")),
278        }
279    }
280
281    fn peek(&self) -> Option<char> {
282        self.chars.get(self.index).copied()
283    }
284
285    fn peek_n(&self, offset: usize) -> Option<char> {
286        self.chars.get(self.index + offset).copied()
287    }
288}
289
290fn is_delimiter(ch: char) -> bool {
291    ch.is_whitespace()
292        || ch == ','
293        || matches!(
294            ch,
295            '[' | ']' | '{' | '}' | '(' | ')' | '"' | ';' | '\'' | '`' | '~' | '@'
296        )
297}
298
299fn parse_forms(input: &str) -> Result<Vec<Form>, String> {
300    Reader::new(input).parse_all()
301}
302
303fn parse_deps_edn_form(form: &Form) -> Result<PackageData, String> {
304    let Form::Map(entries) = form else {
305        return Err("deps.edn root is not a map".to_string());
306    };
307
308    let mut package = default_package_data(Some(DatasourceId::ClojureDepsEdn));
309    let mut dependencies = Vec::new();
310    let mut extra_data = HashMap::new();
311
312    if let Some(Form::Map(dep_map)) = map_get_keyword(entries, "deps") {
313        dependencies.extend(extract_deps_map(dep_map, None, true));
314    }
315
316    if let Some(Form::Map(alias_map)) = map_get_keyword(entries, "aliases") {
317        for (alias_key, alias_value) in alias_map {
318            let Some(alias_name) = keyword_or_symbol_name(alias_key) else {
319                continue;
320            };
321            let Form::Map(alias_entries) = alias_value else {
322                continue;
323            };
324            for dep_key in [
325                "extra-deps",
326                "override-deps",
327                "default-deps",
328                "deps",
329                "replace-deps",
330            ] {
331                if let Some(Form::Map(dep_map)) = map_get_keyword(alias_entries, dep_key) {
332                    dependencies.extend(extract_deps_map(dep_map, Some(&alias_name), false));
333                }
334            }
335        }
336        if let Some(json) = form_to_json(&Form::Map(alias_map.clone())) {
337            extra_data.insert("aliases".to_string(), json);
338        }
339    }
340
341    if let Some(value) = map_get_keyword(entries, "paths").and_then(form_to_json) {
342        extra_data.insert("paths".to_string(), value);
343    }
344    if let Some(value) = map_get_keyword(entries, "mvn/repos").and_then(form_to_json) {
345        extra_data.insert("mvn_repos".to_string(), value);
346    }
347
348    package.dependencies = dependencies;
349    package.extra_data = (!extra_data.is_empty()).then_some(extra_data);
350    Ok(package)
351}
352
353fn parse_project_clj_form(form: &Form) -> Result<PackageData, String> {
354    let Form::List(items) = form else {
355        return Err("project.clj root is not a list".to_string());
356    };
357    if !matches!(items.first(), Some(Form::Symbol(symbol)) if symbol == "defproject") {
358        return Err("project.clj root is not defproject".to_string());
359    }
360
361    let Some((namespace, name)) = items.get(1).and_then(parse_lib_form) else {
362        return Err("defproject missing project identifier".to_string());
363    };
364    let Some(version) = items.get(2).and_then(form_as_string) else {
365        return Err("defproject missing project version".to_string());
366    };
367
368    let mut package = default_package_data(Some(DatasourceId::ClojureProjectClj));
369    package.namespace = namespace.clone();
370    package.name = Some(name.clone());
371    package.version = Some(version.to_string());
372    package.purl = build_maven_purl(namespace.as_deref(), &name, Some(version));
373
374    let mut index = 3usize;
375    while index + 1 < items.len() {
376        let Some(key) = form_as_keyword(&items[index]) else {
377            index += 1;
378            continue;
379        };
380        let value = &items[index + 1];
381
382        match key {
383            "description" => package.description = form_as_string(value).map(ToOwned::to_owned),
384            "url" => package.homepage_url = form_as_string(value).map(ToOwned::to_owned),
385            "license" => {
386                package.extracted_license_statement = format_license(value);
387            }
388            "scm" => {
389                if let Form::Map(entries) = value {
390                    package.vcs_url = map_get_keyword(entries, "url")
391                        .and_then(form_as_string)
392                        .map(ToOwned::to_owned);
393                }
394            }
395            "dependencies" => {
396                if let Form::Vector(deps) = value {
397                    package
398                        .dependencies
399                        .extend(extract_project_dependencies(deps, None));
400                }
401            }
402            "profiles" => {
403                if let Form::Map(entries) = value {
404                    for (profile_key, profile_value) in entries {
405                        let Some(profile_name) = keyword_or_symbol_name(profile_key) else {
406                            continue;
407                        };
408                        let Form::Map(profile_entries) = profile_value else {
409                            continue;
410                        };
411                        if let Some(Form::Vector(deps)) =
412                            map_get_keyword(profile_entries, "dependencies")
413                        {
414                            package
415                                .dependencies
416                                .extend(extract_project_dependencies(deps, Some(&profile_name)));
417                        }
418                    }
419                }
420            }
421            _ => {}
422        }
423        index += 2;
424    }
425
426    Ok(package)
427}
428
429fn extract_deps_map(
430    entries: &[(Form, Form)],
431    scope: Option<&str>,
432    runtime: bool,
433) -> Vec<Dependency> {
434    entries
435        .iter()
436        .filter_map(|(lib, coord)| build_deps_edn_dependency(lib, coord, scope, runtime))
437        .collect()
438}
439
440fn build_deps_edn_dependency(
441    lib: &Form,
442    coord: &Form,
443    scope: Option<&str>,
444    runtime: bool,
445) -> Option<Dependency> {
446    let (namespace, name) = parse_lib_form(lib)?;
447    let mut extra_data = HashMap::new();
448    let mut requirement = None;
449    let mut pinned = false;
450
451    if let Form::Map(entries) = coord {
452        if let Some(version) = map_get_keyword(entries, "mvn/version").and_then(form_as_string) {
453            requirement = Some(version.to_string());
454            pinned = is_exact_version(version);
455        }
456        for (key, data_key) in [
457            ("git/url", "git_url"),
458            ("git/tag", "git_tag"),
459            ("git/sha", "git_sha"),
460            ("deps/root", "deps_root"),
461            ("deps/manifest", "deps_manifest"),
462            ("local/root", "local_root"),
463            ("exclusions", "exclusions"),
464        ] {
465            if let Some(value) = map_get_keyword(entries, key).and_then(form_to_json) {
466                extra_data.insert(data_key.to_string(), value);
467            }
468        }
469    }
470
471    Some(Dependency {
472        purl: build_maven_purl(
473            namespace.as_deref(),
474            &name,
475            requirement.as_deref().map(strip_exact_prefix),
476        ),
477        extracted_requirement: requirement,
478        scope: scope.map(ToOwned::to_owned),
479        is_runtime: Some(runtime),
480        is_optional: Some(scope.is_some()),
481        is_pinned: Some(pinned),
482        is_direct: Some(true),
483        resolved_package: None,
484        extra_data: (!extra_data.is_empty()).then_some(extra_data),
485    })
486}
487
488fn extract_project_dependencies(entries: &[Form], scope: Option<&str>) -> Vec<Dependency> {
489    entries
490        .iter()
491        .filter_map(|entry| {
492            let Form::Vector(parts) = entry else {
493                return None;
494            };
495            let (namespace, name) = parse_lib_form(parts.first()?)?;
496            let version = form_as_string(parts.get(1)?)?;
497
498            let mut extra_data = HashMap::new();
499            let mut index = 2usize;
500            while index + 1 < parts.len() {
501                if let Some(key) = form_as_keyword(&parts[index])
502                    && let Some(value) = form_to_json(&parts[index + 1])
503                {
504                    extra_data.insert(key.replace('-', "_"), value);
505                }
506                index += 2;
507            }
508
509            let (is_runtime, is_optional) = match scope {
510                Some("dev") | Some("test") => (false, true),
511                Some("provided") => (false, false),
512                Some(_) => (false, true),
513                None => (true, false),
514            };
515
516            Some(Dependency {
517                purl: build_maven_purl(
518                    namespace.as_deref(),
519                    &name,
520                    Some(strip_exact_prefix(version)),
521                ),
522                extracted_requirement: Some(version.to_string()),
523                scope: scope.map(ToOwned::to_owned),
524                is_runtime: Some(is_runtime),
525                is_optional: Some(is_optional),
526                is_pinned: Some(is_exact_version(version)),
527                is_direct: Some(true),
528                resolved_package: None,
529                extra_data: (!extra_data.is_empty()).then_some(extra_data),
530            })
531        })
532        .collect()
533}
534
535fn parse_lib_form(form: &Form) -> Option<(Option<String>, String)> {
536    let raw = match form {
537        Form::Symbol(value) | Form::String(value) => value,
538        _ => return None,
539    };
540
541    if let Some((namespace, name)) = raw.split_once('/') {
542        Some((Some(namespace.to_string()), name.to_string()))
543    } else {
544        Some((Some(raw.to_string()), raw.to_string()))
545    }
546}
547
548fn map_get_keyword<'a>(entries: &'a [(Form, Form)], key: &str) -> Option<&'a Form> {
549    entries.iter().find_map(|(entry_key, entry_value)| {
550        if form_as_keyword(entry_key) == Some(key) {
551            Some(entry_value)
552        } else {
553            None
554        }
555    })
556}
557
558fn form_as_keyword(form: &Form) -> Option<&str> {
559    match form {
560        Form::Keyword(value) => Some(value.as_str()),
561        _ => None,
562    }
563}
564
565fn form_as_string(form: &Form) -> Option<&str> {
566    match form {
567        Form::String(value) => Some(value.as_str()),
568        _ => None,
569    }
570}
571
572fn keyword_or_symbol_name(form: &Form) -> Option<String> {
573    match form {
574        Form::Keyword(value) | Form::Symbol(value) => Some(value.clone()),
575        _ => None,
576    }
577}
578
579fn map_key_name(form: &Form) -> Option<String> {
580    match form {
581        Form::Keyword(value) | Form::Symbol(value) | Form::String(value) => Some(value.clone()),
582        _ => None,
583    }
584}
585
586fn form_to_json(form: &Form) -> Option<JsonValue> {
587    Some(match form {
588        Form::Nil => JsonValue::Null,
589        Form::Bool(value) => JsonValue::Bool(*value),
590        Form::String(value) => JsonValue::String(value.clone()),
591        Form::Keyword(value) => JsonValue::String(format!(":{value}")),
592        Form::Symbol(value) => JsonValue::String(value.clone()),
593        Form::Vector(values) | Form::List(values) => {
594            JsonValue::Array(values.iter().filter_map(form_to_json).collect())
595        }
596        Form::Map(entries) => {
597            let mut map = serde_json::Map::new();
598            for (key, value) in entries {
599                let Some(key_name) = map_key_name(key) else {
600                    continue;
601                };
602                if let Some(json) = form_to_json(value) {
603                    map.insert(key_name, json);
604                }
605            }
606            JsonValue::Object(map)
607        }
608        Form::Prefixed(value) => form_to_json(value)?,
609    })
610}
611
612fn format_license(form: &Form) -> Option<String> {
613    match form {
614        Form::Map(entries) => format_license_map(entries),
615        Form::Vector(values) | Form::List(values) => {
616            let licenses: Vec<String> = values.iter().filter_map(format_license).collect();
617            if licenses.is_empty() {
618                None
619            } else {
620                Some(licenses.join("\n"))
621            }
622        }
623        _ => None,
624    }
625}
626
627fn format_license_map(entries: &[(Form, Form)]) -> Option<String> {
628    let name = map_get_keyword(entries, "name").and_then(form_as_string)?;
629    let mut rendered = format!("- license:\n    name: {name}\n");
630    if let Some(url) = map_get_keyword(entries, "url").and_then(form_as_string) {
631        rendered.push_str(&format!("    url: {url}\n"));
632    }
633    Some(rendered)
634}
635
636fn build_maven_purl(namespace: Option<&str>, name: &str, version: Option<&str>) -> Option<String> {
637    let mut purl = PackageUrl::new(PackageType::Maven.as_str(), name).ok()?;
638    if let Some(namespace) = namespace {
639        purl.with_namespace(namespace).ok()?;
640    }
641    if let Some(version) = version {
642        purl.with_version(version).ok()?;
643    }
644    Some(purl.to_string())
645}
646
647fn is_exact_version(version: &str) -> bool {
648    let normalized = strip_exact_prefix(version).trim();
649    !normalized.is_empty()
650        && !normalized.contains('*')
651        && !normalized.contains('^')
652        && !normalized.contains('~')
653        && !normalized.contains('>')
654        && !normalized.contains('<')
655        && !normalized.contains('|')
656        && !normalized.contains(',')
657        && !normalized.contains(' ')
658}
659
660fn strip_exact_prefix(version: &str) -> &str {
661    version.trim_start_matches('=')
662}
663
664fn default_package_data(datasource_id: Option<DatasourceId>) -> PackageData {
665    PackageData {
666        package_type: Some(PackageType::Maven),
667        primary_language: Some("Clojure".to_string()),
668        datasource_id,
669        ..Default::default()
670    }
671}
672
673crate::register_parser!(
674    "Clojure deps.edn and project.clj manifests",
675    &["**/deps.edn", "**/project.clj"],
676    "maven",
677    "Clojure",
678    Some("https://clojure.org/reference/deps_edn"),
679);