Skip to main content

provenant/parsers/
clojure.rs

1use std::collections::HashMap;
2use std::fs;
3use std::path::Path;
4
5use crate::parser_warn as warn;
6use packageurl::PackageUrl;
7use serde_json::Value as JsonValue;
8
9use crate::models::{DatasourceId, Dependency, PackageData, PackageType};
10
11use super::PackageParser;
12
13pub struct ClojureDepsEdnParser;
14
15impl PackageParser for ClojureDepsEdnParser {
16    const PACKAGE_TYPE: PackageType = PackageType::Maven;
17
18    fn is_match(path: &Path) -> bool {
19        path.file_name().is_some_and(|name| name == "deps.edn")
20    }
21
22    fn extract_packages(path: &Path) -> Vec<PackageData> {
23        let content = match fs::read_to_string(path) {
24            Ok(content) => content,
25            Err(error) => {
26                warn!("Failed to read deps.edn at {:?}: {}", path, error);
27                return vec![default_package_data(Some(DatasourceId::ClojureDepsEdn))];
28            }
29        };
30
31        match parse_forms(&content)
32            .and_then(|forms| {
33                forms
34                    .into_iter()
35                    .next()
36                    .ok_or_else(|| "deps.edn contained no readable forms".to_string())
37            })
38            .and_then(|form| parse_deps_edn_form(&form))
39        {
40            Ok(package) => vec![package],
41            Err(error) => {
42                warn!("Failed to parse deps.edn at {:?}: {}", path, error);
43                vec![default_package_data(Some(DatasourceId::ClojureDepsEdn))]
44            }
45        }
46    }
47}
48
49pub struct ClojureProjectCljParser;
50
51impl PackageParser for ClojureProjectCljParser {
52    const PACKAGE_TYPE: PackageType = PackageType::Maven;
53
54    fn is_match(path: &Path) -> bool {
55        path.file_name().is_some_and(|name| name == "project.clj")
56    }
57
58    fn extract_packages(path: &Path) -> Vec<PackageData> {
59        let content = match fs::read_to_string(path) {
60            Ok(content) => content,
61            Err(error) => {
62                warn!("Failed to read project.clj at {:?}: {}", path, error);
63                return vec![default_package_data(Some(DatasourceId::ClojureProjectClj))];
64            }
65        };
66
67        if looks_like_template_project_clj(&content) {
68            return vec![default_package_data(Some(DatasourceId::ClojureProjectClj))];
69        }
70
71        if !content.contains("(defproject") {
72            return vec![default_package_data(Some(DatasourceId::ClojureProjectClj))];
73        }
74
75        let forms = match parse_forms(&content) {
76            Ok(forms) => forms,
77            Err(error) => {
78                warn!("Failed to parse project.clj at {:?}: {}", path, error);
79                return vec![default_package_data(Some(DatasourceId::ClojureProjectClj))];
80            }
81        };
82
83        let Some(form) = forms.into_iter().find(|form| {
84            matches!(
85                form,
86                Form::List(items) if matches!(items.first(), Some(Form::Symbol(symbol)) if symbol == "defproject")
87            )
88        }) else {
89            return vec![default_package_data(Some(DatasourceId::ClojureProjectClj))];
90        };
91
92        match parse_project_clj_form(&form) {
93            Ok(package) => vec![package],
94            Err(error) => {
95                warn!("Failed to parse project.clj at {:?}: {}", path, error);
96                vec![default_package_data(Some(DatasourceId::ClojureProjectClj))]
97            }
98        }
99    }
100}
101
102#[derive(Clone, Debug)]
103enum Form {
104    Nil,
105    Bool(bool),
106    String(String),
107    Keyword(String),
108    Symbol(String),
109    Vector(Vec<Form>),
110    List(Vec<Form>),
111    Map(Vec<(Form, Form)>),
112    Prefixed(Box<Form>),
113}
114
115struct Reader {
116    chars: Vec<char>,
117    index: usize,
118}
119
120impl Reader {
121    fn new(input: &str) -> Self {
122        Self {
123            chars: input.chars().collect(),
124            index: 0,
125        }
126    }
127
128    fn parse_all(mut self) -> Result<Vec<Form>, String> {
129        let mut forms = Vec::new();
130        while self.skip_ws_and_comments() {
131            forms.push(self.parse_form()?);
132        }
133        Ok(forms)
134    }
135
136    fn skip_ws_and_comments(&mut self) -> bool {
137        loop {
138            while self
139                .peek()
140                .is_some_and(|ch| ch.is_whitespace() || ch == ',')
141            {
142                self.index += 1;
143            }
144            if self.peek() == Some(';') {
145                while let Some(ch) = self.peek() {
146                    self.index += 1;
147                    if ch == '\n' {
148                        break;
149                    }
150                }
151                continue;
152            }
153            return self.peek().is_some();
154        }
155    }
156
157    fn parse_form(&mut self) -> Result<Form, String> {
158        self.skip_ws_and_comments();
159        match self.peek() {
160            Some('"') => self.parse_string().map(Form::String),
161            Some(':') => self.parse_keyword().map(Form::Keyword),
162            Some('[') => self.parse_collection('[', ']').map(Form::Vector),
163            Some('(') => self.parse_collection('(', ')').map(Form::List),
164            Some('{') => self.parse_map(),
165            Some('^') => {
166                self.index += 1;
167                let _ = self.parse_form()?;
168                self.parse_form()
169            }
170            Some('~') | Some('\'') | Some('`') | Some('@') => {
171                self.index += 1;
172                let form = self.parse_form()?;
173                Ok(Form::Prefixed(Box::new(form)))
174            }
175            Some('#') => self.parse_dispatch_form(),
176            Some(_) => self.parse_atom(),
177            None => Err("unexpected end of input".to_string()),
178        }
179    }
180
181    fn parse_dispatch_form(&mut self) -> Result<Form, String> {
182        self.expect('#')?;
183        match self.peek() {
184            Some('_') => {
185                self.index += 1;
186                let _ = self.parse_form()?;
187                self.parse_form()
188            }
189            Some('=') => Err("unsupported reader eval dispatch".to_string()),
190            Some('"') => {
191                // Tolerate regex literals in ignored fields without implementing reader semantics.
192                self.parse_string().map(Form::String)
193            }
194            Some('{') => {
195                // Tolerate set literals in ignored fields by treating them as plain collections.
196                self.parse_collection('{', '}').map(Form::Vector)
197            }
198            Some('(') => {
199                // Tolerate function literals in ignored fields without implementing reader semantics.
200                self.parse_collection('(', ')').map(Form::List)
201            }
202            Some('?') => {
203                // Tolerate reader conditionals by skipping the dispatch token and
204                // returning the selected readable form without evaluating features.
205                self.index += 1;
206                if self.peek() == Some('@') {
207                    self.index += 1;
208                }
209                let _ = self.parse_form()?;
210                self.parse_form()
211            }
212            Some(ch) if !is_delimiter(ch) => {
213                // Tolerate tagged literals in ignored fields by ignoring the tag and
214                // parsing the following readable form as plain data.
215                let _ = self.parse_atom()?;
216                self.parse_form()
217            }
218            Some(ch) => Err(format!("unsupported reader dispatch '#{ch}'")),
219            None => Err("unexpected end of input after '#'".to_string()),
220        }
221    }
222
223    fn parse_string(&mut self) -> Result<String, String> {
224        self.expect('"')?;
225        let mut result = String::new();
226        let mut escaped = false;
227        while let Some(ch) = self.peek() {
228            self.index += 1;
229            if escaped {
230                result.push(match ch {
231                    'n' => '\n',
232                    'r' => '\r',
233                    't' => '\t',
234                    '"' => '"',
235                    '\\' => '\\',
236                    other => other,
237                });
238                escaped = false;
239            } else if ch == '\\' {
240                escaped = true;
241            } else if ch == '"' {
242                return Ok(result);
243            } else {
244                result.push(ch);
245            }
246        }
247        Err("unterminated string".to_string())
248    }
249
250    fn parse_keyword(&mut self) -> Result<String, String> {
251        self.expect(':')?;
252        let start = self.index;
253        while let Some(ch) = self.peek() {
254            if is_delimiter(ch) {
255                break;
256            }
257            self.index += 1;
258        }
259        if self.index == start {
260            return Err("empty keyword".to_string());
261        }
262        Ok(self.chars[start..self.index].iter().collect())
263    }
264
265    fn parse_collection(&mut self, open: char, close: char) -> Result<Vec<Form>, String> {
266        self.expect(open)?;
267        let mut forms = Vec::new();
268        loop {
269            self.skip_ws_and_comments();
270            if self.peek() == Some(close) {
271                self.index += 1;
272                return Ok(forms);
273            }
274            if self.peek().is_none() {
275                return Err(format!("unterminated collection starting with {open}"));
276            }
277            forms.push(self.parse_form()?);
278        }
279    }
280
281    fn parse_map(&mut self) -> Result<Form, String> {
282        self.expect('{')?;
283        let mut entries = Vec::new();
284        loop {
285            self.skip_ws_and_comments();
286            if self.peek() == Some('}') {
287                self.index += 1;
288                return Ok(Form::Map(entries));
289            }
290            if self.peek().is_none() {
291                return Err("unterminated map".to_string());
292            }
293            let key = self.parse_form()?;
294            self.skip_ws_and_comments();
295            if self.peek() == Some('}') {
296                return Err("map missing value".to_string());
297            }
298            let value = self.parse_form()?;
299            entries.push((key, value));
300        }
301    }
302
303    fn parse_atom(&mut self) -> Result<Form, String> {
304        let start = self.index;
305        while let Some(ch) = self.peek() {
306            if is_delimiter(ch) {
307                break;
308            }
309            self.index += 1;
310        }
311        let token: String = self.chars[start..self.index].iter().collect();
312        if token.is_empty() {
313            return Err("empty token".to_string());
314        }
315        Ok(match token.as_str() {
316            "nil" => Form::Nil,
317            "true" => Form::Bool(true),
318            "false" => Form::Bool(false),
319            _ => Form::Symbol(token),
320        })
321    }
322
323    fn expect(&mut self, expected: char) -> Result<(), String> {
324        match self.peek() {
325            Some(ch) if ch == expected => {
326                self.index += 1;
327                Ok(())
328            }
329            Some(ch) => Err(format!("expected '{expected}', found '{ch}'")),
330            None => Err(format!("expected '{expected}', found end of input")),
331        }
332    }
333
334    fn peek(&self) -> Option<char> {
335        self.chars.get(self.index).copied()
336    }
337}
338
339fn is_delimiter(ch: char) -> bool {
340    ch.is_whitespace()
341        || ch == ','
342        || matches!(
343            ch,
344            '[' | ']' | '{' | '}' | '(' | ')' | '"' | ';' | '\'' | '`' | '~' | '@'
345        )
346}
347
348fn parse_forms(input: &str) -> Result<Vec<Form>, String> {
349    Reader::new(input).parse_all()
350}
351
352fn parse_deps_edn_form(form: &Form) -> Result<PackageData, String> {
353    let Form::Map(entries) = form else {
354        return Err("deps.edn root is not a map".to_string());
355    };
356
357    let mut package = default_package_data(Some(DatasourceId::ClojureDepsEdn));
358    let mut dependencies = Vec::new();
359    let mut extra_data = HashMap::new();
360
361    if let Some(Form::Map(dep_map)) = map_get_keyword(entries, "deps") {
362        dependencies.extend(extract_deps_map(dep_map, None, true));
363    }
364
365    if let Some(Form::Map(alias_map)) = map_get_keyword(entries, "aliases") {
366        for (alias_key, alias_value) in alias_map {
367            let Some(alias_name) = keyword_or_symbol_name(alias_key) else {
368                continue;
369            };
370            let Form::Map(alias_entries) = alias_value else {
371                continue;
372            };
373            for dep_key in [
374                "extra-deps",
375                "override-deps",
376                "default-deps",
377                "deps",
378                "replace-deps",
379            ] {
380                if let Some(Form::Map(dep_map)) = map_get_keyword(alias_entries, dep_key) {
381                    dependencies.extend(extract_deps_map(dep_map, Some(&alias_name), false));
382                }
383            }
384        }
385        if let Some(json) = form_to_json(&Form::Map(alias_map.clone())) {
386            extra_data.insert("aliases".to_string(), json);
387        }
388    }
389
390    if let Some(value) = map_get_keyword(entries, "paths").and_then(form_to_json) {
391        extra_data.insert("paths".to_string(), value);
392    }
393    if let Some(value) = map_get_keyword(entries, "mvn/repos").and_then(form_to_json) {
394        extra_data.insert("mvn_repos".to_string(), value);
395    }
396
397    package.dependencies = dependencies;
398    package.extra_data = (!extra_data.is_empty()).then_some(extra_data);
399    Ok(package)
400}
401
402fn parse_project_clj_form(form: &Form) -> Result<PackageData, String> {
403    let Form::List(items) = form else {
404        return Err("project.clj root is not a list".to_string());
405    };
406    if !matches!(items.first(), Some(Form::Symbol(symbol)) if symbol == "defproject") {
407        return Err("project.clj root is not defproject".to_string());
408    }
409
410    let Some((namespace, name)) = items.get(1).and_then(parse_lib_form) else {
411        return Err("defproject missing project identifier".to_string());
412    };
413    let Some(version) = items.get(2).and_then(form_as_string) else {
414        return Err("defproject missing project version".to_string());
415    };
416
417    let mut package = default_package_data(Some(DatasourceId::ClojureProjectClj));
418    package.namespace = namespace.clone();
419    package.name = Some(name.clone());
420    package.version = Some(version.to_string());
421    package.purl = build_maven_purl(namespace.as_deref(), &name, Some(version));
422
423    let mut index = 3usize;
424    while index + 1 < items.len() {
425        let Some(key) = form_as_keyword(&items[index]) else {
426            index += 1;
427            continue;
428        };
429        let value = &items[index + 1];
430
431        match key {
432            "description" => package.description = form_as_string(value).map(ToOwned::to_owned),
433            "url" => package.homepage_url = form_as_string(value).map(ToOwned::to_owned),
434            "license" => {
435                package.extracted_license_statement = format_license(value);
436            }
437            "scm" => {
438                if let Form::Map(entries) = value {
439                    package.vcs_url = map_get_keyword(entries, "url")
440                        .and_then(form_as_string)
441                        .map(ToOwned::to_owned);
442                }
443            }
444            "dependencies" => {
445                if let Form::Vector(deps) = value {
446                    package
447                        .dependencies
448                        .extend(extract_project_dependencies(deps, None));
449                }
450            }
451            "profiles" => {
452                if let Form::Map(entries) = value {
453                    for (profile_key, profile_value) in entries {
454                        let Some(profile_name) = keyword_or_symbol_name(profile_key) else {
455                            continue;
456                        };
457                        let Form::Map(profile_entries) = profile_value else {
458                            continue;
459                        };
460                        if let Some(Form::Vector(deps)) =
461                            map_get_keyword(profile_entries, "dependencies")
462                        {
463                            package
464                                .dependencies
465                                .extend(extract_project_dependencies(deps, Some(&profile_name)));
466                        }
467                    }
468                }
469            }
470            _ => {}
471        }
472        index += 2;
473    }
474
475    Ok(package)
476}
477
478fn extract_deps_map(
479    entries: &[(Form, Form)],
480    scope: Option<&str>,
481    runtime: bool,
482) -> Vec<Dependency> {
483    entries
484        .iter()
485        .filter_map(|(lib, coord)| build_deps_edn_dependency(lib, coord, scope, runtime))
486        .collect()
487}
488
489fn build_deps_edn_dependency(
490    lib: &Form,
491    coord: &Form,
492    scope: Option<&str>,
493    runtime: bool,
494) -> Option<Dependency> {
495    let (namespace, name) = parse_lib_form(lib)?;
496    let mut extra_data = HashMap::new();
497    let mut requirement = None;
498    let mut pinned = false;
499
500    if let Form::Map(entries) = coord {
501        if let Some(version) = map_get_keyword(entries, "mvn/version").and_then(form_as_string) {
502            requirement = Some(version.to_string());
503            pinned = is_exact_version(version);
504        }
505        for (key, data_key) in [
506            ("git/url", "git_url"),
507            ("git/tag", "git_tag"),
508            ("git/sha", "git_sha"),
509            ("deps/root", "deps_root"),
510            ("deps/manifest", "deps_manifest"),
511            ("local/root", "local_root"),
512            ("exclusions", "exclusions"),
513        ] {
514            if let Some(value) = map_get_keyword(entries, key).and_then(form_to_json) {
515                extra_data.insert(data_key.to_string(), value);
516            }
517        }
518    }
519
520    Some(Dependency {
521        purl: build_maven_purl(
522            namespace.as_deref(),
523            &name,
524            requirement.as_deref().map(strip_exact_prefix),
525        ),
526        extracted_requirement: requirement,
527        scope: scope.map(ToOwned::to_owned),
528        is_runtime: Some(runtime),
529        is_optional: Some(scope.is_some()),
530        is_pinned: Some(pinned),
531        is_direct: Some(true),
532        resolved_package: None,
533        extra_data: (!extra_data.is_empty()).then_some(extra_data),
534    })
535}
536
537fn extract_project_dependencies(entries: &[Form], scope: Option<&str>) -> Vec<Dependency> {
538    entries
539        .iter()
540        .filter_map(|entry| {
541            let Form::Vector(parts) = entry else {
542                return None;
543            };
544            let (namespace, name) = parse_lib_form(parts.first()?)?;
545            let version = form_as_string(parts.get(1)?)?;
546
547            let mut extra_data = HashMap::new();
548            let mut index = 2usize;
549            while index + 1 < parts.len() {
550                if let Some(key) = form_as_keyword(&parts[index])
551                    && let Some(value) = form_to_json(&parts[index + 1])
552                {
553                    extra_data.insert(key.replace('-', "_"), value);
554                }
555                index += 2;
556            }
557
558            let (is_runtime, is_optional) = match scope {
559                Some("dev") | Some("test") => (false, true),
560                Some("provided") => (false, false),
561                Some(_) => (false, true),
562                None => (true, false),
563            };
564
565            Some(Dependency {
566                purl: build_maven_purl(
567                    namespace.as_deref(),
568                    &name,
569                    Some(strip_exact_prefix(version)),
570                ),
571                extracted_requirement: Some(version.to_string()),
572                scope: scope.map(ToOwned::to_owned),
573                is_runtime: Some(is_runtime),
574                is_optional: Some(is_optional),
575                is_pinned: Some(is_exact_version(version)),
576                is_direct: Some(true),
577                resolved_package: None,
578                extra_data: (!extra_data.is_empty()).then_some(extra_data),
579            })
580        })
581        .collect()
582}
583
584fn parse_lib_form(form: &Form) -> Option<(Option<String>, String)> {
585    let raw = match form {
586        Form::Symbol(value) | Form::String(value) => value,
587        _ => return None,
588    };
589
590    if let Some((namespace, name)) = raw.split_once('/') {
591        Some((Some(namespace.to_string()), name.to_string()))
592    } else {
593        Some((Some(raw.to_string()), raw.to_string()))
594    }
595}
596
597fn map_get_keyword<'a>(entries: &'a [(Form, Form)], key: &str) -> Option<&'a Form> {
598    entries.iter().find_map(|(entry_key, entry_value)| {
599        if form_as_keyword(entry_key) == Some(key) {
600            Some(entry_value)
601        } else {
602            None
603        }
604    })
605}
606
607fn form_as_keyword(form: &Form) -> Option<&str> {
608    match form {
609        Form::Keyword(value) => Some(value.as_str()),
610        _ => None,
611    }
612}
613
614fn form_as_string(form: &Form) -> Option<&str> {
615    match form {
616        Form::String(value) => Some(value.as_str()),
617        _ => None,
618    }
619}
620
621fn keyword_or_symbol_name(form: &Form) -> Option<String> {
622    match form {
623        Form::Keyword(value) | Form::Symbol(value) => Some(value.clone()),
624        _ => None,
625    }
626}
627
628fn map_key_name(form: &Form) -> Option<String> {
629    match form {
630        Form::Keyword(value) | Form::Symbol(value) | Form::String(value) => Some(value.clone()),
631        _ => None,
632    }
633}
634
635fn form_to_json(form: &Form) -> Option<JsonValue> {
636    Some(match form {
637        Form::Nil => JsonValue::Null,
638        Form::Bool(value) => JsonValue::Bool(*value),
639        Form::String(value) => JsonValue::String(value.clone()),
640        Form::Keyword(value) => JsonValue::String(format!(":{value}")),
641        Form::Symbol(value) => JsonValue::String(value.clone()),
642        Form::Vector(values) | Form::List(values) => {
643            JsonValue::Array(values.iter().filter_map(form_to_json).collect())
644        }
645        Form::Map(entries) => {
646            let mut map = serde_json::Map::new();
647            for (key, value) in entries {
648                let Some(key_name) = map_key_name(key) else {
649                    continue;
650                };
651                if let Some(json) = form_to_json(value) {
652                    map.insert(key_name, json);
653                }
654            }
655            JsonValue::Object(map)
656        }
657        Form::Prefixed(value) => form_to_json(value)?,
658    })
659}
660
661fn format_license(form: &Form) -> Option<String> {
662    match form {
663        Form::Map(entries) => format_license_map(entries),
664        Form::Vector(values) | Form::List(values) => {
665            let licenses: Vec<String> = values.iter().filter_map(format_license).collect();
666            if licenses.is_empty() {
667                None
668            } else {
669                Some(licenses.join("\n"))
670            }
671        }
672        _ => None,
673    }
674}
675
676fn format_license_map(entries: &[(Form, Form)]) -> Option<String> {
677    let name = map_get_keyword(entries, "name").and_then(form_as_string)?;
678    let mut rendered = format!("- license:\n    name: {name}\n");
679    if let Some(url) = map_get_keyword(entries, "url").and_then(form_as_string) {
680        rendered.push_str(&format!("    url: {url}\n"));
681    }
682    Some(rendered)
683}
684
685fn build_maven_purl(namespace: Option<&str>, name: &str, version: Option<&str>) -> Option<String> {
686    let mut purl = PackageUrl::new(PackageType::Maven.as_str(), name).ok()?;
687    if let Some(namespace) = namespace {
688        purl.with_namespace(namespace).ok()?;
689    }
690    if let Some(version) = version {
691        purl.with_version(version).ok()?;
692    }
693    Some(purl.to_string())
694}
695
696fn is_exact_version(version: &str) -> bool {
697    let normalized = strip_exact_prefix(version).trim();
698    !normalized.is_empty()
699        && !normalized.contains('*')
700        && !normalized.contains('^')
701        && !normalized.contains('~')
702        && !normalized.contains('>')
703        && !normalized.contains('<')
704        && !normalized.contains('|')
705        && !normalized.contains(',')
706        && !normalized.contains(' ')
707}
708
709fn strip_exact_prefix(version: &str) -> &str {
710    version.trim_start_matches('=')
711}
712
713fn looks_like_template_project_clj(content: &str) -> bool {
714    let Some(defproject_index) = content.find("(defproject") else {
715        return false;
716    };
717
718    let manifest_window = &content[defproject_index..content.len().min(defproject_index + 256)];
719    manifest_window.contains("{{") && manifest_window.contains("}}")
720}
721
722fn default_package_data(datasource_id: Option<DatasourceId>) -> PackageData {
723    PackageData {
724        package_type: Some(PackageType::Maven),
725        primary_language: Some("Clojure".to_string()),
726        datasource_id,
727        ..Default::default()
728    }
729}
730
731crate::register_parser!(
732    "Clojure deps.edn and project.clj manifests",
733    &["**/deps.edn", "**/project.clj"],
734    "maven",
735    "Clojure",
736    Some("https://clojure.org/reference/deps_edn"),
737);