Skip to main content

provenant/parsers/
bun_lock.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4use std::collections::HashMap;
5use std::path::Path;
6
7use crate::parser_warn as warn;
8use serde_json::{Map, Value as JsonValue};
9
10use crate::models::{
11    DatasourceId, Dependency, Md5Digest, PackageData, PackageType, ResolvedPackage, Sha1Digest,
12    Sha256Digest, Sha512Digest,
13};
14use crate::parsers::utils::{MAX_ITERATION_COUNT, npm_purl, parse_sri, truncate_field};
15
16use super::PackageParser;
17
18pub struct BunLockParser;
19
20#[derive(Clone, Debug)]
21struct ManifestDependencyInfo {
22    scope: &'static str,
23    is_runtime: bool,
24    is_optional: bool,
25}
26
27struct WorkspaceContext {
28    root_name: Option<String>,
29    root_version: Option<String>,
30    direct_deps: HashMap<String, ManifestDependencyInfo>,
31    workspace_versions: HashMap<String, String>,
32    workspace_entries: HashMap<String, JsonValue>,
33}
34
35impl PackageParser for BunLockParser {
36    const PACKAGE_TYPE: PackageType = PackageType::Npm;
37
38    fn is_match(path: &Path) -> bool {
39        path.file_name()
40            .and_then(|name| name.to_str())
41            .is_some_and(|name| name == "bun.lock")
42    }
43
44    fn extract_packages(path: &Path) -> Vec<PackageData> {
45        let content = match crate::parsers::utils::read_file_to_string(path, None) {
46            Ok(content) => content,
47            Err(e) => {
48                warn!("Failed to read bun.lock at {:?}: {}", path, e);
49                return vec![default_package_data()];
50            }
51        };
52
53        let root: JsonValue = match json5::from_str(&content) {
54            Ok(root) => root,
55            Err(e) => {
56                warn!("Failed to parse bun.lock at {:?}: {}", path, e);
57                return vec![default_package_data()];
58            }
59        };
60
61        vec![parse_bun_lockfile(&root)]
62    }
63
64    fn metadata() -> Vec<super::metadata::ParserMetadata> {
65        vec![super::metadata::ParserMetadata {
66            description: "Bun lockfile",
67            file_patterns: &["**/bun.lock"],
68            package_type: "npm",
69            primary_language: "JavaScript",
70            documentation_url: Some("https://bun.sh/docs/pm/lockfile"),
71        }]
72    }
73}
74
75fn default_package_data() -> PackageData {
76    PackageData {
77        package_type: Some(BunLockParser::PACKAGE_TYPE),
78        primary_language: Some(truncate_field("JavaScript".to_string())),
79        datasource_id: Some(DatasourceId::BunLock),
80        extra_data: Some(HashMap::new()),
81        ..Default::default()
82    }
83}
84
85fn parse_bun_lockfile(root: &JsonValue) -> PackageData {
86    let mut result = default_package_data();
87
88    let workspace_context = extract_workspace_info(root);
89    let (namespace, name) = workspace_context
90        .root_name
91        .as_deref()
92        .map(split_namespace_name)
93        .unwrap_or((None, None));
94
95    result.namespace = namespace.map(truncate_field);
96    result.name = name.map(truncate_field);
97    result.version = workspace_context.root_version.clone().map(truncate_field);
98    result.purl = result
99        .name
100        .as_ref()
101        .map(|name| qualify_name(&result.namespace, name))
102        .and_then(|full_name| npm_purl(&full_name, workspace_context.root_version.as_deref()));
103
104    let extra_data = result.extra_data.get_or_insert_with(HashMap::new);
105    if let Some(lockfile_version) = root.get("lockfileVersion").and_then(|value| value.as_i64()) {
106        extra_data.insert(
107            "lockfileVersion".to_string(),
108            JsonValue::from(lockfile_version),
109        );
110    }
111    if let Some(config_version) = root.get("configVersion").and_then(|value| value.as_i64()) {
112        extra_data.insert("configVersion".to_string(), JsonValue::from(config_version));
113    }
114    if let Some(trusted) = root.get("trustedDependencies") {
115        extra_data.insert("trustedDependencies".to_string(), trusted.clone());
116    }
117
118    let Some(packages) = root.get("packages").and_then(|value| value.as_object()) else {
119        warn!("No packages field found in bun.lock");
120        if extra_data.is_empty() {
121            result.extra_data = None;
122        }
123        return result;
124    };
125
126    let mut dependencies = Vec::new();
127    for (key, value) in packages.iter().take(MAX_ITERATION_COUNT) {
128        if let Some(dependency) = parse_package_entry(
129            key,
130            value,
131            &workspace_context.direct_deps,
132            &workspace_context.workspace_versions,
133            &workspace_context.workspace_entries,
134        ) {
135            dependencies.push(dependency);
136        }
137    }
138
139    result.dependencies = dependencies;
140    if result
141        .extra_data
142        .as_ref()
143        .is_some_and(|data| data.is_empty())
144    {
145        result.extra_data = None;
146    }
147
148    result
149}
150
151fn extract_workspace_info(root: &JsonValue) -> WorkspaceContext {
152    let mut direct_deps = HashMap::new();
153    let mut workspace_versions = HashMap::new();
154    let mut workspace_entries = HashMap::new();
155
156    let workspaces = root.get("workspaces").and_then(|value| value.as_object());
157    let root_workspace = workspaces.and_then(|workspaces| workspaces.get(""));
158    let root_name = root_workspace
159        .and_then(|value| value.get("name"))
160        .and_then(|value| value.as_str())
161        .map(|s| truncate_field(s.to_owned()));
162    let root_version = root_workspace
163        .and_then(|value| value.get("version"))
164        .and_then(|value| value.as_str())
165        .map(|s| truncate_field(s.to_owned()));
166
167    if let Some(workspaces) = workspaces {
168        for workspace in workspaces.values().take(MAX_ITERATION_COUNT) {
169            if let Some(name) = workspace.get("name").and_then(|value| value.as_str())
170                && let Some(version) = workspace.get("version").and_then(|value| value.as_str())
171            {
172                workspace_versions.insert(
173                    truncate_field(name.to_string()),
174                    truncate_field(version.to_string()),
175                );
176            }
177            if let Some(name) = workspace.get("name").and_then(|value| value.as_str()) {
178                workspace_entries.insert(truncate_field(name.to_string()), workspace.clone());
179            }
180        }
181    }
182
183    if let Some(workspaces) = workspaces {
184        for workspace in workspaces.values().take(MAX_ITERATION_COUNT) {
185            insert_manifest_dependency_info(
186                workspace.get("dependencies"),
187                "dependencies",
188                true,
189                false,
190                &mut direct_deps,
191            );
192            insert_manifest_dependency_info(
193                workspace.get("devDependencies"),
194                "devDependencies",
195                false,
196                true,
197                &mut direct_deps,
198            );
199            insert_manifest_dependency_info(
200                workspace.get("optionalDependencies"),
201                "optionalDependencies",
202                true,
203                true,
204                &mut direct_deps,
205            );
206            insert_manifest_dependency_info(
207                workspace.get("peerDependencies"),
208                "peerDependencies",
209                true,
210                false,
211                &mut direct_deps,
212            );
213        }
214    }
215
216    WorkspaceContext {
217        root_name,
218        root_version,
219        direct_deps,
220        workspace_versions,
221        workspace_entries,
222    }
223}
224
225fn insert_manifest_dependency_info(
226    value: Option<&JsonValue>,
227    scope: &'static str,
228    is_runtime: bool,
229    is_optional: bool,
230    out: &mut HashMap<String, ManifestDependencyInfo>,
231) {
232    let Some(map) = value.and_then(|value| value.as_object()) else {
233        return;
234    };
235
236    for name in map.keys().take(MAX_ITERATION_COUNT) {
237        out.insert(
238            truncate_field(name.clone()),
239            ManifestDependencyInfo {
240                scope,
241                is_runtime,
242                is_optional,
243            },
244        );
245    }
246}
247
248fn parse_package_entry(
249    key: &str,
250    value: &JsonValue,
251    direct_deps: &HashMap<String, ManifestDependencyInfo>,
252    workspace_versions: &HashMap<String, String>,
253    workspace_entries: &HashMap<String, JsonValue>,
254) -> Option<Dependency> {
255    let tuple = value.as_array()?;
256    let resolution = tuple.first()?.as_str()?;
257    let (package_name, locator) = split_locator(resolution)?;
258    let package_name = truncate_field(package_name);
259    let locator = truncate_field(locator);
260    let package_version = resolve_locator_version(&package_name, &locator, workspace_versions);
261
262    let manifest_info = direct_deps
263        .get(key)
264        .or_else(|| direct_deps.get(&package_name));
265    let (scope, is_runtime, is_optional, is_direct) = manifest_info
266        .map(|info| {
267            (
268                truncate_field(info.scope.to_string()),
269                info.is_runtime,
270                info.is_optional,
271                true,
272            )
273        })
274        .unwrap_or_else(|| {
275            (
276                truncate_field("dependencies".to_string()),
277                true,
278                false,
279                false,
280            )
281        });
282
283    let purl = npm_purl(&package_name, package_version.as_deref()).map(truncate_field);
284    let resolved_download_url =
285        resolved_download_url(&package_name, &locator, tuple, package_version.as_deref())
286            .map(truncate_field);
287    let (sha1, sha256, sha512, md5) = parse_integrity_tuple(tuple);
288    let nested_dependencies =
289        extract_nested_dependencies(&package_name, tuple, workspace_versions, workspace_entries);
290
291    let (namespace, name) = split_namespace_name(&package_name);
292    let namespace = namespace.map(truncate_field);
293    let name = name.map(truncate_field);
294    let resolved_package = ResolvedPackage {
295        primary_language: Some(truncate_field("JavaScript".to_string())),
296        download_url: resolved_download_url,
297        sha1: sha1.and_then(|h| Sha1Digest::from_hex(&h).ok()),
298        sha256: sha256.and_then(|h| Sha256Digest::from_hex(&h).ok()),
299        sha512: sha512.and_then(|h| Sha512Digest::from_hex(&h).ok()),
300        md5: md5.and_then(|h| Md5Digest::from_hex(&h).ok()),
301        is_virtual: true,
302        extra_data: None,
303        dependencies: nested_dependencies,
304        repository_homepage_url: None,
305        repository_download_url: None,
306        api_data_url: None,
307        datasource_id: Some(DatasourceId::BunLock),
308        purl: None,
309        ..ResolvedPackage::new(
310            BunLockParser::PACKAGE_TYPE,
311            namespace.unwrap_or_default(),
312            name.unwrap_or_else(|| package_name.clone()),
313            truncate_field(package_version.clone().unwrap_or_default()),
314        )
315    };
316
317    Some(Dependency {
318        purl,
319        extracted_requirement: Some(truncate_field(
320            package_version.clone().unwrap_or(locator.clone()),
321        )),
322        scope: Some(scope),
323        is_runtime: Some(is_runtime),
324        is_optional: Some(is_optional),
325        is_pinned: Some(true),
326        is_direct: Some(is_direct),
327        resolved_package: Some(Box::new(resolved_package)),
328        extra_data: None,
329    })
330}
331
332fn split_locator(resolution: &str) -> Option<(String, String)> {
333    let (name, locator) = resolution.rsplit_once('@')?;
334    if name.is_empty() || locator.is_empty() {
335        return None;
336    }
337    Some((
338        truncate_field(name.to_string()),
339        truncate_field(locator.to_string()),
340    ))
341}
342
343fn resolve_locator_version(
344    package_name: &str,
345    locator: &str,
346    workspace_versions: &HashMap<String, String>,
347) -> Option<String> {
348    if let Some(path) = locator.strip_prefix("workspace:") {
349        return workspace_versions
350            .get(package_name)
351            .cloned()
352            .or_else(|| workspace_versions.get(path).cloned());
353    }
354
355    if locator.starts_with("file:")
356        || locator.starts_with("link:")
357        || locator.starts_with("github:")
358        || locator.starts_with("git+")
359        || locator.starts_with("http://")
360        || locator.starts_with("https://")
361    {
362        return None;
363    }
364
365    Some(truncate_field(locator.to_string()))
366}
367
368fn resolved_download_url(
369    package_name: &str,
370    locator: &str,
371    tuple: &[JsonValue],
372    version: Option<&str>,
373) -> Option<String> {
374    if let Some(url) = tuple.get(1).and_then(|value| value.as_str())
375        && !url.is_empty()
376    {
377        return Some(truncate_field(url.to_string()));
378    }
379
380    if locator.starts_with("workspace:")
381        || locator.starts_with("file:")
382        || locator.starts_with("link:")
383    {
384        return None;
385    }
386
387    if locator.starts_with("http://")
388        || locator.starts_with("https://")
389        || locator.starts_with("git+")
390        || locator.starts_with("github:")
391    {
392        return Some(truncate_field(locator.to_string()));
393    }
394
395    version.and_then(|version| default_registry_download_url(package_name, version))
396}
397
398fn default_registry_download_url(package_name: &str, version: &str) -> Option<String> {
399    let (namespace, name) = split_namespace_name(package_name);
400    let name = name?;
401    let package_path = qualify_name(&namespace, &name);
402    Some(truncate_field(format!(
403        "https://registry.npmjs.org/{}/-/{}-{}.tgz",
404        package_path, name, version
405    )))
406}
407
408fn parse_integrity_tuple(
409    tuple: &[JsonValue],
410) -> (
411    Option<String>,
412    Option<String>,
413    Option<String>,
414    Option<String>,
415) {
416    let integrity = tuple.iter().rev().find_map(|value| {
417        value.as_str().filter(|value| {
418            value.starts_with("sha1-")
419                || value.starts_with("sha256-")
420                || value.starts_with("sha512-")
421                || value.starts_with("md5-")
422        })
423    });
424
425    let Some(integrity) = integrity else {
426        return (None, None, None, None);
427    };
428
429    match parse_sri(integrity) {
430        Some((algo, hash)) if algo == "sha1" => (Some(hash), None, None, None),
431        Some((algo, hash)) if algo == "sha256" => (None, Some(hash), None, None),
432        Some((algo, hash)) if algo == "sha512" => (None, None, Some(hash), None),
433        Some((algo, hash)) if algo == "md5" => (None, None, None, Some(hash)),
434        _ => (None, None, None, None),
435    }
436}
437
438fn extract_nested_dependencies(
439    package_name: &str,
440    tuple: &[JsonValue],
441    workspace_versions: &HashMap<String, String>,
442    workspace_entries: &HashMap<String, JsonValue>,
443) -> Vec<Dependency> {
444    let info = tuple
445        .iter()
446        .find_map(|value| value.as_object())
447        .or_else(|| {
448            workspace_entries
449                .get(package_name)
450                .and_then(|value| value.as_object())
451        });
452    let Some(info) = info else {
453        return Vec::new();
454    };
455
456    let mut dependencies = Vec::new();
457    dependencies.extend(build_nested_dependencies(
458        info.get("dependencies").and_then(|value| value.as_object()),
459        "dependencies",
460        true,
461        false,
462        workspace_versions,
463    ));
464    dependencies.extend(build_nested_dependencies(
465        info.get("optionalDependencies")
466            .and_then(|value| value.as_object()),
467        "optionalDependencies",
468        true,
469        true,
470        workspace_versions,
471    ));
472    dependencies.extend(build_nested_dependencies(
473        info.get("peerDependencies")
474            .and_then(|value| value.as_object()),
475        "peerDependencies",
476        true,
477        false,
478        workspace_versions,
479    ));
480    dependencies
481}
482
483fn build_nested_dependencies(
484    deps: Option<&Map<String, JsonValue>>,
485    scope: &str,
486    is_runtime: bool,
487    is_optional: bool,
488    workspace_versions: &HashMap<String, String>,
489) -> Vec<Dependency> {
490    let Some(deps) = deps else {
491        return Vec::new();
492    };
493
494    deps.iter()
495        .take(MAX_ITERATION_COUNT)
496        .filter_map(|(name, value)| {
497            let requirement = value.as_str()?;
498            let version = if requirement.starts_with("workspace:") {
499                workspace_versions.get(name).map(String::as_str)
500            } else {
501                None
502            };
503
504            Some(Dependency {
505                purl: npm_purl(name, version).map(truncate_field),
506                extracted_requirement: Some(truncate_field(requirement.to_string())),
507                scope: Some(truncate_field(scope.to_string())),
508                is_runtime: Some(is_runtime),
509                is_optional: Some(is_optional),
510                is_pinned: Some(false),
511                is_direct: Some(false),
512                resolved_package: None,
513                extra_data: None,
514            })
515        })
516        .collect()
517}
518
519fn split_namespace_name(full_name: &str) -> (Option<String>, Option<String>) {
520    if full_name.starts_with('@') {
521        let mut parts = full_name.splitn(2, '/');
522        let namespace = parts.next().map(|s| truncate_field(s.to_owned()));
523        let name = parts.next().map(|s| truncate_field(s.to_owned()));
524        (namespace, name)
525    } else {
526        (
527            Some(String::new()),
528            Some(truncate_field(full_name.to_string())),
529        )
530    }
531}
532
533fn qualify_name(namespace: &Option<String>, name: &str) -> String {
534    match namespace.as_deref() {
535        Some("") | None => name.to_string(),
536        Some(namespace) => format!("{}/{}", namespace, name),
537    }
538}