Skip to main content

provenant/parsers/
bun_lock.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4use std::collections::HashMap;
5use std::path::Path;
6
7use crate::parser_warn as warn;
8use serde_json::{Map, Value as JsonValue};
9
10use crate::models::{
11    DatasourceId, Dependency, Md5Digest, PackageData, PackageType, ResolvedPackage, Sha1Digest,
12    Sha256Digest, Sha512Digest,
13};
14use crate::parsers::utils::{MAX_ITERATION_COUNT, npm_purl, parse_sri, truncate_field};
15
16use super::PackageParser;
17
18pub struct BunLockParser;
19
20#[derive(Clone, Debug)]
21struct ManifestDependencyInfo {
22    scope: &'static str,
23    is_runtime: bool,
24    is_optional: bool,
25}
26
27struct WorkspaceContext {
28    root_name: Option<String>,
29    root_version: Option<String>,
30    direct_deps: HashMap<String, ManifestDependencyInfo>,
31    workspace_versions: HashMap<String, String>,
32    workspace_entries: HashMap<String, JsonValue>,
33}
34
35impl PackageParser for BunLockParser {
36    const PACKAGE_TYPE: PackageType = PackageType::Npm;
37
38    fn is_match(path: &Path) -> bool {
39        path.file_name()
40            .and_then(|name| name.to_str())
41            .is_some_and(|name| name == "bun.lock")
42    }
43
44    fn extract_packages(path: &Path) -> Vec<PackageData> {
45        let content = match crate::parsers::utils::read_file_to_string(path, None) {
46            Ok(content) => content,
47            Err(e) => {
48                warn!("Failed to read bun.lock at {:?}: {}", path, e);
49                return vec![default_package_data()];
50            }
51        };
52
53        let root: JsonValue = match json5::from_str(&content) {
54            Ok(root) => root,
55            Err(e) => {
56                warn!("Failed to parse bun.lock at {:?}: {}", path, e);
57                return vec![default_package_data()];
58            }
59        };
60
61        vec![parse_bun_lockfile(&root)]
62    }
63}
64
65fn default_package_data() -> PackageData {
66    PackageData {
67        package_type: Some(BunLockParser::PACKAGE_TYPE),
68        primary_language: Some(truncate_field("JavaScript".to_string())),
69        datasource_id: Some(DatasourceId::BunLock),
70        extra_data: Some(HashMap::new()),
71        ..Default::default()
72    }
73}
74
75fn parse_bun_lockfile(root: &JsonValue) -> PackageData {
76    let mut result = default_package_data();
77
78    let workspace_context = extract_workspace_info(root);
79    let (namespace, name) = workspace_context
80        .root_name
81        .as_deref()
82        .map(split_namespace_name)
83        .unwrap_or((None, None));
84
85    result.namespace = namespace.map(truncate_field);
86    result.name = name.map(truncate_field);
87    result.version = workspace_context.root_version.clone().map(truncate_field);
88    result.purl = result
89        .name
90        .as_ref()
91        .map(|name| qualify_name(&result.namespace, name))
92        .and_then(|full_name| npm_purl(&full_name, workspace_context.root_version.as_deref()));
93
94    let extra_data = result.extra_data.get_or_insert_with(HashMap::new);
95    if let Some(lockfile_version) = root.get("lockfileVersion").and_then(|value| value.as_i64()) {
96        extra_data.insert(
97            "lockfileVersion".to_string(),
98            JsonValue::from(lockfile_version),
99        );
100    }
101    if let Some(config_version) = root.get("configVersion").and_then(|value| value.as_i64()) {
102        extra_data.insert("configVersion".to_string(), JsonValue::from(config_version));
103    }
104    if let Some(trusted) = root.get("trustedDependencies") {
105        extra_data.insert("trustedDependencies".to_string(), trusted.clone());
106    }
107
108    let Some(packages) = root.get("packages").and_then(|value| value.as_object()) else {
109        warn!("No packages field found in bun.lock");
110        if extra_data.is_empty() {
111            result.extra_data = None;
112        }
113        return result;
114    };
115
116    let mut dependencies = Vec::new();
117    for (key, value) in packages.iter().take(MAX_ITERATION_COUNT) {
118        if let Some(dependency) = parse_package_entry(
119            key,
120            value,
121            &workspace_context.direct_deps,
122            &workspace_context.workspace_versions,
123            &workspace_context.workspace_entries,
124        ) {
125            dependencies.push(dependency);
126        }
127    }
128
129    result.dependencies = dependencies;
130    if result
131        .extra_data
132        .as_ref()
133        .is_some_and(|data| data.is_empty())
134    {
135        result.extra_data = None;
136    }
137
138    result
139}
140
141fn extract_workspace_info(root: &JsonValue) -> WorkspaceContext {
142    let mut direct_deps = HashMap::new();
143    let mut workspace_versions = HashMap::new();
144    let mut workspace_entries = HashMap::new();
145
146    let workspaces = root.get("workspaces").and_then(|value| value.as_object());
147    let root_workspace = workspaces.and_then(|workspaces| workspaces.get(""));
148    let root_name = root_workspace
149        .and_then(|value| value.get("name"))
150        .and_then(|value| value.as_str())
151        .map(|s| truncate_field(s.to_owned()));
152    let root_version = root_workspace
153        .and_then(|value| value.get("version"))
154        .and_then(|value| value.as_str())
155        .map(|s| truncate_field(s.to_owned()));
156
157    if let Some(workspaces) = workspaces {
158        for workspace in workspaces.values().take(MAX_ITERATION_COUNT) {
159            if let Some(name) = workspace.get("name").and_then(|value| value.as_str())
160                && let Some(version) = workspace.get("version").and_then(|value| value.as_str())
161            {
162                workspace_versions.insert(
163                    truncate_field(name.to_string()),
164                    truncate_field(version.to_string()),
165                );
166            }
167            if let Some(name) = workspace.get("name").and_then(|value| value.as_str()) {
168                workspace_entries.insert(truncate_field(name.to_string()), workspace.clone());
169            }
170        }
171    }
172
173    if let Some(workspaces) = workspaces {
174        for workspace in workspaces.values().take(MAX_ITERATION_COUNT) {
175            insert_manifest_dependency_info(
176                workspace.get("dependencies"),
177                "dependencies",
178                true,
179                false,
180                &mut direct_deps,
181            );
182            insert_manifest_dependency_info(
183                workspace.get("devDependencies"),
184                "devDependencies",
185                false,
186                true,
187                &mut direct_deps,
188            );
189            insert_manifest_dependency_info(
190                workspace.get("optionalDependencies"),
191                "optionalDependencies",
192                true,
193                true,
194                &mut direct_deps,
195            );
196            insert_manifest_dependency_info(
197                workspace.get("peerDependencies"),
198                "peerDependencies",
199                true,
200                false,
201                &mut direct_deps,
202            );
203        }
204    }
205
206    WorkspaceContext {
207        root_name,
208        root_version,
209        direct_deps,
210        workspace_versions,
211        workspace_entries,
212    }
213}
214
215fn insert_manifest_dependency_info(
216    value: Option<&JsonValue>,
217    scope: &'static str,
218    is_runtime: bool,
219    is_optional: bool,
220    out: &mut HashMap<String, ManifestDependencyInfo>,
221) {
222    let Some(map) = value.and_then(|value| value.as_object()) else {
223        return;
224    };
225
226    for name in map.keys().take(MAX_ITERATION_COUNT) {
227        out.insert(
228            truncate_field(name.clone()),
229            ManifestDependencyInfo {
230                scope,
231                is_runtime,
232                is_optional,
233            },
234        );
235    }
236}
237
238fn parse_package_entry(
239    key: &str,
240    value: &JsonValue,
241    direct_deps: &HashMap<String, ManifestDependencyInfo>,
242    workspace_versions: &HashMap<String, String>,
243    workspace_entries: &HashMap<String, JsonValue>,
244) -> Option<Dependency> {
245    let tuple = value.as_array()?;
246    let resolution = tuple.first()?.as_str()?;
247    let (package_name, locator) = split_locator(resolution)?;
248    let package_name = truncate_field(package_name);
249    let locator = truncate_field(locator);
250    let package_version = resolve_locator_version(&package_name, &locator, workspace_versions);
251
252    let manifest_info = direct_deps
253        .get(key)
254        .or_else(|| direct_deps.get(&package_name));
255    let (scope, is_runtime, is_optional, is_direct) = manifest_info
256        .map(|info| {
257            (
258                truncate_field(info.scope.to_string()),
259                info.is_runtime,
260                info.is_optional,
261                true,
262            )
263        })
264        .unwrap_or_else(|| {
265            (
266                truncate_field("dependencies".to_string()),
267                true,
268                false,
269                false,
270            )
271        });
272
273    let purl = npm_purl(&package_name, package_version.as_deref()).map(truncate_field);
274    let resolved_download_url =
275        resolved_download_url(&package_name, &locator, tuple, package_version.as_deref())
276            .map(truncate_field);
277    let (sha1, sha256, sha512, md5) = parse_integrity_tuple(tuple);
278    let nested_dependencies =
279        extract_nested_dependencies(&package_name, tuple, workspace_versions, workspace_entries);
280
281    let (namespace, name) = split_namespace_name(&package_name);
282    let namespace = namespace.map(truncate_field);
283    let name = name.map(truncate_field);
284    let resolved_package = ResolvedPackage {
285        primary_language: Some(truncate_field("JavaScript".to_string())),
286        download_url: resolved_download_url,
287        sha1: sha1.and_then(|h| Sha1Digest::from_hex(&h).ok()),
288        sha256: sha256.and_then(|h| Sha256Digest::from_hex(&h).ok()),
289        sha512: sha512.and_then(|h| Sha512Digest::from_hex(&h).ok()),
290        md5: md5.and_then(|h| Md5Digest::from_hex(&h).ok()),
291        is_virtual: true,
292        extra_data: None,
293        dependencies: nested_dependencies,
294        repository_homepage_url: None,
295        repository_download_url: None,
296        api_data_url: None,
297        datasource_id: Some(DatasourceId::BunLock),
298        purl: None,
299        ..ResolvedPackage::new(
300            BunLockParser::PACKAGE_TYPE,
301            namespace.unwrap_or_default(),
302            name.unwrap_or_else(|| package_name.clone()),
303            truncate_field(package_version.clone().unwrap_or_default()),
304        )
305    };
306
307    Some(Dependency {
308        purl,
309        extracted_requirement: Some(truncate_field(
310            package_version.clone().unwrap_or(locator.clone()),
311        )),
312        scope: Some(scope),
313        is_runtime: Some(is_runtime),
314        is_optional: Some(is_optional),
315        is_pinned: Some(true),
316        is_direct: Some(is_direct),
317        resolved_package: Some(Box::new(resolved_package)),
318        extra_data: None,
319    })
320}
321
322fn split_locator(resolution: &str) -> Option<(String, String)> {
323    let (name, locator) = resolution.rsplit_once('@')?;
324    if name.is_empty() || locator.is_empty() {
325        return None;
326    }
327    Some((
328        truncate_field(name.to_string()),
329        truncate_field(locator.to_string()),
330    ))
331}
332
333fn resolve_locator_version(
334    package_name: &str,
335    locator: &str,
336    workspace_versions: &HashMap<String, String>,
337) -> Option<String> {
338    if let Some(path) = locator.strip_prefix("workspace:") {
339        return workspace_versions
340            .get(package_name)
341            .cloned()
342            .or_else(|| workspace_versions.get(path).cloned());
343    }
344
345    if locator.starts_with("file:")
346        || locator.starts_with("link:")
347        || locator.starts_with("github:")
348        || locator.starts_with("git+")
349        || locator.starts_with("http://")
350        || locator.starts_with("https://")
351    {
352        return None;
353    }
354
355    Some(truncate_field(locator.to_string()))
356}
357
358fn resolved_download_url(
359    package_name: &str,
360    locator: &str,
361    tuple: &[JsonValue],
362    version: Option<&str>,
363) -> Option<String> {
364    if let Some(url) = tuple.get(1).and_then(|value| value.as_str())
365        && !url.is_empty()
366    {
367        return Some(truncate_field(url.to_string()));
368    }
369
370    if locator.starts_with("workspace:")
371        || locator.starts_with("file:")
372        || locator.starts_with("link:")
373    {
374        return None;
375    }
376
377    if locator.starts_with("http://")
378        || locator.starts_with("https://")
379        || locator.starts_with("git+")
380        || locator.starts_with("github:")
381    {
382        return Some(truncate_field(locator.to_string()));
383    }
384
385    version.and_then(|version| default_registry_download_url(package_name, version))
386}
387
388fn default_registry_download_url(package_name: &str, version: &str) -> Option<String> {
389    let (namespace, name) = split_namespace_name(package_name);
390    let name = name?;
391    let package_path = qualify_name(&namespace, &name);
392    Some(truncate_field(format!(
393        "https://registry.npmjs.org/{}/-/{}-{}.tgz",
394        package_path, name, version
395    )))
396}
397
398fn parse_integrity_tuple(
399    tuple: &[JsonValue],
400) -> (
401    Option<String>,
402    Option<String>,
403    Option<String>,
404    Option<String>,
405) {
406    let integrity = tuple.iter().rev().find_map(|value| {
407        value.as_str().filter(|value| {
408            value.starts_with("sha1-")
409                || value.starts_with("sha256-")
410                || value.starts_with("sha512-")
411                || value.starts_with("md5-")
412        })
413    });
414
415    let Some(integrity) = integrity else {
416        return (None, None, None, None);
417    };
418
419    match parse_sri(integrity) {
420        Some((algo, hash)) if algo == "sha1" => (Some(hash), None, None, None),
421        Some((algo, hash)) if algo == "sha256" => (None, Some(hash), None, None),
422        Some((algo, hash)) if algo == "sha512" => (None, None, Some(hash), None),
423        Some((algo, hash)) if algo == "md5" => (None, None, None, Some(hash)),
424        _ => (None, None, None, None),
425    }
426}
427
428fn extract_nested_dependencies(
429    package_name: &str,
430    tuple: &[JsonValue],
431    workspace_versions: &HashMap<String, String>,
432    workspace_entries: &HashMap<String, JsonValue>,
433) -> Vec<Dependency> {
434    let info = tuple
435        .iter()
436        .find_map(|value| value.as_object())
437        .or_else(|| {
438            workspace_entries
439                .get(package_name)
440                .and_then(|value| value.as_object())
441        });
442    let Some(info) = info else {
443        return Vec::new();
444    };
445
446    let mut dependencies = Vec::new();
447    dependencies.extend(build_nested_dependencies(
448        info.get("dependencies").and_then(|value| value.as_object()),
449        "dependencies",
450        true,
451        false,
452        workspace_versions,
453    ));
454    dependencies.extend(build_nested_dependencies(
455        info.get("optionalDependencies")
456            .and_then(|value| value.as_object()),
457        "optionalDependencies",
458        true,
459        true,
460        workspace_versions,
461    ));
462    dependencies.extend(build_nested_dependencies(
463        info.get("peerDependencies")
464            .and_then(|value| value.as_object()),
465        "peerDependencies",
466        true,
467        false,
468        workspace_versions,
469    ));
470    dependencies
471}
472
473fn build_nested_dependencies(
474    deps: Option<&Map<String, JsonValue>>,
475    scope: &str,
476    is_runtime: bool,
477    is_optional: bool,
478    workspace_versions: &HashMap<String, String>,
479) -> Vec<Dependency> {
480    let Some(deps) = deps else {
481        return Vec::new();
482    };
483
484    deps.iter()
485        .take(MAX_ITERATION_COUNT)
486        .filter_map(|(name, value)| {
487            let requirement = value.as_str()?;
488            let version = if requirement.starts_with("workspace:") {
489                workspace_versions.get(name).map(String::as_str)
490            } else {
491                None
492            };
493
494            Some(Dependency {
495                purl: npm_purl(name, version).map(truncate_field),
496                extracted_requirement: Some(truncate_field(requirement.to_string())),
497                scope: Some(truncate_field(scope.to_string())),
498                is_runtime: Some(is_runtime),
499                is_optional: Some(is_optional),
500                is_pinned: Some(false),
501                is_direct: Some(false),
502                resolved_package: None,
503                extra_data: None,
504            })
505        })
506        .collect()
507}
508
509fn split_namespace_name(full_name: &str) -> (Option<String>, Option<String>) {
510    if full_name.starts_with('@') {
511        let mut parts = full_name.splitn(2, '/');
512        let namespace = parts.next().map(|s| truncate_field(s.to_owned()));
513        let name = parts.next().map(|s| truncate_field(s.to_owned()));
514        (namespace, name)
515    } else {
516        (
517            Some(String::new()),
518            Some(truncate_field(full_name.to_string())),
519        )
520    }
521}
522
523fn qualify_name(namespace: &Option<String>, name: &str) -> String {
524    match namespace.as_deref() {
525        Some("") | None => name.to_string(),
526        Some(namespace) => format!("{}/{}", namespace, name),
527    }
528}
529
530crate::register_parser!(
531    "Bun lockfile",
532    &["**/bun.lock"],
533    "npm",
534    "JavaScript",
535    Some("https://bun.sh/docs/pm/lockfile"),
536);