Skip to main content

provenant/parsers/
bun_lockb.rs

1use std::collections::HashMap;
2use std::path::Path;
3
4use crate::parser_warn as warn;
5use base64::Engine;
6use serde_json::Value as JsonValue;
7
8use crate::models::{DatasourceId, Dependency, PackageData, PackageType, ResolvedPackage};
9use crate::parsers::utils::{npm_purl, parse_sri};
10
11use super::PackageParser;
12
13pub struct BunLockbParser;
14
15const HEADER_BYTES: &[u8] = b"#!/usr/bin/env bun\nbun-lockfile-format-v0\n";
16const SUPPORTED_FORMAT_VERSION: u32 = 2;
17const PACKAGE_FIELD_LENGTHS: [usize; 8] = [8, 8, 64, 8, 8, 88, 20, 48];
18const DEPENDENCY_ENTRY_SIZE: usize = 26;
19
20#[derive(Clone, Copy)]
21struct SliceRef {
22    off: usize,
23    len: usize,
24}
25
26#[derive(Clone)]
27struct BunLockbPackage {
28    name_ref: [u8; 8],
29    name: String,
30    resolution_raw: [u8; 64],
31    resolution: BunLockbResolution,
32    dependencies: SliceRef,
33    resolutions: SliceRef,
34    integrity: Option<String>,
35}
36
37#[derive(Clone)]
38struct BunLockbResolution {
39    version: Option<String>,
40    resolved: Option<String>,
41}
42
43#[derive(Clone)]
44struct BunLockbDependencyEntry {
45    name: String,
46    literal: String,
47    behavior: u8,
48}
49
50struct BunLockbBuffers<'a> {
51    resolutions: &'a [u8],
52    dependencies: &'a [u8],
53    string_bytes: &'a [u8],
54}
55
56struct LockbCursor<'a> {
57    bytes: &'a [u8],
58    pos: usize,
59}
60
61impl PackageParser for BunLockbParser {
62    const PACKAGE_TYPE: PackageType = PackageType::Npm;
63
64    fn is_match(path: &Path) -> bool {
65        path.file_name()
66            .and_then(|name| name.to_str())
67            .is_some_and(|name| name == "bun.lockb")
68            && !path.with_file_name("bun.lock").exists()
69    }
70
71    fn extract_packages(path: &Path) -> Vec<PackageData> {
72        let bytes = match std::fs::read(path) {
73            Ok(bytes) => bytes,
74            Err(e) => {
75                warn!("Failed to read bun.lockb at {:?}: {}", path, e);
76                return vec![default_package_data()];
77            }
78        };
79
80        match parse_bun_lockb(&bytes) {
81            Ok(package_data) => vec![package_data],
82            Err(e) => {
83                warn!("Failed to parse bun.lockb at {:?}: {}", path, e);
84                vec![default_package_data()]
85            }
86        }
87    }
88}
89
90fn default_package_data() -> PackageData {
91    PackageData {
92        package_type: Some(BunLockbParser::PACKAGE_TYPE),
93        primary_language: Some("JavaScript".to_string()),
94        datasource_id: Some(DatasourceId::BunLockb),
95        extra_data: Some(HashMap::new()),
96        ..Default::default()
97    }
98}
99
100pub(crate) fn parse_bun_lockb(bytes: &[u8]) -> Result<PackageData, String> {
101    let mut cursor = LockbCursor::new(bytes);
102    cursor.expect_bytes(HEADER_BYTES)?;
103
104    let format_version = cursor.read_u32()?;
105    if format_version != SUPPORTED_FORMAT_VERSION {
106        return Err(format!(
107            "Unsupported bun.lockb format version {} (supported: {})",
108            format_version, SUPPORTED_FORMAT_VERSION
109        ));
110    }
111
112    let meta_hash = cursor.read_bytes(32)?;
113    let total_buffer_size = cursor.read_u64()? as usize;
114    if total_buffer_size > bytes.len() {
115        return Err("Lockfile is missing data".to_string());
116    }
117
118    let list_len = cursor.read_u64()? as usize;
119    let input_alignment = cursor.read_u64()?;
120    if input_alignment != 8 {
121        return Err(format!(
122            "Unexpected bun.lockb package alignment {}",
123            input_alignment
124        ));
125    }
126
127    let field_count = cursor.read_u64()? as usize;
128    if field_count != PACKAGE_FIELD_LENGTHS.len() {
129        return Err(format!(
130            "Unexpected bun.lockb package field count {}",
131            field_count
132        ));
133    }
134
135    let packages_begin = cursor.read_u64()? as usize;
136    let packages_end = cursor.read_u64()? as usize;
137    if packages_begin > total_buffer_size
138        || packages_end > total_buffer_size
139        || packages_begin > packages_end
140    {
141        return Err("Invalid bun.lockb package section bounds".to_string());
142    }
143
144    let mut packages = parse_packages(bytes, list_len, packages_begin, packages_end)?;
145    cursor.pos = packages_end;
146    let buffers = parse_buffers(bytes, &mut cursor, total_buffer_size)?;
147    materialize_packages(&mut packages, buffers.string_bytes)?;
148
149    build_package_data_from_lockb(format_version, meta_hash, &packages, &buffers)
150}
151
152fn parse_packages(
153    bytes: &[u8],
154    list_len: usize,
155    packages_begin: usize,
156    packages_end: usize,
157) -> Result<Vec<BunLockbPackage>, String> {
158    let mut packages = vec![
159        BunLockbPackage {
160            name_ref: [0; 8],
161            name: String::new(),
162            resolution_raw: [0; 64],
163            resolution: BunLockbResolution {
164                version: None,
165                resolved: None,
166            },
167            dependencies: SliceRef { off: 0, len: 0 },
168            resolutions: SliceRef { off: 0, len: 0 },
169            integrity: None,
170        };
171        list_len
172    ];
173
174    let package_region = bytes
175        .get(packages_begin..packages_end)
176        .ok_or_else(|| "Invalid bun.lockb package region".to_string())?;
177
178    let expected_size: usize = PACKAGE_FIELD_LENGTHS.iter().sum::<usize>() * list_len;
179    if package_region.len() < expected_size {
180        return Err("bun.lockb package region is truncated".to_string());
181    }
182
183    let mut field_offset = 0usize;
184
185    for package in &mut packages {
186        package
187            .name_ref
188            .copy_from_slice(&package_region[field_offset..field_offset + 8]);
189        field_offset += 8;
190    }
191
192    field_offset += 8 * list_len;
193
194    for package in &mut packages {
195        package
196            .resolution_raw
197            .copy_from_slice(&package_region[field_offset..field_offset + 64]);
198        field_offset += 64;
199    }
200
201    for package in &mut packages {
202        package.dependencies = parse_slice_ref(&package_region[field_offset..field_offset + 8])?;
203        field_offset += 8;
204    }
205
206    for package in &mut packages {
207        package.resolutions = parse_slice_ref(&package_region[field_offset..field_offset + 8])?;
208        field_offset += 8;
209    }
210
211    for package in &mut packages {
212        package.integrity = parse_integrity(&package_region[field_offset + 20..field_offset + 85]);
213        field_offset += 88;
214    }
215
216    let _ = field_offset + 20 * list_len + 48 * list_len;
217
218    Ok(packages)
219}
220
221fn materialize_packages(
222    packages: &mut [BunLockbPackage],
223    string_bytes: &[u8],
224) -> Result<(), String> {
225    for package in packages {
226        package.name = decode_bun_string(&package.name_ref, string_bytes)?;
227        package.resolution = parse_resolution(&package.resolution_raw, string_bytes)?;
228    }
229    Ok(())
230}
231
232fn parse_buffers<'a>(
233    bytes: &'a [u8],
234    cursor: &mut LockbCursor<'a>,
235    total_buffer_size: usize,
236) -> Result<BunLockbBuffers<'a>, String> {
237    let _trees = parse_buffer_range(bytes, cursor, total_buffer_size)?;
238    let _hoisted_dependencies = parse_buffer_range(bytes, cursor, total_buffer_size)?;
239    let resolutions = parse_buffer_range(bytes, cursor, total_buffer_size)?;
240    let dependencies = parse_buffer_range(bytes, cursor, total_buffer_size)?;
241    let _extern_strings = parse_buffer_range(bytes, cursor, total_buffer_size)?;
242    let string_bytes = parse_buffer_range(bytes, cursor, total_buffer_size)?;
243
244    Ok(BunLockbBuffers {
245        resolutions,
246        dependencies,
247        string_bytes,
248    })
249}
250
251fn parse_buffer_range<'a>(
252    bytes: &'a [u8],
253    cursor: &mut LockbCursor<'a>,
254    total_buffer_size: usize,
255) -> Result<&'a [u8], String> {
256    let start = cursor.read_u64()? as usize;
257    let end = cursor.read_u64()? as usize;
258    if start > total_buffer_size || end > total_buffer_size || start > end {
259        return Err("Invalid bun.lockb buffer range".to_string());
260    }
261    cursor.pos = start;
262    let slice = cursor.read_bytes(end - start)?;
263    cursor.pos = end;
264    bytes
265        .get(start..end)
266        .or(Some(slice))
267        .ok_or_else(|| "Invalid bun.lockb buffer slice".to_string())
268}
269
270fn build_package_data_from_lockb(
271    format_version: u32,
272    meta_hash: &[u8],
273    packages: &[BunLockbPackage],
274    buffers: &BunLockbBuffers<'_>,
275) -> Result<PackageData, String> {
276    let root_package = packages
277        .first()
278        .ok_or_else(|| "bun.lockb contains no packages".to_string())?;
279
280    let mut package_data = default_package_data();
281    package_data.name = Some(root_package.name.clone());
282    package_data.purl = npm_purl(&root_package.name, None);
283
284    let extra_data = package_data.extra_data.get_or_insert_with(HashMap::new);
285    extra_data.insert(
286        "lockfileVersion".to_string(),
287        JsonValue::from(format_version as i64),
288    );
289    extra_data.insert(
290        "meta_hash".to_string(),
291        JsonValue::from(encode_hex(meta_hash)),
292    );
293
294    let dependency_entries = parse_dependency_entries(buffers.dependencies, buffers.string_bytes)?;
295    let resolution_ids = parse_resolution_ids(buffers.resolutions)?;
296
297    package_data.dependencies = build_dependencies_for_package(
298        root_package,
299        packages,
300        &dependency_entries,
301        &resolution_ids,
302        buffers.string_bytes,
303        true,
304    )?;
305
306    Ok(package_data)
307}
308
309fn parse_dependency_entries(
310    bytes: &[u8],
311    string_bytes: &[u8],
312) -> Result<Vec<BunLockbDependencyEntry>, String> {
313    if !bytes.len().is_multiple_of(DEPENDENCY_ENTRY_SIZE) {
314        return Err("bun.lockb dependency buffer is malformed".to_string());
315    }
316
317    bytes
318        .chunks_exact(DEPENDENCY_ENTRY_SIZE)
319        .map(|entry| {
320            Ok(BunLockbDependencyEntry {
321                name: decode_bun_string(&entry[0..8], string_bytes)?,
322                behavior: entry[16],
323                literal: decode_bun_string(&entry[18..26], string_bytes)?,
324            })
325        })
326        .collect()
327}
328
329fn parse_resolution_ids(bytes: &[u8]) -> Result<Vec<u32>, String> {
330    if !bytes.len().is_multiple_of(4) {
331        return Err("bun.lockb resolution buffer is malformed".to_string());
332    }
333
334    bytes
335        .chunks_exact(4)
336        .map(|chunk| Ok(u32::from_le_bytes(chunk.try_into().unwrap())))
337        .collect()
338}
339
340fn build_dependencies_for_package(
341    package: &BunLockbPackage,
342    packages: &[BunLockbPackage],
343    dependency_entries: &[BunLockbDependencyEntry],
344    resolution_ids: &[u32],
345    string_bytes: &[u8],
346    is_direct: bool,
347) -> Result<Vec<Dependency>, String> {
348    let dep_slice = dependency_entries
349        .get(package.dependencies.off..package.dependencies.off + package.dependencies.len)
350        .ok_or_else(|| "bun.lockb dependency slice is out of bounds".to_string())?;
351    let res_slice = resolution_ids
352        .get(package.resolutions.off..package.resolutions.off + package.resolutions.len)
353        .ok_or_else(|| "bun.lockb resolution slice is out of bounds".to_string())?;
354
355    dep_slice
356        .iter()
357        .zip(res_slice.iter())
358        .map(|(entry, package_id)| {
359            let manifest = behavior_to_manifest(entry.behavior);
360            let resolved_package = if (*package_id as usize) < packages.len() {
361                let resolved = &packages[*package_id as usize];
362                Some(Box::new(build_resolved_package(
363                    resolved,
364                    packages,
365                    dependency_entries,
366                    resolution_ids,
367                    string_bytes,
368                )?))
369            } else {
370                None
371            };
372
373            let version = resolved_package
374                .as_ref()
375                .and_then(|pkg| (!pkg.version.is_empty()).then_some(pkg.version.as_str()));
376
377            Ok(Dependency {
378                purl: npm_purl(&entry.name, version),
379                extracted_requirement: Some(entry.literal.clone()),
380                scope: Some(manifest.scope.to_string()),
381                is_runtime: Some(manifest.is_runtime),
382                is_optional: Some(manifest.is_optional),
383                is_pinned: version.map(|_| true).or(Some(false)),
384                is_direct: Some(is_direct),
385                resolved_package,
386                extra_data: None,
387            })
388        })
389        .collect()
390}
391
392fn build_resolved_package(
393    package: &BunLockbPackage,
394    packages: &[BunLockbPackage],
395    dependency_entries: &[BunLockbDependencyEntry],
396    resolution_ids: &[u32],
397    string_bytes: &[u8],
398) -> Result<ResolvedPackage, String> {
399    let (namespace, name) = split_namespace_name(&package.name);
400
401    Ok(ResolvedPackage {
402        primary_language: Some("JavaScript".to_string()),
403        download_url: package.resolution.resolved.clone(),
404        sha1: None,
405        sha256: None,
406        sha512: package
407            .integrity
408            .as_ref()
409            .and_then(|s| parse_sri(s).and_then(|(alg, hash)| (alg == "sha512").then_some(hash))),
410        md5: None,
411        is_virtual: true,
412        extra_data: None,
413        dependencies: build_dependencies_for_package(
414            package,
415            packages,
416            dependency_entries,
417            resolution_ids,
418            string_bytes,
419            false,
420        )?,
421        repository_homepage_url: None,
422        repository_download_url: None,
423        api_data_url: None,
424        datasource_id: Some(DatasourceId::BunLockb),
425        purl: None,
426        ..ResolvedPackage::new(
427            PackageType::Npm,
428            namespace.unwrap_or_default(),
429            name.unwrap_or_else(|| package.name.clone()),
430            package.resolution.version.clone().unwrap_or_default(),
431        )
432    })
433}
434
435fn parse_slice_ref(bytes: &[u8]) -> Result<SliceRef, String> {
436    if bytes.len() != 8 {
437        return Err("Invalid bun.lockb slice length".to_string());
438    }
439    let off = u32::from_le_bytes(bytes[0..4].try_into().unwrap()) as usize;
440    let len = u32::from_le_bytes(bytes[4..8].try_into().unwrap()) as usize;
441    Ok(SliceRef { off, len })
442}
443
444fn parse_resolution(bytes: &[u8], string_bytes: &[u8]) -> Result<BunLockbResolution, String> {
445    if bytes.len() != 64 {
446        return Err("Invalid bun.lockb resolution length".to_string());
447    }
448
449    let tag = bytes[0];
450    match tag {
451        1 => Ok(BunLockbResolution {
452            version: None,
453            resolved: Some(String::new()).filter(|s| !s.is_empty()),
454        }),
455        2 => {
456            let resolved = decode_bun_string(&bytes[8..16], string_bytes)?;
457            let major = u32::from_le_bytes(bytes[16..20].try_into().unwrap());
458            let minor = u32::from_le_bytes(bytes[20..24].try_into().unwrap());
459            let patch = u32::from_le_bytes(bytes[24..28].try_into().unwrap());
460            let tag_suffix = decode_version_suffix(&bytes[32..64], string_bytes)?;
461            let version = if let Some(suffix) = tag_suffix {
462                format!("{}.{}.{}{}", major, minor, patch, suffix)
463            } else {
464                format!("{}.{}.{}", major, minor, patch)
465            };
466
467            Ok(BunLockbResolution {
468                version: Some(version),
469                resolved: (!resolved.is_empty()).then_some(resolved),
470            })
471        }
472        72 => {
473            let workspace = decode_bun_string(&bytes[8..16], string_bytes)?;
474            Ok(BunLockbResolution {
475                version: None,
476                resolved: Some(format!("workspace:{}", workspace)),
477            })
478        }
479        4 | 8 | 16 | 24 | 32 | 64 | 80 | 100 => {
480            let resolved = decode_bun_string(&bytes[8..16], string_bytes)?;
481            Ok(BunLockbResolution {
482                version: None,
483                resolved: (!resolved.is_empty()).then_some(resolved),
484            })
485        }
486        _ => Err(format!("Unsupported bun.lockb resolution tag {}", tag)),
487    }
488}
489
490fn decode_version_suffix(bytes: &[u8], string_bytes: &[u8]) -> Result<Option<String>, String> {
491    if bytes.len() != 32 {
492        return Err("Invalid bun.lockb version tag length".to_string());
493    }
494    let pre = decode_bun_string(&bytes[0..8], string_bytes)?;
495    let build = decode_bun_string(&bytes[16..24], string_bytes)?;
496
497    let mut suffix = String::new();
498    if !pre.is_empty() {
499        suffix.push('-');
500        suffix.push_str(&pre);
501    }
502    if !build.is_empty() {
503        suffix.push('+');
504        suffix.push_str(&build);
505    }
506
507    Ok((!suffix.is_empty()).then_some(suffix))
508}
509
510fn decode_bun_string(bytes: &[u8], string_bytes: &[u8]) -> Result<String, String> {
511    if bytes.len() != 8 {
512        return Err("Invalid bun.lockb string width".to_string());
513    }
514
515    if bytes[7] & 0x80 == 0 {
516        let end = bytes.iter().position(|b| *b == 0).unwrap_or(bytes.len());
517        return std::str::from_utf8(&bytes[..end])
518            .map(|s| s.to_string())
519            .map_err(|e| format!("Invalid inline bun.lockb UTF-8: {}", e));
520    }
521
522    let off = u32::from_le_bytes(bytes[0..4].try_into().unwrap()) as usize;
523    let len_raw = u32::from_le_bytes(bytes[4..8].try_into().unwrap());
524    let len = (len_raw & 0x7fff_ffff) as usize;
525    let slice = string_bytes
526        .get(off..off + len)
527        .ok_or_else(|| "bun.lockb string offset out of bounds".to_string())?;
528    std::str::from_utf8(slice)
529        .map(|s| s.to_string())
530        .map_err(|e| format!("Invalid external bun.lockb UTF-8: {}", e))
531}
532
533fn parse_integrity(bytes: &[u8]) -> Option<String> {
534    if bytes.is_empty() {
535        return None;
536    }
537
538    let algorithm = match bytes[0] {
539        1 => "sha1",
540        2 => "sha256",
541        3 => "sha384",
542        4 => "sha512",
543        _ => return None,
544    };
545
546    Some(format!(
547        "{}-{}",
548        algorithm,
549        base64::engine::general_purpose::STANDARD.encode(&bytes[1..])
550    ))
551}
552
553fn encode_hex(bytes: &[u8]) -> String {
554    const HEX: &[u8; 16] = b"0123456789abcdef";
555    let mut out = String::with_capacity(bytes.len() * 2);
556    for byte in bytes {
557        out.push(HEX[(byte >> 4) as usize] as char);
558        out.push(HEX[(byte & 0x0f) as usize] as char);
559    }
560    out
561}
562
563fn split_namespace_name(full_name: &str) -> (Option<String>, Option<String>) {
564    if full_name.starts_with('@') {
565        let mut parts = full_name.splitn(2, '/');
566        let namespace = parts.next().map(ToOwned::to_owned);
567        let name = parts.next().map(ToOwned::to_owned);
568        (namespace, name)
569    } else {
570        (Some(String::new()), Some(full_name.to_string()))
571    }
572}
573
574struct ManifestBehavior {
575    scope: &'static str,
576    is_runtime: bool,
577    is_optional: bool,
578}
579
580fn behavior_to_manifest(behavior: u8) -> ManifestBehavior {
581    const NORMAL: u8 = 0b10;
582    const OPTIONAL: u8 = 0b100;
583    const DEV: u8 = 0b1000;
584    const PEER: u8 = 0b1_0000;
585    const WORKSPACE: u8 = 0b10_0000;
586
587    if behavior & WORKSPACE != 0 {
588        return ManifestBehavior {
589            scope: "workspaces",
590            is_runtime: false,
591            is_optional: false,
592        };
593    }
594    if behavior & DEV != 0 {
595        return ManifestBehavior {
596            scope: "devDependencies",
597            is_runtime: false,
598            is_optional: true,
599        };
600    }
601    if behavior & PEER != 0 && behavior & OPTIONAL != 0 {
602        return ManifestBehavior {
603            scope: "peerDependencies",
604            is_runtime: true,
605            is_optional: true,
606        };
607    }
608    if behavior & PEER != 0 {
609        return ManifestBehavior {
610            scope: "peerDependencies",
611            is_runtime: true,
612            is_optional: false,
613        };
614    }
615    if behavior & OPTIONAL != 0 {
616        return ManifestBehavior {
617            scope: "optionalDependencies",
618            is_runtime: true,
619            is_optional: true,
620        };
621    }
622    if behavior & NORMAL != 0 {
623        return ManifestBehavior {
624            scope: "dependencies",
625            is_runtime: true,
626            is_optional: false,
627        };
628    }
629
630    ManifestBehavior {
631        scope: "dependencies",
632        is_runtime: true,
633        is_optional: false,
634    }
635}
636
637impl<'a> LockbCursor<'a> {
638    fn new(bytes: &'a [u8]) -> Self {
639        Self { bytes, pos: 0 }
640    }
641
642    fn read_bytes(&mut self, len: usize) -> Result<&'a [u8], String> {
643        let end = self
644            .pos
645            .checked_add(len)
646            .ok_or_else(|| "bun.lockb offset overflow".to_string())?;
647        let slice = self
648            .bytes
649            .get(self.pos..end)
650            .ok_or_else(|| "bun.lockb is truncated".to_string())?;
651        self.pos = end;
652        Ok(slice)
653    }
654
655    fn expect_bytes(&mut self, expected: &[u8]) -> Result<(), String> {
656        let actual = self.read_bytes(expected.len())?;
657        if actual == expected {
658            Ok(())
659        } else {
660            Err("Invalid bun.lockb header".to_string())
661        }
662    }
663
664    fn read_u32(&mut self) -> Result<u32, String> {
665        let bytes: [u8; 4] = self
666            .read_bytes(4)?
667            .try_into()
668            .map_err(|_| "Invalid bun.lockb u32".to_string())?;
669        Ok(u32::from_le_bytes(bytes))
670    }
671
672    fn read_u64(&mut self) -> Result<u64, String> {
673        let bytes: [u8; 8] = self
674            .read_bytes(8)?
675            .try_into()
676            .map_err(|_| "Invalid bun.lockb u64".to_string())?;
677        Ok(u64::from_le_bytes(bytes))
678    }
679}
680
681crate::register_parser!(
682    "Legacy Bun binary lockfile",
683    &["**/bun.lockb"],
684    "npm",
685    "JavaScript",
686    Some("https://bun.sh/docs/pm/lockfile"),
687);