Skip to main content

provenant/parsers/
bun_lockb.rs

1use std::collections::HashMap;
2use std::path::Path;
3
4use crate::parser_warn as warn;
5use base64::Engine;
6use serde_json::Value as JsonValue;
7
8use crate::models::{
9    DatasourceId, Dependency, PackageData, PackageType, ResolvedPackage, Sha512Digest,
10};
11use crate::parsers::utils::{npm_purl, parse_sri};
12
13use super::PackageParser;
14
15pub struct BunLockbParser;
16
17const HEADER_BYTES: &[u8] = b"#!/usr/bin/env bun\nbun-lockfile-format-v0\n";
18const SUPPORTED_FORMAT_VERSION: u32 = 2;
19const FIELD_COUNT_WITHOUT_SCRIPTS: usize = 7;
20const FIELD_COUNT_WITH_SCRIPTS: usize = 8;
21const PACKAGE_FIELD_LENGTHS: [usize; 8] = [8, 8, 64, 8, 8, 88, 20, 48];
22const DEPENDENCY_ENTRY_SIZE: usize = 26;
23
24#[derive(Clone, Copy)]
25struct SliceRef {
26    off: usize,
27    len: usize,
28}
29
30#[derive(Clone)]
31struct BunLockbPackage {
32    name_ref: [u8; 8],
33    name: String,
34    resolution_raw: [u8; 64],
35    resolution: BunLockbResolution,
36    dependencies: SliceRef,
37    resolutions: SliceRef,
38    integrity: Option<String>,
39}
40
41#[derive(Clone)]
42struct BunLockbResolution {
43    version: Option<String>,
44    resolved: Option<String>,
45}
46
47#[derive(Clone)]
48struct BunLockbDependencyEntry {
49    name: String,
50    literal: String,
51    behavior: u8,
52}
53
54struct BunLockbBuffers<'a> {
55    resolutions: &'a [u8],
56    dependencies: &'a [u8],
57    string_bytes: &'a [u8],
58}
59
60struct LockbCursor<'a> {
61    bytes: &'a [u8],
62    pos: usize,
63}
64
65impl PackageParser for BunLockbParser {
66    const PACKAGE_TYPE: PackageType = PackageType::Npm;
67
68    fn is_match(path: &Path) -> bool {
69        path.file_name()
70            .and_then(|name| name.to_str())
71            .is_some_and(|name| name == "bun.lockb")
72            && !path.with_file_name("bun.lock").exists()
73    }
74
75    fn extract_packages(path: &Path) -> Vec<PackageData> {
76        let bytes = match std::fs::read(path) {
77            Ok(bytes) => bytes,
78            Err(e) => {
79                warn!("Failed to read bun.lockb at {:?}: {}", path, e);
80                return vec![default_package_data()];
81            }
82        };
83
84        match parse_bun_lockb(&bytes) {
85            Ok(package_data) => vec![package_data],
86            Err(e) => {
87                warn!("Failed to parse bun.lockb at {:?}: {}", path, e);
88                vec![default_package_data()]
89            }
90        }
91    }
92}
93
94fn default_package_data() -> PackageData {
95    PackageData {
96        package_type: Some(BunLockbParser::PACKAGE_TYPE),
97        primary_language: Some("JavaScript".to_string()),
98        datasource_id: Some(DatasourceId::BunLockb),
99        extra_data: Some(HashMap::new()),
100        ..Default::default()
101    }
102}
103
104pub(crate) fn parse_bun_lockb(bytes: &[u8]) -> Result<PackageData, String> {
105    let mut cursor = LockbCursor::new(bytes);
106    cursor.expect_bytes(HEADER_BYTES)?;
107
108    let format_version = cursor.read_u32()?;
109    if format_version != SUPPORTED_FORMAT_VERSION {
110        return Err(format!(
111            "Unsupported bun.lockb format version {} (supported: {})",
112            format_version, SUPPORTED_FORMAT_VERSION
113        ));
114    }
115
116    let meta_hash = cursor.read_bytes(32)?;
117    let total_buffer_size = cursor.read_u64()? as usize;
118    if total_buffer_size > bytes.len() {
119        return Err("Lockfile is missing data".to_string());
120    }
121
122    let list_len = cursor.read_u64()? as usize;
123    let input_alignment = cursor.read_u64()?;
124    if input_alignment != 8 {
125        return Err(format!(
126            "Unexpected bun.lockb package alignment {}",
127            input_alignment
128        ));
129    }
130
131    let field_count = cursor.read_u64()? as usize;
132    if field_count != FIELD_COUNT_WITHOUT_SCRIPTS && field_count != FIELD_COUNT_WITH_SCRIPTS {
133        return Err(format!(
134            "Unexpected bun.lockb package field count {} (supported: {} or {})",
135            field_count, FIELD_COUNT_WITHOUT_SCRIPTS, FIELD_COUNT_WITH_SCRIPTS
136        ));
137    }
138
139    let packages_begin = cursor.read_u64()? as usize;
140    let packages_end = cursor.read_u64()? as usize;
141    if packages_begin > total_buffer_size
142        || packages_end > total_buffer_size
143        || packages_begin > packages_end
144    {
145        return Err("Invalid bun.lockb package section bounds".to_string());
146    }
147
148    let mut packages = parse_packages(bytes, list_len, field_count, packages_begin, packages_end)?;
149    cursor.pos = packages_end;
150    let buffers = parse_buffers(bytes, &mut cursor, total_buffer_size)?;
151    materialize_packages(&mut packages, buffers.string_bytes)?;
152
153    build_package_data_from_lockb(format_version, meta_hash, &packages, &buffers)
154}
155
156fn parse_packages(
157    bytes: &[u8],
158    list_len: usize,
159    field_count: usize,
160    packages_begin: usize,
161    packages_end: usize,
162) -> Result<Vec<BunLockbPackage>, String> {
163    let mut packages = vec![
164        BunLockbPackage {
165            name_ref: [0; 8],
166            name: String::new(),
167            resolution_raw: [0; 64],
168            resolution: BunLockbResolution {
169                version: None,
170                resolved: None,
171            },
172            dependencies: SliceRef { off: 0, len: 0 },
173            resolutions: SliceRef { off: 0, len: 0 },
174            integrity: None,
175        };
176        list_len
177    ];
178
179    let package_region = bytes
180        .get(packages_begin..packages_end)
181        .ok_or_else(|| "Invalid bun.lockb package region".to_string())?;
182
183    let expected_size: usize =
184        PACKAGE_FIELD_LENGTHS[..field_count].iter().sum::<usize>() * list_len;
185    if package_region.len() < expected_size {
186        return Err("bun.lockb package region is truncated".to_string());
187    }
188
189    let mut field_offset = 0usize;
190
191    for package in &mut packages {
192        package
193            .name_ref
194            .copy_from_slice(&package_region[field_offset..field_offset + 8]);
195        field_offset += 8;
196    }
197
198    field_offset += 8 * list_len;
199
200    for package in &mut packages {
201        package
202            .resolution_raw
203            .copy_from_slice(&package_region[field_offset..field_offset + 64]);
204        field_offset += 64;
205    }
206
207    for package in &mut packages {
208        package.dependencies = parse_slice_ref(&package_region[field_offset..field_offset + 8])?;
209        field_offset += 8;
210    }
211
212    for package in &mut packages {
213        package.resolutions = parse_slice_ref(&package_region[field_offset..field_offset + 8])?;
214        field_offset += 8;
215    }
216
217    for package in &mut packages {
218        package.integrity = parse_integrity(&package_region[field_offset + 20..field_offset + 85]);
219        field_offset += 88;
220    }
221
222    field_offset += 20 * list_len;
223    if field_count == FIELD_COUNT_WITH_SCRIPTS {
224        field_offset += 48 * list_len;
225    }
226
227    if field_offset != expected_size {
228        return Err("bun.lockb package region layout is malformed".to_string());
229    }
230
231    Ok(packages)
232}
233
234fn materialize_packages(
235    packages: &mut [BunLockbPackage],
236    string_bytes: &[u8],
237) -> Result<(), String> {
238    for package in packages {
239        package.name = decode_bun_string(&package.name_ref, string_bytes)?;
240        package.resolution = parse_resolution(&package.resolution_raw, string_bytes)?;
241    }
242    Ok(())
243}
244
245fn parse_buffers<'a>(
246    bytes: &'a [u8],
247    cursor: &mut LockbCursor<'a>,
248    total_buffer_size: usize,
249) -> Result<BunLockbBuffers<'a>, String> {
250    let _trees = parse_buffer_range(bytes, cursor, total_buffer_size)?;
251    let _hoisted_dependencies = parse_buffer_range(bytes, cursor, total_buffer_size)?;
252    let resolutions = parse_buffer_range(bytes, cursor, total_buffer_size)?;
253    let dependencies = parse_buffer_range(bytes, cursor, total_buffer_size)?;
254    let _extern_strings = parse_buffer_range(bytes, cursor, total_buffer_size)?;
255    let string_bytes = parse_buffer_range(bytes, cursor, total_buffer_size)?;
256
257    Ok(BunLockbBuffers {
258        resolutions,
259        dependencies,
260        string_bytes,
261    })
262}
263
264fn parse_buffer_range<'a>(
265    bytes: &'a [u8],
266    cursor: &mut LockbCursor<'a>,
267    total_buffer_size: usize,
268) -> Result<&'a [u8], String> {
269    let start = cursor.read_u64()? as usize;
270    let end = cursor.read_u64()? as usize;
271    if start > total_buffer_size || end > total_buffer_size || start > end {
272        return Err("Invalid bun.lockb buffer range".to_string());
273    }
274    cursor.pos = start;
275    let slice = cursor.read_bytes(end - start)?;
276    cursor.pos = end;
277    bytes
278        .get(start..end)
279        .or(Some(slice))
280        .ok_or_else(|| "Invalid bun.lockb buffer slice".to_string())
281}
282
283fn build_package_data_from_lockb(
284    format_version: u32,
285    meta_hash: &[u8],
286    packages: &[BunLockbPackage],
287    buffers: &BunLockbBuffers<'_>,
288) -> Result<PackageData, String> {
289    let root_package = packages
290        .first()
291        .ok_or_else(|| "bun.lockb contains no packages".to_string())?;
292
293    let mut package_data = default_package_data();
294    package_data.name = Some(root_package.name.clone());
295    package_data.purl = npm_purl(&root_package.name, None);
296
297    let extra_data = package_data.extra_data.get_or_insert_with(HashMap::new);
298    extra_data.insert(
299        "lockfileVersion".to_string(),
300        JsonValue::from(i64::from(format_version)),
301    );
302    extra_data.insert(
303        "meta_hash".to_string(),
304        JsonValue::from(encode_hex(meta_hash)),
305    );
306
307    let dependency_entries = parse_dependency_entries(buffers.dependencies, buffers.string_bytes)?;
308    let resolution_ids = parse_resolution_ids(buffers.resolutions)?;
309
310    package_data.dependencies = build_dependencies_for_package(
311        root_package,
312        packages,
313        &dependency_entries,
314        &resolution_ids,
315        buffers.string_bytes,
316        true,
317    )?;
318
319    Ok(package_data)
320}
321
322fn parse_dependency_entries(
323    bytes: &[u8],
324    string_bytes: &[u8],
325) -> Result<Vec<BunLockbDependencyEntry>, String> {
326    if !bytes.len().is_multiple_of(DEPENDENCY_ENTRY_SIZE) {
327        return Err("bun.lockb dependency buffer is malformed".to_string());
328    }
329
330    bytes
331        .chunks_exact(DEPENDENCY_ENTRY_SIZE)
332        .map(|entry| {
333            Ok(BunLockbDependencyEntry {
334                name: decode_bun_string(&entry[0..8], string_bytes)?,
335                behavior: entry[16],
336                literal: decode_bun_string(&entry[18..26], string_bytes)?,
337            })
338        })
339        .collect()
340}
341
342fn parse_resolution_ids(bytes: &[u8]) -> Result<Vec<u32>, String> {
343    if !bytes.len().is_multiple_of(4) {
344        return Err("bun.lockb resolution buffer is malformed".to_string());
345    }
346
347    bytes
348        .chunks_exact(4)
349        .map(|chunk| Ok(u32::from_le_bytes(chunk.try_into().unwrap())))
350        .collect()
351}
352
353fn build_dependencies_for_package(
354    package: &BunLockbPackage,
355    packages: &[BunLockbPackage],
356    dependency_entries: &[BunLockbDependencyEntry],
357    resolution_ids: &[u32],
358    string_bytes: &[u8],
359    is_direct: bool,
360) -> Result<Vec<Dependency>, String> {
361    let dep_slice = dependency_entries
362        .get(package.dependencies.off..package.dependencies.off + package.dependencies.len)
363        .ok_or_else(|| "bun.lockb dependency slice is out of bounds".to_string())?;
364    let res_slice = resolution_ids
365        .get(package.resolutions.off..package.resolutions.off + package.resolutions.len)
366        .ok_or_else(|| "bun.lockb resolution slice is out of bounds".to_string())?;
367
368    dep_slice
369        .iter()
370        .zip(res_slice.iter())
371        .map(|(entry, package_id)| {
372            let manifest = behavior_to_manifest(entry.behavior);
373            let resolved_package = if (*package_id as usize) < packages.len() {
374                let resolved = &packages[*package_id as usize];
375                Some(Box::new(build_resolved_package(
376                    resolved,
377                    packages,
378                    dependency_entries,
379                    resolution_ids,
380                    string_bytes,
381                )?))
382            } else {
383                None
384            };
385
386            let version = resolved_package
387                .as_ref()
388                .and_then(|pkg| (!pkg.version.is_empty()).then_some(pkg.version.as_str()));
389
390            Ok(Dependency {
391                purl: npm_purl(&entry.name, version),
392                extracted_requirement: Some(entry.literal.clone()),
393                scope: Some(manifest.scope.to_string()),
394                is_runtime: Some(manifest.is_runtime),
395                is_optional: Some(manifest.is_optional),
396                is_pinned: version.map(|_| true).or(Some(false)),
397                is_direct: Some(is_direct),
398                resolved_package,
399                extra_data: None,
400            })
401        })
402        .collect()
403}
404
405fn build_resolved_package(
406    package: &BunLockbPackage,
407    packages: &[BunLockbPackage],
408    dependency_entries: &[BunLockbDependencyEntry],
409    resolution_ids: &[u32],
410    string_bytes: &[u8],
411) -> Result<ResolvedPackage, String> {
412    let (namespace, name) = split_namespace_name(&package.name);
413
414    Ok(ResolvedPackage {
415        primary_language: Some("JavaScript".to_string()),
416        download_url: package.resolution.resolved.clone(),
417        sha1: None,
418        sha256: None,
419        sha512: package
420            .integrity
421            .as_ref()
422            .and_then(|s| parse_sri(s).and_then(|(alg, hash)| (alg == "sha512").then_some(hash)))
423            .and_then(|h| Sha512Digest::from_hex(&h).ok()),
424        md5: None,
425        is_virtual: true,
426        extra_data: None,
427        dependencies: build_dependencies_for_package(
428            package,
429            packages,
430            dependency_entries,
431            resolution_ids,
432            string_bytes,
433            false,
434        )?,
435        repository_homepage_url: None,
436        repository_download_url: None,
437        api_data_url: None,
438        datasource_id: Some(DatasourceId::BunLockb),
439        purl: None,
440        ..ResolvedPackage::new(
441            PackageType::Npm,
442            namespace.unwrap_or_default(),
443            name.unwrap_or_else(|| package.name.clone()),
444            package.resolution.version.clone().unwrap_or_default(),
445        )
446    })
447}
448
449fn parse_slice_ref(bytes: &[u8]) -> Result<SliceRef, String> {
450    if bytes.len() != 8 {
451        return Err("Invalid bun.lockb slice length".to_string());
452    }
453    let off = u32::from_le_bytes(bytes[0..4].try_into().unwrap()) as usize;
454    let len = u32::from_le_bytes(bytes[4..8].try_into().unwrap()) as usize;
455    Ok(SliceRef { off, len })
456}
457
458fn parse_resolution(bytes: &[u8], string_bytes: &[u8]) -> Result<BunLockbResolution, String> {
459    if bytes.len() != 64 {
460        return Err("Invalid bun.lockb resolution length".to_string());
461    }
462
463    let tag = bytes[0];
464    match tag {
465        1 => Ok(BunLockbResolution {
466            version: None,
467            resolved: Some(String::new()).filter(|s| !s.is_empty()),
468        }),
469        2 => {
470            let resolved = decode_bun_string(&bytes[8..16], string_bytes)?;
471            let major = u32::from_le_bytes(bytes[16..20].try_into().unwrap());
472            let minor = u32::from_le_bytes(bytes[20..24].try_into().unwrap());
473            let patch = u32::from_le_bytes(bytes[24..28].try_into().unwrap());
474            let tag_suffix = decode_version_suffix(&bytes[32..64], string_bytes)?;
475            let version = if let Some(suffix) = tag_suffix {
476                format!("{}.{}.{}{}", major, minor, patch, suffix)
477            } else {
478                format!("{}.{}.{}", major, minor, patch)
479            };
480
481            Ok(BunLockbResolution {
482                version: Some(version),
483                resolved: (!resolved.is_empty()).then_some(resolved),
484            })
485        }
486        72 => {
487            let workspace = decode_bun_string(&bytes[8..16], string_bytes)?;
488            Ok(BunLockbResolution {
489                version: None,
490                resolved: Some(format!("workspace:{}", workspace)),
491            })
492        }
493        4 | 8 | 16 | 24 | 32 | 64 | 80 | 100 => {
494            let resolved = decode_bun_string(&bytes[8..16], string_bytes)?;
495            Ok(BunLockbResolution {
496                version: None,
497                resolved: (!resolved.is_empty()).then_some(resolved),
498            })
499        }
500        _ => Err(format!("Unsupported bun.lockb resolution tag {}", tag)),
501    }
502}
503
504fn decode_version_suffix(bytes: &[u8], string_bytes: &[u8]) -> Result<Option<String>, String> {
505    if bytes.len() != 32 {
506        return Err("Invalid bun.lockb version tag length".to_string());
507    }
508    let pre = decode_bun_string(&bytes[0..8], string_bytes)?;
509    let build = decode_bun_string(&bytes[16..24], string_bytes)?;
510
511    let mut suffix = String::new();
512    if !pre.is_empty() {
513        suffix.push('-');
514        suffix.push_str(&pre);
515    }
516    if !build.is_empty() {
517        suffix.push('+');
518        suffix.push_str(&build);
519    }
520
521    Ok((!suffix.is_empty()).then_some(suffix))
522}
523
524fn decode_bun_string(bytes: &[u8], string_bytes: &[u8]) -> Result<String, String> {
525    if bytes.len() != 8 {
526        return Err("Invalid bun.lockb string width".to_string());
527    }
528
529    if bytes[7] & 0x80 == 0 {
530        let end = bytes.iter().position(|b| *b == 0).unwrap_or(bytes.len());
531        return std::str::from_utf8(&bytes[..end])
532            .map(|s| s.to_string())
533            .map_err(|e| format!("Invalid inline bun.lockb UTF-8: {}", e));
534    }
535
536    let off = u32::from_le_bytes(bytes[0..4].try_into().unwrap()) as usize;
537    let len_raw = u32::from_le_bytes(bytes[4..8].try_into().unwrap());
538    let len = (len_raw & 0x7fff_ffff) as usize;
539    let slice = string_bytes
540        .get(off..off + len)
541        .ok_or_else(|| "bun.lockb string offset out of bounds".to_string())?;
542    std::str::from_utf8(slice)
543        .map(|s| s.to_string())
544        .map_err(|e| format!("Invalid external bun.lockb UTF-8: {}", e))
545}
546
547fn parse_integrity(bytes: &[u8]) -> Option<String> {
548    if bytes.is_empty() {
549        return None;
550    }
551
552    let algorithm = match bytes[0] {
553        1 => "sha1",
554        2 => "sha256",
555        3 => "sha384",
556        4 => "sha512",
557        _ => return None,
558    };
559
560    Some(format!(
561        "{}-{}",
562        algorithm,
563        base64::engine::general_purpose::STANDARD.encode(&bytes[1..])
564    ))
565}
566
567fn encode_hex(bytes: &[u8]) -> String {
568    const HEX: &[u8; 16] = b"0123456789abcdef";
569    let mut out = String::with_capacity(bytes.len() * 2);
570    for byte in bytes {
571        out.push(HEX[(byte >> 4) as usize] as char);
572        out.push(HEX[(byte & 0x0f) as usize] as char);
573    }
574    out
575}
576
577fn split_namespace_name(full_name: &str) -> (Option<String>, Option<String>) {
578    if full_name.starts_with('@') {
579        let mut parts = full_name.splitn(2, '/');
580        let namespace = parts.next().map(ToOwned::to_owned);
581        let name = parts.next().map(ToOwned::to_owned);
582        (namespace, name)
583    } else {
584        (Some(String::new()), Some(full_name.to_string()))
585    }
586}
587
588struct ManifestBehavior {
589    scope: &'static str,
590    is_runtime: bool,
591    is_optional: bool,
592}
593
594fn behavior_to_manifest(behavior: u8) -> ManifestBehavior {
595    const NORMAL: u8 = 0b10;
596    const OPTIONAL: u8 = 0b100;
597    const DEV: u8 = 0b1000;
598    const PEER: u8 = 0b1_0000;
599    const WORKSPACE: u8 = 0b10_0000;
600
601    if behavior & WORKSPACE != 0 {
602        return ManifestBehavior {
603            scope: "workspaces",
604            is_runtime: false,
605            is_optional: false,
606        };
607    }
608    if behavior & DEV != 0 {
609        return ManifestBehavior {
610            scope: "devDependencies",
611            is_runtime: false,
612            is_optional: true,
613        };
614    }
615    if behavior & PEER != 0 && behavior & OPTIONAL != 0 {
616        return ManifestBehavior {
617            scope: "peerDependencies",
618            is_runtime: true,
619            is_optional: true,
620        };
621    }
622    if behavior & PEER != 0 {
623        return ManifestBehavior {
624            scope: "peerDependencies",
625            is_runtime: true,
626            is_optional: false,
627        };
628    }
629    if behavior & OPTIONAL != 0 {
630        return ManifestBehavior {
631            scope: "optionalDependencies",
632            is_runtime: true,
633            is_optional: true,
634        };
635    }
636    if behavior & NORMAL != 0 {
637        return ManifestBehavior {
638            scope: "dependencies",
639            is_runtime: true,
640            is_optional: false,
641        };
642    }
643
644    ManifestBehavior {
645        scope: "dependencies",
646        is_runtime: true,
647        is_optional: false,
648    }
649}
650
651impl<'a> LockbCursor<'a> {
652    fn new(bytes: &'a [u8]) -> Self {
653        Self { bytes, pos: 0 }
654    }
655
656    fn read_bytes(&mut self, len: usize) -> Result<&'a [u8], String> {
657        let end = self
658            .pos
659            .checked_add(len)
660            .ok_or_else(|| "bun.lockb offset overflow".to_string())?;
661        let slice = self
662            .bytes
663            .get(self.pos..end)
664            .ok_or_else(|| "bun.lockb is truncated".to_string())?;
665        self.pos = end;
666        Ok(slice)
667    }
668
669    fn expect_bytes(&mut self, expected: &[u8]) -> Result<(), String> {
670        let actual = self.read_bytes(expected.len())?;
671        if actual == expected {
672            Ok(())
673        } else {
674            Err("Invalid bun.lockb header".to_string())
675        }
676    }
677
678    fn read_u32(&mut self) -> Result<u32, String> {
679        let bytes: [u8; 4] = self
680            .read_bytes(4)?
681            .try_into()
682            .map_err(|_| "Invalid bun.lockb u32".to_string())?;
683        Ok(u32::from_le_bytes(bytes))
684    }
685
686    fn read_u64(&mut self) -> Result<u64, String> {
687        let bytes: [u8; 8] = self
688            .read_bytes(8)?
689            .try_into()
690            .map_err(|_| "Invalid bun.lockb u64".to_string())?;
691        Ok(u64::from_le_bytes(bytes))
692    }
693}
694
695crate::register_parser!(
696    "Legacy Bun binary lockfile",
697    &["**/bun.lockb"],
698    "npm",
699    "JavaScript",
700    Some("https://bun.sh/docs/pm/lockfile"),
701);