Skip to main content

lance_core/utils/
blob.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright The Lance Authors
3
4use object_store::path::Path;
5
6/// Format a blob sidecar path for a data file.
7///
8/// Layout: `<base>/<data_file_key>/<obfuscated_blob_id>.blob`
9/// - `base` is typically the dataset's data directory.
10/// - `data_file_key` is the stem of the data file (without extension).
11/// - `blob_id` is transformed via `reverse_bits()` before binary formatting.
12pub fn blob_path(base: &Path, data_file_key: &str, blob_id: u32) -> Path {
13    let file_name = format!("{:032b}.blob", blob_id.reverse_bits());
14    base.child(data_file_key).child(file_name.as_str())
15}
16
17#[cfg(test)]
18mod tests {
19    use super::*;
20
21    #[test]
22    fn test_blob_path_formatting() {
23        let base = Path::from("base");
24        let path = blob_path(&base, "deadbeef", 2);
25        assert_eq!(
26            path.to_string(),
27            "base/deadbeef/01000000000000000000000000000000.blob"
28        );
29    }
30
31    #[test]
32    fn test_blob_path_scattered_prefixes_for_sequential_ids() {
33        let base = Path::from("base");
34        let p1 = blob_path(&base, "deadbeef", 1);
35        let p2 = blob_path(&base, "deadbeef", 2);
36        assert_ne!(p1.to_string(), p2.to_string());
37        assert_eq!(
38            p1.to_string(),
39            "base/deadbeef/10000000000000000000000000000000.blob"
40        );
41        assert_eq!(
42            p2.to_string(),
43            "base/deadbeef/01000000000000000000000000000000.blob"
44        );
45    }
46}