blockless_car/utils/
pack.rs

1use std::{
2    collections::{HashMap, VecDeque},
3    fs, io,
4    path::{Path, PathBuf},
5    rc::Rc,
6};
7
8use crate::{
9    codec::Encoder,
10    error::CarError,
11    header::CarHeaderV1,
12    unixfs::{FileType, Link, UnixFs},
13    writer::{CarWriter, CarWriterV1, WriteStream},
14    CarHeader, Ipld,
15};
16use cid::{
17    multihash::{Blake2b256, Code, Hasher, Multihash, MultihashDigest, Sha2_256},
18    Cid,
19};
20use ipld::{pb::DagPbCodec, prelude::Codec, raw::RawCodec};
21
22type WalkPath = (Rc<PathBuf>, Option<usize>);
23type WalkPathCache = HashMap<Rc<PathBuf>, UnixFs>;
24type Size = usize;
25
26const MAX_SECTION_SIZE: usize = 262144;
27const MAX_LINK_COUNT: usize = 174;
28
29/// archive the directory to the target CAR format file
30/// `path` is the directory archived in to the CAR file.
31/// `to_carfile` is the target file.
32pub fn pack_files<T>(
33    path: impl AsRef<Path>,
34    to_carfile: T,
35    hasher_codec: multicodec::Codec,
36    no_wrap_file: bool,
37) -> Result<Cid, CarError>
38where
39    T: std::io::Write + std::io::Seek,
40{
41    let src_path = path.as_ref().to_path_buf();
42    if !src_path.exists() {
43        return Err(CarError::IO(io::ErrorKind::NotFound.into()));
44    }
45    // ensure sufficient file block size for head, after the root cid generated using the content, fill back the head.
46    let mut root_cid = empty_pb_cid(hasher_codec);
47    let header = CarHeader::new_v1(vec![root_cid]);
48    let mut writer = CarWriterV1::new(to_carfile, header);
49
50    if src_path.is_file() {
51        // if the source is a file then do not walk directory tree, process the file directly
52        let mut file = fs::OpenOptions::new().read(true).open(&src_path)?;
53        let file_size = file.metadata()?.len() as usize;
54        let (hash, size) = process_file(&mut file, &mut writer, file_size, hasher_codec)?;
55        if no_wrap_file {
56            root_cid = hash;
57        } else {
58            // wrap file into a directory entry
59            let link = Link {
60                hash,
61                file_type: FileType::Directory,
62                name: src_path.file_name().unwrap().to_str().unwrap().to_owned(),
63                tsize: size as u64,
64            };
65            let unix_fs = UnixFs {
66                links: vec![link],
67                file_type: FileType::Directory,
68                ..Default::default()
69            };
70            let dir_ipld = unix_fs.encode()?;
71            let bs = DagPbCodec
72                .encode(&dir_ipld)
73                .map_err(|e| CarError::Parsing(e.to_string()))?;
74            let cid = pb_cid(&bs, hasher_codec);
75            writer.write_block(cid, bs)?;
76            root_cid = cid;
77        }
78    } else {
79        //source is a directory, walk the directory tree
80        let (walk_paths, mut path_cache) = walk_path(&path)?;
81        for walk_path in &walk_paths {
82            process_path(
83                &src_path,
84                &mut root_cid,
85                &mut writer,
86                walk_path,
87                &mut path_cache,
88                hasher_codec,
89            )?;
90        }
91        // add an additional top node like in go-car
92        let root_node = path_cache.get(&src_path).unwrap();
93        let tsize: u64 = DagPbCodec
94            .encode(&root_node.encode()?)
95            .map_err(|e| CarError::Parsing(e.to_string()))?
96            .len() as u64
97            + root_node.links.iter().map(|link| link.tsize).sum::<u64>();
98        let unix_fs = UnixFs {
99            links: vec![Link {
100                hash: root_cid,
101                file_type: FileType::Directory,
102                name: src_path.file_name().unwrap().to_str().unwrap().to_string(),
103                tsize,
104            }],
105            file_type: FileType::Directory,
106            file_size: None,
107            ..Default::default()
108        };
109        let ipld = unix_fs.encode()?;
110        let bs = DagPbCodec
111            .encode(&ipld)
112            .map_err(|e| CarError::Parsing(e.to_string()))?;
113        root_cid = pb_cid(&bs, hasher_codec);
114        writer.write_block(root_cid, bs)?;
115    }
116    let header = CarHeader::V1(CarHeaderV1::new(vec![root_cid]));
117    writer.rewrite_header(header)?;
118    Ok(root_cid)
119}
120
121pub fn pack_buffer<W, R>(
122    reader: &mut R,
123    writer: W,
124    size: usize,
125    hasher_codec: multicodec::Codec,
126) -> Result<Cid, CarError>
127where
128    W: std::io::Write + std::io::Seek,
129    R: std::io::Read + std::io::Seek,
130{
131    let header = CarHeader::new_v1(vec![empty_pb_cid(hasher_codec)]);
132    let mut writer = CarWriterV1::new(writer, header);
133    let (hash, _) = process_file(reader, &mut writer, size, hasher_codec)?;
134    let header = CarHeader::V1(CarHeaderV1::new(vec![hash]));
135    writer.rewrite_header(header)?;
136    Ok(hash)
137}
138
139trait HasherCodec {
140    fn codec(&self) -> multicodec::Codec;
141}
142
143impl HasherCodec for Sha2_256 {
144    fn codec(&self) -> multicodec::Codec {
145        multicodec::Codec::Sha2_256
146    }
147}
148
149impl HasherCodec for Blake2b256 {
150    fn codec(&self) -> multicodec::Codec {
151        multicodec::Codec::Blake2b_256
152    }
153}
154
155fn cid_gen<H: Hasher + Default + HasherCodec>(
156) -> impl FnMut(WriteStream) -> Option<Result<Cid, CarError>> {
157    let mut hasher = H::default();
158    move |w: WriteStream| match w {
159        WriteStream::Bytes(bs) => {
160            hasher.update(bs);
161            None
162        }
163        WriteStream::End => {
164            let code = hasher.codec();
165            let bs = hasher.finalize();
166            let h = match Multihash::wrap(code.code() as u64, bs) {
167                Ok(h) => h,
168                Err(e) => return Some(Err(CarError::Parsing(e.to_string()))),
169            };
170            Some(Ok(Cid::new_v1(RawCodec.into(), h)))
171        }
172    }
173}
174
175fn stream_block<R, W>(
176    writer: &mut CarWriterV1<W>,
177    stream_len: usize,
178    r: &mut R,
179    hasher_codec: multicodec::Codec,
180) -> Result<Cid, CarError>
181where
182    W: std::io::Write + std::io::Seek,
183    R: std::io::Read + std::io::Seek,
184{
185    match hasher_codec {
186        multicodec::Codec::Sha2_256 => writer.stream_block(cid_gen::<Sha2_256>(), stream_len, r),
187        multicodec::Codec::Blake2b_256 => {
188            writer.stream_block(cid_gen::<Blake2b256>(), stream_len, r)
189        }
190        _ => unimplemented!(),
191    }
192}
193
194fn process_file<W, R>(
195    reader: &mut R,
196    writer: &mut CarWriterV1<W>,
197    size: usize,
198    hasher_codec: multicodec::Codec,
199) -> Result<(Cid, Size), CarError>
200where
201    W: std::io::Write + std::io::Seek,
202    R: std::io::Read + std::io::Seek,
203{
204    if size < MAX_SECTION_SIZE {
205        Ok((stream_block(writer, size, reader, hasher_codec)?, size))
206    } else {
207        let mut secs = size / MAX_SECTION_SIZE;
208        if size % MAX_SECTION_SIZE > 0 {
209            secs += 1;
210        }
211        let mut block_sizes = vec![];
212        let mut links = (0..secs)
213            .map(|i| {
214                let size = if i < secs - 1 {
215                    MAX_SECTION_SIZE
216                } else {
217                    size % MAX_SECTION_SIZE
218                };
219                block_sizes.push(size as u64);
220                let cid = stream_block(writer, size, reader, hasher_codec);
221                cid.map(|cid| Link {
222                    hash: cid,
223                    file_type: FileType::Raw,
224                    name: String::default(),
225                    tsize: size as u64,
226                })
227            })
228            .collect::<Result<Vec<Link>, CarError>>()?;
229        while links.len() > MAX_LINK_COUNT {
230            let mut new_links = vec![];
231            let mut new_block_sizes = vec![];
232            let mut link_count = links.len() / MAX_LINK_COUNT;
233            if links.len() % MAX_LINK_COUNT > 0 {
234                link_count += 1;
235            }
236            for _ in 0..link_count {
237                let len = if links.len() >= MAX_LINK_COUNT {
238                    MAX_LINK_COUNT
239                } else {
240                    links.len()
241                };
242                let links_size = block_sizes.as_slice()[0..len].iter().sum();
243                let unix_fs = UnixFs {
244                    links: links.drain(0..len).collect(),
245                    file_type: FileType::File,
246                    file_size: Some(links_size),
247                    block_sizes: block_sizes.drain(0..len).collect(),
248                    ..Default::default()
249                };
250                let ipld = unix_fs.encode()?;
251                let bs = DagPbCodec
252                    .encode(&ipld)
253                    .map_err(|e| CarError::Parsing(e.to_string()))?;
254                let size = links_size + bs.len() as u64;
255                let cid = pb_cid(&bs, hasher_codec);
256                writer.write_block(cid, bs)?;
257                let new_link = Link {
258                    hash: cid,
259                    file_type: FileType::File,
260                    name: String::default(),
261                    tsize: size,
262                };
263                new_links.push(new_link);
264                new_block_sizes.push(links_size);
265            }
266            links = new_links;
267            block_sizes = new_block_sizes;
268        }
269        let links_size = links.iter().map(|link| link.tsize as usize).sum::<usize>();
270        let unix_fs = UnixFs {
271            file_size: Some(block_sizes.iter().sum()),
272            links,
273            file_type: FileType::File,
274            block_sizes,
275            ..Default::default()
276        };
277        let file_ipld = unix_fs.encode()?;
278        let bs = DagPbCodec
279            .encode(&file_ipld)
280            .map_err(|e| CarError::Parsing(e.to_string()))?;
281        let size = links_size + bs.len();
282        let cid = pb_cid(&bs, hasher_codec);
283        writer.write_block(cid, bs)?;
284        Ok((cid, size))
285    }
286}
287
288fn process_path<W: std::io::Write + std::io::Seek>(
289    root_path: impl AsRef<Path>,
290    root_cid: &mut Cid,
291    writer: &mut CarWriterV1<W>,
292    (abs_path, parent_idx): &(Rc<PathBuf>, Option<usize>),
293    path_cache: &mut WalkPathCache,
294    hasher_codec: multicodec::Codec,
295) -> Result<(), CarError> {
296    let unix_fs = path_cache.get_mut(abs_path).unwrap();
297    let mut parent_tsize = 0;
298    for link in unix_fs.links.iter_mut() {
299        if let FileType::File = link.file_type {
300            let mut file = fs::OpenOptions::new()
301                .read(true)
302                .open(&abs_path.join(&link.name))?;
303            let file_size = file.metadata()?.len() as usize;
304            let (hash, size) = process_file(&mut file, writer, file_size, hasher_codec)?;
305            link.hash = hash;
306            link.tsize = size as u64;
307        }
308        parent_tsize += link.tsize;
309    }
310    // sort links correctly for pb-dag standard https://ipld.io/specs/codecs/dag-pb/spec/#link-sorting
311    unix_fs
312        .links
313        .sort_by(|a, b| match a.name.as_bytes() > b.name.as_bytes() {
314            true => std::cmp::Ordering::Greater,
315            false => std::cmp::Ordering::Less,
316        });
317    let fs_ipld: Ipld = unix_fs.encode()?;
318    let bs = DagPbCodec
319        .encode(&fs_ipld)
320        .map_err(|e| CarError::Parsing(e.to_string()))?;
321    parent_tsize += bs.len() as u64;
322    let cid = pb_cid(&bs, hasher_codec);
323    if root_path.as_ref() == abs_path.as_ref() {
324        *root_cid = cid;
325    }
326    writer.write_block(cid, bs)?;
327    unix_fs.cid = Some(cid);
328    match abs_path.parent() {
329        Some(parent) => {
330            let parent = Rc::new(parent.to_path_buf());
331            if let Some((p, pos)) = path_cache.get_mut(&parent).zip(*parent_idx) {
332                p.links[pos].hash = cid;
333                p.links[pos].tsize = parent_tsize;
334            }
335        }
336        None => unimplemented!("should not happen"),
337    }
338    Ok(())
339}
340
341fn digest(data: &[u8], hasher_codec: multicodec::Codec) -> Multihash {
342    match hasher_codec {
343        multicodec::Codec::Sha2_256 => Code::Sha2_256.digest(data),
344        multicodec::Codec::Blake2b_256 => Code::Blake2b256.digest(data),
345        _ => unimplemented!(),
346    }
347}
348
349#[inline(always)]
350pub fn empty_pb_cid(hasher_codec: multicodec::Codec) -> Cid {
351    pb_cid(&[], hasher_codec)
352}
353
354#[inline(always)]
355pub fn pb_cid(data: &[u8], hasher_codec: multicodec::Codec) -> Cid {
356    Cid::new_v1(DagPbCodec.into(), digest(data, hasher_codec))
357}
358
359#[inline(always)]
360pub fn raw_cid(data: &[u8], hasher_codec: multicodec::Codec) -> Cid {
361    Cid::new_v1(RawCodec.into(), digest(data, hasher_codec))
362}
363
364/// walk all directory, and record the directory informations.
365/// `WalkPath` contain the index in children.
366pub fn walk_path(path: impl AsRef<Path>) -> Result<(Vec<WalkPath>, WalkPathCache), CarError> {
367    let root_path: Rc<PathBuf> = Rc::new(path.as_ref().into());
368    let mut queue = VecDeque::from(vec![root_path.clone()]);
369    let mut path_cache = HashMap::new();
370    let mut walk_paths = Vec::new();
371    while let Some(dir_path) = queue.pop_back() {
372        let mut unix_dir = UnixFs::new_directory();
373        for entry in fs::read_dir(&*dir_path)? {
374            let entry = entry?;
375            let file_type = entry.file_type()?;
376            let name = entry.file_name().to_str().unwrap_or("").to_string();
377            if file_type.is_file() {
378                unix_dir.add_link(Link {
379                    name,
380                    file_type: FileType::File,
381                    ..Default::default()
382                });
383            } else if file_type.is_dir() {
384                let abs_path = entry.path().to_path_buf();
385                let rc_abs_path = Rc::new(abs_path);
386                let idx = unix_dir.add_link(Link {
387                    name,
388                    tsize: 0,
389                    file_type: FileType::Directory,
390                    ..Default::default()
391                });
392                walk_paths.push((rc_abs_path.clone(), Some(idx)));
393                queue.push_back(rc_abs_path);
394            }
395        }
396        path_cache.insert(dir_path, unix_dir);
397    }
398
399    walk_paths.reverse();
400    walk_paths.push((root_path, None));
401
402    Ok((walk_paths, path_cache))
403}
404
405#[cfg(test)]
406mod test {
407    use super::*;
408    use rand::prelude::*;
409    use rand_chacha::ChaCha8Rng;
410    use std::{
411        cmp,
412        fs::File,
413        io::{BufWriter, Cursor, Write},
414        str::FromStr,
415    };
416    use tempdir::TempDir;
417
418    fn write_large_file(path: &PathBuf, size: usize) {
419        let file = File::create(path).unwrap();
420        let mut writer = BufWriter::new(file);
421        let mut buffer: [u8; 1000] = [0; 1000];
422        let mut remaining_size = size;
423        // use seeded random data to fill
424        let mut rng = ChaCha8Rng::seed_from_u64(1);
425        while remaining_size > 0 {
426            let to_write = cmp::min(remaining_size, buffer.len());
427            let buffer = &mut buffer[..to_write];
428            rng.fill(buffer);
429            let amount = writer.write(buffer).unwrap();
430            remaining_size -= amount;
431        }
432        writer.flush().unwrap();
433    }
434
435    fn get_reference_cid(
436        source_path: &impl AsRef<Path>,
437        output_dir: &impl AsRef<Path>,
438        no_wrap: bool,
439    ) -> Option<Cid> {
440        if !home::home_dir().unwrap().join("go/bin/car").exists() {
441            return None;
442        }
443        let temp_reference_file = output_dir.as_ref().join("test-reference.car");
444        std::process::Command::new("sh")
445            .arg("-c")
446            .arg(format!(
447                "$HOME/go/bin/car create --version 1 {} --file {} {}",
448                if no_wrap { "--no-wrap" } else { "" },
449                temp_reference_file.to_str().unwrap(),
450                source_path.as_ref().to_str().unwrap()
451            ))
452            .output()
453            .expect("failed to execute process");
454        let result = String::from_utf8(
455            std::process::Command::new("sh")
456                .arg("-c")
457                .arg(format!(
458                    "$HOME/go/bin/car root {}",
459                    temp_reference_file.to_str().unwrap(),
460                ))
461                .output()
462                .expect("failed to execute process")
463                .stdout,
464        )
465        .unwrap();
466        let reference = Cid::from_str(result.trim()).unwrap();
467        println!("Reference CID: {}", reference);
468        Some(reference)
469    }
470
471    #[test]
472    fn test_pack_files_small_file_no_wrap_false() {
473        let temp_dir = TempDir::new("blockless-car-temp-dir").unwrap();
474
475        // create a root dir with a fixed name (temp_dir name has a random suffix)
476        let root_dir = temp_dir.path().join("root");
477        std::fs::create_dir_all(root_dir).unwrap();
478
479        let temp_file = temp_dir.path().join("test.txt");
480
481        let mut file = File::create(&temp_file).unwrap();
482        file.write_all(b"hello world").unwrap();
483
484        let temp_output_dir = TempDir::new("blockless-car-temp-output-dir").unwrap();
485        let temp_output_file = temp_output_dir.path().join("test.car");
486        let car_file = std::fs::File::create(temp_output_file.as_ref() as &Path).unwrap();
487
488        let reference = match get_reference_cid(&temp_file, &temp_output_dir, false) {
489            Some(reference) => reference,
490            None => Cid::from_str("bafybeifotw2dmp73obnbhg6uffdrjshvone2jkkp3rlw3fot2vne5zvymu")
491                .unwrap(),
492        };
493
494        let test_cid =
495            pack_files(&temp_file, &car_file, multicodec::Codec::Sha2_256, false).unwrap();
496        assert_eq!(test_cid, reference);
497    }
498
499    #[test]
500    fn test_pack_files_small_file_no_wrap_true() {
501        let temp_dir = TempDir::new("blockless-car-temp-dir").unwrap();
502        let temp_file = temp_dir.path().join("test.txt");
503        let mut file = File::create(&temp_file).unwrap();
504        file.write_all(b"hello world").unwrap();
505
506        let temp_output_dir = TempDir::new("blockless-car-temp-output-dir").unwrap();
507        let temp_output_file = temp_output_dir.path().join("test.car");
508        let car_file = std::fs::File::create(temp_output_file.as_ref() as &Path).unwrap();
509
510        let reference = match get_reference_cid(&temp_file, &temp_output_dir, true) {
511            Some(reference) => reference,
512            None => Cid::from_str("bafkreifzjut3te2nhyekklss27nh3k72ysco7y32koao5eei66wof36n5e")
513                .unwrap(),
514        };
515
516        let test_cid =
517            pack_files(&temp_file, &car_file, multicodec::Codec::Sha2_256, true).unwrap();
518        assert_eq!(test_cid, reference);
519    }
520
521    #[test]
522    fn test_pack_files_large_file_no_wrap_false() {
523        let temp_dir = TempDir::new("blockless-car-temp-dir").unwrap();
524
525        // create a root dir with a fixed name (temp_dir name has a random suffix)
526        let root_dir = temp_dir.path().join("root");
527        std::fs::create_dir_all(root_dir).unwrap();
528
529        let temp_file = temp_dir.path().join("data.bin");
530        write_large_file(&temp_file, 1000000);
531
532        let temp_output_dir = TempDir::new("blockless-car-temp-output-dir").unwrap();
533        let temp_output_file = temp_output_dir.path().join("test.car");
534        let car_file = std::fs::File::create(temp_output_file.as_ref() as &Path).unwrap();
535
536        let reference = match get_reference_cid(&temp_file, &temp_output_dir, false) {
537            Some(reference) => reference,
538            None => Cid::from_str("bafybeibdndwligqskbbklvjhq32fuugwfuzt3i242u2yd2ih6hddgmilkm")
539                .unwrap(),
540        };
541
542        let test_cid =
543            pack_files(&temp_file, &car_file, multicodec::Codec::Sha2_256, false).unwrap();
544        assert_eq!(test_cid, reference);
545    }
546
547    #[test]
548    fn test_pack_files_large_file_no_wrap_true() {
549        let temp_dir = TempDir::new("blockless-car-temp-dir").unwrap();
550        let temp_file = temp_dir.path().join("data.bin");
551        write_large_file(&temp_file, 1000000);
552
553        let temp_output_dir = TempDir::new("blockless-car-temp-output-dir").unwrap();
554        let temp_output_file = temp_output_dir.path().join("test.car");
555        let car_file = std::fs::File::create(temp_output_file.as_ref() as &Path).unwrap();
556
557        let reference = match get_reference_cid(&temp_file, &temp_output_dir, true) {
558            Some(reference) => reference,
559            None => Cid::from_str("bafybeigr5o3jbe2biam6pskvjhbaczjfdlmnjwlzovpgbzctiwqtpkvhee")
560                .unwrap(),
561        };
562
563        let test_cid =
564            pack_files(&temp_file, &car_file, multicodec::Codec::Sha2_256, true).unwrap();
565        assert_eq!(test_cid, reference);
566    }
567
568    #[test]
569    fn test_pack_files_dir_small_file() {
570        let temp_dir = TempDir::new("blockless-car-temp-dir").unwrap();
571
572        // create a root dir with a fixed name (temp_dir name has a random suffix)
573        let root_dir = temp_dir.path().join("root");
574        std::fs::create_dir_all(&root_dir).unwrap();
575
576        let temp_file = temp_dir.path().join("test.txt");
577        let mut file = File::create(temp_file).unwrap();
578        file.write_all(b"hello world").unwrap();
579
580        let temp_output_dir = TempDir::new("blockless-car-temp-output-dir").unwrap();
581        let temp_output_file = temp_output_dir.path().join("test.car");
582        let car_file = std::fs::File::create(temp_output_file.as_ref() as &Path).unwrap();
583
584        let reference = match get_reference_cid(&root_dir, &temp_output_dir, false) {
585            Some(reference) => reference,
586            None => Cid::from_str("bafybeifp6fbcoaq3px3ha22ddltu3itl5ek3secgtmbwm4ui7ru74ndwkm")
587                .unwrap(),
588        };
589
590        let test_cid =
591            pack_files(&root_dir, &car_file, multicodec::Codec::Sha2_256, false).unwrap();
592        assert_eq!(test_cid, reference);
593    }
594
595    #[test]
596    fn test_pack_files_dir_big_file() {
597        let temp_dir = TempDir::new("blockless-car-temp-dir").unwrap();
598
599        // create a root dir with a fixed name (temp_dir name has a random suffix)
600        let root_dir = temp_dir.path().join("root");
601        std::fs::create_dir_all(&root_dir).unwrap();
602
603        let temp_file = root_dir.join("data.bin");
604        write_large_file(&temp_file, 1000000000);
605
606        let temp_output_dir = TempDir::new("blockless-car-temp-output-dir").unwrap();
607        let temp_output_file = temp_output_dir.path().join("test.car");
608        let car_file = std::fs::File::create(temp_output_file.as_ref() as &Path).unwrap();
609
610        let reference = match get_reference_cid(&root_dir, &temp_output_dir, false) {
611            Some(reference) => reference,
612            None => Cid::from_str("bafybeidvyeyyss53sab3i43utmznutnise2h7ptvv3ftccvyfqc6r5sv74")
613                .unwrap(),
614        };
615
616        let test_cid =
617            pack_files(&root_dir, &car_file, multicodec::Codec::Sha2_256, false).unwrap();
618        assert_eq!(test_cid, reference);
619    }
620
621    #[test]
622    fn test_pack_files_dir_tree() {
623        let temp_dir = TempDir::new("blockless-car-temp-dir").unwrap();
624
625        // create a root dir with a fixed name (temp_dir name has a random suffix)
626        let root_dir = temp_dir.path().join("root");
627
628        std::fs::create_dir_all(root_dir.join("level1A/level2A/level3A")).unwrap();
629        std::fs::create_dir_all(root_dir.join("level1A/level2B/level3A")).unwrap();
630        std::fs::create_dir_all(root_dir.join("level1A/level2C/level3A")).unwrap();
631        std::fs::create_dir_all(root_dir.join("level1B/level2A/level3A")).unwrap();
632
633        let temp_file = temp_dir
634            .path()
635            .join("root/level1A/level2A/level3A/test.txt");
636        let mut file = File::create(temp_file).unwrap();
637        file.write_all(b"hello world").unwrap();
638
639        let temp_file = root_dir.join("level1A/level2A/test.txt");
640        let mut file = File::create(temp_file).unwrap();
641        file.write_all(b"hello world").unwrap();
642
643        let temp_file = temp_dir
644            .path()
645            .join("root/level1A/level2B/level3A/data.bin");
646        write_large_file(&temp_file, 1000000);
647
648        let temp_file = temp_dir
649            .path()
650            .join("root/level1A/level2C/level3A/data.bin");
651        write_large_file(&temp_file, 100000000);
652
653        let temp_file = temp_dir
654            .path()
655            .join("root/level1A/level2C/level3A/test.txt");
656        let mut file = File::create(temp_file).unwrap();
657        file.write_all(b"hello world").unwrap();
658
659        let temp_output_dir = TempDir::new("blockless-car-temp-output-dir").unwrap();
660        let temp_output_file = temp_output_dir.path().join("test.car");
661        let car_file = std::fs::File::create(temp_output_file.as_ref() as &Path).unwrap();
662
663        let reference = match get_reference_cid(&root_dir, &temp_output_dir, false) {
664            Some(reference) => reference,
665            None => Cid::from_str("bafybeicidmis4mrywfe4almb473raq7upvacl2hk6lxqsi2zggvrj7demi")
666                .unwrap(),
667        };
668
669        let test_cid =
670            pack_files(&root_dir, &car_file, multicodec::Codec::Sha2_256, false).unwrap();
671        assert_eq!(test_cid, reference);
672    }
673
674    #[test]
675    fn test_pack_buffer() {
676        let temp_dir = TempDir::new("blockless-car-temp-dir").unwrap();
677
678        // create a large file
679        let temp_file = temp_dir.path().join("data.bin");
680        write_large_file(&temp_file, 10000000);
681
682        // read file into a buffer which implements std::io::{Read, Seek}
683        let mut reader = Cursor::new(fs::read(&temp_file).unwrap());
684
685        // create a target buffer that implements std::io::{Write, Seek}
686        let mut writer = Cursor::new(vec![]);
687        let size = reader.get_ref().len();
688
689        let test_cid =
690            pack_buffer(&mut reader, &mut writer, size, multicodec::Codec::Sha2_256).unwrap();
691
692        let temp_output_dir = TempDir::new("blockless-car-temp-output-dir").unwrap();
693        let reference = match get_reference_cid(&temp_file, &temp_output_dir, true) {
694            Some(reference) => reference,
695            None => Cid::from_str("bafybeies2czmisuexy2mfx5vizfs34xdtiwsvyqwuy4fdqsfdv2vouo35i")
696                .unwrap(),
697        };
698
699        assert_eq!(test_cid, reference);
700    }
701}