dexter_ipfs_car/
writer.rs

1use std::fs::File;
2use std::io::{Write, BufWriter, Seek, SeekFrom, Cursor};
3use std::path::Path;
4use anyhow::Result;
5use cid::Cid;
6use serde::Serialize;
7use serde_cbor::to_vec;
8
9use crate::{RowKey, RowData};
10use crate::encoding::encode_row;
11
12/// An index entry that maps a row_key to its (offset, length) in the CAR file.
13#[derive(Debug)]
14pub struct BlockIndexEntry {
15    pub row_key: RowKey,
16    pub offset: u64,
17    pub length: u64,
18}
19
20/// CAR file writer
21pub struct CarWriter {
22    writer: BufWriter<File>,
23    cids: Vec<(RowKey, Cid, Vec<u8>)>,
24}
25
26impl CarWriter {
27    /// Create a new CarWriter that writes to the given path on local filesystem.
28    pub fn new<P: AsRef<Path>>(path: P) -> Result<Self> {
29        let file = File::create(path)?;
30        Ok(CarWriter {
31            writer: BufWriter::new(file),
32            cids: Vec::new(),
33        })
34    }
35
36    /// Add a row to the CAR file's internal buffer (not yet written).
37    pub fn add_row(&mut self, key: &RowKey, data: &RowData) -> Result<()> {
38        let (cid, block_data) = encode_row(key, data)?;
39        self.cids.push((key.clone(), cid, block_data));
40        Ok(())
41    }
42
43    /// Finalize and write the CAR file to disk.
44    /// Returns an index of `(row_key, offset, length)` for each block.
45    pub fn finalize(mut self) -> Result<Vec<BlockIndexEntry>> {
46        self.write_header()?;
47
48        let mut index = Vec::new();
49        let mut current_offset = self.writer.seek(SeekFrom::Current(0))?;
50
51        for (row_key, cid, block_data) in self.cids {
52            // The start offset before writing this block
53            let block_start_offset = current_offset;
54
55            // Prepare block data: [CID bytes | row data]
56            let mut block_buf = Vec::new();
57            block_buf.extend_from_slice(&cid.to_bytes());
58            block_buf.extend_from_slice(&block_data);
59
60            // Write varint (block length) + block contents
61            let length_bytes = write_varint_to_vec(block_buf.len() as u64);
62            self.writer.write_all(&length_bytes)?;
63            self.writer.write_all(&block_buf)?;
64
65            let block_total_length = length_bytes.len() as u64 + block_buf.len() as u64;
66
67            // Update offset
68            current_offset += block_total_length;
69
70            index.push(BlockIndexEntry {
71                row_key,
72                offset: block_start_offset,
73                length: block_total_length,
74            });
75        }
76
77        self.writer.flush()?;
78        Ok(index)
79    }
80
81    fn write_header(&mut self) -> Result<()> {
82        #[derive(Serialize)]
83        struct CarHeader {
84            roots: Vec<String>,
85            version: u64,
86        }
87
88        // Each block is considered a "root" for simplicity
89        let root_strings: Vec<String> =
90            self.cids.iter().map(|(_, cid, _)| cid.to_string()).collect();
91
92        let header = CarHeader {
93            roots: root_strings,
94            version: 1,
95        };
96
97        let header_bytes = to_vec(&header)?;
98        let length_bytes = write_varint_to_vec(header_bytes.len() as u64);
99
100        // varint(header_len) + header
101        self.writer.write_all(&length_bytes)?;
102        self.writer.write_all(&header_bytes)?;
103        Ok(())
104    }
105}
106
107/// Build CAR file in memory
108pub struct InMemoryCarBuilder {
109    buffer: Cursor<Vec<u8>>,
110    cids: Vec<(RowKey, Cid, Vec<u8>)>,
111}
112
113impl InMemoryCarBuilder {
114    /// Create an in-memory CarBuilder that writes to a buffer (Vec<u8>).
115    pub fn new() -> Self {
116        InMemoryCarBuilder {
117            buffer: Cursor::new(Vec::new()),
118            cids: Vec::new(),
119        }
120    }
121
122    /// Add a row to the in-memory buffer (not yet written).
123    pub fn add_row(&mut self, key: &RowKey, data: &RowData) -> Result<()> {
124        let (cid, block_data) = encode_row(key, data)?;
125        self.cids.push((key.clone(), cid, block_data));
126        Ok(())
127    }
128
129    /// Finalize and build the CAR file in memory.
130    /// Returns `(car_bytes, index)`.
131    pub fn finalize(mut self) -> Result<(Vec<u8>, Vec<BlockIndexEntry>)> {
132        self.write_header()?;
133
134        let mut index = Vec::new();
135        let mut current_offset = self.buffer.seek(SeekFrom::Current(0))?;
136
137        for (row_key, cid, block_data) in self.cids {
138            let block_start_offset = current_offset;
139
140            // Build the block
141            let mut block_buf = Vec::new();
142            block_buf.extend_from_slice(&cid.to_bytes());
143            block_buf.extend_from_slice(&block_data);
144
145            // varint length
146            let length_bytes = write_varint_to_vec(block_buf.len() as u64);
147            self.buffer.write_all(&length_bytes)?;
148            self.buffer.write_all(&block_buf)?;
149
150            let block_total_length = length_bytes.len() as u64 + block_buf.len() as u64;
151            current_offset += block_total_length;
152
153            index.push(BlockIndexEntry {
154                row_key,
155                offset: block_start_offset,
156                length: block_total_length,
157            });
158        }
159
160        self.buffer.flush()?;
161        let final_data = self.buffer.into_inner();
162        Ok((final_data, index))
163    }
164
165    fn write_header(&mut self) -> Result<()> {
166        #[derive(Serialize)]
167        struct CarHeader {
168            roots: Vec<String>,
169            version: u64,
170        }
171
172        let root_strings: Vec<String> =
173            self.cids.iter().map(|(_, cid, _)| cid.to_string()).collect();
174
175        let header = CarHeader {
176            roots: root_strings,
177            version: 1,
178        };
179
180        let header_bytes = to_vec(&header)?;
181        let length_bytes = write_varint_to_vec(header_bytes.len() as u64);
182
183        // varint(header_len) + header
184        self.buffer.write_all(&length_bytes)?;
185        self.buffer.write_all(&header_bytes)?;
186        Ok(())
187    }
188}
189
190
191/// A convenience function for writing multiple rows directly to a file.
192pub fn write_multiple_rows_as_car<P: AsRef<std::path::Path>>(
193    path: P,
194    rows: &[(RowKey, RowData)],
195) -> Result<Vec<BlockIndexEntry>> {
196    let mut writer = CarWriter::new(path)?;
197    for (key, data) in rows {
198        writer.add_row(key, data)?;
199    }
200    let index = writer.finalize()?;
201    Ok(index)
202}
203
204/// A convenience function for building a CAR file entirely in memory
205pub fn build_in_memory_car(
206    rows: &[(RowKey, RowData)],
207) -> Result<(Vec<u8>, Vec<BlockIndexEntry>)> {
208    let mut builder = InMemoryCarBuilder::new();
209    for (key, data) in rows {
210        builder.add_row(key, data)?;
211    }
212    let (car_bytes, index) = builder.finalize()?;
213    Ok((car_bytes, index))
214}
215
216/// Encode a 64-bit value into a varint stored in a `Vec<u8>`.
217fn write_varint_to_vec(mut value: u64) -> Vec<u8> {
218    let mut buf = [0u8; 10];
219    let mut i = 0;
220    while value >= 0x80 {
221        buf[i] = ((value & 0x7F) as u8) | 0x80;
222        value >>= 7;
223        i += 1;
224    }
225    buf[i] = value as u8;
226    i += 1;
227    buf[..i].to_vec()
228}