Skip to main content

void_core/index/
io.rs

1//! Read/write operations for the workspace index with sharded encryption.
2//!
3//! On-disk format (sharded):
4//!   .void/index/
5//!     index.bin      — header: version, commit_cid, shard_count, per-shard metadata
6//!     shard_00.bin   — ~4096 entries each, independently zstd + AES-GCM encrypted
7//!     shard_01.bin
8//!     ...
9//!
10//! Each shard contains a CBOR-serialized Vec<IndexEntry>, compressed with zstd level 3,
11//! then encrypted with AES-256-GCM. Shards are read/written in parallel with rayon.
12
13use std::fs;
14use std::path::{Path, PathBuf};
15
16use rayon::prelude::*;
17use serde::{Deserialize, Serialize};
18use void_crypto::EncryptedIndex;
19use void_crypto::CommitCid;
20
21use crate::crypto::SecretKey;
22use crate::{Result, VoidError};
23
24use super::types::IndexEntry;
25use super::workspace::WorkspaceIndex;
26
27const INDEX_FILENAME: &str = "index.bin";
28const ZSTD_LEVEL: i32 = 3;
29const SHARD_TARGET_ENTRIES: usize = 4096;
30
31fn index_dir(void_dir: &Path) -> PathBuf {
32    void_dir.join("index")
33}
34
35fn index_path(void_dir: &Path) -> PathBuf {
36    index_dir(void_dir).join(INDEX_FILENAME)
37}
38
39fn shard_path(void_dir: &Path, shard_index: usize) -> PathBuf {
40    index_dir(void_dir).join(format!("shard_{:04}.bin", shard_index))
41}
42
43/// Compress bytes with zstd.
44fn compress(data: &[u8]) -> Result<Vec<u8>> {
45    zstd::encode_all(data, ZSTD_LEVEL).map_err(VoidError::Io)
46}
47
48/// Decompress zstd bytes.
49fn decompress(data: &[u8]) -> Result<Vec<u8>> {
50    zstd::decode_all(data).map_err(VoidError::Io)
51}
52
53// ============================================================================
54// Sharded index header
55// ============================================================================
56
57#[derive(Serialize, Deserialize)]
58struct ShardedIndexHeader {
59    version: u32,
60    commit_cid: Option<CommitCid>,
61    shard_count: u32,
62    entries_per_shard: u32,
63    total_entries: u64,
64    shard_meta: Vec<ShardMeta>,
65}
66
67#[derive(Serialize, Deserialize)]
68struct ShardMeta {
69    first_path: String,
70    last_path: String,
71    entry_count: u32,
72}
73
74const SHARDED_INDEX_VERSION: u32 = 2;
75
76// ============================================================================
77// Write
78// ============================================================================
79
80/// Writes the encrypted workspace index to disk (sharded).
81pub fn write_index(
82    void_dir: impl AsRef<Path>,
83    key: &SecretKey,
84    commit_cid: Option<CommitCid>,
85    entries: Vec<IndexEntry>,
86) -> Result<()> {
87    let index = WorkspaceIndex::new(commit_cid, entries);
88    write_workspace_index(void_dir, key, &index)
89}
90
91/// Writes a WorkspaceIndex directly to disk (sharded).
92pub fn write_workspace_index(
93    void_dir: impl AsRef<Path>,
94    key: &SecretKey,
95    index: &WorkspaceIndex,
96) -> Result<()> {
97    let void_dir = void_dir.as_ref();
98    let dir = index_dir(void_dir);
99    fs::create_dir_all(&dir)?;
100
101    let entries = &index.entries;
102    let chunks: Vec<&[IndexEntry]> = if entries.is_empty() {
103        Vec::new()
104    } else {
105        entries.chunks(SHARD_TARGET_ENTRIES).collect()
106    };
107    let shard_count = chunks.len();
108
109    // Write shard files in parallel
110    let shard_meta: Vec<ShardMeta> = chunks
111        .par_iter()
112        .enumerate()
113        .map(|(i, chunk)| {
114            let cbor = crate::support::cbor_to_vec(chunk)?;
115            let compressed = compress(&cbor)?;
116            let blob = EncryptedIndex::encrypt(key.as_bytes(), &compressed)?;
117
118            let path = shard_path(void_dir, i);
119            let temp = path.with_extension("tmp");
120            fs::write(&temp, blob.as_bytes())?;
121            fs::rename(&temp, &path)?;
122
123            Ok(ShardMeta {
124                first_path: chunk.first().map(|e| e.path.clone()).unwrap_or_default(),
125                last_path: chunk.last().map(|e| e.path.clone()).unwrap_or_default(),
126                entry_count: chunk.len() as u32,
127            })
128        })
129        .collect::<Result<Vec<_>>>()?;
130
131    // Write header
132    let header = ShardedIndexHeader {
133        version: SHARDED_INDEX_VERSION,
134        commit_cid: index.commit_cid.clone(),
135        shard_count: shard_count as u32,
136        entries_per_shard: SHARD_TARGET_ENTRIES as u32,
137        total_entries: entries.len() as u64,
138        shard_meta,
139    };
140    let cbor = crate::support::cbor_to_vec(&header)?;
141    let compressed = compress(&cbor)?;
142    let blob = EncryptedIndex::encrypt(key.as_bytes(), &compressed)?;
143
144    let header_path = index_path(void_dir);
145    let temp = header_path.with_extension("tmp");
146    fs::write(&temp, blob.as_bytes())?;
147    fs::rename(&temp, &header_path)?;
148
149    // Clean up stale shard files
150    for i in shard_count.. {
151        let stale = shard_path(void_dir, i);
152        if stale.exists() {
153            let _ = fs::remove_file(&stale);
154        } else {
155            break;
156        }
157    }
158
159    Ok(())
160}
161
162// ============================================================================
163// Read
164// ============================================================================
165
166/// Reads the encrypted workspace index from disk (sharded).
167pub fn read_index(void_dir: impl AsRef<Path>, key: &SecretKey) -> Result<WorkspaceIndex> {
168    let void_dir = void_dir.as_ref();
169    let header_path = index_path(void_dir);
170
171    let raw = fs::read(&header_path).map_err(|e| {
172        if e.kind() == std::io::ErrorKind::NotFound {
173            VoidError::NotFound("index".into())
174        } else {
175            VoidError::Io(e)
176        }
177    })?;
178
179    let blob = EncryptedIndex::from_bytes(raw);
180    let decrypted = blob.decrypt(key.as_bytes())?;
181    let decompressed = decompress(&decrypted).map_err(|_| {
182        VoidError::Serialization(
183            "index format not recognized — delete .void/index/ and re-run void add".into(),
184        )
185    })?;
186
187    let header: ShardedIndexHeader = ciborium::from_reader(&decompressed[..]).map_err(|_| {
188        VoidError::Serialization(
189            "index format not recognized — delete .void/index/ and re-run void add".into(),
190        )
191    })?;
192
193    if header.shard_count == 0 {
194        return Ok(WorkspaceIndex::new(header.commit_cid, Vec::new()));
195    }
196
197    // Read shard files in parallel
198    let shard_entries: Vec<Vec<IndexEntry>> = (0..header.shard_count as usize)
199        .into_par_iter()
200        .map(|i| {
201            let path = shard_path(void_dir, i);
202            let raw = fs::read(&path).map_err(|e| {
203                if e.kind() == std::io::ErrorKind::NotFound {
204                    VoidError::NotFound(format!("index shard {}", i))
205                } else {
206                    VoidError::Io(e)
207                }
208            })?;
209            let blob = EncryptedIndex::from_bytes(raw);
210            let decrypted = blob.decrypt(key.as_bytes())?;
211            let decompressed = decompress(&decrypted)?;
212            let entries: Vec<IndexEntry> = ciborium::from_reader(&decompressed[..])
213                .map_err(|e| VoidError::Serialization(e.to_string()))?;
214            Ok(entries)
215        })
216        .collect::<Result<Vec<_>>>()?;
217
218    // Concatenate in order — shards are contiguous sorted path ranges
219    let total = header.total_entries as usize;
220    let mut entries = Vec::with_capacity(total);
221    for shard in shard_entries {
222        entries.extend(shard);
223    }
224
225    Ok(WorkspaceIndex::new(header.commit_cid, entries))
226}