void_core/shard/writer.rs
1//! Shard writing and serialization
2
3use rand::RngCore;
4
5use crate::{Result, VoidError};
6
7/// Padding strategy for shard sizes.
8///
9/// Padding shards to fixed size buckets hides true size from the server,
10/// improving privacy by preventing size-based inference attacks.
11#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
12pub enum PaddingStrategy {
13 /// No padding (original behavior)
14 None,
15 /// Pad to power-of-2 boundaries (1KB, 2KB, 4KB, 8KB, ...)
16 #[default]
17 PowerOfTwo,
18 /// Pad to fixed bucket sizes (1KB, 4KB, 16KB, 64KB, 256KB, 1MB, 4MB)
19 Buckets,
20 /// Pad all shards to a fixed maximum size
21 Fixed(usize),
22}
23
24/// Default bucket sizes for Buckets padding strategy.
25pub const DEFAULT_BUCKETS: &[usize] = &[
26 1024, // 1 KB
27 4096, // 4 KB
28 16384, // 16 KB
29 65536, // 64 KB
30 262144, // 256 KB
31 1048576, // 1 MB
32 4194304, // 4 MB
33];
34
35/// Size of the padding length suffix (u64 little-endian).
36const PADDING_SUFFIX_SIZE: usize = 8;
37
38/// Magic bytes to identify padded shards: "VOIDPAD\0"
39/// This distinguishes padded shards from old format shards.
40const PADDING_MAGIC: [u8; 8] = *b"VOIDPAD\0";
41
42/// Total size of padding footer: magic (8) + padding size (8)
43const PADDING_FOOTER_SIZE: usize = 16;
44
45/// Calculates the target padded size for a given data size.
46pub fn calculate_padded_size(size: usize, strategy: PaddingStrategy) -> usize {
47 match strategy {
48 PaddingStrategy::None => size,
49 PaddingStrategy::PowerOfTwo => {
50 // Account for padding footer (magic + size)
51 let total = size.saturating_add(PADDING_FOOTER_SIZE);
52 if total == 0 {
53 PADDING_FOOTER_SIZE
54 } else {
55 total.next_power_of_two()
56 }
57 }
58 PaddingStrategy::Buckets => {
59 let total = size.saturating_add(PADDING_FOOTER_SIZE);
60 DEFAULT_BUCKETS
61 .iter()
62 .find(|&&b| b >= total)
63 .copied()
64 .unwrap_or(total) // If larger than all buckets, no padding
65 }
66 PaddingStrategy::Fixed(max) => max.max(size.saturating_add(PADDING_FOOTER_SIZE)),
67 }
68}
69
70/// Reads the padding size from the end of shard data.
71///
72/// Returns `None` if this is an old format shard (no padding footer).
73/// Returns `Some(padding_size)` if a valid padding footer is found.
74/// Padded shards have a footer with: magic (8 bytes) + padding_size (8 bytes)
75pub fn read_padding_info(data: &[u8]) -> Option<usize> {
76 if data.len() < PADDING_FOOTER_SIZE {
77 return None;
78 }
79
80 // Check for magic bytes before the padding size
81 let magic_start = data.len() - PADDING_FOOTER_SIZE;
82 let magic_end = magic_start + 8;
83 if &data[magic_start..magic_end] != &PADDING_MAGIC {
84 return None; // No padding (old format)
85 }
86
87 let padding = u64::from_le_bytes(
88 data[data.len() - PADDING_SUFFIX_SIZE..]
89 .try_into()
90 .unwrap_or([0u8; 8]),
91 ) as usize;
92
93 // Sanity check: padding can't be larger than data minus the footer
94 if padding.saturating_add(PADDING_FOOTER_SIZE) > data.len() {
95 return None; // Invalid padding
96 }
97 Some(padding)
98}
99
100/// Builder for creating shards.
101///
102/// A shard is a zstd-compressed blob of concatenated file contents.
103/// File indexing is handled by the TreeManifest, not the shard itself.
104/// Shards are opaque blocks — like filesystem blocks.
105pub struct ShardWriter {
106 file_count: u32,
107 body: Vec<u8>,
108}
109
110impl ShardWriter {
111 /// Creates a new shard writer.
112 pub fn new() -> Self {
113 Self {
114 file_count: 0,
115 body: Vec::new(),
116 }
117 }
118
119 /// Adds a file to the shard.
120 ///
121 /// # Arguments
122 /// * `path` - File path (validated for safety)
123 /// * `content` - File content bytes
124 ///
125 /// # Errors
126 /// Returns `VoidError::Shard` if the path is invalid.
127 pub fn add_file(&mut self, path: &str, content: &[u8]) -> Result<()> {
128 if path.is_empty() {
129 return Err(VoidError::Shard("empty path".into()));
130 }
131
132 // Normalize path (forward slashes, no leading slash)
133 let normalized = path.replace('\\', "/").trim_start_matches('/').to_string();
134
135 // Reject path traversal attempts (defense-in-depth)
136 let path_check = std::path::Path::new(&normalized);
137 for component in path_check.components() {
138 match component {
139 std::path::Component::ParentDir => {
140 return Err(VoidError::Shard("path contains '..'".into()));
141 }
142 std::path::Component::RootDir | std::path::Component::Prefix(_) => {
143 return Err(VoidError::Shard("absolute path not allowed".into()));
144 }
145 _ => {}
146 }
147 }
148
149 self.file_count += 1;
150 self.body.extend_from_slice(content);
151
152 Ok(())
153 }
154
155 /// Finishes building and returns the zstd-compressed shard bytes.
156 ///
157 /// # Errors
158 /// Returns `VoidError::Compression` if compression fails.
159 pub fn finish(self, compression_level: i32) -> Result<Vec<u8>> {
160 zstd::encode_all(self.body.as_slice(), compression_level)
161 .map_err(|e| VoidError::Compression(e.to_string()))
162 }
163
164 /// Returns the number of files added.
165 pub fn file_count(&self) -> u32 {
166 self.file_count
167 }
168
169 /// Returns the uncompressed body size.
170 pub fn body_size(&self) -> usize {
171 self.body.len()
172 }
173
174 /// Returns true if this shard has no content.
175 pub fn is_empty(&self) -> bool {
176 self.file_count == 0
177 }
178
179 /// Finishes building and returns serialized shard bytes with optional size padding.
180 ///
181 /// Padding is applied to hide the true shard size from the server.
182 /// The padding size is stored as a little-endian u64 at the end of the shard.
183 ///
184 /// # Errors
185 /// Returns `VoidError::Compression` if compression fails.
186 pub fn finish_padded(self, strategy: PaddingStrategy, compression_level: i32) -> Result<Vec<u8>> {
187 let mut data = self.finish(compression_level)?;
188
189 if matches!(strategy, PaddingStrategy::None) {
190 return Ok(data);
191 }
192
193 let original_size = data.len();
194 let target_size = calculate_padded_size(original_size, strategy);
195 // Account for the full footer size (magic + padding size)
196 let padding_size = target_size.saturating_sub(original_size + PADDING_FOOTER_SIZE);
197
198 if padding_size > 0 {
199 // Generate random padding
200 let mut padding = vec![0u8; padding_size];
201 rand::thread_rng().fill_bytes(&mut padding);
202 data.extend(padding);
203 }
204
205 // Append magic bytes to identify this as a padded shard
206 data.extend(&PADDING_MAGIC);
207 // Append padding size as little-endian u64
208 data.extend(&(padding_size as u64).to_le_bytes());
209
210 Ok(data)
211 }
212}
213
214impl Default for ShardWriter {
215 fn default() -> Self {
216 Self::new()
217 }
218}