Skip to main content

void_core/shard/
writer.rs

1//! Shard writing and serialization
2
3use rand::RngCore;
4
5use crate::{Result, VoidError};
6
7/// Padding strategy for shard sizes.
8///
9/// Padding shards to fixed size buckets hides true size from the server,
10/// improving privacy by preventing size-based inference attacks.
11#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
12pub enum PaddingStrategy {
13    /// No padding (original behavior)
14    None,
15    /// Pad to power-of-2 boundaries (1KB, 2KB, 4KB, 8KB, ...)
16    #[default]
17    PowerOfTwo,
18    /// Pad to fixed bucket sizes (1KB, 4KB, 16KB, 64KB, 256KB, 1MB, 4MB)
19    Buckets,
20    /// Pad all shards to a fixed maximum size
21    Fixed(usize),
22}
23
24/// Default bucket sizes for Buckets padding strategy.
25pub const DEFAULT_BUCKETS: &[usize] = &[
26    1024,    // 1 KB
27    4096,    // 4 KB
28    16384,   // 16 KB
29    65536,   // 64 KB
30    262144,  // 256 KB
31    1048576, // 1 MB
32    4194304, // 4 MB
33];
34
35/// Size of the padding length suffix (u64 little-endian).
36const PADDING_SUFFIX_SIZE: usize = 8;
37
38/// Magic bytes to identify padded shards: "VOIDPAD\0"
39/// This distinguishes padded shards from old format shards.
40const PADDING_MAGIC: [u8; 8] = *b"VOIDPAD\0";
41
42/// Total size of padding footer: magic (8) + padding size (8)
43const PADDING_FOOTER_SIZE: usize = 16;
44
45/// Calculates the target padded size for a given data size.
46pub fn calculate_padded_size(size: usize, strategy: PaddingStrategy) -> usize {
47    match strategy {
48        PaddingStrategy::None => size,
49        PaddingStrategy::PowerOfTwo => {
50            // Account for padding footer (magic + size)
51            let total = size.saturating_add(PADDING_FOOTER_SIZE);
52            if total == 0 {
53                PADDING_FOOTER_SIZE
54            } else {
55                total.next_power_of_two()
56            }
57        }
58        PaddingStrategy::Buckets => {
59            let total = size.saturating_add(PADDING_FOOTER_SIZE);
60            DEFAULT_BUCKETS
61                .iter()
62                .find(|&&b| b >= total)
63                .copied()
64                .unwrap_or(total) // If larger than all buckets, no padding
65        }
66        PaddingStrategy::Fixed(max) => max.max(size.saturating_add(PADDING_FOOTER_SIZE)),
67    }
68}
69
70/// Reads the padding size from the end of shard data.
71///
72/// Returns `None` if this is an old format shard (no padding footer).
73/// Returns `Some(padding_size)` if a valid padding footer is found.
74/// Padded shards have a footer with: magic (8 bytes) + padding_size (8 bytes)
75pub fn read_padding_info(data: &[u8]) -> Option<usize> {
76    if data.len() < PADDING_FOOTER_SIZE {
77        return None;
78    }
79
80    // Check for magic bytes before the padding size
81    let magic_start = data.len() - PADDING_FOOTER_SIZE;
82    let magic_end = magic_start + 8;
83    if &data[magic_start..magic_end] != &PADDING_MAGIC {
84        return None; // No padding (old format)
85    }
86
87    let padding = u64::from_le_bytes(
88        data[data.len() - PADDING_SUFFIX_SIZE..]
89            .try_into()
90            .unwrap_or([0u8; 8]),
91    ) as usize;
92
93    // Sanity check: padding can't be larger than data minus the footer
94    if padding.saturating_add(PADDING_FOOTER_SIZE) > data.len() {
95        return None; // Invalid padding
96    }
97    Some(padding)
98}
99
100/// Builder for creating shards.
101///
102/// A shard is a zstd-compressed blob of concatenated file contents.
103/// File indexing is handled by the TreeManifest, not the shard itself.
104/// Shards are opaque blocks — like filesystem blocks.
105pub struct ShardWriter {
106    file_count: u32,
107    body: Vec<u8>,
108}
109
110impl ShardWriter {
111    /// Creates a new shard writer.
112    pub fn new() -> Self {
113        Self {
114            file_count: 0,
115            body: Vec::new(),
116        }
117    }
118
119    /// Adds a file to the shard.
120    ///
121    /// # Arguments
122    /// * `path` - File path (validated for safety)
123    /// * `content` - File content bytes
124    ///
125    /// # Errors
126    /// Returns `VoidError::Shard` if the path is invalid.
127    pub fn add_file(&mut self, path: &str, content: &[u8]) -> Result<()> {
128        if path.is_empty() {
129            return Err(VoidError::Shard("empty path".into()));
130        }
131
132        // Normalize path (forward slashes, no leading slash)
133        let normalized = path.replace('\\', "/").trim_start_matches('/').to_string();
134
135        // Reject path traversal attempts (defense-in-depth)
136        let path_check = std::path::Path::new(&normalized);
137        for component in path_check.components() {
138            match component {
139                std::path::Component::ParentDir => {
140                    return Err(VoidError::Shard("path contains '..'".into()));
141                }
142                std::path::Component::RootDir | std::path::Component::Prefix(_) => {
143                    return Err(VoidError::Shard("absolute path not allowed".into()));
144                }
145                _ => {}
146            }
147        }
148
149        self.file_count += 1;
150        self.body.extend_from_slice(content);
151
152        Ok(())
153    }
154
155    /// Finishes building and returns the zstd-compressed shard bytes.
156    ///
157    /// # Errors
158    /// Returns `VoidError::Compression` if compression fails.
159    pub fn finish(self, compression_level: i32) -> Result<Vec<u8>> {
160        zstd::encode_all(self.body.as_slice(), compression_level)
161            .map_err(|e| VoidError::Compression(e.to_string()))
162    }
163
164    /// Returns the number of files added.
165    pub fn file_count(&self) -> u32 {
166        self.file_count
167    }
168
169    /// Returns the uncompressed body size.
170    pub fn body_size(&self) -> usize {
171        self.body.len()
172    }
173
174    /// Returns true if this shard has no content.
175    pub fn is_empty(&self) -> bool {
176        self.file_count == 0
177    }
178
179    /// Finishes building and returns serialized shard bytes with optional size padding.
180    ///
181    /// Padding is applied to hide the true shard size from the server.
182    /// The padding size is stored as a little-endian u64 at the end of the shard.
183    ///
184    /// # Errors
185    /// Returns `VoidError::Compression` if compression fails.
186    pub fn finish_padded(self, strategy: PaddingStrategy, compression_level: i32) -> Result<Vec<u8>> {
187        let mut data = self.finish(compression_level)?;
188
189        if matches!(strategy, PaddingStrategy::None) {
190            return Ok(data);
191        }
192
193        let original_size = data.len();
194        let target_size = calculate_padded_size(original_size, strategy);
195        // Account for the full footer size (magic + padding size)
196        let padding_size = target_size.saturating_sub(original_size + PADDING_FOOTER_SIZE);
197
198        if padding_size > 0 {
199            // Generate random padding
200            let mut padding = vec![0u8; padding_size];
201            rand::thread_rng().fill_bytes(&mut padding);
202            data.extend(padding);
203        }
204
205        // Append magic bytes to identify this as a padded shard
206        data.extend(&PADDING_MAGIC);
207        // Append padding size as little-endian u64
208        data.extend(&(padding_size as u64).to_le_bytes());
209
210        Ok(data)
211    }
212}
213
214impl Default for ShardWriter {
215    fn default() -> Self {
216        Self::new()
217    }
218}