Skip to main content

gtp/
attachment.rs

1//! GTP attachment support: chunking, manifest, and integrity verification
2//! (gtp_rfc §6).
3//!
4//! Flow:
5//! 1. Sender splits data into chunks via [`AttachmentSender::new`] and sends
6//!    the manifest message followed by individual [`AttachmentChunk`] messages.
7//! 2. Receiver feeds chunks to [`AttachmentAssembler`]; when all chunks arrive
8//!    it verifies the SHA-256 hash and returns the complete payload.
9
10use gbp::CodecError;
11use serde::{Deserialize, Serialize};
12use serde_bytes::ByteBuf;
13use sha2::{Digest, Sha256};
14use std::collections::HashMap;
15
16/// Default chunk size: 64 KiB.
17pub const DEFAULT_CHUNK_SIZE: usize = 64 * 1024;
18
19/// Manifest sent as the body of a `ContentType::AttachmentRef` message.
20/// Describes the full attachment so the receiver can pre-allocate and verify.
21#[derive(Clone, Debug, Serialize, Deserialize)]
22pub struct AttachmentManifest {
23    /// Unique attachment identifier (sender-scoped).
24    #[serde(rename = "aid")]
25    pub attachment_id: u64,
26    /// Original filename (UTF-8, no path components).
27    #[serde(rename = "name")]
28    pub filename: String,
29    /// MIME type string (e.g. `"image/png"`).
30    #[serde(rename = "mime")]
31    pub mime_type: String,
32    /// Total byte length of the reassembled payload.
33    #[serde(rename = "size")]
34    pub total_size: u64,
35    /// Number of chunks.
36    #[serde(rename = "nc")]
37    pub chunk_count: u32,
38    /// SHA-256 hash of the complete payload (32 bytes).
39    #[serde(rename = "hash")]
40    pub sha256: ByteBuf,
41}
42
43impl AttachmentManifest {
44    /// CBOR-encodes the manifest.
45    pub fn to_cbor(&self) -> Vec<u8> {
46        let mut buf = Vec::new();
47        ciborium::into_writer(self, &mut buf).expect("cbor encode");
48        buf
49    }
50
51    /// Decodes a CBOR manifest.
52    pub fn from_cbor(data: &[u8]) -> Result<Self, CodecError> {
53        ciborium::from_reader(data).map_err(|e| CodecError::Decode(e.to_string()))
54    }
55}
56
57/// One chunk of an attachment payload.
58#[derive(Clone, Debug, Serialize, Deserialize)]
59pub struct AttachmentChunk {
60    /// Attachment this chunk belongs to.
61    #[serde(rename = "aid")]
62    pub attachment_id: u64,
63    /// Zero-based chunk index.
64    #[serde(rename = "idx")]
65    pub chunk_index: u32,
66    /// Total number of chunks (redundant but useful for validation).
67    #[serde(rename = "nc")]
68    pub chunk_count: u32,
69    /// Chunk payload bytes.
70    #[serde(rename = "data")]
71    pub data: ByteBuf,
72}
73
74impl AttachmentChunk {
75    /// CBOR-encodes the chunk.
76    pub fn to_cbor(&self) -> Vec<u8> {
77        let mut buf = Vec::new();
78        ciborium::into_writer(self, &mut buf).expect("cbor encode");
79        buf
80    }
81
82    /// Decodes a CBOR chunk.
83    pub fn from_cbor(data: &[u8]) -> Result<Self, CodecError> {
84        ciborium::from_reader(data).map_err(|e| CodecError::Decode(e.to_string()))
85    }
86}
87
88/// Errors from attachment assembly.
89#[derive(Debug, thiserror::Error)]
90pub enum AttachmentError {
91    /// CBOR decode failed.
92    #[error("decode: {0}")]
93    Decode(#[from] CodecError),
94    /// `chunk_index` is out of range for the declared `chunk_count`.
95    #[error("chunk index {idx} out of range (count={count})")]
96    ChunkOutOfRange {
97        /// The out-of-range index.
98        idx: u32,
99        /// Declared total chunk count.
100        count: u32,
101    },
102    /// The reassembled payload's SHA-256 does not match the manifest.
103    #[error("integrity check failed: hash mismatch")]
104    HashMismatch,
105    /// Not all chunks have arrived yet.
106    #[error("incomplete: {received}/{total} chunks received")]
107    Incomplete {
108        /// Number of chunks received so far.
109        received: u32,
110        /// Total expected chunks.
111        total: u32,
112    },
113}
114
115/// Splits a payload into chunks and produces the manifest.
116pub struct AttachmentSender {
117    /// The manifest to broadcast first.
118    pub manifest: AttachmentManifest,
119    /// Ready-to-send CBOR-encoded chunks.
120    pub chunks: Vec<Vec<u8>>,
121}
122
123impl AttachmentSender {
124    /// Splits `data` into chunks of `chunk_size` bytes (last chunk may be
125    /// smaller), computes the SHA-256 manifest, and encodes everything.
126    pub fn new(
127        attachment_id: u64,
128        filename: impl Into<String>,
129        mime_type: impl Into<String>,
130        data: &[u8],
131        chunk_size: usize,
132    ) -> Self {
133        let hash = Sha256::digest(data);
134        let chunk_size = chunk_size.max(1);
135        let raw_chunks: Vec<&[u8]> = data.chunks(chunk_size).collect();
136        let chunk_count = raw_chunks.len() as u32;
137
138        let manifest = AttachmentManifest {
139            attachment_id,
140            filename: filename.into(),
141            mime_type: mime_type.into(),
142            total_size: data.len() as u64,
143            chunk_count,
144            sha256: ByteBuf::from(hash.as_slice().to_vec()),
145        };
146
147        let chunks = raw_chunks
148            .into_iter()
149            .enumerate()
150            .map(|(i, slice)| {
151                AttachmentChunk {
152                    attachment_id,
153                    chunk_index: i as u32,
154                    chunk_count,
155                    data: ByteBuf::from(slice.to_vec()),
156                }
157                .to_cbor()
158            })
159            .collect();
160
161        Self { manifest, chunks }
162    }
163}
164
165/// Reassembles incoming chunks and verifies integrity when complete.
166pub struct AttachmentAssembler {
167    manifest: AttachmentManifest,
168    received: HashMap<u32, Vec<u8>>,
169}
170
171impl AttachmentAssembler {
172    /// Creates an assembler for the given manifest.
173    pub fn new(manifest: AttachmentManifest) -> Self {
174        Self {
175            manifest,
176            received: HashMap::new(),
177        }
178    }
179
180    /// Returns the manifest.
181    pub fn manifest(&self) -> &AttachmentManifest {
182        &self.manifest
183    }
184
185    /// How many chunks have been received so far.
186    pub fn received_count(&self) -> u32 {
187        self.received.len() as u32
188    }
189
190    /// Returns `true` when all chunks have arrived.
191    pub fn is_complete(&self) -> bool {
192        self.received.len() as u32 == self.manifest.chunk_count
193    }
194
195    /// Feeds a decoded chunk. Duplicate indices are silently ignored.
196    pub fn push(&mut self, chunk: AttachmentChunk) -> Result<(), AttachmentError> {
197        if chunk.chunk_index >= self.manifest.chunk_count {
198            return Err(AttachmentError::ChunkOutOfRange {
199                idx: chunk.chunk_index,
200                count: self.manifest.chunk_count,
201            });
202        }
203        self.received
204            .entry(chunk.chunk_index)
205            .or_insert_with(|| chunk.data.into_vec());
206        Ok(())
207    }
208
209    /// Assembles the payload once all chunks have arrived and verifies the
210    /// SHA-256 hash against the manifest. Returns the complete byte vector.
211    pub fn assemble(self) -> Result<Vec<u8>, AttachmentError> {
212        let total = self.manifest.chunk_count;
213        let received = self.received.len() as u32;
214        if received < total {
215            return Err(AttachmentError::Incomplete { received, total });
216        }
217        let mut payload = Vec::with_capacity(self.manifest.total_size as usize);
218        for i in 0..total {
219            payload.extend_from_slice(self.received.get(&i).unwrap());
220        }
221        let hash = Sha256::digest(&payload);
222        if hash.as_slice() != self.manifest.sha256.as_ref() {
223            return Err(AttachmentError::HashMismatch);
224        }
225        Ok(payload)
226    }
227}
228
229#[cfg(test)]
230mod tests {
231    use super::*;
232
233    fn sample_data(n: usize) -> Vec<u8> {
234        (0..n).map(|i| (i % 251) as u8).collect()
235    }
236
237    #[test]
238    fn round_trip_small_payload() {
239        let data = sample_data(100);
240        let sender = AttachmentSender::new(
241            1,
242            "file.bin",
243            "application/octet-stream",
244            &data,
245            DEFAULT_CHUNK_SIZE,
246        );
247        assert_eq!(sender.manifest.chunk_count, 1);
248        let mut asm = AttachmentAssembler::new(sender.manifest);
249        for cbor in &sender.chunks {
250            let chunk = AttachmentChunk::from_cbor(cbor).unwrap();
251            asm.push(chunk).unwrap();
252        }
253        let result = asm.assemble().unwrap();
254        assert_eq!(result, data);
255    }
256
257    #[test]
258    fn round_trip_multi_chunk() {
259        let data = sample_data(300);
260        let sender = AttachmentSender::new(2, "multi.bin", "application/octet-stream", &data, 100);
261        assert_eq!(sender.manifest.chunk_count, 3);
262        let mut asm = AttachmentAssembler::new(sender.manifest);
263        for cbor in &sender.chunks {
264            let chunk = AttachmentChunk::from_cbor(cbor).unwrap();
265            asm.push(chunk).unwrap();
266        }
267        assert!(asm.is_complete());
268        let result = asm.assemble().unwrap();
269        assert_eq!(result, data);
270    }
271
272    #[test]
273    fn out_of_order_chunks_reassemble_correctly() {
274        let data = sample_data(250);
275        let sender = AttachmentSender::new(3, "ooo.bin", "application/octet-stream", &data, 100);
276        let mut asm = AttachmentAssembler::new(sender.manifest);
277        // Feed chunks in reverse order.
278        for cbor in sender.chunks.iter().rev() {
279            let chunk = AttachmentChunk::from_cbor(cbor).unwrap();
280            asm.push(chunk).unwrap();
281        }
282        let result = asm.assemble().unwrap();
283        assert_eq!(result, data);
284    }
285
286    #[test]
287    fn duplicate_chunk_ignored() {
288        let data = sample_data(100);
289        let sender = AttachmentSender::new(
290            4,
291            "dup.bin",
292            "application/octet-stream",
293            &data,
294            DEFAULT_CHUNK_SIZE,
295        );
296        let mut asm = AttachmentAssembler::new(sender.manifest);
297        let chunk = AttachmentChunk::from_cbor(&sender.chunks[0]).unwrap();
298        asm.push(chunk.clone()).unwrap();
299        asm.push(chunk).unwrap(); // duplicate — should not error
300        let result = asm.assemble().unwrap();
301        assert_eq!(result, data);
302    }
303
304    #[test]
305    fn hash_mismatch_detected() {
306        let data = sample_data(100);
307        let sender = AttachmentSender::new(
308            5,
309            "bad.bin",
310            "application/octet-stream",
311            &data,
312            DEFAULT_CHUNK_SIZE,
313        );
314        let mut manifest = sender.manifest;
315        // Corrupt the hash.
316        manifest.sha256[0] ^= 0xFF;
317        let mut asm = AttachmentAssembler::new(manifest);
318        let chunk = AttachmentChunk::from_cbor(&sender.chunks[0]).unwrap();
319        asm.push(chunk).unwrap();
320        assert!(matches!(asm.assemble(), Err(AttachmentError::HashMismatch)));
321    }
322
323    #[test]
324    fn incomplete_returns_error() {
325        let data = sample_data(300);
326        let sender = AttachmentSender::new(6, "inc.bin", "application/octet-stream", &data, 100);
327        let mut asm = AttachmentAssembler::new(sender.manifest);
328        // Feed only the first chunk.
329        let chunk = AttachmentChunk::from_cbor(&sender.chunks[0]).unwrap();
330        asm.push(chunk).unwrap();
331        assert!(matches!(
332            asm.assemble(),
333            Err(AttachmentError::Incomplete { .. })
334        ));
335    }
336
337    #[test]
338    fn chunk_out_of_range_rejected() {
339        let data = sample_data(100);
340        let sender = AttachmentSender::new(
341            7,
342            "oor.bin",
343            "application/octet-stream",
344            &data,
345            DEFAULT_CHUNK_SIZE,
346        );
347        let mut asm = AttachmentAssembler::new(sender.manifest);
348        let bad_chunk = AttachmentChunk {
349            attachment_id: 7,
350            chunk_index: 99,
351            chunk_count: 1,
352            data: ByteBuf::new(),
353        };
354        assert!(matches!(
355            asm.push(bad_chunk),
356            Err(AttachmentError::ChunkOutOfRange { .. })
357        ));
358    }
359
360    #[test]
361    fn manifest_cbor_round_trip() {
362        let data = sample_data(50);
363        let sender = AttachmentSender::new(8, "rt.bin", "text/plain", &data, DEFAULT_CHUNK_SIZE);
364        let encoded = sender.manifest.to_cbor();
365        let decoded = AttachmentManifest::from_cbor(&encoded).unwrap();
366        assert_eq!(decoded.attachment_id, 8);
367        assert_eq!(decoded.filename, "rt.bin");
368        assert_eq!(decoded.chunk_count, 1);
369    }
370}