Skip to main content

phasm_core/stego/
payload.rs

1// Copyright (c) 2026 Christoph Gaffga
2// SPDX-License-Identifier: GPL-3.0-only
3// https://github.com/cgaffga/phasmcore
4
5//! Payload serialization, compression, and file embedding.
6//!
7//! The payload format wraps the plaintext before encryption:
8//!
9//! ```text
10//! [1 byte ] flags
11//! [M bytes] inner payload (raw or Brotli-compressed depending on flags)
12//! ```
13//!
14//! The inner payload (after decompression) uses a `0x00` separator:
15//!
16//! ```text
17//! [text bytes]     UTF-8 message (may be empty)
18//! [0x00]           separator (only present if files follow)
19//! [file entry]*    zero or more file entries
20//! ```
21//!
22//! File entry format:
23//!
24//! ```text
25//! [1 byte ] filename_len (1–255)
26//! [N bytes] filename (UTF-8)
27//! [4 bytes] content_len (u32 BE)
28//! [M bytes] file content
29//! ```
30
31use crate::stego::error::StegoError;
32use std::io::{Read, Write};
33
34/// Compression algorithm flags (bits 0-1 of flags byte).
35const COMPRESS_NONE: u8 = 0b00;
36const COMPRESS_BROTLI: u8 = 0b01;
37const COMPRESS_MASK: u8 = 0b11;
38
39/// Brotli compression quality (0-11). 11 = max compression. Our payloads are
40/// small (<64 KB) so even max quality compresses in milliseconds.
41const BROTLI_QUALITY: u32 = 11;
42
43/// Brotli LG_WINDOW_SIZE. 22 is the default (4 MB window).
44/// For small payloads this is fine — Brotli auto-adjusts.
45const BROTLI_LG_WINDOW_SIZE: u32 = 22;
46
47/// Maximum raw file size before compression (hard reject).
48pub const MAX_RAW_FILE_SIZE: usize = 2 * 1024 * 1024; // 2 MB
49
50/// A file entry embedded in the payload.
51#[derive(Debug, Clone, PartialEq, Eq)]
52pub struct FileEntry {
53    pub filename: String,
54    pub content: Vec<u8>,
55}
56
57/// Decoded payload containing text and optional files.
58#[derive(Debug, Clone, PartialEq, Eq)]
59pub struct PayloadData {
60    pub text: String,
61    pub files: Vec<FileEntry>,
62}
63
64/// Encode a payload (text + optional files) into bytes ready for encryption.
65///
66/// Returns `[flags byte][maybe_compressed_inner]`. The caller encrypts this
67/// as the "plaintext" in the frame format.
68///
69/// Tries Brotli compression and uses it only if it produces a smaller result.
70///
71/// Returns an error if any file exceeds `MAX_RAW_FILE_SIZE` or has a filename
72/// longer than 255 bytes.
73pub fn encode_payload(text: &str, files: &[FileEntry]) -> Result<Vec<u8>, StegoError> {
74    for file in files {
75        if file.content.len() > MAX_RAW_FILE_SIZE {
76            return Err(StegoError::MessageTooLarge);
77        }
78        if file.filename.len() > 255 {
79            return Err(StegoError::MessageTooLarge);
80        }
81    }
82    let inner = serialize_inner(text, files);
83    Ok(try_compress(&inner))
84}
85
86/// Compute the compressed payload size (in bytes) for the given text and files.
87///
88/// This is the exact size that `encode_payload` would produce, without actually
89/// encrypting or embedding. Useful for showing a live "used bytes" count in the
90/// UI that reflects Brotli compression savings.
91///
92/// Falls back to `text.as_bytes().len() + 1` (raw text + flags byte) if
93/// `encode_payload` fails for any reason (e.g. file too large).
94pub fn compressed_payload_size(text: &str, files: &[FileEntry]) -> usize {
95    encode_payload(text, files).map_or_else(|_| text.len() + 1, |v| v.len())
96}
97
98/// Decode a payload from decrypted bytes.
99///
100/// Input is `[flags byte][maybe_compressed_inner]` as produced by `encode_payload`.
101pub fn decode_payload(data: &[u8]) -> Result<PayloadData, StegoError> {
102    if data.is_empty() {
103        return Err(StegoError::FrameCorrupted);
104    }
105
106    let flags = data[0];
107    let compressed_data = &data[1..];
108
109    let inner = match flags & COMPRESS_MASK {
110        COMPRESS_NONE => compressed_data.to_vec(),
111        COMPRESS_BROTLI => decompress_brotli(compressed_data)?,
112        _ => return Err(StegoError::FrameCorrupted),
113    };
114
115    parse_inner(&inner)
116}
117
118/// Serialize the inner payload: `[text][0x00][file entries...]`
119fn serialize_inner(text: &str, files: &[FileEntry]) -> Vec<u8> {
120    let mut buf = Vec::new();
121    buf.extend_from_slice(text.as_bytes());
122
123    if !files.is_empty() {
124        buf.push(0x00); // separator
125        for file in files {
126            let name_bytes = file.filename.as_bytes();
127            let name_len = name_bytes.len().min(255) as u8;
128            buf.push(name_len);
129            buf.extend_from_slice(&name_bytes[..name_len as usize]);
130            buf.extend_from_slice(&(file.content.len() as u32).to_be_bytes());
131            buf.extend_from_slice(&file.content);
132        }
133    }
134
135    buf
136}
137
138/// Parse the inner payload after decompression.
139fn parse_inner(data: &[u8]) -> Result<PayloadData, StegoError> {
140    // Find first 0x00 byte — everything before is text, after is file entries.
141    let separator_pos = data.iter().position(|&b| b == 0x00);
142
143    match separator_pos {
144        None => {
145            // No separator — entire payload is text (text-only, no files).
146            let text = std::str::from_utf8(data)
147                .map_err(|_| StegoError::InvalidUtf8)?
148                .to_string();
149            Ok(PayloadData { text, files: vec![] })
150        }
151        Some(pos) => {
152            // Text before separator.
153            let text = std::str::from_utf8(&data[..pos])
154                .map_err(|_| StegoError::InvalidUtf8)?
155                .to_string();
156
157            // Parse file entries after separator.
158            let mut files = Vec::new();
159            let mut cursor = pos + 1;
160
161            while cursor < data.len() {
162                // filename_len (1 byte)
163                let name_len = data[cursor] as usize;
164                cursor += 1;
165                if name_len == 0 || cursor + name_len > data.len() {
166                    return Err(StegoError::FrameCorrupted);
167                }
168
169                // filename (UTF-8)
170                let filename = std::str::from_utf8(&data[cursor..cursor + name_len])
171                    .map_err(|_| StegoError::InvalidUtf8)?
172                    .to_string();
173                cursor += name_len;
174
175                // content_len (u32 BE)
176                if cursor + 4 > data.len() {
177                    return Err(StegoError::FrameCorrupted);
178                }
179                let content_len = u32::from_be_bytes([
180                    data[cursor],
181                    data[cursor + 1],
182                    data[cursor + 2],
183                    data[cursor + 3],
184                ]) as usize;
185                cursor += 4;
186
187                // file content
188                if cursor + content_len > data.len() {
189                    return Err(StegoError::FrameCorrupted);
190                }
191                let content = data[cursor..cursor + content_len].to_vec();
192                cursor += content_len;
193
194                files.push(FileEntry { filename, content });
195            }
196
197            Ok(PayloadData { text, files })
198        }
199    }
200}
201
202/// Try Brotli compression; return `[flags][data]` using whichever is smaller.
203///
204/// Skips Brotli entirely for tiny payloads (< 32 bytes) where compression
205/// overhead exceeds any possible savings.
206fn try_compress(inner: &[u8]) -> Vec<u8> {
207    // Skip Brotli for tiny payloads — the framing overhead exceeds any savings.
208    if inner.len() < 32 {
209        let mut result = Vec::with_capacity(1 + inner.len());
210        result.push(COMPRESS_NONE);
211        result.extend_from_slice(inner);
212        return result;
213    }
214
215    let compressed = compress_brotli(inner);
216
217    // Use compressed only if it's strictly smaller.
218    // Both paths include the 1-byte flags prefix.
219    if compressed.len() < inner.len() {
220        let mut result = Vec::with_capacity(1 + compressed.len());
221        result.push(COMPRESS_BROTLI);
222        result.extend_from_slice(&compressed);
223        result
224    } else {
225        let mut result = Vec::with_capacity(1 + inner.len());
226        result.push(COMPRESS_NONE);
227        result.extend_from_slice(inner);
228        result
229    }
230}
231
232/// Compress data with Brotli.
233fn compress_brotli(data: &[u8]) -> Vec<u8> {
234    let mut output = Vec::new();
235    {
236        let mut compressor = brotli::CompressorWriter::new(
237            &mut output,
238            4096, // buffer size
239            BROTLI_QUALITY,
240            BROTLI_LG_WINDOW_SIZE,
241        );
242        compressor.write_all(data).expect("Brotli compression should not fail");
243        // CompressorWriter flushes on drop
244    }
245    output
246}
247
248/// Decompress Brotli data.
249fn decompress_brotli(data: &[u8]) -> Result<Vec<u8>, StegoError> {
250    let mut output = Vec::new();
251    let decompressor = brotli::Decompressor::new(data, 4096);
252    // Limit decompressed size to prevent decompression bombs
253    let limit = 128 * 1024; // 128 KB generous limit
254    decompressor.take(limit as u64).read_to_end(&mut output)
255        .map_err(|_| StegoError::FrameCorrupted)?;
256    Ok(output)
257}
258
259#[cfg(test)]
260mod tests {
261    use super::*;
262
263    #[test]
264    fn text_only_roundtrip() {
265        let encoded = encode_payload("Hello, world!", &[]).unwrap();
266        let decoded = decode_payload(&encoded).unwrap();
267        assert_eq!(decoded.text, "Hello, world!");
268        assert!(decoded.files.is_empty());
269    }
270
271    #[test]
272    fn empty_text_roundtrip() {
273        let encoded = encode_payload("", &[]).unwrap();
274        let decoded = decode_payload(&encoded).unwrap();
275        assert_eq!(decoded.text, "");
276        assert!(decoded.files.is_empty());
277    }
278
279    #[test]
280    fn text_with_one_file() {
281        let files = vec![FileEntry {
282            filename: "test.txt".to_string(),
283            content: b"file content here".to_vec(),
284        }];
285        let encoded = encode_payload("hello", &files).unwrap();
286        let decoded = decode_payload(&encoded).unwrap();
287        assert_eq!(decoded.text, "hello");
288        assert_eq!(decoded.files.len(), 1);
289        assert_eq!(decoded.files[0].filename, "test.txt");
290        assert_eq!(decoded.files[0].content, b"file content here");
291    }
292
293    #[test]
294    fn text_with_multiple_files() {
295        let files = vec![
296            FileEntry {
297                filename: "a.bin".to_string(),
298                content: vec![0xDE, 0xAD, 0xBE, 0xEF],
299            },
300            FileEntry {
301                filename: "readme.md".to_string(),
302                content: b"# Hello\nWorld".to_vec(),
303            },
304        ];
305        let encoded = encode_payload("msg", &files).unwrap();
306        let decoded = decode_payload(&encoded).unwrap();
307        assert_eq!(decoded.text, "msg");
308        assert_eq!(decoded.files.len(), 2);
309        assert_eq!(decoded.files[0].filename, "a.bin");
310        assert_eq!(decoded.files[0].content, vec![0xDE, 0xAD, 0xBE, 0xEF]);
311        assert_eq!(decoded.files[1].filename, "readme.md");
312        assert_eq!(decoded.files[1].content, b"# Hello\nWorld");
313    }
314
315    #[test]
316    fn empty_text_with_files() {
317        let files = vec![FileEntry {
318            filename: "data.bin".to_string(),
319            content: vec![1, 2, 3],
320        }];
321        let encoded = encode_payload("", &files).unwrap();
322        let decoded = decode_payload(&encoded).unwrap();
323        assert_eq!(decoded.text, "");
324        assert_eq!(decoded.files.len(), 1);
325    }
326
327    #[test]
328    fn short_message_not_compressed() {
329        let encoded = encode_payload("hi", &[]).unwrap();
330        // Flags byte should be COMPRESS_NONE for short messages.
331        assert_eq!(encoded[0] & COMPRESS_MASK, COMPRESS_NONE);
332    }
333
334    #[test]
335    fn long_repetitive_text_compressed() {
336        let long_text = "abcdefghij".repeat(100); // 1000 bytes of repetitive text
337        let encoded = encode_payload(&long_text, &[]).unwrap();
338        // Should be compressed (Brotli).
339        assert_eq!(encoded[0] & COMPRESS_MASK, COMPRESS_BROTLI);
340        // Compressed should be smaller than raw.
341        assert!(encoded.len() < long_text.len());
342        // Roundtrip.
343        let decoded = decode_payload(&encoded).unwrap();
344        assert_eq!(decoded.text, long_text);
345    }
346
347    #[test]
348    fn large_compressible_file() {
349        let files = vec![FileEntry {
350            filename: "big.txt".to_string(),
351            content: b"Hello World! ".repeat(1000),
352        }];
353        let encoded = encode_payload("", &files).unwrap();
354        assert_eq!(encoded[0] & COMPRESS_MASK, COMPRESS_BROTLI);
355        let decoded = decode_payload(&encoded).unwrap();
356        assert_eq!(decoded.files[0].content.len(), 13000);
357    }
358
359    #[test]
360    fn incompressible_data_stays_raw() {
361        // Random-looking data shouldn't compress.
362        let mut data = Vec::new();
363        for i in 0u16..200 {
364            data.push((i.wrapping_mul(7919) % 256) as u8);
365        }
366        let files = vec![FileEntry {
367            filename: "rand.bin".to_string(),
368            content: data.clone(),
369        }];
370        let encoded = encode_payload("", &files).unwrap();
371        // May or may not compress depending on Brotli's framing overhead.
372        // Either way, roundtrip must work.
373        let decoded = decode_payload(&encoded).unwrap();
374        assert_eq!(decoded.files[0].content, data);
375    }
376
377    #[test]
378    fn unicode_text_and_filename() {
379        let files = vec![FileEntry {
380            filename: "daten-übersicht.pdf".to_string(),
381            content: vec![0xFF],
382        }];
383        let encoded = encode_payload("Ünïcödé 🎉", &files).unwrap();
384        let decoded = decode_payload(&encoded).unwrap();
385        assert_eq!(decoded.text, "Ünïcödé 🎉");
386        assert_eq!(decoded.files[0].filename, "daten-übersicht.pdf");
387    }
388
389    #[test]
390    fn empty_payload_error() {
391        assert!(decode_payload(&[]).is_err());
392    }
393
394    #[test]
395    fn truncated_file_entry_error() {
396        // flags=0, text="A", separator=0x00, then truncated file entry
397        let data = vec![COMPRESS_NONE, b'A', 0x00, 5]; // filename_len=5 but no data
398        assert!(decode_payload(&data).is_err());
399    }
400
401    #[test]
402    fn zero_length_filename_error() {
403        // filename_len=0 is invalid
404        let data = vec![COMPRESS_NONE, 0x00, 0]; // separator then filename_len=0
405        assert!(decode_payload(&data).is_err());
406    }
407}