hyperstack_server/
compression.rs

1//! Application-level compression for WebSocket payloads.
2//!
3//! Since tokio-tungstenite doesn't support permessage-deflate, we implement
4//! application-level gzip compression for large payloads (like snapshots).
5//!
6//! Compressed payloads are sent as raw binary gzip data. Clients detect
7//! compression by checking for the gzip magic bytes (0x1f, 0x8b) at the
8//! start of binary WebSocket frames.
9//!
10//! This approach eliminates the ~33% overhead of base64 encoding that was
11//! previously used with JSON-wrapped compressed data.
12
13use bytes::Bytes;
14use flate2::{write::GzEncoder, Compression};
15use std::io::Write;
16
17/// Minimum payload size (in bytes) before compression is applied.
18/// Payloads smaller than this are sent uncompressed.
19const COMPRESSION_THRESHOLD: usize = 1024; // 1KB
20
21/// Result of attempting to compress a payload.
22#[derive(Debug)]
23pub enum CompressedPayload {
24    /// Payload was compressed - contains raw gzip bytes.
25    /// Should be sent as a binary WebSocket frame.
26    Compressed(Bytes),
27    /// Payload was not compressed - contains original JSON bytes.
28    /// Should be sent as a text WebSocket frame (or binary, both work).
29    Uncompressed(Bytes),
30}
31
32impl CompressedPayload {
33    /// Returns true if the payload is compressed.
34    pub fn is_compressed(&self) -> bool {
35        matches!(self, CompressedPayload::Compressed(_))
36    }
37
38    /// Consumes self and returns the inner bytes.
39    pub fn into_bytes(self) -> Bytes {
40        match self {
41            CompressedPayload::Compressed(b) => b,
42            CompressedPayload::Uncompressed(b) => b,
43        }
44    }
45
46    /// Returns a reference to the inner bytes.
47    pub fn as_bytes(&self) -> &Bytes {
48        match self {
49            CompressedPayload::Compressed(b) => b,
50            CompressedPayload::Uncompressed(b) => b,
51        }
52    }
53}
54
55/// Compress a payload if it exceeds the threshold.
56///
57/// Returns `CompressedPayload::Uncompressed` if:
58/// - Payload is below threshold
59/// - Compression fails
60/// - Compressed size is larger than original (unlikely for JSON)
61///
62/// Returns `CompressedPayload::Compressed` with raw gzip bytes if compression
63/// is beneficial. The gzip data starts with magic bytes 0x1f 0x8b which clients
64/// use to detect compression.
65pub fn maybe_compress(payload: &[u8]) -> CompressedPayload {
66    if payload.len() < COMPRESSION_THRESHOLD {
67        return CompressedPayload::Uncompressed(Bytes::copy_from_slice(payload));
68    }
69
70    match compress_gzip(payload) {
71        Ok(compressed) => {
72            // Only use compression if it actually reduces size
73            if compressed.len() < payload.len() {
74                CompressedPayload::Compressed(Bytes::from(compressed))
75            } else {
76                CompressedPayload::Uncompressed(Bytes::copy_from_slice(payload))
77            }
78        }
79        Err(_) => CompressedPayload::Uncompressed(Bytes::copy_from_slice(payload)),
80    }
81}
82
83fn compress_gzip(data: &[u8]) -> std::io::Result<Vec<u8>> {
84    let mut encoder = GzEncoder::new(Vec::new(), Compression::fast());
85    encoder.write_all(data)?;
86    encoder.finish()
87}
88
89/// Gzip magic bytes - used by clients to detect compressed frames.
90pub const GZIP_MAGIC: [u8; 2] = [0x1f, 0x8b];
91
92/// Check if bytes start with gzip magic bytes.
93pub fn is_gzip(data: &[u8]) -> bool {
94    data.len() >= 2 && data[0] == GZIP_MAGIC[0] && data[1] == GZIP_MAGIC[1]
95}
96
97#[cfg(test)]
98mod tests {
99    use super::*;
100    use serde_json::json;
101
102    #[test]
103    fn test_small_payload_not_compressed() {
104        let small = b"hello";
105        let result = maybe_compress(small);
106        assert!(!result.is_compressed());
107        assert_eq!(result.as_bytes().as_ref(), small);
108    }
109
110    #[test]
111    fn test_large_payload_compressed_as_raw_gzip() {
112        let entities: Vec<_> = (0..100)
113            .map(|i| {
114                json!({
115                    "key": format!("entity_{}", i),
116                    "data": {
117                        "id": i,
118                        "name": format!("Entity number {}", i),
119                        "description": "This is a test entity with some data that will be repeated",
120                        "values": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
121                        "nested": {
122                            "field1": "value1",
123                            "field2": "value2",
124                            "field3": "value3",
125                        }
126                    }
127                })
128            })
129            .collect();
130
131        let payload = serde_json::to_vec(&entities).unwrap();
132        let original_size = payload.len();
133        let result = maybe_compress(&payload);
134
135        assert!(result.is_compressed());
136
137        let bytes = result.as_bytes();
138        assert!(
139            is_gzip(bytes),
140            "Compressed data should start with gzip magic bytes"
141        );
142
143        assert!(
144            bytes.len() < original_size,
145            "Compressed {} should be < original {}",
146            bytes.len(),
147            original_size
148        );
149
150        println!(
151            "Original: {} bytes, Compressed: {} bytes, Ratio: {:.1}%",
152            original_size,
153            bytes.len(),
154            (bytes.len() as f64 / original_size as f64) * 100.0
155        );
156    }
157
158    #[test]
159    fn test_gzip_magic_detection() {
160        assert!(is_gzip(&[0x1f, 0x8b, 0x08]));
161        assert!(!is_gzip(&[0x7b, 0x22]));
162        assert!(!is_gzip(&[0x1f]));
163        assert!(!is_gzip(&[]));
164    }
165
166    #[test]
167    fn test_small_data_not_compressed() {
168        let data = b"small";
169        let result = maybe_compress(data);
170        assert!(!result.is_compressed());
171        assert_eq!(result.as_bytes().as_ref(), data);
172    }
173}