hyperstack_server/
compression.rs

1//! Application-level compression for WebSocket payloads.
2//!
3//! Since tokio-tungstenite doesn't support permessage-deflate, we implement
4//! application-level gzip compression for large payloads (like snapshots).
5//!
6//! The compressed payload is sent as a JSON wrapper:
7//! ```json
8//! { "compressed": "gzip", "data": "<base64-encoded-gzip-data>" }
9//! ```
10//!
11//! Clients detect the `compressed` field and decompress accordingly.
12
13use base64::{engine::general_purpose::STANDARD as BASE64, Engine};
14use bytes::Bytes;
15use flate2::{write::GzEncoder, Compression};
16use serde::Serialize;
17use std::io::Write;
18
19/// Minimum payload size (in bytes) before compression is applied.
20/// Payloads smaller than this are sent uncompressed.
21const COMPRESSION_THRESHOLD: usize = 1024; // 1KB
22
23/// Wrapper for compressed payloads sent over WebSocket.
24#[derive(Serialize)]
25struct CompressedFrame {
26    compressed: &'static str,
27    data: String,
28}
29
30/// Compress a payload if it exceeds the threshold.
31///
32/// Returns the original bytes if:
33/// - Payload is below threshold
34/// - Compression fails
35/// - Compressed size is larger than original (unlikely for JSON)
36///
37/// Returns a compressed wrapper JSON if compression is beneficial.
38pub fn maybe_compress(payload: &[u8]) -> Bytes {
39    if payload.len() < COMPRESSION_THRESHOLD {
40        return Bytes::copy_from_slice(payload);
41    }
42
43    match compress_gzip(payload) {
44        Ok(compressed) => {
45            // Only use compression if it actually reduces size
46            // Account for base64 overhead (~33%) and JSON wrapper (~30 bytes)
47            let estimated_compressed_size = (compressed.len() * 4 / 3) + 40;
48            if estimated_compressed_size < payload.len() {
49                let frame = CompressedFrame {
50                    compressed: "gzip",
51                    data: BASE64.encode(&compressed),
52                };
53                match serde_json::to_vec(&frame) {
54                    Ok(json) => Bytes::from(json),
55                    Err(_) => Bytes::copy_from_slice(payload),
56                }
57            } else {
58                Bytes::copy_from_slice(payload)
59            }
60        }
61        Err(_) => Bytes::copy_from_slice(payload),
62    }
63}
64
65/// Compress data using gzip.
66fn compress_gzip(data: &[u8]) -> std::io::Result<Vec<u8>> {
67    let mut encoder = GzEncoder::new(Vec::new(), Compression::fast());
68    encoder.write_all(data)?;
69    encoder.finish()
70}
71
72#[cfg(test)]
73mod tests {
74    use super::*;
75    use serde_json::json;
76
77    #[test]
78    fn test_small_payload_not_compressed() {
79        let small = b"hello";
80        let result = maybe_compress(small);
81        assert_eq!(result.as_ref(), small);
82    }
83
84    #[test]
85    fn test_large_payload_compressed() {
86        // Create a large JSON payload similar to snapshots
87        let entities: Vec<_> = (0..100)
88            .map(|i| {
89                json!({
90                    "key": format!("entity_{}", i),
91                    "data": {
92                        "id": i,
93                        "name": format!("Entity number {}", i),
94                        "description": "This is a test entity with some data that will be repeated",
95                        "values": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
96                        "nested": {
97                            "field1": "value1",
98                            "field2": "value2",
99                            "field3": "value3",
100                        }
101                    }
102                })
103            })
104            .collect();
105
106        let payload = serde_json::to_vec(&entities).unwrap();
107        let original_size = payload.len();
108        let result = maybe_compress(&payload);
109
110        // Should be compressed
111        let result_str = std::str::from_utf8(&result).unwrap();
112        let parsed: serde_json::Value = serde_json::from_str(result_str).unwrap();
113
114        assert_eq!(parsed["compressed"], "gzip");
115        assert!(parsed["data"].is_string());
116
117        // Compressed should be smaller
118        assert!(
119            result.len() < original_size,
120            "Compressed {} should be < original {}",
121            result.len(),
122            original_size
123        );
124
125        println!(
126            "Original: {} bytes, Compressed: {} bytes, Ratio: {:.1}%",
127            original_size,
128            result.len(),
129            (result.len() as f64 / original_size as f64) * 100.0
130        );
131    }
132
133    #[test]
134    fn test_incompressible_data_not_wrapped() {
135        // Random-ish data that doesn't compress well
136        // But still make it look like valid JSON so we can test the size comparison
137        let data: Vec<u8> = (0..2000).map(|i| (i % 256) as u8).collect();
138
139        // This won't be valid JSON, so it will just return as-is due to size check
140        let result = maybe_compress(&data);
141
142        // For truly incompressible data, we should get the original back
143        // (either because compression failed or didn't help)
144        assert!(!result.is_empty());
145    }
146}