Skip to main content

shape_runtime/stdlib/
compress.rs

1//! Native `compress` module for data compression and decompression.
2//!
3//! Exports: compress.gzip, compress.gunzip, compress.zstd, compress.unzstd,
4//!          compress.deflate, compress.inflate
5
6use crate::module_exports::{ModuleContext, ModuleExports, ModuleFunction, ModuleParam};
7use shape_value::ValueWord;
8use std::sync::Arc;
9
10/// Extract a byte array (Array<int>) from a ValueWord into a Vec<u8>.
11fn bytes_from_array(val: &ValueWord) -> Result<Vec<u8>, String> {
12    let arr = val
13        .as_any_array()
14        .ok_or_else(|| "expected an Array<int> of bytes".to_string())?
15        .to_generic();
16    let mut bytes = Vec::with_capacity(arr.len());
17    for item in arr.iter() {
18        let byte_val = item
19            .as_i64()
20            .or_else(|| item.as_f64().map(|n| n as i64))
21            .ok_or_else(|| "array elements must be integers (0-255)".to_string())?;
22        if !(0..=255).contains(&byte_val) {
23            return Err(format!("byte value out of range: {}", byte_val));
24        }
25        bytes.push(byte_val as u8);
26    }
27    Ok(bytes)
28}
29
30/// Convert a Vec<u8> into a ValueWord Array<int>.
31fn bytes_to_array(bytes: &[u8]) -> ValueWord {
32    let items: Vec<ValueWord> = bytes
33        .iter()
34        .map(|&b| ValueWord::from_i64(b as i64))
35        .collect();
36    ValueWord::from_array(Arc::new(items))
37}
38
39/// Create the `compress` module with compression/decompression functions.
40pub fn create_compress_module() -> ModuleExports {
41    let mut module = ModuleExports::new("compress");
42    module.description = "Data compression and decompression (gzip, zstd, deflate)".to_string();
43
44    // compress.gzip(data: string) -> Array<int>
45    module.add_function_with_schema(
46        "gzip",
47        |args: &[ValueWord], _ctx: &ModuleContext| {
48            use flate2::Compression;
49            use flate2::write::GzEncoder;
50            use std::io::Write;
51
52            let data = args
53                .first()
54                .and_then(|a| a.as_str())
55                .ok_or_else(|| "compress.gzip() requires a string argument".to_string())?;
56
57            let mut encoder = GzEncoder::new(Vec::new(), Compression::default());
58            encoder
59                .write_all(data.as_bytes())
60                .map_err(|e| format!("compress.gzip() failed: {}", e))?;
61            let compressed = encoder
62                .finish()
63                .map_err(|e| format!("compress.gzip() failed: {}", e))?;
64
65            Ok(bytes_to_array(&compressed))
66        },
67        ModuleFunction {
68            description: "Compress a string using gzip, returning a byte array".to_string(),
69            params: vec![ModuleParam {
70                name: "data".to_string(),
71                type_name: "string".to_string(),
72                required: true,
73                description: "String data to compress".to_string(),
74                ..Default::default()
75            }],
76            return_type: Some("Array<int>".to_string()),
77        },
78    );
79
80    // compress.gunzip(data: Array<int>) -> string
81    module.add_function_with_schema(
82        "gunzip",
83        |args: &[ValueWord], _ctx: &ModuleContext| {
84            use flate2::read::GzDecoder;
85            use std::io::Read;
86
87            let input = args
88                .first()
89                .ok_or_else(|| "compress.gunzip() requires an Array<int> argument".to_string())?;
90            let bytes = bytes_from_array(input).map_err(|e| format!("compress.gunzip(): {}", e))?;
91
92            let mut decoder = GzDecoder::new(&bytes[..]);
93            let mut output = String::new();
94            decoder
95                .read_to_string(&mut output)
96                .map_err(|e| format!("compress.gunzip() failed: {}", e))?;
97
98            Ok(ValueWord::from_string(Arc::new(output)))
99        },
100        ModuleFunction {
101            description: "Decompress a gzip byte array back to a string".to_string(),
102            params: vec![ModuleParam {
103                name: "data".to_string(),
104                type_name: "Array<int>".to_string(),
105                required: true,
106                description: "Gzip-compressed byte array".to_string(),
107                ..Default::default()
108            }],
109            return_type: Some("string".to_string()),
110        },
111    );
112
113    // compress.zstd(data: string, level?: int) -> Array<int>
114    module.add_function_with_schema(
115        "zstd",
116        |args: &[ValueWord], _ctx: &ModuleContext| {
117            let data = args
118                .first()
119                .and_then(|a| a.as_str())
120                .ok_or_else(|| "compress.zstd() requires a string argument".to_string())?;
121
122            let level = args
123                .get(1)
124                .and_then(|a| a.as_i64().or_else(|| a.as_f64().map(|n| n as i64)))
125                .unwrap_or(3) as i32;
126
127            let compressed = zstd::encode_all(data.as_bytes(), level)
128                .map_err(|e| format!("compress.zstd() failed: {}", e))?;
129
130            Ok(bytes_to_array(&compressed))
131        },
132        ModuleFunction {
133            description: "Compress a string using Zstandard, returning a byte array".to_string(),
134            params: vec![
135                ModuleParam {
136                    name: "data".to_string(),
137                    type_name: "string".to_string(),
138                    required: true,
139                    description: "String data to compress".to_string(),
140                    ..Default::default()
141                },
142                ModuleParam {
143                    name: "level".to_string(),
144                    type_name: "int".to_string(),
145                    required: false,
146                    description: "Compression level (default: 3)".to_string(),
147                    default_snippet: Some("3".to_string()),
148                    ..Default::default()
149                },
150            ],
151            return_type: Some("Array<int>".to_string()),
152        },
153    );
154
155    // compress.unzstd(data: Array<int>) -> string
156    module.add_function_with_schema(
157        "unzstd",
158        |args: &[ValueWord], _ctx: &ModuleContext| {
159            let input = args
160                .first()
161                .ok_or_else(|| "compress.unzstd() requires an Array<int> argument".to_string())?;
162            let bytes = bytes_from_array(input).map_err(|e| format!("compress.unzstd(): {}", e))?;
163
164            let decompressed = zstd::decode_all(&bytes[..])
165                .map_err(|e| format!("compress.unzstd() failed: {}", e))?;
166
167            let output = String::from_utf8(decompressed)
168                .map_err(|e| format!("compress.unzstd() invalid UTF-8: {}", e))?;
169
170            Ok(ValueWord::from_string(Arc::new(output)))
171        },
172        ModuleFunction {
173            description: "Decompress a Zstandard byte array back to a string".to_string(),
174            params: vec![ModuleParam {
175                name: "data".to_string(),
176                type_name: "Array<int>".to_string(),
177                required: true,
178                description: "Zstd-compressed byte array".to_string(),
179                ..Default::default()
180            }],
181            return_type: Some("string".to_string()),
182        },
183    );
184
185    // compress.deflate(data: string) -> Array<int>
186    module.add_function_with_schema(
187        "deflate",
188        |args: &[ValueWord], _ctx: &ModuleContext| {
189            use flate2::Compression;
190            use flate2::write::DeflateEncoder;
191            use std::io::Write;
192
193            let data = args
194                .first()
195                .and_then(|a| a.as_str())
196                .ok_or_else(|| "compress.deflate() requires a string argument".to_string())?;
197
198            let mut encoder = DeflateEncoder::new(Vec::new(), Compression::default());
199            encoder
200                .write_all(data.as_bytes())
201                .map_err(|e| format!("compress.deflate() failed: {}", e))?;
202            let compressed = encoder
203                .finish()
204                .map_err(|e| format!("compress.deflate() failed: {}", e))?;
205
206            Ok(bytes_to_array(&compressed))
207        },
208        ModuleFunction {
209            description: "Compress a string using raw deflate, returning a byte array".to_string(),
210            params: vec![ModuleParam {
211                name: "data".to_string(),
212                type_name: "string".to_string(),
213                required: true,
214                description: "String data to compress".to_string(),
215                ..Default::default()
216            }],
217            return_type: Some("Array<int>".to_string()),
218        },
219    );
220
221    // compress.inflate(data: Array<int>) -> string
222    module.add_function_with_schema(
223        "inflate",
224        |args: &[ValueWord], _ctx: &ModuleContext| {
225            use flate2::read::DeflateDecoder;
226            use std::io::Read;
227
228            let input = args
229                .first()
230                .ok_or_else(|| "compress.inflate() requires an Array<int> argument".to_string())?;
231            let bytes =
232                bytes_from_array(input).map_err(|e| format!("compress.inflate(): {}", e))?;
233
234            let mut decoder = DeflateDecoder::new(&bytes[..]);
235            let mut output = String::new();
236            decoder
237                .read_to_string(&mut output)
238                .map_err(|e| format!("compress.inflate() failed: {}", e))?;
239
240            Ok(ValueWord::from_string(Arc::new(output)))
241        },
242        ModuleFunction {
243            description: "Decompress a raw deflate byte array back to a string".to_string(),
244            params: vec![ModuleParam {
245                name: "data".to_string(),
246                type_name: "Array<int>".to_string(),
247                required: true,
248                description: "Deflate-compressed byte array".to_string(),
249                ..Default::default()
250            }],
251            return_type: Some("string".to_string()),
252        },
253    );
254
255    module
256}
257
258#[cfg(test)]
259mod tests {
260    use super::*;
261
262    fn test_ctx() -> crate::module_exports::ModuleContext<'static> {
263        let registry = Box::leak(Box::new(crate::type_schema::TypeSchemaRegistry::new()));
264        crate::module_exports::ModuleContext {
265            schemas: registry,
266            invoke_callable: None,
267            raw_invoker: None,
268            function_hashes: None,
269            vm_state: None,
270            granted_permissions: None,
271            scope_constraints: None,
272            set_pending_resume: None,
273            set_pending_frame_resume: None,
274        }
275    }
276
277    #[test]
278    fn test_compress_module_creation() {
279        let module = create_compress_module();
280        assert_eq!(module.name, "compress");
281        assert!(module.has_export("gzip"));
282        assert!(module.has_export("gunzip"));
283        assert!(module.has_export("zstd"));
284        assert!(module.has_export("unzstd"));
285        assert!(module.has_export("deflate"));
286        assert!(module.has_export("inflate"));
287    }
288
289    #[test]
290    fn test_gzip_roundtrip() {
291        let module = create_compress_module();
292        let ctx = test_ctx();
293        let gzip_fn = module.get_export("gzip").unwrap();
294        let gunzip_fn = module.get_export("gunzip").unwrap();
295
296        let input = ValueWord::from_string(Arc::new("hello world".to_string()));
297        let compressed = gzip_fn(&[input], &ctx).unwrap();
298
299        // Compressed should be an array
300        assert!(compressed.as_any_array().is_some());
301
302        let decompressed = gunzip_fn(&[compressed], &ctx).unwrap();
303        assert_eq!(decompressed.as_str(), Some("hello world"));
304    }
305
306    #[test]
307    fn test_zstd_roundtrip() {
308        let module = create_compress_module();
309        let ctx = test_ctx();
310        let zstd_fn = module.get_export("zstd").unwrap();
311        let unzstd_fn = module.get_export("unzstd").unwrap();
312
313        let input = ValueWord::from_string(Arc::new("hello zstd compression".to_string()));
314        let compressed = zstd_fn(&[input], &ctx).unwrap();
315
316        assert!(compressed.as_any_array().is_some());
317
318        let decompressed = unzstd_fn(&[compressed], &ctx).unwrap();
319        assert_eq!(decompressed.as_str(), Some("hello zstd compression"));
320    }
321
322    #[test]
323    fn test_zstd_with_level() {
324        let module = create_compress_module();
325        let ctx = test_ctx();
326        let zstd_fn = module.get_export("zstd").unwrap();
327        let unzstd_fn = module.get_export("unzstd").unwrap();
328
329        let input = ValueWord::from_string(Arc::new("level test".to_string()));
330        let level = ValueWord::from_i64(1);
331        let compressed = zstd_fn(&[input, level], &ctx).unwrap();
332
333        let decompressed = unzstd_fn(&[compressed], &ctx).unwrap();
334        assert_eq!(decompressed.as_str(), Some("level test"));
335    }
336
337    #[test]
338    fn test_deflate_roundtrip() {
339        let module = create_compress_module();
340        let ctx = test_ctx();
341        let deflate_fn = module.get_export("deflate").unwrap();
342        let inflate_fn = module.get_export("inflate").unwrap();
343
344        let input = ValueWord::from_string(Arc::new("deflate test data".to_string()));
345        let compressed = deflate_fn(&[input], &ctx).unwrap();
346
347        assert!(compressed.as_any_array().is_some());
348
349        let decompressed = inflate_fn(&[compressed], &ctx).unwrap();
350        assert_eq!(decompressed.as_str(), Some("deflate test data"));
351    }
352
353    #[test]
354    fn test_gzip_requires_string() {
355        let module = create_compress_module();
356        let ctx = test_ctx();
357        let gzip_fn = module.get_export("gzip").unwrap();
358
359        let result = gzip_fn(&[ValueWord::from_i64(42)], &ctx);
360        assert!(result.is_err());
361    }
362
363    #[test]
364    fn test_gunzip_invalid_data() {
365        let module = create_compress_module();
366        let ctx = test_ctx();
367        let gunzip_fn = module.get_export("gunzip").unwrap();
368
369        let bad_data = ValueWord::from_array(Arc::new(vec![
370            ValueWord::from_i64(1),
371            ValueWord::from_i64(2),
372            ValueWord::from_i64(3),
373        ]));
374        let result = gunzip_fn(&[bad_data], &ctx);
375        assert!(result.is_err());
376    }
377
378    #[test]
379    fn test_empty_string_roundtrip() {
380        let module = create_compress_module();
381        let ctx = test_ctx();
382        let gzip_fn = module.get_export("gzip").unwrap();
383        let gunzip_fn = module.get_export("gunzip").unwrap();
384
385        let input = ValueWord::from_string(Arc::new(String::new()));
386        let compressed = gzip_fn(&[input], &ctx).unwrap();
387        let decompressed = gunzip_fn(&[compressed], &ctx).unwrap();
388        assert_eq!(decompressed.as_str(), Some(""));
389    }
390
391    #[test]
392    fn test_large_data_roundtrip() {
393        let module = create_compress_module();
394        let ctx = test_ctx();
395        let gzip_fn = module.get_export("gzip").unwrap();
396        let gunzip_fn = module.get_export("gunzip").unwrap();
397
398        let large = "a".repeat(10_000);
399        let input = ValueWord::from_string(Arc::new(large.clone()));
400        let compressed = gzip_fn(&[input], &ctx).unwrap();
401
402        // Compressed should be smaller than original
403        let arr = compressed.as_any_array().unwrap().to_generic();
404        assert!(arr.len() < 10_000);
405
406        let decompressed = gunzip_fn(&[compressed], &ctx).unwrap();
407        assert_eq!(decompressed.as_str(), Some(large.as_str()));
408    }
409
410    #[test]
411    fn test_schemas() {
412        let module = create_compress_module();
413
414        let gzip_schema = module.get_schema("gzip").unwrap();
415        assert_eq!(gzip_schema.params.len(), 1);
416        assert!(gzip_schema.params[0].required);
417        assert_eq!(gzip_schema.return_type.as_deref(), Some("Array<int>"));
418
419        let zstd_schema = module.get_schema("zstd").unwrap();
420        assert_eq!(zstd_schema.params.len(), 2);
421        assert!(zstd_schema.params[0].required);
422        assert!(!zstd_schema.params[1].required);
423
424        let gunzip_schema = module.get_schema("gunzip").unwrap();
425        assert_eq!(gunzip_schema.return_type.as_deref(), Some("string"));
426    }
427}