Skip to main content

shape_runtime/stdlib/
archive.rs

1//! Native `archive` module for creating and extracting zip/tar archives.
2//!
3//! Exports: archive.zip_create, archive.zip_extract, archive.tar_create, archive.tar_extract
4
5use crate::module_exports::{ModuleContext, ModuleExports, ModuleFunction, ModuleParam};
6use shape_value::ValueWord;
7use shape_value::heap_value::HeapValue;
8use std::sync::Arc;
9
10/// Extract a byte array (Array<int>) from a ValueWord into a Vec<u8>.
11fn bytes_from_array(val: &ValueWord) -> Result<Vec<u8>, String> {
12    let arr = val
13        .as_any_array()
14        .ok_or_else(|| "expected an Array<int> of bytes".to_string())?
15        .to_generic();
16    let mut bytes = Vec::with_capacity(arr.len());
17    for item in arr.iter() {
18        let byte_val = item
19            .as_i64()
20            .or_else(|| item.as_f64().map(|n| n as i64))
21            .ok_or_else(|| "array elements must be integers (0-255)".to_string())?;
22        if !(0..=255).contains(&byte_val) {
23            return Err(format!("byte value out of range: {}", byte_val));
24        }
25        bytes.push(byte_val as u8);
26    }
27    Ok(bytes)
28}
29
30/// Convert a Vec<u8> into a ValueWord Array<int>.
31fn bytes_to_array(bytes: &[u8]) -> ValueWord {
32    let items: Vec<ValueWord> = bytes
33        .iter()
34        .map(|&b| ValueWord::from_i64(b as i64))
35        .collect();
36    ValueWord::from_array(Arc::new(items))
37}
38
39/// Extract entries from an Array of {name: string, data: string} objects.
40/// Supports both TypedObject and HashMap representations.
41fn extract_entries(val: &ValueWord) -> Result<Vec<(String, String)>, String> {
42    let arr = val
43        .as_any_array()
44        .ok_or_else(|| "expected an Array of entry objects".to_string())?
45        .to_generic();
46
47    let mut entries = Vec::with_capacity(arr.len());
48    for (i, item) in arr.iter().enumerate() {
49        let (name, data) =
50            extract_entry_fields(item).map_err(|e| format!("entry [{}]: {}", i, e))?;
51        entries.push((name, data));
52    }
53    Ok(entries)
54}
55
56/// Extract `name` and `data` fields from a single entry (TypedObject or HashMap).
57fn extract_entry_fields(val: &ValueWord) -> Result<(String, String), String> {
58    // Try TypedObject first
59    if let Some(HeapValue::TypedObject {
60        slots, heap_mask, ..
61    }) = val.as_heap_ref()
62    {
63        // Convention: slot 0 = name, slot 1 = data (both heap/string)
64        if slots.len() >= 2 {
65            let name_nb = if heap_mask & 1 != 0 {
66                slots[0].as_heap_nb()
67            } else {
68                unsafe { ValueWord::clone_from_bits(slots[0].raw()) }
69            };
70            let data_nb = if heap_mask & 2 != 0 {
71                slots[1].as_heap_nb()
72            } else {
73                unsafe { ValueWord::clone_from_bits(slots[1].raw()) }
74            };
75            if let (Some(name), Some(data)) = (name_nb.as_str(), data_nb.as_str()) {
76                return Ok((name.to_string(), data.to_string()));
77            }
78        }
79    }
80
81    // Try HashMap
82    if let Some((keys, values, _)) = val.as_hashmap() {
83        let mut name = None;
84        let mut data = None;
85        for (k, v) in keys.iter().zip(values.iter()) {
86            if let Some(key_str) = k.as_str() {
87                match key_str {
88                    "name" => name = v.as_str().map(|s| s.to_string()),
89                    "data" => data = v.as_str().map(|s| s.to_string()),
90                    _ => {}
91                }
92            }
93        }
94        if let (Some(n), Some(d)) = (name, data) {
95            return Ok((n, d));
96        }
97    }
98
99    Err("entry must have 'name' (string) and 'data' (string) fields".to_string())
100}
101
102/// Build an entry object as a HashMap with `name` and `data` keys.
103fn make_entry(name: &str, data: &str) -> ValueWord {
104    let keys = vec![
105        ValueWord::from_string(Arc::new("name".to_string())),
106        ValueWord::from_string(Arc::new("data".to_string())),
107    ];
108    let values = vec![
109        ValueWord::from_string(Arc::new(name.to_string())),
110        ValueWord::from_string(Arc::new(data.to_string())),
111    ];
112    ValueWord::from_hashmap_pairs(keys, values)
113}
114
115/// Create the `archive` module with zip/tar creation and extraction functions.
116pub fn create_archive_module() -> ModuleExports {
117    let mut module = ModuleExports::new("archive");
118    module.description = "Archive creation and extraction (zip, tar)".to_string();
119
120    // archive.zip_create(entries: Array<{name: string, data: string}>) -> Array<int>
121    module.add_function_with_schema(
122        "zip_create",
123        |args: &[ValueWord], _ctx: &ModuleContext| {
124            use std::io::{Cursor, Write};
125
126            let entries_val = args
127                .first()
128                .ok_or_else(|| "archive.zip_create() requires an entries array".to_string())?;
129            let entries =
130                extract_entries(entries_val).map_err(|e| format!("archive.zip_create(): {}", e))?;
131
132            let buf = Cursor::new(Vec::new());
133            let mut zip_writer = zip::ZipWriter::new(buf);
134
135            let options = zip::write::SimpleFileOptions::default()
136                .compression_method(zip::CompressionMethod::Deflated);
137
138            for (name, data) in &entries {
139                zip_writer.start_file(name.as_str(), options).map_err(|e| {
140                    format!(
141                        "archive.zip_create() failed to start file '{}': {}",
142                        name, e
143                    )
144                })?;
145                zip_writer.write_all(data.as_bytes()).map_err(|e| {
146                    format!("archive.zip_create() failed to write '{}': {}", name, e)
147                })?;
148            }
149
150            let cursor = zip_writer
151                .finish()
152                .map_err(|e| format!("archive.zip_create() failed to finish: {}", e))?;
153
154            Ok(bytes_to_array(&cursor.into_inner()))
155        },
156        ModuleFunction {
157            description: "Create a zip archive in memory from an array of entries".to_string(),
158            params: vec![ModuleParam {
159                name: "entries".to_string(),
160                type_name: "Array<{name: string, data: string}>".to_string(),
161                required: true,
162                description: "Array of objects with 'name' and 'data' fields".to_string(),
163                ..Default::default()
164            }],
165            return_type: Some("Array<int>".to_string()),
166        },
167    );
168
169    // archive.zip_extract(data: Array<int>) -> Array<{name: string, data: string}>
170    module.add_function_with_schema(
171        "zip_extract",
172        |args: &[ValueWord], _ctx: &ModuleContext| {
173            use std::io::{Cursor, Read};
174
175            let input = args.first().ok_or_else(|| {
176                "archive.zip_extract() requires an Array<int> argument".to_string()
177            })?;
178            let bytes =
179                bytes_from_array(input).map_err(|e| format!("archive.zip_extract(): {}", e))?;
180
181            let cursor = Cursor::new(bytes);
182            let mut archive = zip::ZipArchive::new(cursor)
183                .map_err(|e| format!("archive.zip_extract() invalid zip: {}", e))?;
184
185            let mut entries = Vec::new();
186            for i in 0..archive.len() {
187                let mut file = archive.by_index(i).map_err(|e| {
188                    format!("archive.zip_extract() failed to read entry {}: {}", i, e)
189                })?;
190
191                if file.is_dir() {
192                    continue;
193                }
194
195                let name = file.name().to_string();
196                let mut contents = String::new();
197                file.read_to_string(&mut contents).map_err(|e| {
198                    format!("archive.zip_extract() failed to read '{}': {}", name, e)
199                })?;
200
201                entries.push(make_entry(&name, &contents));
202            }
203
204            Ok(ValueWord::from_array(Arc::new(entries)))
205        },
206        ModuleFunction {
207            description: "Extract a zip archive from a byte array into an array of entries"
208                .to_string(),
209            params: vec![ModuleParam {
210                name: "data".to_string(),
211                type_name: "Array<int>".to_string(),
212                required: true,
213                description: "Zip archive as byte array".to_string(),
214                ..Default::default()
215            }],
216            return_type: Some("Array<{name: string, data: string}>".to_string()),
217        },
218    );
219
220    // archive.tar_create(entries: Array<{name: string, data: string}>) -> Array<int>
221    module.add_function_with_schema(
222        "tar_create",
223        |args: &[ValueWord], _ctx: &ModuleContext| {
224            let entries_val = args
225                .first()
226                .ok_or_else(|| "archive.tar_create() requires an entries array".to_string())?;
227            let entries =
228                extract_entries(entries_val).map_err(|e| format!("archive.tar_create(): {}", e))?;
229
230            let mut builder = tar::Builder::new(Vec::new());
231
232            for (name, data) in &entries {
233                let data_bytes = data.as_bytes();
234                let mut header = tar::Header::new_gnu();
235                header.set_size(data_bytes.len() as u64);
236                header.set_mode(0o644);
237                header.set_cksum();
238
239                builder
240                    .append_data(&mut header, name.as_str(), data_bytes)
241                    .map_err(|e| format!("archive.tar_create() failed for '{}': {}", name, e))?;
242            }
243
244            let tar_bytes = builder
245                .into_inner()
246                .map_err(|e| format!("archive.tar_create() failed to finish: {}", e))?;
247
248            Ok(bytes_to_array(&tar_bytes))
249        },
250        ModuleFunction {
251            description: "Create a tar archive in memory from an array of entries".to_string(),
252            params: vec![ModuleParam {
253                name: "entries".to_string(),
254                type_name: "Array<{name: string, data: string}>".to_string(),
255                required: true,
256                description: "Array of objects with 'name' and 'data' fields".to_string(),
257                ..Default::default()
258            }],
259            return_type: Some("Array<int>".to_string()),
260        },
261    );
262
263    // archive.tar_extract(data: Array<int>) -> Array<{name: string, data: string}>
264    module.add_function_with_schema(
265        "tar_extract",
266        |args: &[ValueWord], _ctx: &ModuleContext| {
267            use std::io::{Cursor, Read};
268
269            let input = args.first().ok_or_else(|| {
270                "archive.tar_extract() requires an Array<int> argument".to_string()
271            })?;
272            let bytes =
273                bytes_from_array(input).map_err(|e| format!("archive.tar_extract(): {}", e))?;
274
275            let cursor = Cursor::new(bytes);
276            let mut archive = tar::Archive::new(cursor);
277
278            let mut entries = Vec::new();
279            for entry_result in archive
280                .entries()
281                .map_err(|e| format!("archive.tar_extract() invalid tar: {}", e))?
282            {
283                let mut entry = entry_result
284                    .map_err(|e| format!("archive.tar_extract() failed to read entry: {}", e))?;
285
286                // Skip directories
287                if entry.header().entry_type().is_dir() {
288                    continue;
289                }
290
291                let name = entry
292                    .path()
293                    .map_err(|e| format!("archive.tar_extract() invalid path: {}", e))?
294                    .to_string_lossy()
295                    .to_string();
296
297                let mut contents = String::new();
298                entry.read_to_string(&mut contents).map_err(|e| {
299                    format!("archive.tar_extract() failed to read '{}': {}", name, e)
300                })?;
301
302                entries.push(make_entry(&name, &contents));
303            }
304
305            Ok(ValueWord::from_array(Arc::new(entries)))
306        },
307        ModuleFunction {
308            description: "Extract a tar archive from a byte array into an array of entries"
309                .to_string(),
310            params: vec![ModuleParam {
311                name: "data".to_string(),
312                type_name: "Array<int>".to_string(),
313                required: true,
314                description: "Tar archive as byte array".to_string(),
315                ..Default::default()
316            }],
317            return_type: Some("Array<{name: string, data: string}>".to_string()),
318        },
319    );
320
321    module
322}
323
324#[cfg(test)]
325mod tests {
326    use super::*;
327
328    fn test_ctx() -> crate::module_exports::ModuleContext<'static> {
329        let registry = Box::leak(Box::new(crate::type_schema::TypeSchemaRegistry::new()));
330        crate::module_exports::ModuleContext {
331            schemas: registry,
332            invoke_callable: None,
333            raw_invoker: None,
334            function_hashes: None,
335            vm_state: None,
336            granted_permissions: None,
337            scope_constraints: None,
338            set_pending_resume: None,
339            set_pending_frame_resume: None,
340        }
341    }
342
343    fn make_test_entries() -> ValueWord {
344        let entries = vec![
345            make_entry("hello.txt", "Hello, World!"),
346            make_entry("data/numbers.txt", "1 2 3 4 5"),
347        ];
348        ValueWord::from_array(Arc::new(entries))
349    }
350
351    #[test]
352    fn test_archive_module_creation() {
353        let module = create_archive_module();
354        assert_eq!(module.name, "archive");
355        assert!(module.has_export("zip_create"));
356        assert!(module.has_export("zip_extract"));
357        assert!(module.has_export("tar_create"));
358        assert!(module.has_export("tar_extract"));
359    }
360
361    #[test]
362    fn test_zip_roundtrip() {
363        let module = create_archive_module();
364        let ctx = test_ctx();
365        let zip_create_fn = module.get_export("zip_create").unwrap();
366        let zip_extract_fn = module.get_export("zip_extract").unwrap();
367
368        let entries = make_test_entries();
369        let zip_bytes = zip_create_fn(&[entries], &ctx).unwrap();
370
371        // Should be a byte array
372        assert!(zip_bytes.as_any_array().is_some());
373
374        let extracted = zip_extract_fn(&[zip_bytes], &ctx).unwrap();
375        let arr = extracted.as_any_array().unwrap().to_generic();
376        assert_eq!(arr.len(), 2);
377
378        // Check first entry
379        let (name0, data0) = extract_entry_fields(&arr[0]).unwrap();
380        assert_eq!(name0, "hello.txt");
381        assert_eq!(data0, "Hello, World!");
382
383        // Check second entry
384        let (name1, data1) = extract_entry_fields(&arr[1]).unwrap();
385        assert_eq!(name1, "data/numbers.txt");
386        assert_eq!(data1, "1 2 3 4 5");
387    }
388
389    #[test]
390    fn test_tar_roundtrip() {
391        let module = create_archive_module();
392        let ctx = test_ctx();
393        let tar_create_fn = module.get_export("tar_create").unwrap();
394        let tar_extract_fn = module.get_export("tar_extract").unwrap();
395
396        let entries = make_test_entries();
397        let tar_bytes = tar_create_fn(&[entries], &ctx).unwrap();
398
399        assert!(tar_bytes.as_any_array().is_some());
400
401        let extracted = tar_extract_fn(&[tar_bytes], &ctx).unwrap();
402        let arr = extracted.as_any_array().unwrap().to_generic();
403        assert_eq!(arr.len(), 2);
404
405        let (name0, data0) = extract_entry_fields(&arr[0]).unwrap();
406        assert_eq!(name0, "hello.txt");
407        assert_eq!(data0, "Hello, World!");
408
409        let (name1, data1) = extract_entry_fields(&arr[1]).unwrap();
410        assert_eq!(name1, "data/numbers.txt");
411        assert_eq!(data1, "1 2 3 4 5");
412    }
413
414    #[test]
415    fn test_zip_create_empty() {
416        let module = create_archive_module();
417        let ctx = test_ctx();
418        let zip_create_fn = module.get_export("zip_create").unwrap();
419        let zip_extract_fn = module.get_export("zip_extract").unwrap();
420
421        let empty = ValueWord::from_array(Arc::new(Vec::new()));
422        let zip_bytes = zip_create_fn(&[empty], &ctx).unwrap();
423
424        let extracted = zip_extract_fn(&[zip_bytes], &ctx).unwrap();
425        let arr = extracted.as_any_array().unwrap().to_generic();
426        assert_eq!(arr.len(), 0);
427    }
428
429    #[test]
430    fn test_tar_create_empty() {
431        let module = create_archive_module();
432        let ctx = test_ctx();
433        let tar_create_fn = module.get_export("tar_create").unwrap();
434        let tar_extract_fn = module.get_export("tar_extract").unwrap();
435
436        let empty = ValueWord::from_array(Arc::new(Vec::new()));
437        let tar_bytes = tar_create_fn(&[empty], &ctx).unwrap();
438
439        let extracted = tar_extract_fn(&[tar_bytes], &ctx).unwrap();
440        let arr = extracted.as_any_array().unwrap().to_generic();
441        assert_eq!(arr.len(), 0);
442    }
443
444    #[test]
445    fn test_zip_extract_invalid_data() {
446        let module = create_archive_module();
447        let ctx = test_ctx();
448        let zip_extract_fn = module.get_export("zip_extract").unwrap();
449
450        let bad_data = ValueWord::from_array(Arc::new(vec![
451            ValueWord::from_i64(1),
452            ValueWord::from_i64(2),
453        ]));
454        let result = zip_extract_fn(&[bad_data], &ctx);
455        assert!(result.is_err());
456    }
457
458    #[test]
459    fn test_tar_extract_invalid_data() {
460        let module = create_archive_module();
461        let ctx = test_ctx();
462        let tar_extract_fn = module.get_export("tar_extract").unwrap();
463
464        let bad_data = ValueWord::from_array(Arc::new(vec![
465            ValueWord::from_i64(1),
466            ValueWord::from_i64(2),
467        ]));
468        let result = tar_extract_fn(&[bad_data], &ctx);
469        // tar with just 2 bytes will likely result in empty entries (not enough for header)
470        // or an error — either is acceptable
471        if let Ok(val) = result {
472            let arr = val.as_any_array().unwrap().to_generic();
473            assert_eq!(arr.len(), 0);
474        }
475    }
476
477    #[test]
478    fn test_zip_create_requires_array() {
479        let module = create_archive_module();
480        let ctx = test_ctx();
481        let zip_create_fn = module.get_export("zip_create").unwrap();
482
483        let result = zip_create_fn(&[ValueWord::from_i64(42)], &ctx);
484        assert!(result.is_err());
485    }
486
487    #[test]
488    fn test_schemas() {
489        let module = create_archive_module();
490
491        let zip_create_schema = module.get_schema("zip_create").unwrap();
492        assert_eq!(zip_create_schema.params.len(), 1);
493        assert!(zip_create_schema.params[0].required);
494        assert_eq!(zip_create_schema.return_type.as_deref(), Some("Array<int>"));
495
496        let zip_extract_schema = module.get_schema("zip_extract").unwrap();
497        assert_eq!(
498            zip_extract_schema.return_type.as_deref(),
499            Some("Array<{name: string, data: string}>")
500        );
501
502        let tar_create_schema = module.get_schema("tar_create").unwrap();
503        assert_eq!(tar_create_schema.params.len(), 1);
504
505        let tar_extract_schema = module.get_schema("tar_extract").unwrap();
506        assert_eq!(
507            tar_extract_schema.return_type.as_deref(),
508            Some("Array<{name: string, data: string}>")
509        );
510    }
511
512    #[test]
513    fn test_zip_unicode_content() {
514        let module = create_archive_module();
515        let ctx = test_ctx();
516        let zip_create_fn = module.get_export("zip_create").unwrap();
517        let zip_extract_fn = module.get_export("zip_extract").unwrap();
518
519        let entries = vec![make_entry("unicode.txt", "Hello \u{1F600} World \u{00E9}")];
520        let input = ValueWord::from_array(Arc::new(entries));
521        let zip_bytes = zip_create_fn(&[input], &ctx).unwrap();
522
523        let extracted = zip_extract_fn(&[zip_bytes], &ctx).unwrap();
524        let arr = extracted.as_any_array().unwrap().to_generic();
525        let (_, data) = extract_entry_fields(&arr[0]).unwrap();
526        assert_eq!(data, "Hello \u{1F600} World \u{00E9}");
527    }
528}