Skip to main content

heyo_sdk/
archive.rs

1//! Build a `tar.gz` archive from a local directory and upload it as a sandbox
2//! archive. Mirrors `sdk-ts/src/archive.ts`.
3//!
4//! ```no_run
5//! use heyo_sdk::{archive_dir, ArchiveDirOptions, HeyoClientOptions};
6//! # async fn run() -> Result<(), heyo_sdk::HeyoError> {
7//! let archive = archive_dir("./my-app", ArchiveDirOptions::default(), HeyoClientOptions::default()).await?;
8//! println!("archive id: {}", archive.id);
9//! # Ok(()) }
10//! ```
11//!
12//! Minimal v0: excludes a default deny-list of build/dep directories. Does
13//! **not** parse `.gitignore` — that's a follow-up.
14
15use std::collections::BTreeSet;
16use std::io::Write;
17use std::path::Path;
18use std::time::{SystemTime, UNIX_EPOCH};
19
20use flate2::write::GzEncoder;
21use flate2::Compression;
22use reqwest::Method;
23use serde::{Deserialize, Serialize};
24
25use crate::client::{HeyoClient, HeyoClientOptions, RequestOptions};
26use crate::errors::HeyoError;
27
28const DEFAULT_MOUNT_PATH: &str = "/workspace";
29
30const EXCLUDED_DIRS: &[&str] = &[
31    "node_modules",
32    ".git",
33    "target",
34    "__pycache__",
35    ".cache",
36    ".npm",
37    ".cargo",
38    "dist",
39    ".next",
40    ".nuxt",
41    "build",
42    "vendor",
43    ".venv",
44    "venv",
45    ".tox",
46];
47
48#[derive(Debug, Clone, Default)]
49pub struct ArchiveDirOptions {
50    /// Display name for the archive.
51    pub name: Option<String>,
52    /// Mount-path prefix for files inside the archive. Default: `/workspace`.
53    pub mount_path: Option<String>,
54    /// If true, skip the default exclude list (`.git`, `node_modules`, …).
55    pub no_ignore: bool,
56    /// Extra directory names to exclude (matched on the segment, not the path).
57    pub extra_excludes: Vec<String>,
58}
59
60#[derive(Debug, Clone)]
61pub struct ArchiveResult {
62    pub id: String,
63    pub size_bytes: u64,
64    pub created_at: String,
65}
66
67#[derive(Deserialize)]
68struct PresignResponse {
69    archive_id: String,
70    upload_url: String,
71}
72
73#[derive(Serialize)]
74struct FinalizeRequest<'a> {
75    sandbox_id: &'a str,
76    #[serde(skip_serializing_if = "Option::is_none")]
77    name: Option<&'a str>,
78}
79
80#[derive(Deserialize)]
81struct FinalizeResponse {
82    id: String,
83    #[serde(default)]
84    size_bytes: u64,
85    #[serde(default)]
86    created_at: String,
87}
88
89/// Tar+gzip `local_path` and upload it as a sandbox archive.
90pub async fn archive_dir(
91    local_path: impl AsRef<Path>,
92    options: ArchiveDirOptions,
93    client_options: HeyoClientOptions,
94) -> Result<ArchiveResult, HeyoError> {
95    let dir = local_path
96        .as_ref()
97        .canonicalize()
98        .map_err(|e| HeyoError::invalid(format!("invalid path '{}': {}", local_path.as_ref().display(), e)))?;
99    if !dir.is_dir() {
100        return Err(HeyoError::invalid(format!(
101            "'{}' is not a directory",
102            dir.display()
103        )));
104    }
105    let prefix = options
106        .mount_path
107        .as_deref()
108        .unwrap_or(DEFAULT_MOUNT_PATH)
109        .trim_start_matches('/');
110    let entries = collect_files(&dir, prefix, options.no_ignore, &options.extra_excludes)?;
111    if entries.is_empty() {
112        return Err(HeyoError::invalid("No files found in directory to archive"));
113    }
114
115    let tar_gz = build_tar_gz(&entries)?;
116
117    let client = HeyoClient::new(client_options)?;
118    let presign: PresignResponse = client
119        .request(
120            Method::POST,
121            "/sandbox-archives/presign",
122            None::<&()>,
123            RequestOptions::default(),
124        )
125        .await?;
126
127    upload_to_presigned(&presign.upload_url, tar_gz, &client).await?;
128
129    let basename = dir
130        .file_name()
131        .and_then(|s| s.to_str())
132        .unwrap_or("archive");
133    let body = FinalizeRequest {
134        sandbox_id: basename,
135        name: options.name.as_deref(),
136    };
137    let finalized: FinalizeResponse = client
138        .request(
139            Method::POST,
140            &format!("/sandbox-archives/{}/finalize", presign.archive_id),
141            Some(&body),
142            RequestOptions::default(),
143        )
144        .await?;
145
146    Ok(ArchiveResult {
147        id: finalized.id,
148        size_bytes: finalized.size_bytes,
149        created_at: finalized.created_at,
150    })
151}
152
153struct TarFileEntry {
154    path_in_archive: String,
155    content: Vec<u8>,
156}
157
158fn collect_files(
159    root: &Path,
160    prefix: &str,
161    no_ignore: bool,
162    extra_excludes: &[String],
163) -> Result<Vec<TarFileEntry>, HeyoError> {
164    let excluded: BTreeSet<&str> = if no_ignore {
165        BTreeSet::new()
166    } else {
167        let mut s: BTreeSet<&str> = EXCLUDED_DIRS.iter().copied().collect();
168        for e in extra_excludes {
169            s.insert(e.as_str());
170        }
171        s
172    };
173    let mut out = Vec::new();
174    walk(root, root, prefix, &excluded, &mut out)?;
175    Ok(out)
176}
177
178fn walk(
179    root: &Path,
180    cur: &Path,
181    prefix: &str,
182    excluded: &BTreeSet<&str>,
183    out: &mut Vec<TarFileEntry>,
184) -> Result<(), HeyoError> {
185    let entries = std::fs::read_dir(cur)
186        .map_err(|e| HeyoError::invalid(format!("read_dir {}: {}", cur.display(), e)))?;
187    for entry in entries {
188        let entry = entry
189            .map_err(|e| HeyoError::invalid(format!("read_dir entry in {}: {}", cur.display(), e)))?;
190        let file_type = entry
191            .file_type()
192            .map_err(|e| HeyoError::invalid(format!("file_type for {}: {}", entry.path().display(), e)))?;
193        let name = entry.file_name();
194        let name_str = match name.to_str() {
195            Some(s) => s,
196            None => continue,
197        };
198        if file_type.is_dir() {
199            if excluded.contains(name_str) {
200                continue;
201            }
202            walk(root, &entry.path(), prefix, excluded, out)?;
203        } else if file_type.is_file() {
204            let abs = entry.path();
205            let rel = abs
206                .strip_prefix(root)
207                .map_err(|e| HeyoError::invalid(format!("strip_prefix {}: {}", abs.display(), e)))?;
208            let rel_str = rel
209                .to_str()
210                .ok_or_else(|| HeyoError::invalid(format!("non-utf8 path: {}", rel.display())))?
211                .replace('\\', "/");
212            let content = std::fs::read(&abs)
213                .map_err(|e| HeyoError::invalid(format!("read {}: {}", abs.display(), e)))?;
214            let path_in_archive = if prefix.is_empty() {
215                rel_str
216            } else {
217                format!("{}/{}", prefix, rel_str)
218            };
219            out.push(TarFileEntry {
220                path_in_archive,
221                content,
222            });
223        }
224        // Symlinks intentionally skipped, matching the TS implementation.
225    }
226    Ok(())
227}
228
229fn build_tar_gz(entries: &[TarFileEntry]) -> Result<Vec<u8>, HeyoError> {
230    let mtime = SystemTime::now()
231        .duration_since(UNIX_EPOCH)
232        .map(|d| d.as_secs())
233        .unwrap_or(0);
234    let mut blocks: Vec<Vec<u8>> = Vec::new();
235
236    // Directory entries (sorted, deduped) so the archive plays nicely with
237    // tools that need explicit dirs.
238    let mut dirs: BTreeSet<String> = BTreeSet::new();
239    for entry in entries {
240        let mut acc = String::new();
241        let mut parts: Vec<&str> = entry.path_in_archive.split('/').collect();
242        parts.pop(); // drop filename
243        for part in parts {
244            if !acc.is_empty() {
245                acc.push('/');
246            }
247            acc.push_str(part);
248            dirs.insert(acc.clone());
249        }
250    }
251    for dir in dirs {
252        let path = format!("{}/", dir);
253        append_entry(&mut blocks, &path, &[], 0o755, mtime, b'5')?;
254    }
255    for entry in entries {
256        append_entry(
257            &mut blocks,
258            &entry.path_in_archive,
259            &entry.content,
260            0o644,
261            mtime,
262            b'0',
263        )?;
264    }
265    // Two trailing 512-byte zero blocks.
266    blocks.push(vec![0; 1024]);
267
268    let tar: Vec<u8> = blocks.into_iter().flatten().collect();
269    let mut encoder = GzEncoder::new(Vec::new(), Compression::default());
270    encoder
271        .write_all(&tar)
272        .map_err(|e| HeyoError::api(0, format!("gzip write: {}", e)))?;
273    encoder
274        .finish()
275        .map_err(|e| HeyoError::api(0, format!("gzip finish: {}", e)))
276}
277
278fn append_entry(
279    out: &mut Vec<Vec<u8>>,
280    name: &str,
281    content: &[u8],
282    mode: u32,
283    mtime: u64,
284    typeflag: u8,
285) -> Result<(), HeyoError> {
286    let name_bytes = name.as_bytes();
287    if name_bytes.len() > 100 {
288        // GNU LongLink extension.
289        let mut long_name = name_bytes.to_vec();
290        long_name.push(0);
291        let header = build_header(
292            b"././@LongLink",
293            0,
294            0,
295            0,
296            long_name.len() as u64,
297            0,
298            b'L',
299            *b"ustar ",
300            *b" \0",
301        );
302        out.push(header.to_vec());
303        let padded_len = pad_to_512(long_name.len());
304        let mut padded = long_name;
305        padded.resize(padded_len, 0);
306        out.push(padded);
307    }
308    let truncated = if name_bytes.len() > 100 {
309        &name_bytes[..100]
310    } else {
311        name_bytes
312    };
313    let header = build_header(
314        truncated,
315        mode,
316        1000,
317        1000,
318        content.len() as u64,
319        mtime,
320        typeflag,
321        *b"ustar\0",
322        *b"00",
323    );
324    out.push(header.to_vec());
325    if !content.is_empty() {
326        let padded_len = pad_to_512(content.len());
327        let mut padded = content.to_vec();
328        padded.resize(padded_len, 0);
329        out.push(padded);
330    }
331    Ok(())
332}
333
334fn pad_to_512(len: usize) -> usize {
335    if len % 512 == 0 {
336        len
337    } else {
338        len + (512 - (len % 512))
339    }
340}
341
342#[allow(clippy::too_many_arguments)]
343fn build_header(
344    name: &[u8],
345    mode: u32,
346    uid: u32,
347    gid: u32,
348    size: u64,
349    mtime: u64,
350    typeflag: u8,
351    magic: [u8; 6],
352    version: [u8; 2],
353) -> [u8; 512] {
354    let mut buf = [0u8; 512];
355    let nlen = name.len().min(100);
356    buf[..nlen].copy_from_slice(&name[..nlen]);
357    write_octal(&mut buf, 100, 8, mode as u64, false);
358    write_octal(&mut buf, 108, 8, uid as u64, false);
359    write_octal(&mut buf, 116, 8, gid as u64, false);
360    write_octal(&mut buf, 124, 12, size, true);
361    write_octal(&mut buf, 136, 12, mtime, true);
362    for b in &mut buf[148..156] {
363        *b = b' ';
364    }
365    buf[156] = typeflag;
366    buf[257..263].copy_from_slice(&magic);
367    buf[263..265].copy_from_slice(&version);
368
369    let mut sum: u32 = 0;
370    for b in buf.iter() {
371        sum += *b as u32;
372    }
373    let sum_str = format!("{:06o}", sum);
374    let sum_bytes = sum_str.as_bytes();
375    let take = sum_bytes.len().min(6);
376    buf[148..148 + take].copy_from_slice(&sum_bytes[..take]);
377    buf[154] = 0;
378    buf[155] = b' ';
379    buf
380}
381
382fn write_octal(buf: &mut [u8], offset: usize, width: usize, value: u64, trailing_space: bool) {
383    let s = format!("{:0>width$o}", value, width = width - 1);
384    let bytes = s.as_bytes();
385    for i in 0..(width - 1) {
386        buf[offset + i] = bytes.get(i).copied().unwrap_or(b'0');
387    }
388    buf[offset + width - 1] = if trailing_space { b' ' } else { 0 };
389}
390
391async fn upload_to_presigned(url: &str, body: Vec<u8>, client: &HeyoClient) -> Result<(), HeyoError> {
392    // The presigned URL belongs to the object store, not our cloud — don't
393    // attach the bearer header. Build a one-shot request directly via reqwest.
394    let http = reqwest::Client::new();
395    let len = body.len();
396    let resp = http
397        .put(url)
398        .header("Content-Type", "application/gzip")
399        .header("Content-Length", len.to_string())
400        .body(body)
401        .send()
402        .await
403        .map_err(|e| HeyoError::api(0, format!("archive PUT to presigned URL: {}", e)))?;
404    let _ = client; // keep parameter for symmetry with TS sig.
405    if !resp.status().is_success() {
406        let status = resp.status().as_u16();
407        let body = resp.text().await.unwrap_or_default();
408        return Err(HeyoError::api(status, format!("archive upload failed: {}", body)));
409    }
410    Ok(())
411}