1use std::collections::BTreeSet;
16use std::io::Write;
17use std::path::Path;
18use std::time::{SystemTime, UNIX_EPOCH};
19
20use flate2::write::GzEncoder;
21use flate2::Compression;
22use reqwest::Method;
23use serde::{Deserialize, Serialize};
24
25use crate::client::{HeyoClient, HeyoClientOptions, RequestOptions};
26use crate::errors::HeyoError;
27
28const DEFAULT_MOUNT_PATH: &str = "/workspace";
29
30const EXCLUDED_DIRS: &[&str] = &[
31 "node_modules",
32 ".git",
33 "target",
34 "__pycache__",
35 ".cache",
36 ".npm",
37 ".cargo",
38 "dist",
39 ".next",
40 ".nuxt",
41 "build",
42 "vendor",
43 ".venv",
44 "venv",
45 ".tox",
46];
47
48#[derive(Debug, Clone, Default)]
49pub struct ArchiveDirOptions {
50 pub name: Option<String>,
52 pub mount_path: Option<String>,
54 pub no_ignore: bool,
56 pub extra_excludes: Vec<String>,
58}
59
60#[derive(Debug, Clone)]
61pub struct ArchiveResult {
62 pub id: String,
63 pub size_bytes: u64,
64 pub created_at: String,
65}
66
67#[derive(Deserialize)]
68struct PresignResponse {
69 archive_id: String,
70 upload_url: String,
71}
72
73#[derive(Serialize)]
74struct FinalizeRequest<'a> {
75 sandbox_id: &'a str,
76 #[serde(skip_serializing_if = "Option::is_none")]
77 name: Option<&'a str>,
78}
79
80#[derive(Deserialize)]
81struct FinalizeResponse {
82 id: String,
83 #[serde(default)]
84 size_bytes: u64,
85 #[serde(default)]
86 created_at: String,
87}
88
89pub async fn archive_dir(
91 local_path: impl AsRef<Path>,
92 options: ArchiveDirOptions,
93 client_options: HeyoClientOptions,
94) -> Result<ArchiveResult, HeyoError> {
95 let dir = local_path
96 .as_ref()
97 .canonicalize()
98 .map_err(|e| HeyoError::invalid(format!("invalid path '{}': {}", local_path.as_ref().display(), e)))?;
99 if !dir.is_dir() {
100 return Err(HeyoError::invalid(format!(
101 "'{}' is not a directory",
102 dir.display()
103 )));
104 }
105 let prefix = options
106 .mount_path
107 .as_deref()
108 .unwrap_or(DEFAULT_MOUNT_PATH)
109 .trim_start_matches('/');
110 let entries = collect_files(&dir, prefix, options.no_ignore, &options.extra_excludes)?;
111 if entries.is_empty() {
112 return Err(HeyoError::invalid("No files found in directory to archive"));
113 }
114
115 let tar_gz = build_tar_gz(&entries)?;
116
117 let client = HeyoClient::new(client_options)?;
118 let presign: PresignResponse = client
119 .request(
120 Method::POST,
121 "/sandbox-archives/presign",
122 None::<&()>,
123 RequestOptions::default(),
124 )
125 .await?;
126
127 upload_to_presigned(&presign.upload_url, tar_gz, &client).await?;
128
129 let basename = dir
130 .file_name()
131 .and_then(|s| s.to_str())
132 .unwrap_or("archive");
133 let body = FinalizeRequest {
134 sandbox_id: basename,
135 name: options.name.as_deref(),
136 };
137 let finalized: FinalizeResponse = client
138 .request(
139 Method::POST,
140 &format!("/sandbox-archives/{}/finalize", presign.archive_id),
141 Some(&body),
142 RequestOptions::default(),
143 )
144 .await?;
145
146 Ok(ArchiveResult {
147 id: finalized.id,
148 size_bytes: finalized.size_bytes,
149 created_at: finalized.created_at,
150 })
151}
152
153struct TarFileEntry {
154 path_in_archive: String,
155 content: Vec<u8>,
156}
157
158fn collect_files(
159 root: &Path,
160 prefix: &str,
161 no_ignore: bool,
162 extra_excludes: &[String],
163) -> Result<Vec<TarFileEntry>, HeyoError> {
164 let excluded: BTreeSet<&str> = if no_ignore {
165 BTreeSet::new()
166 } else {
167 let mut s: BTreeSet<&str> = EXCLUDED_DIRS.iter().copied().collect();
168 for e in extra_excludes {
169 s.insert(e.as_str());
170 }
171 s
172 };
173 let mut out = Vec::new();
174 walk(root, root, prefix, &excluded, &mut out)?;
175 Ok(out)
176}
177
178fn walk(
179 root: &Path,
180 cur: &Path,
181 prefix: &str,
182 excluded: &BTreeSet<&str>,
183 out: &mut Vec<TarFileEntry>,
184) -> Result<(), HeyoError> {
185 let entries = std::fs::read_dir(cur)
186 .map_err(|e| HeyoError::invalid(format!("read_dir {}: {}", cur.display(), e)))?;
187 for entry in entries {
188 let entry = entry
189 .map_err(|e| HeyoError::invalid(format!("read_dir entry in {}: {}", cur.display(), e)))?;
190 let file_type = entry
191 .file_type()
192 .map_err(|e| HeyoError::invalid(format!("file_type for {}: {}", entry.path().display(), e)))?;
193 let name = entry.file_name();
194 let name_str = match name.to_str() {
195 Some(s) => s,
196 None => continue,
197 };
198 if file_type.is_dir() {
199 if excluded.contains(name_str) {
200 continue;
201 }
202 walk(root, &entry.path(), prefix, excluded, out)?;
203 } else if file_type.is_file() {
204 let abs = entry.path();
205 let rel = abs
206 .strip_prefix(root)
207 .map_err(|e| HeyoError::invalid(format!("strip_prefix {}: {}", abs.display(), e)))?;
208 let rel_str = rel
209 .to_str()
210 .ok_or_else(|| HeyoError::invalid(format!("non-utf8 path: {}", rel.display())))?
211 .replace('\\', "/");
212 let content = std::fs::read(&abs)
213 .map_err(|e| HeyoError::invalid(format!("read {}: {}", abs.display(), e)))?;
214 let path_in_archive = if prefix.is_empty() {
215 rel_str
216 } else {
217 format!("{}/{}", prefix, rel_str)
218 };
219 out.push(TarFileEntry {
220 path_in_archive,
221 content,
222 });
223 }
224 }
226 Ok(())
227}
228
229fn build_tar_gz(entries: &[TarFileEntry]) -> Result<Vec<u8>, HeyoError> {
230 let mtime = SystemTime::now()
231 .duration_since(UNIX_EPOCH)
232 .map(|d| d.as_secs())
233 .unwrap_or(0);
234 let mut blocks: Vec<Vec<u8>> = Vec::new();
235
236 let mut dirs: BTreeSet<String> = BTreeSet::new();
239 for entry in entries {
240 let mut acc = String::new();
241 let mut parts: Vec<&str> = entry.path_in_archive.split('/').collect();
242 parts.pop(); for part in parts {
244 if !acc.is_empty() {
245 acc.push('/');
246 }
247 acc.push_str(part);
248 dirs.insert(acc.clone());
249 }
250 }
251 for dir in dirs {
252 let path = format!("{}/", dir);
253 append_entry(&mut blocks, &path, &[], 0o755, mtime, b'5')?;
254 }
255 for entry in entries {
256 append_entry(
257 &mut blocks,
258 &entry.path_in_archive,
259 &entry.content,
260 0o644,
261 mtime,
262 b'0',
263 )?;
264 }
265 blocks.push(vec![0; 1024]);
267
268 let tar: Vec<u8> = blocks.into_iter().flatten().collect();
269 let mut encoder = GzEncoder::new(Vec::new(), Compression::default());
270 encoder
271 .write_all(&tar)
272 .map_err(|e| HeyoError::api(0, format!("gzip write: {}", e)))?;
273 encoder
274 .finish()
275 .map_err(|e| HeyoError::api(0, format!("gzip finish: {}", e)))
276}
277
278fn append_entry(
279 out: &mut Vec<Vec<u8>>,
280 name: &str,
281 content: &[u8],
282 mode: u32,
283 mtime: u64,
284 typeflag: u8,
285) -> Result<(), HeyoError> {
286 let name_bytes = name.as_bytes();
287 if name_bytes.len() > 100 {
288 let mut long_name = name_bytes.to_vec();
290 long_name.push(0);
291 let header = build_header(
292 b"././@LongLink",
293 0,
294 0,
295 0,
296 long_name.len() as u64,
297 0,
298 b'L',
299 *b"ustar ",
300 *b" \0",
301 );
302 out.push(header.to_vec());
303 let padded_len = pad_to_512(long_name.len());
304 let mut padded = long_name;
305 padded.resize(padded_len, 0);
306 out.push(padded);
307 }
308 let truncated = if name_bytes.len() > 100 {
309 &name_bytes[..100]
310 } else {
311 name_bytes
312 };
313 let header = build_header(
314 truncated,
315 mode,
316 1000,
317 1000,
318 content.len() as u64,
319 mtime,
320 typeflag,
321 *b"ustar\0",
322 *b"00",
323 );
324 out.push(header.to_vec());
325 if !content.is_empty() {
326 let padded_len = pad_to_512(content.len());
327 let mut padded = content.to_vec();
328 padded.resize(padded_len, 0);
329 out.push(padded);
330 }
331 Ok(())
332}
333
334fn pad_to_512(len: usize) -> usize {
335 if len % 512 == 0 {
336 len
337 } else {
338 len + (512 - (len % 512))
339 }
340}
341
342#[allow(clippy::too_many_arguments)]
343fn build_header(
344 name: &[u8],
345 mode: u32,
346 uid: u32,
347 gid: u32,
348 size: u64,
349 mtime: u64,
350 typeflag: u8,
351 magic: [u8; 6],
352 version: [u8; 2],
353) -> [u8; 512] {
354 let mut buf = [0u8; 512];
355 let nlen = name.len().min(100);
356 buf[..nlen].copy_from_slice(&name[..nlen]);
357 write_octal(&mut buf, 100, 8, mode as u64, false);
358 write_octal(&mut buf, 108, 8, uid as u64, false);
359 write_octal(&mut buf, 116, 8, gid as u64, false);
360 write_octal(&mut buf, 124, 12, size, true);
361 write_octal(&mut buf, 136, 12, mtime, true);
362 for b in &mut buf[148..156] {
363 *b = b' ';
364 }
365 buf[156] = typeflag;
366 buf[257..263].copy_from_slice(&magic);
367 buf[263..265].copy_from_slice(&version);
368
369 let mut sum: u32 = 0;
370 for b in buf.iter() {
371 sum += *b as u32;
372 }
373 let sum_str = format!("{:06o}", sum);
374 let sum_bytes = sum_str.as_bytes();
375 let take = sum_bytes.len().min(6);
376 buf[148..148 + take].copy_from_slice(&sum_bytes[..take]);
377 buf[154] = 0;
378 buf[155] = b' ';
379 buf
380}
381
382fn write_octal(buf: &mut [u8], offset: usize, width: usize, value: u64, trailing_space: bool) {
383 let s = format!("{:0>width$o}", value, width = width - 1);
384 let bytes = s.as_bytes();
385 for i in 0..(width - 1) {
386 buf[offset + i] = bytes.get(i).copied().unwrap_or(b'0');
387 }
388 buf[offset + width - 1] = if trailing_space { b' ' } else { 0 };
389}
390
391async fn upload_to_presigned(url: &str, body: Vec<u8>, client: &HeyoClient) -> Result<(), HeyoError> {
392 let http = reqwest::Client::new();
395 let len = body.len();
396 let resp = http
397 .put(url)
398 .header("Content-Type", "application/gzip")
399 .header("Content-Length", len.to_string())
400 .body(body)
401 .send()
402 .await
403 .map_err(|e| HeyoError::api(0, format!("archive PUT to presigned URL: {}", e)))?;
404 let _ = client; if !resp.status().is_success() {
406 let status = resp.status().as_u16();
407 let body = resp.text().await.unwrap_or_default();
408 return Err(HeyoError::api(status, format!("archive upload failed: {}", body)));
409 }
410 Ok(())
411}