shopify_approver_opencode/
packager.rs1use crate::error::{OpenCodeError, Result};
7use flate2::write::GzEncoder;
8use flate2::Compression;
9use ignore::WalkBuilder;
10use std::fs::File;
11use std::path::{Path, PathBuf};
12use tar::Builder;
13use tempfile::NamedTempFile;
14use tracing::{debug, info, warn};
15
16const DEFAULT_EXCLUDES: &[&str] = &[
18 ".git",
20 ".svn",
21 ".hg",
22 "node_modules",
24 "vendor",
25 "target",
26 ".venv",
27 "venv",
28 "__pycache__",
29 ".pytest_cache",
30 "dist",
32 "build",
33 "out",
34 ".next",
35 ".nuxt",
36 ".idea",
38 ".vscode",
39 "*.swp",
40 "*.swo",
41 ".DS_Store",
43 "Thumbs.db",
44 "*.log",
46 "logs",
47 "coverage",
49 ".nyc_output",
50 "*.tmp",
52 "*.temp",
53 ".cache",
54];
55
56pub struct Packager {
58 max_file_size: usize,
60 max_total_size: usize,
62 exclude_patterns: Vec<String>,
64 include_patterns: Vec<String>,
66}
67
68#[derive(Debug)]
70pub struct PackageResult {
71 pub archive_path: PathBuf,
73 pub file_count: usize,
75 pub total_size: usize,
77 pub compressed_size: usize,
79 pub skipped_files: Vec<SkippedFile>,
81}
82
83#[derive(Debug)]
85pub struct SkippedFile {
86 pub path: PathBuf,
87 pub reason: SkipReason,
88}
89
90#[derive(Debug)]
91pub enum SkipReason {
92 TooLarge(usize),
93 ExcludedPattern,
94 NotReadable,
95 Binary,
96}
97
98impl Packager {
99 pub fn new() -> Self {
101 Self {
102 max_file_size: 1024 * 1024, max_total_size: 100 * 1024 * 1024, exclude_patterns: Vec::new(),
105 include_patterns: Vec::new(),
106 }
107 }
108
109 pub fn with_max_file_size(mut self, size: usize) -> Self {
111 self.max_file_size = size;
112 self
113 }
114
115 pub fn with_max_total_size(mut self, size: usize) -> Self {
117 self.max_total_size = size;
118 self
119 }
120
121 pub fn with_excludes(mut self, patterns: Vec<String>) -> Self {
123 self.exclude_patterns = patterns;
124 self
125 }
126
127 pub fn with_includes(mut self, patterns: Vec<String>) -> Self {
129 self.include_patterns = patterns;
130 self
131 }
132
133 pub fn package(&self, codebase_path: &Path) -> Result<PackageResult> {
135 if !codebase_path.exists() {
136 return Err(OpenCodeError::Packaging(format!(
137 "Path does not exist: {}",
138 codebase_path.display()
139 )));
140 }
141
142 if !codebase_path.is_dir() {
143 return Err(OpenCodeError::Packaging(
144 "Path must be a directory".to_string(),
145 ));
146 }
147
148 info!("Packaging codebase from: {}", codebase_path.display());
149
150 let temp_file = NamedTempFile::new()?;
152 let archive_path = temp_file.path().to_path_buf();
153
154 let file = File::create(&archive_path)?;
156 let encoder = GzEncoder::new(file, Compression::default());
157 let mut archive = Builder::new(encoder);
158
159 let mut file_count = 0;
160 let mut total_size = 0;
161 let mut skipped_files = Vec::new();
162
163 let walker = WalkBuilder::new(codebase_path)
165 .hidden(false) .git_ignore(true) .git_exclude(true) .ignore(true) .build();
170
171 for entry in walker.flatten() {
172 let path = entry.path();
173
174 if path.is_dir() {
176 continue;
177 }
178
179 let relative_path = path
181 .strip_prefix(codebase_path)
182 .map_err(|e| OpenCodeError::Packaging(e.to_string()))?;
183
184 if self.should_exclude(relative_path) {
186 skipped_files.push(SkippedFile {
187 path: relative_path.to_path_buf(),
188 reason: SkipReason::ExcludedPattern,
189 });
190 continue;
191 }
192
193 let metadata = match path.metadata() {
195 Ok(m) => m,
196 Err(_) => {
197 skipped_files.push(SkippedFile {
198 path: relative_path.to_path_buf(),
199 reason: SkipReason::NotReadable,
200 });
201 continue;
202 }
203 };
204
205 let file_size = metadata.len() as usize;
206
207 if file_size > self.max_file_size {
208 debug!("Skipping large file: {} ({} bytes)", relative_path.display(), file_size);
209 skipped_files.push(SkippedFile {
210 path: relative_path.to_path_buf(),
211 reason: SkipReason::TooLarge(file_size),
212 });
213 continue;
214 }
215
216 if self.is_binary(path) {
218 debug!("Skipping binary file: {}", relative_path.display());
219 skipped_files.push(SkippedFile {
220 path: relative_path.to_path_buf(),
221 reason: SkipReason::Binary,
222 });
223 continue;
224 }
225
226 if total_size + file_size > self.max_total_size {
228 warn!("Reached maximum total size limit");
229 break;
230 }
231
232 match archive.append_path_with_name(path, relative_path) {
234 Ok(_) => {
235 file_count += 1;
236 total_size += file_size;
237 debug!("Added: {}", relative_path.display());
238 }
239 Err(e) => {
240 warn!("Failed to add file {}: {}", relative_path.display(), e);
241 skipped_files.push(SkippedFile {
242 path: relative_path.to_path_buf(),
243 reason: SkipReason::NotReadable,
244 });
245 }
246 }
247 }
248
249 let encoder = archive.into_inner()?;
251 encoder.finish()?;
252
253 let compressed_size = std::fs::metadata(&archive_path)?.len() as usize;
255
256 let (_, archive_path) = temp_file
258 .keep()
259 .map_err(|e| OpenCodeError::Packaging(format!("Failed to persist temp file: {}", e)))?;
260
261 info!(
262 "Packaged {} files ({} bytes -> {} bytes compressed)",
263 file_count, total_size, compressed_size
264 );
265
266 Ok(PackageResult {
267 archive_path,
268 file_count,
269 total_size,
270 compressed_size,
271 skipped_files,
272 })
273 }
274
275 fn should_exclude(&self, path: &Path) -> bool {
277 let path_str = path.to_string_lossy();
278
279 for pattern in &self.include_patterns {
281 if glob_match(pattern, &path_str) {
282 return false;
283 }
284 }
285
286 for pattern in DEFAULT_EXCLUDES {
288 if glob_match(pattern, &path_str) {
289 return true;
290 }
291 }
292
293 for pattern in &self.exclude_patterns {
295 if glob_match(pattern, &path_str) {
296 return true;
297 }
298 }
299
300 false
301 }
302
303 fn is_binary(&self, path: &Path) -> bool {
305 if let Some(ext) = path.extension() {
307 let ext = ext.to_string_lossy().to_lowercase();
308 let binary_extensions = [
309 "png", "jpg", "jpeg", "gif", "ico", "svg", "webp", "bmp",
310 "pdf", "doc", "docx", "xls", "xlsx", "ppt", "pptx",
311 "zip", "tar", "gz", "rar", "7z",
312 "exe", "dll", "so", "dylib",
313 "woff", "woff2", "ttf", "otf", "eot",
314 "mp3", "mp4", "avi", "mov", "wav",
315 "pyc", "pyo", "class",
316 ];
317 if binary_extensions.contains(&ext.as_str()) {
318 return true;
319 }
320 }
321
322 if let Ok(file) = File::open(path) {
324 use std::io::Read;
325 let mut buffer = [0u8; 8192];
326 let mut reader = std::io::BufReader::new(file);
327 if let Ok(n) = reader.read(&mut buffer) {
328 if buffer[..n].contains(&0) {
330 return true;
331 }
332 }
333 }
334
335 false
336 }
337}
338
339impl Default for Packager {
340 fn default() -> Self {
341 Self::new()
342 }
343}
344
345fn glob_match(pattern: &str, path: &str) -> bool {
347 if pattern.starts_with("**/") {
349 let suffix = &pattern[3..]; if suffix.starts_with("*.") {
351 let ext = &suffix[1..]; return path.ends_with(ext);
354 }
355 return path.contains(suffix) || path.ends_with(suffix);
357 }
358
359 if pattern.contains("**") {
361 let parts: Vec<&str> = pattern.split("**").collect();
362 if parts.len() == 2 {
363 let prefix = parts[0].trim_end_matches('/');
364 let suffix = parts[1].trim_start_matches('/');
365
366 if !prefix.is_empty() && !path.starts_with(prefix) {
367 return false;
368 }
369 if !suffix.is_empty() {
370 if suffix.starts_with("*.") {
372 return path.ends_with(&suffix[1..]);
373 }
374 if !path.ends_with(suffix) && !path.contains(suffix) {
375 return false;
376 }
377 }
378 return true;
379 }
380 }
381
382 if pattern.starts_with("*.") {
384 let ext = &pattern[1..]; return path.ends_with(ext);
386 }
387
388 if pattern.contains('*') {
390 let parts: Vec<&str> = pattern.split('*').collect();
391 let mut pos = 0;
392 for part in parts {
393 if part.is_empty() {
394 continue;
395 }
396 if let Some(found) = path[pos..].find(part) {
397 pos += found + part.len();
398 } else {
399 return false;
400 }
401 }
402 return true;
403 }
404
405 path == pattern || path.contains(pattern) || path.ends_with(pattern)
407}
408
409#[cfg(test)]
410mod tests {
411 use super::*;
412 use std::fs;
413 use tempfile::TempDir;
414
415 #[test]
416 fn test_glob_match() {
417 assert!(glob_match("node_modules", "node_modules/foo"));
418 assert!(glob_match("*.log", "app.log"));
419 assert!(glob_match(".git", ".git/config"));
420 assert!(glob_match("**/*.js", "src/app.js"));
421 assert!(!glob_match("node_modules", "src/node.js"));
422 }
423
424 #[test]
425 fn test_packager() {
426 let temp_dir = TempDir::new().unwrap();
427 let base_path = temp_dir.path();
428
429 fs::write(base_path.join("index.js"), "console.log('hello');").unwrap();
431 fs::write(base_path.join("app.ts"), "export const foo = 1;").unwrap();
432 fs::create_dir(base_path.join("src")).unwrap();
433 fs::write(base_path.join("src/util.js"), "// util").unwrap();
434
435 fs::create_dir(base_path.join("node_modules")).unwrap();
437 fs::write(base_path.join("node_modules/pkg.js"), "// pkg").unwrap();
438
439 let packager = Packager::new();
440 let result = packager.package(base_path).unwrap();
441
442 assert_eq!(result.file_count, 3);
443 assert!(result.compressed_size > 0);
444
445 fs::remove_file(&result.archive_path).ok();
447 }
448}