shopify_approver_opencode/
packager.rs

1//! Codebase packager for upload to OpenCode
2//!
3//! Packages a codebase into a compressed tarball, excluding
4//! common non-essential files (node_modules, .git, etc.)
5
6use crate::error::{OpenCodeError, Result};
7use flate2::write::GzEncoder;
8use flate2::Compression;
9use ignore::WalkBuilder;
10use std::fs::File;
11use std::path::{Path, PathBuf};
12use tar::Builder;
13use tempfile::NamedTempFile;
14use tracing::{debug, info, warn};
15
16/// Default patterns to exclude when packaging
17const DEFAULT_EXCLUDES: &[&str] = &[
18    // Version control
19    ".git",
20    ".svn",
21    ".hg",
22    // Dependencies
23    "node_modules",
24    "vendor",
25    "target",
26    ".venv",
27    "venv",
28    "__pycache__",
29    ".pytest_cache",
30    // Build outputs
31    "dist",
32    "build",
33    "out",
34    ".next",
35    ".nuxt",
36    // IDE
37    ".idea",
38    ".vscode",
39    "*.swp",
40    "*.swo",
41    // OS
42    ".DS_Store",
43    "Thumbs.db",
44    // Logs
45    "*.log",
46    "logs",
47    // Test coverage
48    "coverage",
49    ".nyc_output",
50    // Temporary files
51    "*.tmp",
52    "*.temp",
53    ".cache",
54];
55
56/// Codebase packager
57pub struct Packager {
58    /// Maximum file size to include
59    max_file_size: usize,
60    /// Maximum total size
61    max_total_size: usize,
62    /// Additional patterns to exclude
63    exclude_patterns: Vec<String>,
64    /// Patterns to always include (override excludes)
65    include_patterns: Vec<String>,
66}
67
68/// Result of packaging operation
69#[derive(Debug)]
70pub struct PackageResult {
71    /// Path to the created tarball
72    pub archive_path: PathBuf,
73    /// Number of files included
74    pub file_count: usize,
75    /// Total uncompressed size
76    pub total_size: usize,
77    /// Compressed size
78    pub compressed_size: usize,
79    /// Files that were skipped
80    pub skipped_files: Vec<SkippedFile>,
81}
82
83/// A file that was skipped during packaging
84#[derive(Debug)]
85pub struct SkippedFile {
86    pub path: PathBuf,
87    pub reason: SkipReason,
88}
89
90#[derive(Debug)]
91pub enum SkipReason {
92    TooLarge(usize),
93    ExcludedPattern,
94    NotReadable,
95    Binary,
96}
97
98impl Packager {
99    /// Create a new packager with default settings
100    pub fn new() -> Self {
101        Self {
102            max_file_size: 1024 * 1024,      // 1MB
103            max_total_size: 100 * 1024 * 1024, // 100MB
104            exclude_patterns: Vec::new(),
105            include_patterns: Vec::new(),
106        }
107    }
108
109    /// Set maximum file size
110    pub fn with_max_file_size(mut self, size: usize) -> Self {
111        self.max_file_size = size;
112        self
113    }
114
115    /// Set maximum total size
116    pub fn with_max_total_size(mut self, size: usize) -> Self {
117        self.max_total_size = size;
118        self
119    }
120
121    /// Add exclude patterns
122    pub fn with_excludes(mut self, patterns: Vec<String>) -> Self {
123        self.exclude_patterns = patterns;
124        self
125    }
126
127    /// Add include patterns (override excludes)
128    pub fn with_includes(mut self, patterns: Vec<String>) -> Self {
129        self.include_patterns = patterns;
130        self
131    }
132
133    /// Package a codebase into a compressed tarball
134    pub fn package(&self, codebase_path: &Path) -> Result<PackageResult> {
135        if !codebase_path.exists() {
136            return Err(OpenCodeError::Packaging(format!(
137                "Path does not exist: {}",
138                codebase_path.display()
139            )));
140        }
141
142        if !codebase_path.is_dir() {
143            return Err(OpenCodeError::Packaging(
144                "Path must be a directory".to_string(),
145            ));
146        }
147
148        info!("Packaging codebase from: {}", codebase_path.display());
149
150        // Create temporary file for the archive
151        let temp_file = NamedTempFile::new()?;
152        let archive_path = temp_file.path().to_path_buf();
153
154        // Create gzipped tar archive
155        let file = File::create(&archive_path)?;
156        let encoder = GzEncoder::new(file, Compression::default());
157        let mut archive = Builder::new(encoder);
158
159        let mut file_count = 0;
160        let mut total_size = 0;
161        let mut skipped_files = Vec::new();
162
163        // Walk directory respecting .gitignore
164        let walker = WalkBuilder::new(codebase_path)
165            .hidden(false) // Include hidden files
166            .git_ignore(true) // Respect .gitignore
167            .git_exclude(true) // Respect .git/info/exclude
168            .ignore(true) // Respect .ignore files
169            .build();
170
171        for entry in walker.flatten() {
172            let path = entry.path();
173
174            // Skip directories
175            if path.is_dir() {
176                continue;
177            }
178
179            // Get relative path
180            let relative_path = path
181                .strip_prefix(codebase_path)
182                .map_err(|e| OpenCodeError::Packaging(e.to_string()))?;
183
184            // Check if excluded
185            if self.should_exclude(relative_path) {
186                skipped_files.push(SkippedFile {
187                    path: relative_path.to_path_buf(),
188                    reason: SkipReason::ExcludedPattern,
189                });
190                continue;
191            }
192
193            // Check file size
194            let metadata = match path.metadata() {
195                Ok(m) => m,
196                Err(_) => {
197                    skipped_files.push(SkippedFile {
198                        path: relative_path.to_path_buf(),
199                        reason: SkipReason::NotReadable,
200                    });
201                    continue;
202                }
203            };
204
205            let file_size = metadata.len() as usize;
206
207            if file_size > self.max_file_size {
208                debug!("Skipping large file: {} ({} bytes)", relative_path.display(), file_size);
209                skipped_files.push(SkippedFile {
210                    path: relative_path.to_path_buf(),
211                    reason: SkipReason::TooLarge(file_size),
212                });
213                continue;
214            }
215
216            // Check if binary
217            if self.is_binary(path) {
218                debug!("Skipping binary file: {}", relative_path.display());
219                skipped_files.push(SkippedFile {
220                    path: relative_path.to_path_buf(),
221                    reason: SkipReason::Binary,
222                });
223                continue;
224            }
225
226            // Check total size limit
227            if total_size + file_size > self.max_total_size {
228                warn!("Reached maximum total size limit");
229                break;
230            }
231
232            // Add file to archive
233            match archive.append_path_with_name(path, relative_path) {
234                Ok(_) => {
235                    file_count += 1;
236                    total_size += file_size;
237                    debug!("Added: {}", relative_path.display());
238                }
239                Err(e) => {
240                    warn!("Failed to add file {}: {}", relative_path.display(), e);
241                    skipped_files.push(SkippedFile {
242                        path: relative_path.to_path_buf(),
243                        reason: SkipReason::NotReadable,
244                    });
245                }
246            }
247        }
248
249        // Finish the archive
250        let encoder = archive.into_inner()?;
251        encoder.finish()?;
252
253        // Get compressed size
254        let compressed_size = std::fs::metadata(&archive_path)?.len() as usize;
255
256        // Keep the temp file (don't delete on drop)
257        let (_, archive_path) = temp_file
258            .keep()
259            .map_err(|e| OpenCodeError::Packaging(format!("Failed to persist temp file: {}", e)))?;
260
261        info!(
262            "Packaged {} files ({} bytes -> {} bytes compressed)",
263            file_count, total_size, compressed_size
264        );
265
266        Ok(PackageResult {
267            archive_path,
268            file_count,
269            total_size,
270            compressed_size,
271            skipped_files,
272        })
273    }
274
275    /// Check if a path should be excluded
276    fn should_exclude(&self, path: &Path) -> bool {
277        let path_str = path.to_string_lossy();
278
279        // Check include patterns first (they override excludes)
280        for pattern in &self.include_patterns {
281            if glob_match(pattern, &path_str) {
282                return false;
283            }
284        }
285
286        // Check default excludes
287        for pattern in DEFAULT_EXCLUDES {
288            if glob_match(pattern, &path_str) {
289                return true;
290            }
291        }
292
293        // Check custom excludes
294        for pattern in &self.exclude_patterns {
295            if glob_match(pattern, &path_str) {
296                return true;
297            }
298        }
299
300        false
301    }
302
303    /// Check if a file is binary
304    fn is_binary(&self, path: &Path) -> bool {
305        // Check by extension first
306        if let Some(ext) = path.extension() {
307            let ext = ext.to_string_lossy().to_lowercase();
308            let binary_extensions = [
309                "png", "jpg", "jpeg", "gif", "ico", "svg", "webp", "bmp",
310                "pdf", "doc", "docx", "xls", "xlsx", "ppt", "pptx",
311                "zip", "tar", "gz", "rar", "7z",
312                "exe", "dll", "so", "dylib",
313                "woff", "woff2", "ttf", "otf", "eot",
314                "mp3", "mp4", "avi", "mov", "wav",
315                "pyc", "pyo", "class",
316            ];
317            if binary_extensions.contains(&ext.as_str()) {
318                return true;
319            }
320        }
321
322        // Read first bytes to check for binary content
323        if let Ok(file) = File::open(path) {
324            use std::io::Read;
325            let mut buffer = [0u8; 8192];
326            let mut reader = std::io::BufReader::new(file);
327            if let Ok(n) = reader.read(&mut buffer) {
328                // Check for null bytes (common in binary files)
329                if buffer[..n].contains(&0) {
330                    return true;
331                }
332            }
333        }
334
335        false
336    }
337}
338
339impl Default for Packager {
340    fn default() -> Self {
341        Self::new()
342    }
343}
344
345/// Simple glob matching (supports * and **)
346fn glob_match(pattern: &str, path: &str) -> bool {
347    // Handle **/*.ext patterns (match any path with extension)
348    if pattern.starts_with("**/") {
349        let suffix = &pattern[3..]; // Remove **/
350        if suffix.starts_with("*.") {
351            // **/*.ext - match any file with this extension
352            let ext = &suffix[1..]; // Remove *
353            return path.ends_with(ext);
354        }
355        // **/name - match name anywhere in path
356        return path.contains(suffix) || path.ends_with(suffix);
357    }
358
359    // Handle ** (match any path segment)
360    if pattern.contains("**") {
361        let parts: Vec<&str> = pattern.split("**").collect();
362        if parts.len() == 2 {
363            let prefix = parts[0].trim_end_matches('/');
364            let suffix = parts[1].trim_start_matches('/');
365
366            if !prefix.is_empty() && !path.starts_with(prefix) {
367                return false;
368            }
369            if !suffix.is_empty() {
370                // Handle *.ext in suffix
371                if suffix.starts_with("*.") {
372                    return path.ends_with(&suffix[1..]);
373                }
374                if !path.ends_with(suffix) && !path.contains(suffix) {
375                    return false;
376                }
377            }
378            return true;
379        }
380    }
381
382    // Handle *.ext (match extension)
383    if pattern.starts_with("*.") {
384        let ext = &pattern[1..]; // Keep the dot
385        return path.ends_with(ext);
386    }
387
388    // Handle * (match within segment)
389    if pattern.contains('*') {
390        let parts: Vec<&str> = pattern.split('*').collect();
391        let mut pos = 0;
392        for part in parts {
393            if part.is_empty() {
394                continue;
395            }
396            if let Some(found) = path[pos..].find(part) {
397                pos += found + part.len();
398            } else {
399                return false;
400            }
401        }
402        return true;
403    }
404
405    // Exact match or contains
406    path == pattern || path.contains(pattern) || path.ends_with(pattern)
407}
408
409#[cfg(test)]
410mod tests {
411    use super::*;
412    use std::fs;
413    use tempfile::TempDir;
414
415    #[test]
416    fn test_glob_match() {
417        assert!(glob_match("node_modules", "node_modules/foo"));
418        assert!(glob_match("*.log", "app.log"));
419        assert!(glob_match(".git", ".git/config"));
420        assert!(glob_match("**/*.js", "src/app.js"));
421        assert!(!glob_match("node_modules", "src/node.js"));
422    }
423
424    #[test]
425    fn test_packager() {
426        let temp_dir = TempDir::new().unwrap();
427        let base_path = temp_dir.path();
428
429        // Create test files
430        fs::write(base_path.join("index.js"), "console.log('hello');").unwrap();
431        fs::write(base_path.join("app.ts"), "export const foo = 1;").unwrap();
432        fs::create_dir(base_path.join("src")).unwrap();
433        fs::write(base_path.join("src/util.js"), "// util").unwrap();
434
435        // Create node_modules (should be excluded)
436        fs::create_dir(base_path.join("node_modules")).unwrap();
437        fs::write(base_path.join("node_modules/pkg.js"), "// pkg").unwrap();
438
439        let packager = Packager::new();
440        let result = packager.package(base_path).unwrap();
441
442        assert_eq!(result.file_count, 3);
443        assert!(result.compressed_size > 0);
444
445        // Cleanup
446        fs::remove_file(&result.archive_path).ok();
447    }
448}