srcpack/
lib.rs

1use anyhow::{Context, Result};
2use ignore::overrides::OverrideBuilder;
3use ignore::WalkBuilder;
4use std::fs::File;
5use std::io::BufWriter;
6#[cfg(unix)]
7use std::os::unix::fs::PermissionsExt;
8use std::path::PathBuf;
9use zip::write::SimpleFileOptions;
10use zip::CompressionMethod;
11
12/// Configuration for the file scanning process.
13pub struct ScanConfig {
14    /// The root directory from which the scan will start.
15    pub root_path: PathBuf,
16    /// Optional patterns to exclude from the scan.
17    pub exclude_patterns: Vec<String>,
18}
19
20impl ScanConfig {
21    /// Creates a new `ScanConfig` with the specified root path.
22    pub fn new(path: impl Into<PathBuf>, excludes: Vec<String>) -> Self {
23        Self {
24            root_path: path.into(),
25            exclude_patterns: excludes,
26        }
27    }
28}
29
30pub struct PackConfig {
31    pub root_path: PathBuf,
32    pub output_path: PathBuf,
33    pub compression_method: CompressionMethod,
34    // None Use the default, some(0-9) to specify the level
35    pub compression_level: Option<i64>,
36}
37
38/// Scans the directory specified in the configuration and returns a list of files to include.
39///
40/// This function utilizes the `ignore` crate to respect `.gitignore` rules.
41/// It also performs additional filtering to exclude common build artifacts
42/// (such as `node_modules`, `target`, `.git`, etc.) regardless of gitignore settings.
43///
44/// # Arguments
45///
46/// * `config` - The configuration object containing the root path.
47///
48/// # Returns
49///
50/// * `Result<Vec<PathBuf>>` - A vector containing absolute paths to the valid files found.
51/// # Example
52///
53/// ```no_run
54/// use srcpack::{ScanConfig, scan_files};
55///
56/// let config = ScanConfig::new(".", vec![String::from("*.mp4"), String::from("!special-include.mp4")]);
57/// match scan_files(&config) {
58///     Ok(files) => println!("Found {} files respecting .gitignore", files.len()),
59///     Err(e) => eprintln!("Error scanning directory: {}", e),
60/// }
61/// ```
62pub fn scan_files(config: &ScanConfig) -> Result<Vec<PathBuf>> {
63    let mut files = Vec::new();
64
65    let mut overrides = OverrideBuilder::new(&config.root_path);
66    for pattern in &config.exclude_patterns {
67        if let Some(whitelist_pattern) = pattern.strip_prefix('!') {
68            // Scenario A: User enters "file.txt" (intent: force inclusion/whitelisting)
69            // Action: Remove "!", pass directly to the builder.
70            overrides
71                .add(whitelist_pattern)
72                .context("Invalid include pattern")?;
73        } else {
74            // Scenario B: User enters "file.txt" (intent: exclude/ignore)
75            // Action: Manually add "!".
76            overrides
77                .add(&format!("!{}", pattern))
78                .context("Invalid exclude pattern")?;
79        }
80    }
81    let override_matched = overrides.build()?;
82
83    // WalkBuilder is the core builder from the ignore crate
84    let walker = WalkBuilder::new(&config.root_path)
85        .standard_filters(true) // Automatically read .gitignore, .git/info/exclude, etc.
86        .overrides(override_matched) // Apply user-defined exclude patterns
87        .require_git(false) // Do not require a git repository to work
88        .hidden(false) // Include hidden files (like .env), though specific ones are filtered later
89        .build();
90
91    for result in walker {
92        match result {
93            Ok(entry) => {
94                let path = entry.path();
95
96                // Filter out directories; we only collect files
97                if path.is_file() {
98                    files.push(path.to_path_buf());
99                }
100            }
101            Err(err) => {
102                eprintln!("Scan warning: {}", err);
103            }
104        }
105    }
106
107    Ok(files)
108}
109
110/// Compresses the provided list of files into a ZIP archive.
111///
112/// This function supports **ZIP64** extensions, allowing it to handle files larger than 4GB.
113/// It uses stream-based copying (`std::io::copy`) to keep memory usage low.
114///
115/// # Arguments
116///
117/// * `files` - A slice of file paths to be compressed.
118/// * `root_path` - The base path used to calculate relative paths inside the ZIP archive.
119/// * `output_path` - The destination path for the generated ZIP file.
120/// * `on_progress` - A closure called after each file is processed.
121///     * Arguments: `(path: &PathBuf, current_file_size: u64, total_processed_size: u64)`
122///
123/// # Returns
124///
125/// * `Result<()>` - Returns Ok if the operation completes successfully.
126///
127/// # Example
128///
129/// ```no_run
130/// use srcpack::{pack_files, ScanConfig, scan_files, PackConfig};
131/// use std::path::Path;
132///
133/// let root = Path::new(".");
134/// let config = ScanConfig::new(root, vec![]);
135/// let files = scan_files(&config).unwrap(); // Get list of files first
136/// let output = Path::new("backup.zip");
137/// let pack_config = PackConfig {
138///    root_path: root.to_path_buf(),
139///    output_path: output.to_path_buf(),
140///    compression_method: zip::CompressionMethod::Deflated,
141///    compression_level: None,
142/// };
143///
144/// // Pack the files with a simple progress closure
145/// pack_files(&files, &pack_config, |path, size, total| {
146///     println!("Packed {:?} ({} bytes)", path, size);
147/// }).expect("Failed to pack files");
148/// ```
149pub fn pack_files<F>(files: &[PathBuf], config: &PackConfig, mut on_progress: F) -> Result<()>
150where
151    F: FnMut(&PathBuf, u64, u64) -> (),
152{
153    let file = File::create(&config.output_path)
154        .with_context(|| format!("Failed to create output file: {:?}", &config.output_path))?;
155
156    // Use a buffered writer to improve file I/O performance
157    let buf_writer = BufWriter::with_capacity(1024 * 1024, file);
158    let mut zip = zip::ZipWriter::new(buf_writer);
159
160    // Set compression options: Default to Deflated (standard compression)
161    let options = SimpleFileOptions::default()
162        .compression_method(CompressionMethod::Deflated)
163        .compression_level(config.compression_level)
164        .large_file(true); // Enable ZIP64 for large files
165
166    let mut total_processed_size: u64 = 0;
167
168    for path in files {
169        // Calculate relative path (e.g., "src/main.rs")
170        // If calculation fails (edge case), fallback to the full path
171        let relative_path = path.strip_prefix(&config.root_path).unwrap_or(path);
172
173        // Normalize path separators (Windows "\" -> Zip "/")
174        // Crucial for cross-platform compatibility
175        let path_str = relative_path.to_string_lossy().replace('\\', "/");
176
177        // Read file content and stream it into the Zip
178        let mut f = File::open(path)?;
179        let metadata = f.metadata()?;
180
181        // Preserve original file permissions if possible
182        let permissions = if cfg!(unix) {
183            #[cfg(unix)]
184            {
185                metadata.permissions().mode()
186            }
187            #[cfg(not(unix))]
188            {
189                0o644 // Windows/Other fallback
190            }
191        } else {
192            0o644
193        };
194
195        // Start a new file in the Zip archive
196        zip.start_file(path_str, options.clone().unix_permissions(permissions))?;
197
198        let current_file_size = metadata.len();
199
200        // Stream copy: reads from file and writes to zip buffer directly
201        std::io::copy(&mut f, &mut zip)?;
202
203        total_processed_size += current_file_size;
204        on_progress(path, current_file_size, total_processed_size);
205    }
206
207    // Finalize the zip file structure
208    zip.finish()?;
209
210    Ok(())
211}
212
213#[cfg(test)]
214mod tests {
215    use super::*;
216    use std::fs::{create_dir_all, File};
217    use std::io::{Read, Write};
218    use std::path::Path;
219    use tempfile::tempdir;
220    use zip::ZipArchive;
221
222    /// Helper function to create a file with specific content
223    fn create_test_file(dir: &Path, name: &str, content: &[u8]) {
224        let path = dir.join(name);
225        if let Some(parent) = path.parent() {
226            create_dir_all(parent).unwrap();
227        }
228        let mut f = File::create(path).unwrap();
229        f.write_all(content).unwrap();
230    }
231
232    #[test]
233    fn test_scan_filtering_logic() {
234        // 1. Setup a temporary environment
235        let temp_dir = tempdir().unwrap();
236        let root = temp_dir.path();
237
238        // 2. Create a mixed file structure (valid source code vs artifacts)
239
240        // Valid files
241        create_test_file(root, "src/main.rs", b"fn main() {}");
242        create_test_file(root, "README.md", b"# Hello");
243        // Hidden file that should be kept (unless ignored by gitignore)
244        create_test_file(root, ".env", b"SECRET=123");
245
246        // Hardcoded artifacts (should be ignored by is_build_artifact)
247        create_test_file(root, "target/debug/app.exe", b"binary");
248        create_test_file(root, "node_modules/react/index.js", b"module");
249        create_test_file(root, ".git/HEAD", b"ref: refs/heads/main");
250        create_test_file(root, ".vscode/settings.json", b"{}");
251
252        // Gitignore logic
253        create_test_file(root, ".gitignore", b"*.log\n/temp/");
254        create_test_file(root, "error.log", b"error content"); // Should be ignored by *.log
255        create_test_file(root, "temp/cache.bin", b"cache"); // Should be ignored by /temp/
256
257        // 3. Execute Scan
258        let config = ScanConfig::new(root, vec![
259            String::from(".git"),
260            String::from("node_modules"),
261            String::from("target"),
262            String::from(".vscode"),
263        ]);
264        let files = scan_files(&config).expect("Scan failed");
265
266        // 4. Verification
267        // Convert paths to relative strings for easier assertion
268        let relative_paths: Vec<String> = files
269            .iter()
270            .map(|p| {
271                p.strip_prefix(root)
272                    .unwrap()
273                    .to_string_lossy()
274                    .replace('\\', "/")
275            })
276            .collect();
277
278        // Assertions:
279        // SHOULD contain:
280        assert!(
281            relative_paths.contains(&"src/main.rs".to_string()),
282            "Missing src/main.rs"
283        );
284        assert!(
285            relative_paths.contains(&"README.md".to_string()),
286            "Missing README.md"
287        );
288        assert!(relative_paths.contains(&".env".to_string()), "Missing .env");
289        assert!(
290            relative_paths.contains(&".gitignore".to_string()),
291            "Missing .gitignore"
292        ); // We allowed hidden files, so .gitignore itself should be packed
293
294        // SHOULD NOT contain (Hardcoded filters):
295        assert!(
296            !relative_paths.iter().any(|p| p.contains("target")),
297            "Should exclude target"
298        );
299        assert!(
300            !relative_paths.iter().any(|p| p.contains("node_modules")),
301            "Should exclude node_modules"
302        );
303        assert!(
304            !relative_paths.iter().any(|p| p.contains(".git/")),
305            "Should exclude .git"
306        );
307        assert!(
308            !relative_paths.iter().any(|p| p.contains(".vscode")),
309            "Should exclude .vscode"
310        );
311
312        // SHOULD NOT contain (Gitignore filters):
313        assert!(
314            !relative_paths.contains(&"error.log".to_string()),
315            "Should respect *.log in gitignore"
316        );
317        assert!(
318            !relative_paths.contains(&"temp/cache.bin".to_string()),
319            "Should respect /temp/ in gitignore"
320        );
321    }
322
323    #[test]
324    fn test_scan_whitelist_overrides() {
325        // Setup
326        let temp_dir = tempdir().unwrap();
327        let root = temp_dir.path();
328
329        // Create files
330        create_test_file(root, "include_me.mp4", b"video content");
331        create_test_file(root, "ignore_me.mp4", b"video content");
332
333        // .gitignore ignore all mp4
334        create_test_file(root, ".gitignore", b"*.mp4");
335
336        // Execute Scan
337        // User intuition: Use "!" to indicate "I want this file, don't care what gitignore says"
338        let config = ScanConfig::new(root, vec!["!include_me.mp4".to_string()]);
339
340        let files = scan_files(&config).expect("Scan failed");
341
342        // Verification
343        let relative_paths: Vec<String> = files
344            .iter()
345            .map(|p| {
346                p.strip_prefix(root)
347                    .unwrap()
348                    .to_string_lossy()
349                    .replace('\\', "/")
350            })
351            .collect();
352
353        dbg!(&relative_paths);
354
355        assert!(
356            relative_paths.contains(&"include_me.mp4".to_string()),
357            "Failed to include whitelisted file"
358        );
359
360        assert!(
361            !relative_paths.contains(&"ignore_me.mp4".to_string()),
362            "Should not include other mp4 files"
363        );
364    }
365
366    #[test]
367    fn test_pack_integrity_and_round_trip() {
368        // 1. Setup
369        let temp_dir = tempdir().unwrap();
370        let root = temp_dir.path();
371        let output_zip_path = temp_dir.path().join("test_archive.zip");
372
373        // Create some files with distinct content
374        let file1_content = "Rust is awesome!";
375        let file2_content = vec![0u8; 1024 * 10]; // 10KB dummy binary data
376
377        create_test_file(root, "src/lib.rs", file1_content.as_bytes());
378        create_test_file(root, "assets/data.bin", &file2_content);
379
380        // Create a deep directory structure
381        create_test_file(root, "a/b/c/d/deep.txt", b"Deep file");
382
383        // 2. Scan
384        let config = ScanConfig::new(root, vec![]);
385        let files = scan_files(&config).unwrap();
386        assert_eq!(files.len(), 3);
387
388        // 3. Pack (Test the pack_files function)
389        pack_files(
390            &files,
391            &PackConfig {
392                root_path: root.to_path_buf(),
393                output_path: output_zip_path.clone(),
394                compression_method: CompressionMethod::Deflated,
395                compression_level: None,
396            },
397            |_, _, _| {}, // Empty progress callback
398        )
399        .expect("Packing failed");
400
401        assert!(output_zip_path.exists(), "Zip file was not created");
402
403        // 4. Verify Integrity (Unzip and Compare)
404        let zip_file = File::open(&output_zip_path).unwrap();
405        let mut archive = ZipArchive::new(zip_file).unwrap();
406
407        // Check if correct number of files are in zip
408        assert_eq!(archive.len(), 3);
409
410        // Check file 1: Content match
411        let mut f1 = archive
412            .by_name("src/lib.rs")
413            .expect("src/lib.rs missing in zip");
414        let mut buffer = String::new();
415        f1.read_to_string(&mut buffer).unwrap();
416        assert_eq!(buffer, file1_content, "Content mismatch for src/lib.rs");
417        drop(f1); // Release borrow
418
419        // Check file 2: Binary size match
420        let f2 = archive
421            .by_name("assets/data.bin")
422            .expect("assets/data.bin missing");
423        assert_eq!(
424            f2.size(),
425            file2_content.len() as u64,
426            "Size mismatch for binary file"
427        );
428        drop(f2);
429
430        // Check file 3: Path normalization (Windows backslash handling)
431        // zip crate standardizes to forward slash, ensure our code did that
432        let filenames: Vec<_> = archive.file_names().collect();
433        assert!(
434            filenames.contains(&"a/b/c/d/deep.txt"),
435            "Deep path not preserved or normalized incorrectly"
436        );
437
438        // 5. Cleanup
439        // The `temp_dir` object (from tempfile crate) automatically deletes
440        // the directory and all contents when it goes out of scope here.
441        // No manual deletion needed.
442    }
443
444    #[test]
445    fn test_manual_exclude_patterns() {
446        // 1. Setup
447        let temp_dir = tempdir().unwrap();
448        let root = temp_dir.path();
449
450        // 2. Create a mixed environment
451        // Files that should REMAIN
452        create_test_file(root, "src/main.rs", b"code");
453        create_test_file(root, "assets/logo.png", b"image");
454        create_test_file(root, "docs/readme.txt", b"docs");
455
456        // Files that should be EXCLUDED
457        create_test_file(root, "assets/demo.mp4", b"heavy video"); // Exclude by extension
458        create_test_file(root, "secrets/api_key.txt", b"super secret"); // Exclude by directory
459        create_test_file(root, "secrets/nested/config.yaml", b"nested secret"); // Exclude by directory (deep)
460        create_test_file(root, "backup.log", b"log file"); // Exclude by exact name
461
462        // 3. Configure with Excludes
463        // logic: user passes "pattern", code converts to "!pattern" for ignore crate
464        let excludes = vec![
465            "*.mp4".to_string(),   // Pattern 1: Glob extension
466            "secrets".to_string(), // Pattern 2: Directory name
467            "*.log".to_string(),   // Pattern 3: Glob extension
468        ];
469
470        let config = ScanConfig::new(root, excludes);
471
472        // 4. Execute Scan
473        let files = scan_files(&config).expect("Scan failed");
474
475        // 5. Verify results
476        let relative_paths: Vec<String> = files
477            .iter()
478            .map(|p| {
479                p.strip_prefix(root)
480                    .unwrap()
481                    .to_string_lossy()
482                    .replace('\\', "/")
483            })
484            .collect();
485
486        // --- Positive Assertions (What should be there) ---
487        assert!(
488            relative_paths.contains(&"src/main.rs".to_string()),
489            "Standard file should be present"
490        );
491        assert!(
492            relative_paths.contains(&"assets/logo.png".to_string()),
493            "Non-excluded asset should be present"
494        );
495        assert!(
496            relative_paths.contains(&"docs/readme.txt".to_string()),
497            "Docs should be present"
498        );
499
500        // --- Negative Assertions (What should be gone) ---
501        // Verify *.mp4 is gone
502        assert!(
503            !relative_paths.iter().any(|p| p.ends_with(".mp4")),
504            "Failed to exclude .mp4 files"
505        );
506
507        // Verify secrets directory is gone (including nested files)
508        assert!(
509            !relative_paths.iter().any(|p| p.starts_with("secrets/")),
510            "Failed to exclude secrets directory"
511        );
512
513        // Verify *.log is gone
514        assert!(
515            !relative_paths.iter().any(|p| p.ends_with(".log")),
516            "Failed to exclude .log files"
517        );
518    }
519}