totebag 0.8.14

An API for extracting/archiving files and directories in multiple formats.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
//! # Totebag
//!
//! `totebag` is an archiving utilities that can archive and extract files supported several formats.
//!
pub mod archiver;
pub mod extractor;
pub mod format;
pub(crate) mod outputs;

use clap::ValueEnum;
use ignore::WalkBuilder;
use std::collections::HashSet;
use std::fmt::Display;
use std::path::{Path, PathBuf};

use typed_builder::TypedBuilder;

use crate::archiver::ArchiveEntries;
use crate::extractor::Entries;
use crate::format::{default_format_detector, FormatDetector};

/// Define the result type for this library.
pub type Result<T> = std::result::Result<T, Error>;

/// Define the ignore types for directory traversing.
#[derive(Debug, Clone, ValueEnum, PartialEq, Copy, Hash, Eq)]
pub enum IgnoreType {
    /// use `git-ignore`, `.gitglobal`, `.gitexclude`, and `.ignore`.
    Default,
    /// ignore hidden files and directories.
    Hidden,
    /// ignore files and directories that are listed in `.gitignore`.
    GitIgnore,
    /// ignore files and directories that are listed in `.gitglobal`.
    GitGlobal,
    /// ignore files and directories that are listed in `.gitexclude`.
    GitExclude,
    /// ignore files and directories that are listed in `.ignore`.
    Ignore,
}

/// Errors that can occur when using this library.
///
/// This enum represents all possible errors that can be returned
/// from archive and extraction operations.
#[derive(Debug)]
pub enum Error {
    /// Error from archiving operation with a descriptive message
    Archiver(String),
    /// Multiple errors occurred during an operation
    Array(Vec<Error>),
    /// The destination path is a directory when a file was expected
    DestIsDir(PathBuf),
    /// The target directory already exists
    DirExists(PathBuf),
    /// Error from extraction operation with a descriptive message
    Extractor(String),
    /// A fatal error from an underlying library
    Fatal(Box<dyn std::error::Error>),
    /// The specified file was not found
    FileNotFound(PathBuf),
    /// The file already exists when it shouldn't be overwritten
    FileExists(PathBuf),
    /// Standard I/O error
    IO(std::io::Error),
    /// JSON serialization/deserialization error
    Json(serde_json::Error),
    /// No arguments were provided when some were required
    NoArgumentsGiven,
    /// A warning message that doesn't halt execution
    Warn(String),
    /// The archive format could not be determined
    UnknownFormat(String),
    /// The format is recognized but not supported for the operation
    UnsupportedFormat(String),
    /// XML serialization/deserialization error
    Xml(serde_xml_rs::Error),
}

impl Display for Error {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        match self {
            Error::Archiver(s) => write!(f, "Archiver error: {s}"),
            Error::Array(errs) => {
                errs.iter()
                    .map(std::string::ToString::to_string)
                    .collect::<Vec<_>>()
                    .join("\n")
                    .fmt(f)
            },
            Error::DestIsDir(p) => write!(f, "{}: Destination is a directory", p.to_str().unwrap()),
            Error::DirExists(p) => write!(f, "{}: Directory already exists", p.to_str().unwrap()),
            Error::Extractor(s) => write!(f, "Extractor error: {s}"),
            Error::Fatal(e) => write!(f, "Error: {e}"),
            Error::FileNotFound(p) => write!(f, "{}: File not found", p.to_str().unwrap()),
            Error::FileExists(p) => write!(f, "{}: File already exists", p.to_str().unwrap()),
            Error::IO(e) => write!(f, "IO error: {e}"),
            Error::Json(e) => write!(f, "Json error: {e}"),
            Error::NoArgumentsGiven => write!(f, "No arguments given. Use --help for usage."),
            Error::Warn(s) => write!(f, "Unknown error: {s}"),
            Error::UnknownFormat(s) => write!(f, "{s}: Unknown format"),
            Error::UnsupportedFormat(s) => write!(f, "{s}: Unsupported format"),
            Error::Xml(e) => write!(f, "Xml error: {e}"),
        }
    }
}

impl Error {
    /// Returns `Ok(ok)` if there are no errors, otherwise returns an appropriate error.
    ///
    /// This is a helper method to consolidate multiple errors into a single result.
    ///
    /// # Arguments
    ///
    /// * `ok` - The success value to return if no errors occurred
    /// * `errs` - A vector of errors that may have occurred
    ///
    /// # Returns
    ///
    /// * `Ok(ok)` if `errs` is empty
    /// * `Err(error)` if `errs` contains a single error
    /// * `Err(Error::Array(errs))` if `errs` contains multiple errors
    pub fn error_or<T>(ok: T, errs: Vec<Self>) -> Result<T> {
        if errs.is_empty() {
            Ok(ok)
        } else if errs.len() == 1 {
            Err(errs.into_iter().next().unwrap())
        } else {
            Err(Error::Array(errs))
        }
    }

    /// Returns `Ok(ok())` if there are no errors, otherwise returns an appropriate error.
    /// see [`Error::error_or`] for details of error handling.
    pub fn error_or_else<F, O>(ok: F, errs: Vec<Self>) -> Result<O>
    where
        F: FnOnce() -> O,
    {
        if errs.is_empty() {
            Ok(ok())
        } else if errs.len() == 1 {
            Err(errs.into_iter().next().unwrap())
        } else {
            Err(Error::Array(errs))
        }
    }
}

/// Extract an archive file to the specified destination directory.
///
/// # Arguments
///
/// * `archive_file` - The path to the archive file to extract
/// * `config` - The extraction configuration
///
/// # Examples
///
/// ```
/// use totebag::{extract, ExtractConfig};
///
/// let config = ExtractConfig::builder()
///     .dest("output")
///     .overwrite(true)
///     .build();
/// match extract("../testdata/test.zip", &config) {
///     Ok(_) => println!("Extraction successful"),
///     Err(e) => eprintln!("Error: {:?}", e),
/// }
/// ```
pub fn extract<P: AsRef<Path>>(archive_file: P, config: &ExtractConfig) -> Result<()> {
    let archive_file = archive_file.as_ref();
    let base_dir = config.dest(archive_file)?;
    let extractor = config.extractor(archive_file)?;
    extractor.perform(archive_file.to_path_buf(), base_dir)
}

/// Configuration for extracting archive files.
///
/// This struct holds all the options needed to extract an archive file.
/// Use the builder pattern to create an instance.
///
/// # Examples
///
/// ```
/// use totebag::ExtractConfig;
/// use std::path::PathBuf;
///
/// let config = ExtractConfig::builder()
///     .dest(PathBuf::from("output"))
///     .overwrite(true)
///     .use_archive_name_dir(false)
///     .build();
/// ```
#[derive(TypedBuilder)]
pub struct ExtractConfig {
    /// The destination directory for extraction.
    #[builder(setter(into), default = PathBuf::from("."))]
    pub dest: PathBuf,
    /// Overwrite flag, if `true`, overwrite the files.
    #[builder(default = false)]
    pub overwrite: bool,
    /// If `true`, the destination path becomes `{dest}/{archive_file.file_stem()}`.
    #[builder(default = false)]
    pub use_archive_name_dir: bool,
    /// The format detector to use for determining archive format.
    #[builder(default = default_format_detector())]
    pub format_detector: Box<dyn FormatDetector>,
}

impl ExtractConfig {
    /// Determines the destination path for extraction based on configuration.
    ///
    /// This internal method calculates the final destination path,
    /// taking into account the `use_archive_name_dir` flag.
    pub(crate) fn dest(&self, archive_file: &Path) -> Result<PathBuf> {
        let dest = if self.use_archive_name_dir {
            let stem = archive_file
                .file_stem()
                .unwrap_or_else(|| std::ffi::OsStr::new("archive"));
            self.dest.join(stem)
        } else {
            self.dest.clone()
        };
        if dest.exists() && !self.overwrite {
            if dest == PathBuf::from(".") || dest == PathBuf::from("..") {
                Ok(dest)
            } else {
                Err(Error::DirExists(dest))
            }
        } else {
            Ok(dest)
        }
    }

    /// Creates an extractor for the given archive file.
    ///
    /// # Arguments
    ///
    /// * `archive_file` - Path to the archive file
    ///
    /// # Returns
    ///
    /// Returns a boxed [`ToteExtractor`](crate::extractor::ToteExtractor) for the detected format.
    pub fn extractor(&self, archive_file: &Path) -> Result<Box<dyn crate::extractor::ToteExtractor>> {
        let format = self.format_detector.detect(archive_file);
        crate::extractor::create_with(archive_file, format)
    }
}

/// Returns the entries (file list) in the given archive file.
///
/// # Arguments
///
/// * `archive_file` - The path to the archive file
/// * `format_detector` - The format detector to determine archive type
///
/// # Returns
///
/// Returns a [`Result`] containing [`Entries`] which holds the list of files in the archive.
///
/// # Examples
///
/// ```
/// use totebag::{entries, format::default_format_detector};
///
/// let detector = default_format_detector();
/// match entries("../testdata/test.zip", detector.as_ref()) {
///     Ok(entries) => {
///         for entry in entries.iter() {
///             println!("{}", entry.name);
///         }
///     }
///     Err(e) => eprintln!("Error: {:?}", e),
/// }
/// ```
pub fn entries<P: AsRef<Path>>(archive_file: P, format_detector: &dyn FormatDetector) -> Result<Entries> {
    let archive_file = archive_file.as_ref();
    let format = format_detector.detect(archive_file);
    let extractor = crate::extractor::create_with(archive_file, format)?;
    extractor.list(archive_file.to_path_buf())
}

/// Returns a formatted string representation of the entries in the given archive file.
///
/// # Arguments
///
/// * `archive_file` - The path to the archive file
/// * `config` - The list configuration including output format
///
/// # Returns
///
/// Returns a [`Result`] containing a formatted string of the archive entries.
/// The format depends on the [`OutputFormat`] specified in the config.
///
/// # Examples
///
/// ```
/// use totebag::{list, ListConfig, OutputFormat, format::default_format_detector};
///
/// let config = ListConfig::new(OutputFormat::Long, default_format_detector());
/// match list("../testdata/test.zip", &config) {
///     Ok(output) => println!("{}", output),
///     Err(e) => eprintln!("Error: {:?}", e),
/// }
/// ```
pub fn list<P: AsRef<Path>>(archive_file: P, config: &ListConfig) -> Result<String> {
    match entries(archive_file, config.format_detector.as_ref()) {
        Err(e) => Err(e),
        Ok(entries) => format_for_output(entries, &config.format),
    }
}

fn format_for_output(entries: Entries, f: &OutputFormat) -> Result<String> {
    use OutputFormat::*;
    match f {
        Default => outputs::to_string(&entries),
        Long => outputs::to_string_long(&entries),
        Json => serde_json::to_string(&entries).map_err(Error::Json),
        PrettyJson => serde_json::to_string_pretty(&entries).map_err(Error::Json),
        Xml => serde_xml_rs::to_string(&entries).map_err(Error::Xml),
    }
}

/// Configuration for listing archive file contents.
///
/// This struct holds the options for displaying archive entries.
///
/// # Examples
///
/// ```
/// use totebag::{ListConfig, OutputFormat, format::default_format_detector};
///
/// let config = ListConfig::new(OutputFormat::Json, default_format_detector());
/// ```
pub struct ListConfig {
    /// Specify the output format for listing.
    pub format: OutputFormat,
    format_detector: Box<dyn FormatDetector>,
}

impl ListConfig {
    pub fn new(format: OutputFormat, format_detector: Box<dyn FormatDetector>) -> Self {
        Self { format, format_detector }
    }
}

/// Output format options for listing archive contents.
///
/// # Variants
///
/// * `Default` - Simple list of file names, one per line
/// * `Long` - Detailed format with permissions, sizes, and dates
/// * `Json` - Compact JSON format
/// * `PrettyJson` - Human-readable JSON format with indentation
/// * `Xml` - XML format
#[derive(ValueEnum, Debug, Clone)]
pub enum OutputFormat {
    Default,
    Long,
    Json,
    PrettyJson,
    Xml,
}

/// Create an archive file from the specified targets.
/// 
/// # Arguments
/// 
/// * `archive_targets` - A slice of paths to files or directories to archive
/// * `config` - The archive configuration
/// 
/// # Returns
/// 
/// Returns a [`Result`] containing [`ArchiveEntries`] which holds details about the created archive.
/// 
/// # Examples
/// ```
/// use totebag::{archive, ArchiveConfig};
/// use std::path::PathBuf;
/// let config = ArchiveConfig::builder()
///     .dest("output.tar.gz")  // Destination archive file and its format (by file extension).
///     .level(9)               // Maximum compression level
///     .overwrite(true)        // set overwrite flag of the destination file.
///     // .no_recursive(false) // Default is false.
///     .build();
/// let targets = vec!["src", "Cargo.toml"].iter() // files to be archived.
///    .map(|s| PathBuf::from(s)).collect::<Vec<PathBuf>>();
/// archive(&targets, &config)
///     .expect("Archiving should succeed");
/// ``` 
pub fn archive<P: AsRef<Path>>(
    archive_targets: &[P],
    config: &ArchiveConfig,
) -> Result<ArchiveEntries> {
    let dest_file = config.dest_file()?;
    log::info!("{:?}: {}", dest_file, dest_file.exists());
    let archiver = archiver::create(&dest_file)?;
    if let Some(parent) = dest_file.parent() {
        if !parent.exists() {
            if let Err(e) = std::fs::create_dir_all(parent) {
                return Err(Error::IO(e));
            }
        }
    }
    let targets = prepare_targets(archive_targets);
    match std::fs::File::create(&dest_file) {
        Ok(file) => match archiver.perform(file, &targets, config) {
            Ok(entries) => {
                let compressed = dest_file.metadata().map(|m| m.len()).unwrap_or(0);
                Ok(ArchiveEntries::new(dest_file, entries, compressed))
            }
            Err(e) => Err(e),
        },
        Err(e) => Err(Error::IO(e)),
    }
}

fn prepare_targets<P: AsRef<Path>>(targets: &[P]) -> Vec<PathBuf> {
    targets
        .iter()
        .map(|p| p.as_ref().to_path_buf())
        .collect()
}

/// Configuration for creating archive files.
///
/// This struct holds all the options needed to create an archive file.
/// Use the builder pattern to create an instance.
///
/// # Examples
///
/// ```
/// use totebag::{ArchiveConfig, IgnoreType};
/// use std::path::PathBuf;
///
/// let config = ArchiveConfig::builder()
///     .dest("output.tar.gz")
///     .level(9)  // Maximum compression
///     .rebase_dir(PathBuf::from("root"))
///     .overwrite(true)
///     .no_recursive(false)
///     .ignore(vec![IgnoreType::GitIgnore, IgnoreType::Hidden])
///     .build();
/// ```
#[derive(TypedBuilder, Debug, Clone)]
pub struct ArchiveConfig {
    /// The destination file for archiving.
    #[builder(setter(into), default = PathBuf::from("totebag.zip"))]
    pub dest: PathBuf,

    /// The compression level (available: 0 to 9, 0 is none and 9 is finest).
    #[builder(default = 5)]
    pub level: u8,

    /// the prefix directory for the each file into the archive files when `Some`
    #[builder(default = None, setter(strip_option, into))]
    pub rebase_dir: Option<PathBuf>,

    /// Overwrite flag for archive file. Default is false.
    #[builder(default = false)]
    pub overwrite: bool,

    /// By default (`false`), read files by traversing the each `targets`.
    /// If `true`, it archives the specified files in `targets`.
    #[builder(default = false)]
    pub no_recursive: bool,

    /// specifies the ignore types for traversing.
    #[builder(default = vec![IgnoreType::Default], setter(into))]
    pub ignore: Vec<IgnoreType>,
}

impl ArchiveConfig {
    /// Validates and returns the destination file path.
    ///
    /// # Returns
    ///
    /// Returns the destination path if valid, or an error if:
    /// - The file exists and overwrite is false
    /// - The path is a directory
    pub fn dest_file(&self) -> Result<PathBuf> {
        let dest_path = self.dest.clone();
        if dest_path.exists() {
            if dest_path.is_file() && !self.overwrite {
                Err(Error::FileExists(dest_path))
            } else if self.dest.is_dir() {
                Err(Error::DestIsDir(dest_path))
            } else {
                Ok(dest_path)
            }
        } else {
            Ok(dest_path)
        }
    }

    /// Transforms the given path to its representation inside the archive.
    ///
    /// If `rebase_dir` is set, the path will be prefixed with it.
    ///
    /// # Arguments
    ///
    /// * `path` - The file path to transform
    ///
    /// # Returns
    ///
    /// The path as it should appear in the archive.
    pub fn path_in_archive<P: AsRef<Path>>(&self, path: P) -> PathBuf {
        let from_path = path.as_ref();
        let to_path = if let Some(rebase) = &self.rebase_dir {
            rebase.join(from_path)
        } else {
            from_path.to_path_buf()
        };
        log::debug!("dest_path({from_path:?}) -> {to_path:?}");
        to_path
    }

    /// Creates an iterator over directory entries for the given path.
    ///
    /// The iterator respects the ignore settings configured in this config.
    ///
    /// # Arguments
    ///
    /// * `path` - The path to iterate over
    ///
    /// # Returns
    ///
    /// An iterator over directory entries.
    pub fn iter<P: AsRef<Path>>(&self, path: P) -> impl Iterator<Item = ignore::DirEntry> {
        let mut builder = WalkBuilder::new(path);
        build_walker_impl(self, &mut builder);
        builder.build().flatten()
    }

    /// Returns the list of ignore types to use.
    ///
    /// If the ignore list is empty, returns a default set.
    /// The `Default` ignore type expands to multiple individual types.
    ///
    /// # Returns
    ///
    /// A vector of ignore types.
    pub fn ignore_types(&self) -> Vec<IgnoreType> {
        if self.ignore.is_empty() {
            vec![
                IgnoreType::Ignore,
                IgnoreType::GitIgnore,
                IgnoreType::GitGlobal,
                IgnoreType::GitExclude,
            ]
        } else {
            let mut r = HashSet::<IgnoreType>::new();
            for &it in &self.ignore {
                if it == IgnoreType::Default {
                    r.insert(IgnoreType::Ignore);
                    r.insert(IgnoreType::GitIgnore);
                    r.insert(IgnoreType::GitGlobal);
                    r.insert(IgnoreType::GitExclude);
                } else {
                    r.insert(it);
                }
            }
            r.into_iter().collect()
        }
    }
}

fn build_walker_impl(opts: &ArchiveConfig, w: &mut WalkBuilder) {
    for it in opts.ignore_types() {
        match it {
            IgnoreType::Default => w
                .ignore(true)
                .git_ignore(true)
                .git_global(true)
                .git_exclude(true),
            IgnoreType::GitIgnore => w.git_ignore(true),
            IgnoreType::GitGlobal => w.git_global(true),
            IgnoreType::GitExclude => w.git_exclude(true),
            IgnoreType::Hidden => w.hidden(true),
            IgnoreType::Ignore => w.ignore(true),
        };
    }
}

mod tests {

    #[test]
    fn test_error_message() {
        use crate::Error;
        use std::path::PathBuf;

        assert_eq!(
            Error::Archiver("hoge".into()).to_string(),
            "Archiver error: hoge"
        );
        assert_eq!(
            Error::Extractor("hoge".into()).to_string(),
            "Extractor error: hoge"
        );
        assert_eq!(
            Error::DestIsDir(PathBuf::from("hoge")).to_string(),
            "hoge: Destination is a directory"
        );
        assert_eq!(
            Error::DirExists(PathBuf::from("hoge")).to_string(),
            "hoge: Directory already exists"
        );
        assert_eq!(
            Error::Fatal(Box::new(std::io::Error::new(std::io::ErrorKind::Other, "hoge"))).to_string(),
            "Error: hoge"
        );        
        assert_eq!(
            Error::Json(serde::de::Error::custom("hoge")).to_string(),
            "Json error: hoge"
        );        
        assert_eq!(
            Error::Xml(serde_xml_rs::Error::Custom("hoge".into())).to_string(),
            "Xml error: Custom: hoge"
        );        
        assert_eq!(
            Error::IO(std::io::Error::new(std::io::ErrorKind::NotFound, "hoge")).to_string(),
            "IO error: hoge"
        );
        assert_eq!(
            Error::FileNotFound("hoge".into()).to_string(),
            "hoge: File not found"
        );
        assert_eq!(
            Error::FileExists("hoge".into()).to_string(),
            "hoge: File already exists"
        );
        assert_eq!(
            Error::UnknownFormat("hoge".to_string()).to_string(),
            "hoge: Unknown format"
        );
        assert_eq!(
            Error::UnsupportedFormat("hoge".to_string()).to_string(),
            "hoge: Unsupported format"
        );
        assert_eq!(
            Error::Warn("message".to_string()).to_string(),
            "Unknown error: message"
        );
        assert_eq!(
            Error::NoArgumentsGiven.to_string(),
            "No arguments given. Use --help for usage."
        );
        assert_eq!(
            Error::Array(vec![
                Error::Warn("hoge1".to_string()),
                Error::Warn("hoge2".to_string())
            ])
            .to_string(),
            "Unknown error: hoge1\nUnknown error: hoge2"
        );
    }
}