copyrite 0.3.2

A CLI tool for efficient checksum and copy operations across object stores
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
//! Structs related to output statistics.
//!

use crate::checksum::Ctx;
use crate::checksum::file::{Checksum, SumsFile};
use crate::cli::CopyMode;
use crate::error::{ApiError, Error};
use crate::task::check::{CheckTask, CheckTaskError, GroupBy};
use crate::task::copy::{CopyTask, CopyTaskError};
use crate::task::generate::{GenerateTask, GenerateTaskError, GenerateTaskResult};
use serde::{Deserialize, Serialize};
use std::collections::{BTreeMap, HashSet};
use std::time::Duration;

/// The result type for stats.
pub type Result<T> = std::result::Result<T, Box<T>>;

/// Stats from running a `generate` command.
#[derive(Serialize, Deserialize, Debug, Default)]
pub struct GenerateStats {
    /// Time taken in seconds.
    pub(crate) elapsed_seconds: f64,
    /// The stats for individual file objects.
    #[serde(skip_serializing_if = "Vec::is_empty")]
    pub(crate) stats: Vec<GenerateFileStats>,
    /// Stats from running `check` for comparability when computing sums with `--missing`.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub(crate) check_stats: Option<Box<CheckStats>>,
    /// The API errors if there was permission issues for object attributes.
    #[serde(skip_serializing_if = "HashSet::is_empty")]
    pub(crate) recoverable_errors: HashSet<ApiError>,
    /// An unrecoverable error occurred, causing the execution to stop.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub(crate) unrecoverable_error: Option<Error>,
    #[serde(skip)]
    pub(crate) sums: Option<Vec<(String, SumsFile)>>,
}

impl From<Error> for Box<GenerateStats> {
    fn from(err: Error) -> Self {
        Box::new(GenerateStats {
            unrecoverable_error: Some(err),
            ..Default::default()
        })
    }
}

impl From<GenerateTaskError> for Box<GenerateStats> {
    fn from(err: GenerateTaskError) -> Self {
        let stats = err.error.into();
        GenerateStats::default().push_task(err.task);
        stats
    }
}

impl GenerateStats {
    /// Create new generate stats.
    pub fn new(stats: Vec<GenerateFileStats>, check_stats: Option<CheckStats>) -> Self {
        let mut result = Self {
            elapsed_seconds: 0.0,
            ..Default::default()
        };

        stats.into_iter().for_each(|stat| result.push_stats(stat));
        result.set_check_stats(check_stats);

        result
    }

    /// Create stats from a sums file.
    pub fn from_sums(sums: Vec<(String, SumsFile)>) -> Self {
        Self {
            sums: Some(sums),
            ..Default::default()
        }
    }

    fn push_stats(&mut self, stats: GenerateFileStats) {
        if !stats.checksums_generated.0.is_empty() {
            self.stats.push(stats);
        }
    }

    fn push_task(&mut self, task: GenerateTask) {
        self.push_stats(GenerateFileStats::from_task(task));
    }

    /// Add generate stats for a file.
    pub fn add_stats(mut self, task: GenerateTaskResult) -> Result<Self> {
        match task {
            Ok(task) => {
                self.push_task(task);
                Ok(self)
            }
            Err(err) => {
                self.push_task(err.task);
                Err(Box::new(self))
            }
        }
    }

    /// Set the seconds of the task.
    pub fn set_sums_files(&mut self, sums: Vec<(String, SumsFile)>) {
        self.sums = Some(sums);
    }

    /// Set the check stats.
    pub fn set_check_stats(&mut self, check_stats: Option<CheckStats>) {
        self.check_stats = check_stats.map(Box::new);
    }

    /// Set the recoverable errors.
    pub fn set_recoverable_errors(&mut self, recoverable_errors: HashSet<ApiError>) {
        self.recoverable_errors = recoverable_errors;
    }

    /// Set the number of elapsed seconds.
    pub fn with_elapsed(mut self, elapsed: Duration) -> Self {
        self.elapsed_seconds = elapsed.as_secs_f64();
        self
    }
}

/// A checksum pair represents the reason that a check command succeeded.
#[derive(Serialize, Deserialize, Debug, Clone)]
pub struct ChecksumPair {
    /// The kind of checksum, e.g. `md5`.
    pub(crate) kind: Ctx,
    /// The value of the checksum.
    pub(crate) value: Checksum,
}

impl ChecksumPair {
    /// Create a new checksum pair.
    pub fn new(kind: Ctx, value: Checksum) -> Self {
        Self { kind, value }
    }
}

impl From<&CheckStats> for Option<ChecksumPair> {
    fn from(stats: &CheckStats) -> Self {
        stats
            .compared
            .first()
            .map(|compared| compared.reason.clone())
    }
}

/// A list of checksum pair "reasons".
#[derive(Serialize, Deserialize, Debug)]
pub struct ChecksumStats(Vec<ChecksumPair>);

impl From<BTreeMap<Ctx, Checksum>> for ChecksumStats {
    fn from(map: BTreeMap<Ctx, Checksum>) -> Self {
        Self(
            map.into_iter()
                .map(|(k, v)| ChecksumPair::new(k, v))
                .collect(),
        )
    }
}

/// Generate stats for an individual file.
#[derive(Serialize, Deserialize, Debug)]
pub struct GenerateFileStats {
    /// The location of the file.
    pub(crate) input: String,
    /// Whether the .sums file was updated. This might be false if `--verify` was used and no
    /// sums needed to be updated.
    pub(crate) updated: bool,
    /// The set of checksums that were generated.
    pub(crate) checksums_generated: ChecksumStats,
}

impl GenerateFileStats {
    /// Create new generate stats.
    pub fn new(input: String, updated: bool, checksums_generated: ChecksumStats) -> Self {
        Self {
            input,
            updated,
            checksums_generated,
        }
    }

    /// Create generate stats from a task.
    pub fn from_task(task: GenerateTask) -> Self {
        let (_, object, updated, checksums_generated) = task.into_inner();

        Self::new(object.location(), updated, checksums_generated.into())
    }
}

/// Represents stats from a `check` operation.
#[derive(Serialize, Deserialize, Debug, Default)]
pub struct CheckStats {
    /// The time taken in seconds.
    pub(crate) elapsed_seconds: f64,
    /// Whether the check compared for equality of comparability. Equality ensures that there is
    /// at least one checksum with the same value. Comparability only ensures that there is at
    /// least one checksum that is the same type, but not necessarily that they are the same.
    pub(crate) comparison_type: GroupBy,
    /// The set of compared sums.
    #[serde(skip_serializing_if = "Vec::is_empty")]
    pub(crate) compared: Vec<CheckComparison>,
    /// Comparison groups. Files in the same group are considered equal or comparable depending
    /// on the comparison type.
    #[serde(skip_serializing_if = "Vec::is_empty")]
    pub(crate) groups: Vec<Vec<String>>,
    /// The set of sums that were updated if using `--update`.
    #[serde(skip_serializing_if = "Vec::is_empty")]
    pub(crate) updated: Vec<String>,
    /// Any generate stats computed if using `--missing`.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub(crate) generate_stats: Option<GenerateStats>,
    /// The API errors if there was permission issues for object attributes.
    #[serde(skip_serializing_if = "HashSet::is_empty")]
    pub(crate) api_errors: HashSet<ApiError>,
    /// An unrecoverable error occurred, causing the execution to stop.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub(crate) unrecoverable_error: Option<Error>,
}

impl From<Error> for Box<CheckStats> {
    fn from(err: Error) -> Self {
        Box::new(CheckStats {
            unrecoverable_error: Some(err),
            ..Default::default()
        })
    }
}

impl From<CheckTaskError> for Box<CheckStats> {
    fn from(err: CheckTaskError) -> Self {
        let mut stats = CheckStats::from_task(err.task, None);
        stats.unrecoverable_error = Some(err.error);
        Box::new(stats)
    }
}

impl CheckStats {
    /// Create new check stats.
    pub fn new(
        comparison_type: GroupBy,
        compared: Vec<CheckComparison>,
        groups: Vec<Vec<String>>,
        updated: Vec<String>,
        generate_stats: Option<GenerateStats>,
        api_errors: HashSet<ApiError>,
    ) -> Self {
        Self {
            elapsed_seconds: 0.0,
            comparison_type,
            compared,
            groups,
            updated,
            generate_stats,
            api_errors,
            unrecoverable_error: None,
        }
    }

    /// Create check stats from a generate task.
    pub fn from_generate_task(group_by: GroupBy, generate_stats: GenerateStats) -> Self {
        Self::new(
            group_by,
            vec![],
            vec![],
            vec![],
            Some(generate_stats),
            Default::default(),
        )
    }

    /// Create check stats from a task.
    pub fn from_task(task: CheckTask, generate_stats: Option<GenerateStats>) -> Self {
        let group_by = task.group_by();
        let (objects, compared, updated, api_errors) = task.into_inner();

        Self::new(
            group_by,
            compared,
            objects.to_groups(),
            updated,
            generate_stats,
            api_errors,
        )
    }

    /// Set the number of elapsed seconds.
    pub fn with_elapsed(mut self, elapsed: Duration) -> Self {
        self.elapsed_seconds = elapsed.as_secs_f64();
        self
    }
}

/// Represents stats from a `copy` operation.
#[derive(Serialize, Deserialize, Debug, Default)]
pub struct CopyStats {
    /// Time taken in seconds.
    pub(crate) elapsed_seconds: f64,
    /// The source of the copy.
    pub(crate) source: String,
    /// The destination of the copy.
    pub(crate) destination: String,
    /// The total bytes transferred to the destination.
    pub(crate) bytes_transferred: u64,
    /// Whether the copy was skipped because the destination already has the file with
    /// matching sums.
    pub(crate) skipped: bool,
    /// Whether the copy occurred because the sums at the destination did not match the source sums.
    /// This will be true if the destination file existed but the sums do not match, thus forcing
    /// a re-copy. It will be false if the destination did not exist in the first place.
    pub(crate) sums_mismatch: bool,
    /// The mode of the copy, either server-side or download-upload.
    pub(crate) copy_mode: CopyMode,
    /// The reason a copy was considered successful. This shows the matching checksum that
    /// determines that the copy completed correctly. If the copy was skipped, this shows the
    /// matching checksum.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub(crate) reason: Option<ChecksumPair>,
    /// The number of retries if there was permission issues for copying metadata or tags.
    pub(crate) n_retries: u64,
    /// Stats from checking sums to ensure that the copy was successful.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub(crate) check_stats: Option<CheckStats>,
    /// The API errors if there was permission issues for copying metadata or tags.
    #[serde(skip_serializing_if = "HashSet::is_empty")]
    pub(crate) api_errors: HashSet<ApiError>,
    /// An unrecoverable error occurred, causing the execution to stop.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub(crate) unrecoverable_error: Option<Error>,
}

impl From<Error> for Box<CopyStats> {
    fn from(err: Error) -> Self {
        Box::new(CopyStats {
            unrecoverable_error: Some(err),
            ..Default::default()
        })
    }
}

impl From<CopyTaskError> for Box<CopyStats> {
    fn from(err: CopyTaskError) -> Self {
        let mut stats = CopyStats::from_task(err.task, None, false, false);
        stats.unrecoverable_error = Some(err.error);
        Box::new(stats)
    }
}

impl CopyStats {
    /// Create check stats from a generate task.
    pub fn from_check_stats(
        source: String,
        destination: String,
        copy_mode: CopyMode,
        check_stats: CheckStats,
        skipped: bool,
        sums_mismatch: bool,
    ) -> Self {
        Self {
            elapsed_seconds: 0.0,
            source,
            destination,
            bytes_transferred: 0,
            skipped,
            sums_mismatch,
            copy_mode,
            reason: Option::<ChecksumPair>::from(&check_stats),
            n_retries: 0,
            api_errors: Default::default(),
            check_stats: Some(check_stats),
            unrecoverable_error: None,
        }
    }

    /// Create copy stats from a task.
    pub fn from_task(
        copy_task: CopyTask,
        check_stats: Option<CheckStats>,
        skipped: bool,
        sums_mismatch: bool,
    ) -> Self {
        Self {
            elapsed_seconds: 0.0,
            source: copy_task.source().format(),
            destination: copy_task.destination().format(),
            bytes_transferred: copy_task.bytes_transferred(),
            skipped,
            sums_mismatch,
            copy_mode: copy_task.copy_mode(),
            reason: check_stats.as_ref().and_then(Option::<ChecksumPair>::from),
            n_retries: copy_task.n_retries(),
            api_errors: copy_task.api_errors(),
            check_stats,
            unrecoverable_error: None,
        }
    }

    /// Set the number of elapsed seconds.
    pub fn with_elapsed(mut self, elapsed: Duration) -> Self {
        self.elapsed_seconds = elapsed.as_secs_f64();
        self
    }
}

/// The specific comparison that a `check` performed.
#[derive(Serialize, Deserialize, Debug)]
pub struct CheckComparison {
    /// The location of files that were affected by this check.
    #[serde(skip_serializing_if = "Vec::is_empty")]
    pub(crate) locations: Vec<String>,
    /// The reason that the check was successful.
    pub(crate) reason: ChecksumPair,
}

impl CheckComparison {
    /// Create a new check comparison.
    pub fn new(locations: Vec<String>, reason: ChecksumPair) -> Self {
        Self { locations, reason }
    }
}