Skip to main content

s3_unspool/
report.rs

1use std::collections::BTreeMap;
2
3use serde::{Deserialize, Serialize};
4
5use crate::options::RetryJitter;
6use crate::s3_uri::{S3Object, S3Prefix};
7
8/// Summary returned by [`crate::upload_directory_zip_to_s3`].
9#[derive(Clone, Debug, Serialize, Deserialize)]
10pub struct UploadReport {
11    /// Local directory that was uploaded.
12    pub source_dir: String,
13    /// Destination ZIP object.
14    pub destination: S3Object,
15    /// Number of regular files included in the ZIP.
16    pub files: usize,
17    /// Number of preserved directory entries included in the ZIP.
18    #[serde(default)]
19    pub directories: usize,
20    /// Whether the embedded update catalog was included.
21    #[serde(default = "default_include_catalog")]
22    pub include_catalog: bool,
23    /// Total uncompressed payload bytes.
24    pub uncompressed_bytes: u64,
25    /// Total uploaded ZIP object bytes.
26    pub zip_bytes: u64,
27}
28
29/// Summary returned by [`crate::zip_s3_prefix_to_s3`].
30#[derive(Clone, Debug, Serialize, Deserialize)]
31pub struct S3PrefixUploadReport {
32    /// Source prefix that was uploaded.
33    pub source: S3Prefix,
34    /// Destination ZIP object.
35    pub destination: S3Object,
36    /// Number of regular source objects included as ZIP file entries.
37    pub files: usize,
38    /// Number of zero-byte trailing-slash source objects included as ZIP directories.
39    pub directories: usize,
40    /// Whether the embedded update catalog was included.
41    #[serde(default = "default_include_catalog")]
42    pub include_catalog: bool,
43    /// Total number of ZIP entries written, excluding the embedded catalog.
44    pub entries: usize,
45    /// Total uncompressed payload bytes across regular file entries.
46    pub uncompressed_bytes: u64,
47    /// Total uploaded ZIP object bytes.
48    pub zip_bytes: u64,
49}
50
51/// Summary returned by local ZIP creation helpers.
52#[derive(Clone, Debug, Serialize, Deserialize)]
53pub struct LocalZipReport {
54    /// Source tree that was zipped.
55    pub source: String,
56    /// Destination ZIP file path.
57    pub destination_zip: String,
58    /// Number of regular file entries included in the ZIP.
59    pub files: usize,
60    /// Number of preserved directory entries included in the ZIP.
61    pub directories: usize,
62    /// Whether the embedded update catalog was included.
63    #[serde(default = "default_include_catalog")]
64    pub include_catalog: bool,
65    /// Total number of ZIP entries written, excluding the embedded catalog.
66    pub entries: usize,
67    /// Total uncompressed payload bytes across regular file entries.
68    pub uncompressed_bytes: u64,
69    /// Size of the generated ZIP file.
70    pub zip_bytes: u64,
71}
72
73/// Summary returned by ZIP dry-run helpers.
74#[derive(Clone, Debug, Serialize, Deserialize)]
75pub struct ZipDryRunReport {
76    /// Source tree that would be zipped.
77    pub source: String,
78    /// Destination ZIP file path or S3 object URI.
79    pub destination: String,
80    /// Number of regular file entries that would be included.
81    pub files: usize,
82    /// Number of directory entries that would be included.
83    pub directories: usize,
84    /// Total number of ZIP entries that would be written, excluding the embedded catalog.
85    pub entries: usize,
86    /// Total uncompressed payload bytes across regular file entries.
87    pub uncompressed_bytes: u64,
88    /// Whether the embedded update catalog would be included.
89    pub include_catalog: bool,
90}
91
92fn default_include_catalog() -> bool {
93    true
94}
95
96/// Aggregate counters for an extract run.
97#[derive(Clone, Debug, Default, Serialize, Deserialize)]
98pub struct SyncSummary {
99    /// Number of source ZIP entries found, excluding the embedded catalog.
100    pub zip_files: usize,
101    /// Number of destination objects listed before extraction.
102    pub destination_objects: usize,
103    /// Number of missing destination objects uploaded.
104    pub uploaded_new: usize,
105    /// Number of changed destination objects uploaded.
106    pub uploaded_changed: usize,
107    /// Number of unchanged destination objects skipped.
108    pub skipped_unchanged: usize,
109    /// Number of conditional write conflicts.
110    pub conditional_conflicts: usize,
111    /// Number of extra destination objects deleted.
112    pub deleted_extra: usize,
113    /// Number of per-object errors.
114    pub errors: usize,
115}
116
117/// Aggregate counters for an extract dry run.
118#[derive(Clone, Debug, Default, Serialize, Deserialize)]
119pub struct UnzipDryRunSummary {
120    /// Number of source ZIP entries found, excluding the embedded catalog.
121    pub zip_files: usize,
122    /// Number of destination objects listed before extraction, for S3 destinations.
123    pub destination_objects: usize,
124    /// Number of missing destination entries that would be uploaded or created.
125    pub would_upload_new: usize,
126    /// Number of existing destination entries that would be replaced.
127    pub would_upload_changed: usize,
128    /// Number of destination entries that already match the source entry.
129    pub skipped_unchanged: usize,
130    /// Number of extra destination objects that would be deleted.
131    pub would_delete_extra: usize,
132    /// Number of per-entry errors found while planning.
133    pub errors: usize,
134}
135
136/// Full report returned by [`crate::sync_zip_to_s3`].
137#[derive(Clone, Debug, Serialize, Deserialize)]
138pub struct SyncReport {
139    /// Source ZIP object.
140    pub source: S3Object,
141    /// Destination prefix.
142    pub destination: S3Prefix,
143    /// Aggregate extract counters.
144    pub summary: SyncSummary,
145    /// Optional source scheduler and destination `PutObject` diagnostics.
146    #[serde(skip_serializing_if = "Option::is_none")]
147    pub diagnostics: Option<SyncDiagnostics>,
148    /// Per-object operation records.
149    pub operations: Vec<ObjectReport>,
150}
151
152impl SyncReport {
153    /// Returns `true` when one or more object operations failed.
154    pub fn has_errors(&self) -> bool {
155        self.summary.errors > 0
156    }
157}
158
159/// Full report returned when extracting a local ZIP into an S3 prefix.
160#[derive(Clone, Debug, Serialize, Deserialize)]
161pub struct LocalZipToS3Report {
162    /// Source ZIP file path.
163    pub source_zip: String,
164    /// Destination prefix.
165    pub destination: S3Prefix,
166    /// Aggregate extract counters.
167    pub summary: SyncSummary,
168    /// Per-entry operation records.
169    pub operations: Vec<ObjectReport>,
170}
171
172impl LocalZipToS3Report {
173    /// Returns `true` when one or more entry operations failed.
174    pub fn has_errors(&self) -> bool {
175        self.summary.errors > 0
176    }
177}
178
179/// Full report returned when extracting a ZIP into a local directory.
180#[derive(Clone, Debug, Serialize, Deserialize)]
181pub struct LocalUnzipReport {
182    /// Source ZIP object URI or local file path.
183    pub source_zip: String,
184    /// Destination local directory.
185    pub destination_dir: String,
186    /// Aggregate extract counters.
187    pub summary: SyncSummary,
188    /// Optional source scheduler diagnostics for S3 ZIP sources.
189    #[serde(skip_serializing_if = "Option::is_none")]
190    pub diagnostics: Option<LocalUnzipDiagnostics>,
191    /// Per-entry operation records.
192    pub operations: Vec<ObjectReport>,
193}
194
195impl LocalUnzipReport {
196    /// Returns `true` when one or more entry operations failed.
197    pub fn has_errors(&self) -> bool {
198        self.summary.errors > 0
199    }
200}
201
202/// Full report returned by extract dry-run helpers.
203#[derive(Clone, Debug, Serialize, Deserialize)]
204pub struct UnzipDryRunReport {
205    /// Source ZIP object URI or local file path.
206    pub source_zip: String,
207    /// Destination prefix URI or local directory.
208    pub destination: String,
209    /// Aggregate dry-run counters.
210    pub summary: UnzipDryRunSummary,
211    /// Optional source scheduler diagnostics for S3 ZIP sources.
212    #[serde(skip_serializing_if = "Option::is_none")]
213    pub diagnostics: Option<DryRunDiagnostics>,
214    /// Per-entry dry-run operation records.
215    pub operations: Vec<DryRunObjectReport>,
216}
217
218impl UnzipDryRunReport {
219    /// Returns `true` when one or more planned operations failed.
220    pub fn has_errors(&self) -> bool {
221        self.summary.errors > 0
222    }
223}
224
225/// Effective extract settings and aggregate diagnostics.
226#[derive(Clone, Debug, Serialize, Deserialize)]
227pub struct SyncDiagnostics {
228    /// Effective entry concurrency.
229    pub concurrency: usize,
230    /// Effective destination `PutObject` concurrency.
231    pub put_concurrency: usize,
232    /// Effective destination `PutObject` retry policy.
233    pub put_retry: PutRetryDiagnostics,
234    /// Effective source block size in bytes.
235    pub source_block_size: usize,
236    /// Effective source block merge gap in bytes.
237    pub source_block_merge_gap: usize,
238    /// Effective source ranged `GetObject` concurrency.
239    pub source_get_concurrency: usize,
240    /// Effective source block window capacity in bytes.
241    pub source_window_capacity: usize,
242    /// Aggregate source scheduler counters.
243    pub source: SourceDiagnostics,
244    /// Aggregate destination `PutObject` counters.
245    pub put: PutDiagnostics,
246}
247
248/// Effective local unzip settings and aggregate source diagnostics.
249#[derive(Clone, Debug, Serialize, Deserialize)]
250pub struct LocalUnzipDiagnostics {
251    /// Effective entry concurrency.
252    pub concurrency: usize,
253    /// Effective source block size in bytes.
254    pub source_block_size: usize,
255    /// Effective source block merge gap in bytes.
256    pub source_block_merge_gap: usize,
257    /// Effective source ranged `GetObject` concurrency.
258    pub source_get_concurrency: usize,
259    /// Effective source block window capacity in bytes.
260    pub source_window_capacity: usize,
261    /// Aggregate source scheduler counters.
262    pub source: SourceDiagnostics,
263}
264
265/// Effective dry-run source settings and aggregate diagnostics.
266#[derive(Clone, Debug, Serialize, Deserialize)]
267pub struct DryRunDiagnostics {
268    /// Effective entry concurrency.
269    pub concurrency: usize,
270    /// Effective source block size in bytes.
271    pub source_block_size: usize,
272    /// Effective source block merge gap in bytes.
273    pub source_block_merge_gap: usize,
274    /// Effective source ranged `GetObject` concurrency.
275    pub source_get_concurrency: usize,
276    /// Effective source block window capacity in bytes.
277    pub source_window_capacity: usize,
278    /// Aggregate source scheduler counters.
279    pub source: SourceDiagnostics,
280}
281
282/// Source scheduler and ranged `GetObject` counters.
283#[derive(Clone, Debug, Serialize, Deserialize)]
284pub struct SourceDiagnostics {
285    /// Source ZIP object size in bytes.
286    pub source_zip_bytes: u64,
287    /// Number of ZIP entries included in source plans.
288    pub planned_entries: u64,
289    /// Number of source blocks included in source plans.
290    pub planned_blocks: u64,
291    /// Number of source blocks fetched successfully.
292    pub fetched_blocks: u64,
293    /// Total ranged `GetObject` attempts, including retries.
294    pub source_get_attempts: u64,
295    /// Total ranged `GetObject` retries.
296    pub source_get_retries: u64,
297    /// Ranged `GetObject` request errors.
298    pub source_get_request_errors: u64,
299    /// Ranged `GetObject` response body errors.
300    pub source_get_body_errors: u64,
301    /// Ranged `GetObject` responses that ended before the requested bytes were read.
302    pub source_get_short_body_errors: u64,
303    /// Source block fetches that failed after all retry attempts.
304    pub source_get_errors: u64,
305    /// Sum of planned source block sizes.
306    pub planned_source_bytes: u64,
307    /// Sum of fetched source block sizes.
308    pub fetched_source_bytes: u64,
309    /// Unique source bytes covered by fetched ranges.
310    pub unique_source_bytes: u64,
311    /// Ratio of fetched source bytes to unique fetched source bytes.
312    pub source_amplification: f64,
313    /// Number of block read requests served from ready blocks.
314    pub block_hits: u64,
315    /// Number of block read requests that waited for scheduled data.
316    pub block_waits: u64,
317    /// Number of ready source blocks released from the resident window after all
318    /// planned claims consumed them.
319    pub block_releases: u64,
320    /// Number of reader cache misses. This should remain zero for the planned
321    /// source scheduler.
322    pub block_misses: u64,
323    /// Number of explicit replay fetches for blocks that had already been
324    /// released from the resident window.
325    pub block_refetches: u64,
326    /// Highest number of concurrent ranged `GetObject` requests.
327    pub active_gets_high_water: u64,
328}
329
330/// Effective destination `PutObject` retry settings.
331#[derive(Clone, Debug, Serialize, Deserialize)]
332pub struct PutRetryDiagnostics {
333    /// Maximum application-level `PutObject` attempts per object.
334    pub max_attempts: usize,
335    /// Base delay for retryable non-throttling failures, in milliseconds.
336    pub base_delay_ms: u64,
337    /// Maximum delay for retryable non-throttling failures, in milliseconds.
338    pub max_delay_ms: u64,
339    /// Base delay for throttling failures such as S3 `SlowDown`, in milliseconds.
340    pub slowdown_base_delay_ms: u64,
341    /// Maximum delay for throttling failures such as S3 `SlowDown`, in milliseconds.
342    pub slowdown_max_delay_ms: u64,
343    /// Jitter mode applied to computed retry delays.
344    pub jitter: RetryJitter,
345}
346
347/// Destination `PutObject` failure counters.
348#[derive(Clone, Debug, Default, Serialize, Deserialize)]
349pub struct PutDiagnostics {
350    /// Number of failed `PutObject` attempts, including retryable attempts that
351    /// later succeeded.
352    pub failed_attempts: u64,
353    /// Failed `PutObject` attempts grouped by AWS error code or SDK failure kind.
354    pub failures_by_error_code: BTreeMap<String, u64>,
355    /// Application-level retry attempts scheduled after failed `PutObject` attempts.
356    pub retry_attempts: u64,
357    /// Failed `PutObject` attempts classified as throttling.
358    pub throttled_attempts: u64,
359    /// Number of waits on the shared destination PUT throttle.
360    pub throttle_waits: u64,
361    /// Total milliseconds spent waiting on the shared destination PUT throttle.
362    pub throttle_wait_millis: u64,
363}
364
365/// Status for a single destination object operation.
366#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
367#[serde(rename_all = "snake_case")]
368pub enum OperationStatus {
369    /// The destination key was absent and was uploaded.
370    UploadedNew,
371    /// The destination key existed and was overwritten.
372    UploadedChanged,
373    /// The destination key existed and already matched the source entry.
374    SkippedUnchanged,
375    /// A conditional write failed because the destination changed after listing.
376    ConditionalConflict,
377    /// The destination key was extra and was deleted.
378    DeletedExtra,
379    /// The object operation failed.
380    Error,
381}
382
383/// Status for a single dry-run destination operation.
384#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
385#[serde(rename_all = "snake_case")]
386pub enum DryRunOperationStatus {
387    /// The destination entry is absent and would be uploaded or created.
388    WouldUploadNew,
389    /// The destination entry exists and would be replaced.
390    WouldUploadChanged,
391    /// The destination entry already matches the source entry.
392    SkippedUnchanged,
393    /// The destination object is extra and would be deleted.
394    WouldDeleteExtra,
395    /// The object operation failed while planning.
396    Error,
397}
398
399/// Per-object operation result.
400#[derive(Clone, Debug, Serialize, Deserialize)]
401pub struct ObjectReport {
402    /// Operation status.
403    pub status: OperationStatus,
404    /// Destination object key or local path.
405    pub key: String,
406    /// Source ZIP path when the operation corresponds to a ZIP entry.
407    #[serde(skip_serializing_if = "Option::is_none")]
408    pub zip_path: Option<String>,
409    /// Source entry size in bytes when known.
410    #[serde(skip_serializing_if = "Option::is_none")]
411    pub size: Option<u64>,
412    /// Source MD5 digest when known.
413    #[serde(skip_serializing_if = "Option::is_none")]
414    pub md5: Option<String>,
415    /// Destination ETag observed during the initial listing when available.
416    #[serde(skip_serializing_if = "Option::is_none")]
417    pub destination_etag: Option<String>,
418    /// Error or conflict message when present.
419    #[serde(skip_serializing_if = "Option::is_none")]
420    pub message: Option<String>,
421}
422
423/// Per-object dry-run operation result.
424#[derive(Clone, Debug, Serialize, Deserialize)]
425pub struct DryRunObjectReport {
426    /// Planned operation status.
427    pub status: DryRunOperationStatus,
428    /// Destination object key or local path.
429    pub key: String,
430    /// Source ZIP path when the operation corresponds to a ZIP entry.
431    #[serde(skip_serializing_if = "Option::is_none")]
432    pub zip_path: Option<String>,
433    /// Source entry size in bytes when known.
434    #[serde(skip_serializing_if = "Option::is_none")]
435    pub size: Option<u64>,
436    /// Source MD5 digest when known.
437    #[serde(skip_serializing_if = "Option::is_none")]
438    pub md5: Option<String>,
439    /// Destination ETag observed during the initial listing when available.
440    #[serde(skip_serializing_if = "Option::is_none")]
441    pub destination_etag: Option<String>,
442    /// Error message when present.
443    #[serde(skip_serializing_if = "Option::is_none")]
444    pub message: Option<String>,
445}
446
447#[cfg(test)]
448pub(crate) fn summarize(report: &mut SyncReport) {
449    for operation in &report.operations {
450        summarize_operation(&mut report.summary, operation);
451    }
452}
453
454pub(crate) fn summarize_dry_run_operation(
455    summary: &mut UnzipDryRunSummary,
456    operation: &DryRunObjectReport,
457) {
458    match operation.status {
459        DryRunOperationStatus::WouldUploadNew => summary.would_upload_new += 1,
460        DryRunOperationStatus::WouldUploadChanged => summary.would_upload_changed += 1,
461        DryRunOperationStatus::SkippedUnchanged => summary.skipped_unchanged += 1,
462        DryRunOperationStatus::WouldDeleteExtra => summary.would_delete_extra += 1,
463        DryRunOperationStatus::Error => summary.errors += 1,
464    }
465}
466
467pub(crate) fn summarize_operation(summary: &mut SyncSummary, operation: &ObjectReport) {
468    match operation.status {
469        OperationStatus::UploadedNew => summary.uploaded_new += 1,
470        OperationStatus::UploadedChanged => summary.uploaded_changed += 1,
471        OperationStatus::SkippedUnchanged => summary.skipped_unchanged += 1,
472        OperationStatus::ConditionalConflict => summary.conditional_conflicts += 1,
473        OperationStatus::DeletedExtra => summary.deleted_extra += 1,
474        OperationStatus::Error => summary.errors += 1,
475    }
476}