s3_unspool/report.rs
1use std::collections::BTreeMap;
2
3use serde::{Deserialize, Serialize};
4
5use crate::options::RetryJitter;
6use crate::s3_uri::{S3Object, S3Prefix};
7
8/// Summary returned by [`crate::upload_directory_zip_to_s3`].
9#[derive(Clone, Debug, Serialize, Deserialize)]
10pub struct UploadReport {
11 /// Local directory that was uploaded.
12 pub source_dir: String,
13 /// Destination ZIP object.
14 pub destination: S3Object,
15 /// Number of regular files included in the ZIP.
16 pub files: usize,
17 /// Number of preserved directory entries included in the ZIP.
18 #[serde(default)]
19 pub directories: usize,
20 /// Whether the embedded update catalog was included.
21 #[serde(default = "default_include_catalog")]
22 pub include_catalog: bool,
23 /// Total uncompressed payload bytes.
24 pub uncompressed_bytes: u64,
25 /// Total uploaded ZIP object bytes.
26 pub zip_bytes: u64,
27}
28
29/// Summary returned by [`crate::zip_s3_prefix_to_s3`].
30#[derive(Clone, Debug, Serialize, Deserialize)]
31pub struct S3PrefixUploadReport {
32 /// Source prefix that was uploaded.
33 pub source: S3Prefix,
34 /// Destination ZIP object.
35 pub destination: S3Object,
36 /// Number of regular source objects included as ZIP file entries.
37 pub files: usize,
38 /// Number of zero-byte trailing-slash source objects included as ZIP directories.
39 pub directories: usize,
40 /// Whether the embedded update catalog was included.
41 #[serde(default = "default_include_catalog")]
42 pub include_catalog: bool,
43 /// Total number of ZIP entries written, excluding the embedded catalog.
44 pub entries: usize,
45 /// Total uncompressed payload bytes across regular file entries.
46 pub uncompressed_bytes: u64,
47 /// Total uploaded ZIP object bytes.
48 pub zip_bytes: u64,
49}
50
51/// Summary returned by local ZIP creation helpers.
52#[derive(Clone, Debug, Serialize, Deserialize)]
53pub struct LocalZipReport {
54 /// Source tree that was zipped.
55 pub source: String,
56 /// Destination ZIP file path.
57 pub destination_zip: String,
58 /// Number of regular file entries included in the ZIP.
59 pub files: usize,
60 /// Number of preserved directory entries included in the ZIP.
61 pub directories: usize,
62 /// Whether the embedded update catalog was included.
63 #[serde(default = "default_include_catalog")]
64 pub include_catalog: bool,
65 /// Total number of ZIP entries written, excluding the embedded catalog.
66 pub entries: usize,
67 /// Total uncompressed payload bytes across regular file entries.
68 pub uncompressed_bytes: u64,
69 /// Size of the generated ZIP file.
70 pub zip_bytes: u64,
71}
72
73/// Summary returned by ZIP dry-run helpers.
74#[derive(Clone, Debug, Serialize, Deserialize)]
75pub struct ZipDryRunReport {
76 /// Source tree that would be zipped.
77 pub source: String,
78 /// Destination ZIP file path or S3 object URI.
79 pub destination: String,
80 /// Number of regular file entries that would be included.
81 pub files: usize,
82 /// Number of directory entries that would be included.
83 pub directories: usize,
84 /// Total number of ZIP entries that would be written, excluding the embedded catalog.
85 pub entries: usize,
86 /// Total uncompressed payload bytes across regular file entries.
87 pub uncompressed_bytes: u64,
88 /// Whether the embedded update catalog would be included.
89 pub include_catalog: bool,
90}
91
92fn default_include_catalog() -> bool {
93 true
94}
95
96/// Aggregate counters for an extract run.
97#[derive(Clone, Debug, Default, Serialize, Deserialize)]
98pub struct SyncSummary {
99 /// Number of source ZIP entries found, excluding the embedded catalog.
100 pub zip_files: usize,
101 /// Number of destination objects listed before extraction.
102 pub destination_objects: usize,
103 /// Number of missing destination objects uploaded.
104 pub uploaded_new: usize,
105 /// Number of changed destination objects uploaded.
106 pub uploaded_changed: usize,
107 /// Number of unchanged destination objects skipped.
108 pub skipped_unchanged: usize,
109 /// Number of conditional write conflicts.
110 pub conditional_conflicts: usize,
111 /// Number of extra destination objects deleted.
112 pub deleted_extra: usize,
113 /// Number of per-object errors.
114 pub errors: usize,
115}
116
117/// Aggregate counters for an extract dry run.
118#[derive(Clone, Debug, Default, Serialize, Deserialize)]
119pub struct UnzipDryRunSummary {
120 /// Number of source ZIP entries found, excluding the embedded catalog.
121 pub zip_files: usize,
122 /// Number of destination objects listed before extraction, for S3 destinations.
123 pub destination_objects: usize,
124 /// Number of missing destination entries that would be uploaded or created.
125 pub would_upload_new: usize,
126 /// Number of existing destination entries that would be replaced.
127 pub would_upload_changed: usize,
128 /// Number of destination entries that already match the source entry.
129 pub skipped_unchanged: usize,
130 /// Number of extra destination objects that would be deleted.
131 pub would_delete_extra: usize,
132 /// Number of per-entry errors found while planning.
133 pub errors: usize,
134}
135
136/// Full report returned by [`crate::sync_zip_to_s3`].
137#[derive(Clone, Debug, Serialize, Deserialize)]
138pub struct SyncReport {
139 /// Source ZIP object.
140 pub source: S3Object,
141 /// Destination prefix.
142 pub destination: S3Prefix,
143 /// Aggregate extract counters.
144 pub summary: SyncSummary,
145 /// Optional source scheduler and destination `PutObject` diagnostics.
146 #[serde(skip_serializing_if = "Option::is_none")]
147 pub diagnostics: Option<SyncDiagnostics>,
148 /// Per-object operation records.
149 pub operations: Vec<ObjectReport>,
150}
151
152impl SyncReport {
153 /// Returns `true` when one or more object operations failed.
154 pub fn has_errors(&self) -> bool {
155 self.summary.errors > 0
156 }
157}
158
159/// Full report returned when extracting a local ZIP into an S3 prefix.
160#[derive(Clone, Debug, Serialize, Deserialize)]
161pub struct LocalZipToS3Report {
162 /// Source ZIP file path.
163 pub source_zip: String,
164 /// Destination prefix.
165 pub destination: S3Prefix,
166 /// Aggregate extract counters.
167 pub summary: SyncSummary,
168 /// Per-entry operation records.
169 pub operations: Vec<ObjectReport>,
170}
171
172impl LocalZipToS3Report {
173 /// Returns `true` when one or more entry operations failed.
174 pub fn has_errors(&self) -> bool {
175 self.summary.errors > 0
176 }
177}
178
179/// Full report returned when extracting a ZIP into a local directory.
180#[derive(Clone, Debug, Serialize, Deserialize)]
181pub struct LocalUnzipReport {
182 /// Source ZIP object URI or local file path.
183 pub source_zip: String,
184 /// Destination local directory.
185 pub destination_dir: String,
186 /// Aggregate extract counters.
187 pub summary: SyncSummary,
188 /// Optional source scheduler diagnostics for S3 ZIP sources.
189 #[serde(skip_serializing_if = "Option::is_none")]
190 pub diagnostics: Option<LocalUnzipDiagnostics>,
191 /// Per-entry operation records.
192 pub operations: Vec<ObjectReport>,
193}
194
195impl LocalUnzipReport {
196 /// Returns `true` when one or more entry operations failed.
197 pub fn has_errors(&self) -> bool {
198 self.summary.errors > 0
199 }
200}
201
202/// Full report returned by extract dry-run helpers.
203#[derive(Clone, Debug, Serialize, Deserialize)]
204pub struct UnzipDryRunReport {
205 /// Source ZIP object URI or local file path.
206 pub source_zip: String,
207 /// Destination prefix URI or local directory.
208 pub destination: String,
209 /// Aggregate dry-run counters.
210 pub summary: UnzipDryRunSummary,
211 /// Optional source scheduler diagnostics for S3 ZIP sources.
212 #[serde(skip_serializing_if = "Option::is_none")]
213 pub diagnostics: Option<DryRunDiagnostics>,
214 /// Per-entry dry-run operation records.
215 pub operations: Vec<DryRunObjectReport>,
216}
217
218impl UnzipDryRunReport {
219 /// Returns `true` when one or more planned operations failed.
220 pub fn has_errors(&self) -> bool {
221 self.summary.errors > 0
222 }
223}
224
225/// Effective extract settings and aggregate diagnostics.
226#[derive(Clone, Debug, Serialize, Deserialize)]
227pub struct SyncDiagnostics {
228 /// Effective entry concurrency.
229 pub concurrency: usize,
230 /// Effective destination `PutObject` concurrency.
231 pub put_concurrency: usize,
232 /// Effective destination `PutObject` retry policy.
233 pub put_retry: PutRetryDiagnostics,
234 /// Effective source block size in bytes.
235 pub source_block_size: usize,
236 /// Effective source block merge gap in bytes.
237 pub source_block_merge_gap: usize,
238 /// Effective source ranged `GetObject` concurrency.
239 pub source_get_concurrency: usize,
240 /// Effective source block window capacity in bytes.
241 pub source_window_capacity: usize,
242 /// Aggregate source scheduler counters.
243 pub source: SourceDiagnostics,
244 /// Aggregate destination `PutObject` counters.
245 pub put: PutDiagnostics,
246}
247
248/// Effective local unzip settings and aggregate source diagnostics.
249#[derive(Clone, Debug, Serialize, Deserialize)]
250pub struct LocalUnzipDiagnostics {
251 /// Effective entry concurrency.
252 pub concurrency: usize,
253 /// Effective source block size in bytes.
254 pub source_block_size: usize,
255 /// Effective source block merge gap in bytes.
256 pub source_block_merge_gap: usize,
257 /// Effective source ranged `GetObject` concurrency.
258 pub source_get_concurrency: usize,
259 /// Effective source block window capacity in bytes.
260 pub source_window_capacity: usize,
261 /// Aggregate source scheduler counters.
262 pub source: SourceDiagnostics,
263}
264
265/// Effective dry-run source settings and aggregate diagnostics.
266#[derive(Clone, Debug, Serialize, Deserialize)]
267pub struct DryRunDiagnostics {
268 /// Effective entry concurrency.
269 pub concurrency: usize,
270 /// Effective source block size in bytes.
271 pub source_block_size: usize,
272 /// Effective source block merge gap in bytes.
273 pub source_block_merge_gap: usize,
274 /// Effective source ranged `GetObject` concurrency.
275 pub source_get_concurrency: usize,
276 /// Effective source block window capacity in bytes.
277 pub source_window_capacity: usize,
278 /// Aggregate source scheduler counters.
279 pub source: SourceDiagnostics,
280}
281
282/// Source scheduler and ranged `GetObject` counters.
283#[derive(Clone, Debug, Serialize, Deserialize)]
284pub struct SourceDiagnostics {
285 /// Source ZIP object size in bytes.
286 pub source_zip_bytes: u64,
287 /// Number of ZIP entries included in source plans.
288 pub planned_entries: u64,
289 /// Number of source blocks included in source plans.
290 pub planned_blocks: u64,
291 /// Number of source blocks fetched successfully.
292 pub fetched_blocks: u64,
293 /// Total ranged `GetObject` attempts, including retries.
294 pub source_get_attempts: u64,
295 /// Total ranged `GetObject` retries.
296 pub source_get_retries: u64,
297 /// Ranged `GetObject` request errors.
298 pub source_get_request_errors: u64,
299 /// Ranged `GetObject` response body errors.
300 pub source_get_body_errors: u64,
301 /// Ranged `GetObject` responses that ended before the requested bytes were read.
302 pub source_get_short_body_errors: u64,
303 /// Source block fetches that failed after all retry attempts.
304 pub source_get_errors: u64,
305 /// Sum of planned source block sizes.
306 pub planned_source_bytes: u64,
307 /// Sum of fetched source block sizes.
308 pub fetched_source_bytes: u64,
309 /// Unique source bytes covered by fetched ranges.
310 pub unique_source_bytes: u64,
311 /// Ratio of fetched source bytes to unique fetched source bytes.
312 pub source_amplification: f64,
313 /// Number of block read requests served from ready blocks.
314 pub block_hits: u64,
315 /// Number of block read requests that waited for scheduled data.
316 pub block_waits: u64,
317 /// Number of ready source blocks released from the resident window after all
318 /// planned claims consumed them.
319 pub block_releases: u64,
320 /// Number of reader cache misses. This should remain zero for the planned
321 /// source scheduler.
322 pub block_misses: u64,
323 /// Number of explicit replay fetches for blocks that had already been
324 /// released from the resident window.
325 pub block_refetches: u64,
326 /// Highest number of concurrent ranged `GetObject` requests.
327 pub active_gets_high_water: u64,
328}
329
330/// Effective destination `PutObject` retry settings.
331#[derive(Clone, Debug, Serialize, Deserialize)]
332pub struct PutRetryDiagnostics {
333 /// Maximum application-level `PutObject` attempts per object.
334 pub max_attempts: usize,
335 /// Base delay for retryable non-throttling failures, in milliseconds.
336 pub base_delay_ms: u64,
337 /// Maximum delay for retryable non-throttling failures, in milliseconds.
338 pub max_delay_ms: u64,
339 /// Base delay for throttling failures such as S3 `SlowDown`, in milliseconds.
340 pub slowdown_base_delay_ms: u64,
341 /// Maximum delay for throttling failures such as S3 `SlowDown`, in milliseconds.
342 pub slowdown_max_delay_ms: u64,
343 /// Jitter mode applied to computed retry delays.
344 pub jitter: RetryJitter,
345}
346
347/// Destination `PutObject` failure counters.
348#[derive(Clone, Debug, Default, Serialize, Deserialize)]
349pub struct PutDiagnostics {
350 /// Number of failed `PutObject` attempts, including retryable attempts that
351 /// later succeeded.
352 pub failed_attempts: u64,
353 /// Failed `PutObject` attempts grouped by AWS error code or SDK failure kind.
354 pub failures_by_error_code: BTreeMap<String, u64>,
355 /// Application-level retry attempts scheduled after failed `PutObject` attempts.
356 pub retry_attempts: u64,
357 /// Failed `PutObject` attempts classified as throttling.
358 pub throttled_attempts: u64,
359 /// Number of waits on the shared destination PUT throttle.
360 pub throttle_waits: u64,
361 /// Total milliseconds spent waiting on the shared destination PUT throttle.
362 pub throttle_wait_millis: u64,
363}
364
365/// Status for a single destination object operation.
366#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
367#[serde(rename_all = "snake_case")]
368pub enum OperationStatus {
369 /// The destination key was absent and was uploaded.
370 UploadedNew,
371 /// The destination key existed and was overwritten.
372 UploadedChanged,
373 /// The destination key existed and already matched the source entry.
374 SkippedUnchanged,
375 /// A conditional write failed because the destination changed after listing.
376 ConditionalConflict,
377 /// The destination key was extra and was deleted.
378 DeletedExtra,
379 /// The object operation failed.
380 Error,
381}
382
383/// Status for a single dry-run destination operation.
384#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
385#[serde(rename_all = "snake_case")]
386pub enum DryRunOperationStatus {
387 /// The destination entry is absent and would be uploaded or created.
388 WouldUploadNew,
389 /// The destination entry exists and would be replaced.
390 WouldUploadChanged,
391 /// The destination entry already matches the source entry.
392 SkippedUnchanged,
393 /// The destination object is extra and would be deleted.
394 WouldDeleteExtra,
395 /// The object operation failed while planning.
396 Error,
397}
398
399/// Per-object operation result.
400#[derive(Clone, Debug, Serialize, Deserialize)]
401pub struct ObjectReport {
402 /// Operation status.
403 pub status: OperationStatus,
404 /// Destination object key or local path.
405 pub key: String,
406 /// Source ZIP path when the operation corresponds to a ZIP entry.
407 #[serde(skip_serializing_if = "Option::is_none")]
408 pub zip_path: Option<String>,
409 /// Source entry size in bytes when known.
410 #[serde(skip_serializing_if = "Option::is_none")]
411 pub size: Option<u64>,
412 /// Source MD5 digest when known.
413 #[serde(skip_serializing_if = "Option::is_none")]
414 pub md5: Option<String>,
415 /// Destination ETag observed during the initial listing when available.
416 #[serde(skip_serializing_if = "Option::is_none")]
417 pub destination_etag: Option<String>,
418 /// Error or conflict message when present.
419 #[serde(skip_serializing_if = "Option::is_none")]
420 pub message: Option<String>,
421}
422
423/// Per-object dry-run operation result.
424#[derive(Clone, Debug, Serialize, Deserialize)]
425pub struct DryRunObjectReport {
426 /// Planned operation status.
427 pub status: DryRunOperationStatus,
428 /// Destination object key or local path.
429 pub key: String,
430 /// Source ZIP path when the operation corresponds to a ZIP entry.
431 #[serde(skip_serializing_if = "Option::is_none")]
432 pub zip_path: Option<String>,
433 /// Source entry size in bytes when known.
434 #[serde(skip_serializing_if = "Option::is_none")]
435 pub size: Option<u64>,
436 /// Source MD5 digest when known.
437 #[serde(skip_serializing_if = "Option::is_none")]
438 pub md5: Option<String>,
439 /// Destination ETag observed during the initial listing when available.
440 #[serde(skip_serializing_if = "Option::is_none")]
441 pub destination_etag: Option<String>,
442 /// Error message when present.
443 #[serde(skip_serializing_if = "Option::is_none")]
444 pub message: Option<String>,
445}
446
447#[cfg(test)]
448pub(crate) fn summarize(report: &mut SyncReport) {
449 for operation in &report.operations {
450 summarize_operation(&mut report.summary, operation);
451 }
452}
453
454pub(crate) fn summarize_dry_run_operation(
455 summary: &mut UnzipDryRunSummary,
456 operation: &DryRunObjectReport,
457) {
458 match operation.status {
459 DryRunOperationStatus::WouldUploadNew => summary.would_upload_new += 1,
460 DryRunOperationStatus::WouldUploadChanged => summary.would_upload_changed += 1,
461 DryRunOperationStatus::SkippedUnchanged => summary.skipped_unchanged += 1,
462 DryRunOperationStatus::WouldDeleteExtra => summary.would_delete_extra += 1,
463 DryRunOperationStatus::Error => summary.errors += 1,
464 }
465}
466
467pub(crate) fn summarize_operation(summary: &mut SyncSummary, operation: &ObjectReport) {
468 match operation.status {
469 OperationStatus::UploadedNew => summary.uploaded_new += 1,
470 OperationStatus::UploadedChanged => summary.uploaded_changed += 1,
471 OperationStatus::SkippedUnchanged => summary.skipped_unchanged += 1,
472 OperationStatus::ConditionalConflict => summary.conditional_conflicts += 1,
473 OperationStatus::DeletedExtra => summary.deleted_extra += 1,
474 OperationStatus::Error => summary.errors += 1,
475 }
476}