Skip to main content

s3_unspool/
lib.rs

1//! Streaming ZIP upload and extraction for Amazon S3.
2//!
3//! `s3-unspool` zips local directories and existing S3 prefixes into local or
4//! S3 ZIP files, and unzips local or S3 ZIP files into local directories or S3
5//! prefixes.
6//!
7//! Local directory and S3-prefix zip operations generate the ZIP once. S3 ZIP
8//! destinations are streamed with multipart upload, and local ZIP destinations
9//! are written through a temporary sibling file before being renamed into place.
10//! Empty local directories and zero-byte S3 marker objects are preserved as ZIP
11//! directory entries.
12//!
13//! Extraction compares ZIP entries with destination objects listed by
14//! `ListObjectsV2`. Missing objects are uploaded with `If-None-Match: *`, and
15//! changed objects are uploaded with `If-Match` against the listed destination
16//! ETag so newer destination data is not overwritten accidentally.
17//! Conditional write conflicts are recorded and skipped by default; set
18//! [`SyncOptions::fail_on_conflict`] or [`LocalZipSyncOptions::fail_on_conflict`]
19//! to return an error on the first observed conflict.
20//!
21//! Conditional overwrites require `s3:GetObject` permission on destination
22//! objects as well as `s3:PutObject`. `s3-unspool` does not issue per-file
23//! destination `HeadObject` requests or read destination object bodies, but S3
24//! authorizes `If-Match` writes against object-read permission.
25//!
26//! Destination `PutObject` bodies are fed by a source-range scheduler, so a body
27//! can pause while waiting for planned ZIP bytes. For high-concurrency extracts,
28//! consider relaxing or disabling AWS SDK upload stalled-stream protection on
29//! the destination client. Keep download stalled-stream protection enabled for
30//! source reads. The repository CLI and Lambda example configure this split.
31//! Library users can call [`sync_zip_to_s3_with_clients`] when source reads and
32//! destination writes need separate S3 client configuration.
33//!
34//! [`inspect_s3_zip`] reads source ZIP size and file count without downloading
35//! the archive. It is useful before choosing memory-sensitive scheduler options
36//! with [`AdaptiveSourceWindow`] or
37//! [`SyncOptions::with_source_window_memory_budget_mb`].
38//!
39//! ZIPs created with [`upload_directory_zip_to_s3`], [`zip_directory_to_file`],
40//! [`zip_s3_prefix_to_s3`], or [`zip_s3_prefix_to_file`] include an embedded catalog at
41//! [`EMBEDDED_CATALOG_PATH`] by default. The catalog stores each file path and
42//! MD5 digest so later extracts can skip unchanged files before decompressing
43//! them. Set [`SyncOptions::force_hash_comparison`] when you need to measure
44//! or force the fallback extract-and-hash path.
45//!
46//! Set [`SyncOptions::with_selection`] or the equivalent local unzip option
47//! when only a subset of ZIP paths should be restored. Selection patterns use
48//! gitignore-style syntax and are applied before source range planning, so
49//! ranged `GetObject` requests are planned only for selected entries that still
50//! need source bytes. Exclude-only selections restore every non-excluded ZIP
51//! path. Selected extracts reject [`SyncOptions::delete_extra_objects`] because
52//! unselected destination objects are outside the restore scope.
53//!
54//! ZIP directory entries round-trip as zero-byte S3 marker objects whose keys
55//! end in `/`. Local upload preserves empty directories as ZIP directory
56//! entries, and S3-prefix upload preserves zero-byte S3 marker objects as ZIP
57//! directory entries while rejecting nonzero trailing-slash S3 objects as
58//! ambiguous.
59//!
60//! # Extract an S3 ZIP to a Destination Prefix
61//!
62//! ```no_run
63//! use aws_config::BehaviorVersion;
64//! use aws_sdk_s3::Client;
65//! use s3_unspool::{S3Object, S3Prefix, SyncOptions, sync_zip_to_s3};
66//!
67//! # async fn run() -> s3_unspool::Result<()> {
68//! let config = aws_config::load_defaults(BehaviorVersion::latest()).await;
69//! let client = Client::new(&config);
70//!
71//! let extract = SyncOptions::new(
72//!     S3Object::parse("s3://my-bucket/releases/site.zip")?,
73//!     S3Prefix::parse("s3://my-bucket/www/")?,
74//! )
75//! .delete_extra_objects();
76//!
77//! let report = sync_zip_to_s3(&client, extract).await?;
78//! println!("uploaded changed files: {}", report.summary.uploaded_changed);
79//! # Ok(())
80//! # }
81//! ```
82//!
83//! # Extract Selected Entries from an S3 ZIP
84//!
85//! ```no_run
86//! use aws_config::BehaviorVersion;
87//! use aws_sdk_s3::Client;
88//! use s3_unspool::{S3Object, S3Prefix, SyncOptions, UnzipSelection, sync_zip_to_s3};
89//!
90//! # async fn run() -> s3_unspool::Result<()> {
91//! let config = aws_config::load_defaults(BehaviorVersion::latest()).await;
92//! let client = Client::new(&config);
93//!
94//! let extract = SyncOptions::new(
95//!     S3Object::parse("s3://my-bucket/releases/site.zip")?,
96//!     S3Prefix::parse("s3://my-bucket/www/")?,
97//! )
98//! .with_selection(
99//!     UnzipSelection::new()
100//!         .include("index.md")
101//!         .include("docs/**/*.md")
102//!         .exclude("docs/drafts/**"),
103//! );
104//!
105//! let report = sync_zip_to_s3(&client, extract).await?;
106//! println!("processed entries: {}", report.summary.zip_files);
107//! # Ok(())
108//! # }
109//! ```
110//!
111//! # Upload a Directory as a Cataloged ZIP
112//!
113//! ```no_run
114//! use aws_config::BehaviorVersion;
115//! use aws_sdk_s3::Client;
116//! use s3_unspool::{S3Object, UploadOptions, upload_directory_zip_to_s3};
117//!
118//! # async fn run() -> s3_unspool::Result<()> {
119//! let config = aws_config::load_defaults(BehaviorVersion::latest()).await;
120//! let client = Client::new(&config);
121//!
122//! let upload = UploadOptions::new(
123//!     "./site",
124//!     S3Object::parse("s3://my-bucket/releases/site.zip")?,
125//! );
126//! let report = upload_directory_zip_to_s3(&client, upload).await?;
127//! println!("uploaded files: {}", report.files);
128//! # Ok(())
129//! # }
130//! ```
131//!
132//! # Upload an S3 Prefix as a Cataloged ZIP
133//!
134//! ```no_run
135//! use aws_config::BehaviorVersion;
136//! use aws_sdk_s3::Client;
137//! use s3_unspool::{S3Object, S3Prefix, S3PrefixUploadOptions, zip_s3_prefix_to_s3};
138//!
139//! # async fn run() -> s3_unspool::Result<()> {
140//! let config = aws_config::load_defaults(BehaviorVersion::latest()).await;
141//! let client = Client::new(&config);
142//!
143//! let upload = S3PrefixUploadOptions::new(
144//!     S3Prefix::parse("s3://my-bucket/www/")?,
145//!     S3Object::parse("s3://my-bucket/releases/site.zip")?,
146//! );
147//! let report = zip_s3_prefix_to_s3(&client, upload).await?;
148//! println!("uploaded files: {}", report.files);
149//! # Ok(())
150//! # }
151//! ```
152//!
153//! # Assumptions
154//!
155//! The crate assumes destination objects use single-part S3 ETags that match the
156//! MD5 digest of the object body. Multipart destination objects and SSE-C
157//! destination ETags are intentionally out of scope for comparison.
158
159#![deny(missing_docs)]
160
161mod catalog;
162mod constants;
163mod entry_reader;
164mod error;
165mod extract;
166mod inspect;
167mod options;
168mod range;
169mod report;
170mod s3_uri;
171mod source;
172mod upload;
173mod zip_manifest;
174
175pub use constants::EMBEDDED_CATALOG_PATH;
176pub use error::{Error, Result};
177pub use extract::{
178    dry_run_sync_zip_to_s3, dry_run_sync_zip_to_s3_with_clients, dry_run_unzip_file_to_local,
179    dry_run_unzip_file_to_s3, dry_run_unzip_s3_zip_to_local, sync_zip_to_s3,
180    sync_zip_to_s3_with_clients, unzip_file_to_local, unzip_file_to_s3, unzip_s3_zip_to_local,
181};
182pub use inspect::{S3ZipInfo, inspect_s3_zip};
183pub use options::{
184    AdaptiveSourceWindow, ComparisonMode, ConflictPolicy, DestinationCleanup, LocalUnzipOptions,
185    LocalZipOptions, LocalZipSyncOptions, PutRetryPolicy, RetryJitter, S3PrefixLocalZipOptions,
186    S3PrefixUploadOptions, S3ZipLocalUnzipOptions, SyncOptions, UnzipSelection, UploadOptions,
187    UploadProgress, UploadProgressHandler, ZipCompression, adaptive_source_get_concurrency,
188};
189pub use report::{
190    DryRunDiagnostics, DryRunObjectReport, DryRunOperationStatus, LocalUnzipDiagnostics,
191    LocalUnzipReport, LocalZipReport, LocalZipToS3Report, ObjectReport, OperationStatus,
192    PutDiagnostics, PutRetryDiagnostics, S3PrefixUploadReport, SourceDiagnostics, SyncDiagnostics,
193    SyncReport, SyncSummary, UnzipDryRunReport, UnzipDryRunSummary, UploadReport, ZipDryRunReport,
194};
195pub use s3_uri::{S3Object, S3Prefix};
196pub use upload::{
197    dry_run_upload_directory_zip_to_s3, dry_run_zip_directory_to_file,
198    dry_run_zip_s3_prefix_to_file, dry_run_zip_s3_prefix_to_s3, upload_directory_zip_to_s3,
199    zip_directory_to_file, zip_s3_prefix_to_file, zip_s3_prefix_to_s3,
200};
201
202#[cfg(test)]
203mod tests;