Skip to main content

s3rm_rs/config/args/
mod.rs

1use crate::callback::event_manager::EventManager;
2use crate::callback::filter_manager::FilterManager;
3use crate::config::{
4    CLITimeoutConfig, ClientConfig, Config, FilterConfig, ForceRetryConfig, RetryConfig,
5    TracingConfig,
6};
7use crate::types::{AccessKeys, ClientConfigLocation, S3Credentials, StoragePath};
8use aws_sdk_s3::types::RequestPayer;
9use aws_smithy_types::checksum_config::RequestChecksumCalculation;
10use chrono::{DateTime, Utc};
11use clap::Parser;
12use clap::builder::NonEmptyStringValueParser;
13use clap_verbosity_flag::{Verbosity, WarnLevel};
14use fancy_regex::Regex;
15use std::ffi::OsString;
16use std::path::PathBuf;
17
18#[cfg(feature = "version")]
19use shadow_rs::shadow;
20
21#[cfg(feature = "version")]
22shadow!(build);
23
24mod value_parser;
25
26#[cfg(test)]
27mod tests;
28
29// ---------------------------------------------------------------------------
30// Default constants (aligned with s3sync)
31// ---------------------------------------------------------------------------
32
33const EXPRESS_ONEZONE_STORAGE_SUFFIX: &str = "--x-s3";
34
35const DEFAULT_WORKER_SIZE: u16 = 16;
36const DEFAULT_BATCH_SIZE: u16 = 200;
37const DEFAULT_AWS_MAX_ATTEMPTS: u32 = 10;
38const DEFAULT_FORCE_RETRY_COUNT: u32 = 0;
39const DEFAULT_FORCE_RETRY_INTERVAL_MILLISECONDS: u64 = 1000;
40const DEFAULT_INITIAL_BACKOFF_MILLISECONDS: u64 = 100;
41const DEFAULT_JSON_TRACING: bool = false;
42const DEFAULT_AWS_SDK_TRACING: bool = false;
43const DEFAULT_SPAN_EVENTS_TRACING: bool = false;
44const DEFAULT_DISABLE_COLOR_TRACING: bool = false;
45const DEFAULT_WARN_AS_ERROR: bool = false;
46const DEFAULT_FORCE_PATH_STYLE: bool = false;
47const DEFAULT_DRY_RUN: bool = false;
48const DEFAULT_MAX_KEYS: i32 = 1000;
49const DEFAULT_DISABLE_STALLED_STREAM_PROTECTION: bool = false;
50const DEFAULT_MAX_PARALLEL_LISTINGS: u16 = 16;
51const DEFAULT_OBJECT_LISTING_QUEUE_SIZE: u32 = 200000;
52const DEFAULT_PARALLEL_LISTING_MAX_DEPTH: u16 = 2;
53const DEFAULT_ALLOW_PARALLEL_LISTINGS_IN_EXPRESS_ONE_ZONE: bool = false;
54const DEFAULT_ACCELERATE: bool = false;
55const DEFAULT_REQUEST_PAYER: bool = false;
56const DEFAULT_SHOW_NO_PROGRESS: bool = false;
57const DEFAULT_IF_MATCH: bool = false;
58#[allow(dead_code)]
59const DEFAULT_ALLOW_LUA_OS_LIBRARY: bool = false;
60#[allow(dead_code)]
61const DEFAULT_ALLOW_LUA_UNSAFE_VM: bool = false;
62#[allow(dead_code)]
63const DEFAULT_LUA_VM_MEMORY_LIMIT: &str = "64MiB";
64#[allow(dead_code)]
65const DEFAULT_LUA_CALLBACK_TIMEOUT: u64 = 10_000;
66const DEFAULT_DELETE_ALL_VERSIONS: bool = false;
67const DEFAULT_KEEP_LATEST_ONLY: bool = false;
68const DEFAULT_FORCE: bool = false;
69
70// ---------------------------------------------------------------------------
71// Error messages
72// ---------------------------------------------------------------------------
73
74const ERROR_MESSAGE_INVALID_TARGET: &str = "target must be an S3 path (e.g. s3://bucket/prefix)";
75
76// ---------------------------------------------------------------------------
77// Value parser helpers
78// ---------------------------------------------------------------------------
79
80fn check_s3_target(s: &str) -> Result<String, String> {
81    if s.starts_with("s3://") && s.len() > 5 {
82        Ok(s.to_string())
83    } else {
84        Err(ERROR_MESSAGE_INVALID_TARGET.to_string())
85    }
86}
87
88fn parse_human_bytes(s: &str) -> Result<u64, String> {
89    value_parser::human_bytes::parse_human_bytes(s)
90}
91
92// ---------------------------------------------------------------------------
93// CLIArgs (clap-derived argument struct)
94// ---------------------------------------------------------------------------
95
96/// s3rm - Fast Amazon S3 object deletion tool.
97///
98/// Delete objects from S3 buckets with powerful filtering,
99/// safety features, and versioning support.
100///
101/// Example:
102///   s3rm s3://my-bucket/logs/2023/ --dry-run
103///   s3rm s3://my-bucket/temp/ --filter-include-regex '.*\.tmp$' --force
104///   s3rm s3://my-bucket/old-data/ --delete-all-versions -vv
105#[derive(Parser, Clone, Debug)]
106#[cfg_attr(feature = "version", command(version = format!("{} ({} {}), {}", build::PKG_VERSION, build::SHORT_COMMIT, build::BUILD_TARGET, build::RUST_VERSION)))]
107#[cfg_attr(not(feature = "version"), command(version))]
108#[command(name = "s3rm", about, long_about = None)]
109pub struct CLIArgs {
110    /// S3 target path: s3://<BUCKET_NAME>[/prefix]
111    #[arg(
112        env,
113        help = "s3://<BUCKET_NAME>[/prefix]",
114        value_parser = check_s3_target,
115        default_value_if("auto_complete_shell", clap::builder::ArgPredicate::IsPresent, "s3://ignored"),
116        required = false,
117    )]
118    pub target: String,
119
120    // -----------------------------------------------------------------------
121    // General options
122    // -----------------------------------------------------------------------
123    /// List objects that would be deleted without actually deleting them
124    #[arg(short = 'd', long, env, default_value_t = DEFAULT_DRY_RUN, help_heading = "General")]
125    pub dry_run: bool,
126
127    /// Skip the confirmation prompt before deleting
128    #[arg(short = 'f', long, env, default_value_t = DEFAULT_FORCE, help_heading = "General")]
129    pub force: bool,
130
131    /// Hide the progress bar
132    #[arg(long, env, default_value_t = DEFAULT_SHOW_NO_PROGRESS, help_heading = "General")]
133    pub show_no_progress: bool,
134
135    /// Delete all versions of matching objects, including delete markers
136    #[arg(long, env, default_value_t = DEFAULT_DELETE_ALL_VERSIONS, help_heading = "General")]
137    pub delete_all_versions: bool,
138
139    /// Stop deleting after this many objects have been deleted
140    #[arg(long, env, value_parser = clap::value_parser!(u64).range(1..), help_heading = "General")]
141    pub max_delete: Option<u64>,
142
143    /// Keep only the latest version of each object, deleting all older versions
144    #[arg(
145        long,
146        env,
147        default_value_t = DEFAULT_KEEP_LATEST_ONLY,
148        requires = "delete_all_versions",
149        conflicts_with_all = [
150            "filter_include_content_type_regex",
151            "filter_exclude_content_type_regex",
152            "filter_include_metadata_regex",
153            "filter_exclude_metadata_regex",
154            "filter_include_tag_regex",
155            "filter_exclude_tag_regex",
156            "filter_larger_size",
157            "filter_smaller_size",
158            "filter_mtime_before",
159            "filter_mtime_after",
160        ],
161        help_heading = "General",
162    )]
163    #[cfg_attr(
164        feature = "lua_support",
165        arg(conflicts_with = "filter_callback_lua_script")
166    )]
167    pub keep_latest_only: bool,
168
169    // -----------------------------------------------------------------------
170    // Filter options (same as s3sync)
171    // -----------------------------------------------------------------------
172    /// Delete only objects whose key matches this regex
173    #[arg(long, env, value_parser = value_parser::regex::parse_regex, help_heading = "Filtering")]
174    pub filter_include_regex: Option<String>,
175
176    /// Skip objects whose key matches this regex
177    #[arg(long, env, value_parser = value_parser::regex::parse_regex, help_heading = "Filtering")]
178    pub filter_exclude_regex: Option<String>,
179
180    /// Delete only objects whose content type matches this regex
181    #[arg(long, env, value_parser = value_parser::regex::parse_regex, help_heading = "Filtering",
182        long_help = r#"Delete only objects whose content type matches this regular expression.
183This filter is applied after key, size, and time filters.
184May require an extra API call per object to retrieve content type."#)]
185    pub filter_include_content_type_regex: Option<String>,
186
187    /// Skip objects whose content type matches this regex
188    #[arg(long, env, value_parser = value_parser::regex::parse_regex, help_heading = "Filtering",
189        long_help = r#"Skip objects whose content type matches this regular expression.
190This filter is applied after key, size, and time filters.
191May require an extra API call per object to retrieve content type."#)]
192    pub filter_exclude_content_type_regex: Option<String>,
193
194    /// Delete only objects whose user-defined metadata matches this regex
195    #[arg(long, env, value_parser = value_parser::regex::parse_regex, help_heading = "Filtering",
196        long_help = r#"Delete only objects whose user-defined metadata matches this regular expression.
197Keys (lowercase) must be sorted alphabetically and separated by commas.
198This filter is applied after all other filters except tag filters.
199May require an extra API call per object to retrieve metadata.
200
201Example: "key1=(value1|value2),key2=value2""#)]
202    pub filter_include_metadata_regex: Option<String>,
203
204    /// Skip objects whose user-defined metadata matches this regex
205    #[arg(long, env, value_parser = value_parser::regex::parse_regex, help_heading = "Filtering",
206        long_help = r#"Skip objects whose user-defined metadata matches this regular expression.
207Keys (lowercase) must be sorted alphabetically and separated by commas.
208This filter is applied after all other filters except tag filters.
209May require an extra API call per object to retrieve metadata.
210
211Example: "key1=(value1|value2),key2=value2""#)]
212    pub filter_exclude_metadata_regex: Option<String>,
213
214    /// Delete only objects whose tags match this regex
215    #[arg(long, env, value_parser = value_parser::regex::parse_regex, help_heading = "Filtering",
216        long_help = r#"Delete only objects whose tags match this regular expression.
217Keys must be sorted alphabetically and separated by '&'.
218This filter is applied after all other filters.
219Requires an extra API call per object to retrieve tags.
220
221Example: "key1=(value1|value2)&key2=value2""#)]
222    pub filter_include_tag_regex: Option<String>,
223
224    /// Skip objects whose tags match this regex
225    #[arg(long, env, value_parser = value_parser::regex::parse_regex, help_heading = "Filtering",
226        long_help = r#"Skip objects whose tags match this regular expression.
227Keys must be sorted alphabetically and separated by '&'.
228This filter is applied after all other filters.
229Requires an extra API call per object to retrieve tags.
230
231Example: "key1=(value1|value2)&key2=value2""#)]
232    pub filter_exclude_tag_regex: Option<String>,
233
234    /// Delete only objects modified before this time
235    #[arg(
236        long,
237        env,
238        help_heading = "Filtering",
239        long_help = r#"Delete only objects older than the given time (RFC 3339 format).
240Example: 2023-02-19T12:00:00Z"#
241    )]
242    pub filter_mtime_before: Option<DateTime<Utc>>,
243
244    /// Delete only objects modified at or after this time
245    #[arg(
246        long,
247        env,
248        help_heading = "Filtering",
249        long_help = r#"Delete only objects newer than or equal to the given time (RFC 3339 format).
250Example: 2023-02-19T12:00:00Z"#
251    )]
252    pub filter_mtime_after: Option<DateTime<Utc>>,
253
254    /// Delete only objects smaller than this size
255    #[arg(
256        long,
257        env,
258        value_parser = value_parser::human_bytes::check_human_bytes,
259        help_heading = "Filtering",
260        long_help = r#"Delete only objects smaller than the given size.
261Supported suffixes: KB, KiB, MB, MiB, GB, GiB, TB, TiB"#
262    )]
263    pub filter_smaller_size: Option<String>,
264
265    /// Delete only objects larger than or equal to this size
266    #[arg(
267        long,
268        env,
269        value_parser = value_parser::human_bytes::check_human_bytes,
270        help_heading = "Filtering",
271        long_help = r#"Delete only objects larger than or equal to the given size.
272Supported suffixes: KB, KiB, MB, MiB, GB, GiB, TB, TiB"#
273    )]
274    pub filter_larger_size: Option<String>,
275
276    // -----------------------------------------------------------------------
277    // Tracing/Logging options (same as s3sync)
278    // -----------------------------------------------------------------------
279    /// Verbosity level (-q quiet, default normal, -v, -vv, -vvv)
280    #[command(flatten)]
281    pub verbosity: Verbosity<WarnLevel>,
282
283    /// Output structured logs in JSON format (requires -f/--force)
284    #[arg(long, env, default_value_t = DEFAULT_JSON_TRACING, requires = "force", help_heading = "Tracing/Logging")]
285    pub json_tracing: bool,
286
287    /// Include AWS SDK internal traces in log output
288    #[arg(long, env, default_value_t = DEFAULT_AWS_SDK_TRACING, help_heading = "Tracing/Logging")]
289    pub aws_sdk_tracing: bool,
290
291    /// Include span open/close events in log output
292    #[arg(long, env, default_value_t = DEFAULT_SPAN_EVENTS_TRACING, help_heading = "Tracing/Logging")]
293    pub span_events_tracing: bool,
294
295    /// Disable colored output in logs
296    #[arg(long, env, default_value_t = DEFAULT_DISABLE_COLOR_TRACING, help_heading = "Tracing/Logging")]
297    pub disable_color_tracing: bool,
298
299    // -----------------------------------------------------------------------
300    // AWS configuration (target-only, adapted from s3sync)
301    // -----------------------------------------------------------------------
302    /// Path to the AWS config file
303    #[arg(long, env, help_heading = "AWS Configuration")]
304    pub aws_config_file: Option<PathBuf>,
305
306    /// Path to the AWS shared credentials file
307    #[arg(long, env, help_heading = "AWS Configuration")]
308    pub aws_shared_credentials_file: Option<PathBuf>,
309
310    /// Target AWS CLI profile
311    #[arg(long, env, conflicts_with_all = ["target_access_key", "target_secret_access_key", "target_session_token"], value_parser = NonEmptyStringValueParser::new(), help_heading = "AWS Configuration")]
312    pub target_profile: Option<String>,
313
314    /// Target access key
315    #[arg(long, env, conflicts_with_all = ["target_profile"], requires = "target_secret_access_key", value_parser = NonEmptyStringValueParser::new(), help_heading = "AWS Configuration")]
316    pub target_access_key: Option<String>,
317
318    /// Target secret access key
319    #[arg(long, env, conflicts_with_all = ["target_profile"], requires = "target_access_key", value_parser = NonEmptyStringValueParser::new(), help_heading = "AWS Configuration")]
320    pub target_secret_access_key: Option<String>,
321
322    /// Target session token
323    #[arg(long, env, conflicts_with_all = ["target_profile"], requires = "target_access_key", value_parser = NonEmptyStringValueParser::new(), help_heading = "AWS Configuration")]
324    pub target_session_token: Option<String>,
325
326    /// AWS region for the target
327    #[arg(long, env, value_parser = NonEmptyStringValueParser::new(), help_heading = "AWS Configuration")]
328    pub target_region: Option<String>,
329
330    /// Custom S3-compatible endpoint URL (e.g. MinIO, Wasabi)
331    #[arg(long, env, value_parser = value_parser::url::check_scheme, help_heading = "AWS Configuration")]
332    pub target_endpoint_url: Option<String>,
333
334    /// Use path-style access (required by some S3-compatible services)
335    #[arg(long, env, default_value_t = DEFAULT_FORCE_PATH_STYLE, help_heading = "AWS Configuration")]
336    pub target_force_path_style: bool,
337
338    /// Enable S3 Transfer Acceleration
339    #[arg(long, env, default_value_t = DEFAULT_ACCELERATE, help_heading = "AWS Configuration")]
340    pub target_accelerate: bool,
341
342    /// Enable requester-pays for the target bucket
343    #[arg(long, env, default_value_t = DEFAULT_REQUEST_PAYER, help_heading = "AWS Configuration")]
344    pub target_request_payer: bool,
345
346    /// Disable stalled stream protection
347    #[arg(long, env, default_value_t = DEFAULT_DISABLE_STALLED_STREAM_PROTECTION, help_heading = "AWS Configuration")]
348    pub disable_stalled_stream_protection: bool,
349
350    // -----------------------------------------------------------------------
351    // Performance options (same as s3sync)
352    // -----------------------------------------------------------------------
353    /// Number of concurrent deletion workers (1-65535)
354    #[arg(long, env, default_value_t = DEFAULT_WORKER_SIZE, value_parser = clap::value_parser!(u16).range(1..), help_heading = "Performance")]
355    pub worker_size: u16,
356
357    /// Objects per batch deletion request (1-1000; 1 uses single-object deletion)
358    #[arg(long, env, default_value_t = DEFAULT_BATCH_SIZE, value_parser = clap::value_parser!(u16).range(1..=1000), help_heading = "Performance")]
359    pub batch_size: u16,
360
361    /// Number of concurrent listing operations
362    #[arg(long, env, default_value_t = DEFAULT_MAX_PARALLEL_LISTINGS, value_parser = clap::value_parser!(u16).range(1..), help_heading = "Performance")]
363    pub max_parallel_listings: u16,
364
365    /// Maximum depth for parallel listing operations
366    #[arg(long, env, default_value_t = DEFAULT_PARALLEL_LISTING_MAX_DEPTH, value_parser = clap::value_parser!(u16).range(1..), help_heading = "Performance")]
367    pub max_parallel_listing_max_depth: u16,
368
369    /// Maximum objects per second (rate limiting)
370    #[arg(long, env, value_parser = clap::value_parser!(u32).range(10..), help_heading = "Performance")]
371    pub rate_limit_objects: Option<u32>,
372
373    /// Internal queue size for object listing
374    #[arg(long, env, default_value_t = DEFAULT_OBJECT_LISTING_QUEUE_SIZE, value_parser = clap::value_parser!(u32).range(1..), help_heading = "Performance")]
375    pub object_listing_queue_size: u32,
376
377    /// Allow parallel listings in Express One Zone storage
378    #[arg(long, env, default_value_t = DEFAULT_ALLOW_PARALLEL_LISTINGS_IN_EXPRESS_ONE_ZONE, help_heading = "Performance")]
379    pub allow_parallel_listings_in_express_one_zone: bool,
380
381    // -----------------------------------------------------------------------
382    // Retry options (same as s3sync)
383    // -----------------------------------------------------------------------
384    /// Maximum retry attempts for AWS SDK operations
385    #[arg(long, env, default_value_t = DEFAULT_AWS_MAX_ATTEMPTS, help_heading = "Retry Options")]
386    pub aws_max_attempts: u32,
387
388    /// Initial backoff in milliseconds for retries
389    #[arg(long, env, default_value_t = DEFAULT_INITIAL_BACKOFF_MILLISECONDS, help_heading = "Retry Options")]
390    pub initial_backoff_milliseconds: u64,
391
392    /// Number of application-level retries after SDK retries are exhausted
393    #[arg(long, env, default_value_t = DEFAULT_FORCE_RETRY_COUNT, help_heading = "Retry Options")]
394    pub force_retry_count: u32,
395
396    /// Interval in milliseconds between application-level retries
397    #[arg(long, env, default_value_t = DEFAULT_FORCE_RETRY_INTERVAL_MILLISECONDS, help_heading = "Retry Options")]
398    pub force_retry_interval_milliseconds: u64,
399
400    // -----------------------------------------------------------------------
401    // Timeout options (same as s3sync)
402    // -----------------------------------------------------------------------
403    /// Overall operation timeout in milliseconds
404    #[arg(long, env, help_heading = "Timeout Options")]
405    pub operation_timeout_milliseconds: Option<u64>,
406
407    /// Per-attempt operation timeout in milliseconds
408    #[arg(long, env, help_heading = "Timeout Options")]
409    pub operation_attempt_timeout_milliseconds: Option<u64>,
410
411    /// Connection timeout in milliseconds
412    #[arg(long, env, help_heading = "Timeout Options")]
413    pub connect_timeout_milliseconds: Option<u64>,
414
415    /// Read timeout in milliseconds
416    #[arg(long, env, help_heading = "Timeout Options")]
417    pub read_timeout_milliseconds: Option<u64>,
418
419    // -----------------------------------------------------------------------
420    // Lua scripting support (same as s3sync)
421    // -----------------------------------------------------------------------
422    /// Path to a Lua filter callback script
423    #[cfg(feature = "lua_support")]
424    #[arg(
425        long,
426        env,
427        value_parser = value_parser::file_exist::is_file_exist,
428        help_heading = "Lua scripting support",
429        long_help = "Path to a Lua script used as a filter callback.\nThe script is called for each object and must return true to delete the object."
430    )]
431    pub filter_callback_lua_script: Option<String>,
432
433    /// Path to a Lua event callback script
434    #[cfg(feature = "lua_support")]
435    #[arg(
436        long,
437        env,
438        value_parser = value_parser::file_exist::is_file_exist,
439        help_heading = "Lua scripting support",
440        long_help = "Path to a Lua script used as an event callback.\nThe script receives deletion events such as progress, errors, and completion."
441    )]
442    pub event_callback_lua_script: Option<String>,
443
444    /// Allow Lua OS and I/O library access in the Lua script
445    #[cfg(feature = "lua_support")]
446    #[arg(
447        long,
448        env,
449        conflicts_with_all = ["allow_lua_unsafe_vm"],
450        default_value_t = DEFAULT_ALLOW_LUA_OS_LIBRARY,
451        help_heading = "Lua scripting support",
452        long_help = "Allow Lua OS and I/O library access in the Lua script"
453    )]
454    pub allow_lua_os_library: bool,
455
456    /// Memory limit for the Lua VM
457    #[cfg(feature = "lua_support")]
458    #[arg(
459        long,
460        env,
461        default_value = DEFAULT_LUA_VM_MEMORY_LIMIT,
462        value_parser = value_parser::human_bytes::check_human_bytes,
463        help_heading = "Lua scripting support",
464        long_help = "Memory limit for the Lua VM.\nSupported suffixes: KB, KiB, MB, MiB, GB, GiB.\nSet to 0 for no limit. Exceeding this limit terminates the process."
465    )]
466    pub lua_vm_memory_limit: String,
467
468    /// Timeout in milliseconds for each Lua callback invocation (0 = no timeout)
469    #[cfg(feature = "lua_support")]
470    #[arg(
471        long,
472        env,
473        default_value_t = DEFAULT_LUA_CALLBACK_TIMEOUT,
474        value_parser = clap::value_parser!(u64),
475        help_heading = "Lua scripting support",
476        long_help = "Timeout in milliseconds for each Lua callback invocation.\nSet to 0 to disable the timeout.\nIf a filter callback times out, the pipeline is cancelled.\nIf an event callback times out, a warning is logged and execution continues."
477    )]
478    pub lua_callback_timeout: u64,
479
480    // -----------------------------------------------------------------------
481    // Advanced options (same as s3sync)
482    // -----------------------------------------------------------------------
483    /// Use ETag-based conditional deletion to prevent race conditions
484    #[arg(long, env, default_value_t = DEFAULT_IF_MATCH, conflicts_with = "delete_all_versions", help_heading = "Advanced")]
485    pub if_match: bool,
486
487    /// Treat warnings as errors (exit code 1 instead of 3)
488    #[arg(long, env, default_value_t = DEFAULT_WARN_AS_ERROR, help_heading = "Advanced")]
489    pub warn_as_error: bool,
490
491    /// Maximum number of objects returned in a single list object request
492    #[arg(long, env, default_value_t = DEFAULT_MAX_KEYS, value_parser = clap::value_parser!(i32).range(1..=32767), help_heading = "Advanced")]
493    pub max_keys: i32,
494
495    /// Generate shell completions for the given shell
496    #[arg(long, env, help_heading = "Advanced")]
497    pub auto_complete_shell: Option<clap_complete::shells::Shell>,
498
499    // -----------------------------------------------------------------------
500    // Dangerous options
501    // -----------------------------------------------------------------------
502    /// Allow loading unsafe Lua standard libraries and C modules
503    #[cfg(feature = "lua_support")]
504    #[arg(
505        long,
506        env,
507        conflicts_with_all = ["allow_lua_os_library"],
508        default_value_t = DEFAULT_ALLOW_LUA_UNSAFE_VM,
509        help_heading = "Dangerous",
510        long_help = "Allow loading unsafe Lua standard libraries and C modules.\nThis removes all sandbox restrictions from the Lua VM."
511    )]
512    pub allow_lua_unsafe_vm: bool,
513}
514
515// ---------------------------------------------------------------------------
516// parse_from_args (public API)
517// ---------------------------------------------------------------------------
518
519/// Parse command-line arguments into a `CLIArgs` struct.
520///
521/// This is the primary entry point for argument parsing, following s3sync's pattern.
522///
523/// # Example
524///
525/// ```
526/// use s3rm_rs::config::args::parse_from_args;
527///
528/// let args = vec!["s3rm", "s3://my-bucket/prefix/", "--dry-run"];
529/// let cli_args = parse_from_args(args).unwrap();
530/// assert!(cli_args.dry_run);
531/// ```
532pub fn parse_from_args<I, T>(args: I) -> Result<CLIArgs, clap::Error>
533where
534    I: IntoIterator<Item = T>,
535    T: Into<OsString> + Clone,
536{
537    CLIArgs::try_parse_from(args)
538}
539
540/// Parse arguments and build a Config in one step.
541///
542/// Convenience function that combines `parse_from_args` and `Config::try_from`.
543pub fn build_config_from_args<I, T>(args: I) -> Result<Config, String>
544where
545    I: IntoIterator<Item = T>,
546    T: Into<OsString> + Clone,
547{
548    let cli_args = CLIArgs::try_parse_from(args).map_err(|e| e.to_string())?;
549    Config::try_from(cli_args)
550}
551
552// ---------------------------------------------------------------------------
553// Validation and Config conversion
554// ---------------------------------------------------------------------------
555
556impl CLIArgs {
557    fn build_filter_config(&self) -> Result<FilterConfig, String> {
558        // value_parser already validated regexes at parse time
559        let compile_regex = |pattern: &Option<String>| -> Option<Regex> {
560            pattern
561                .as_ref()
562                .map(|p| Regex::new(p).expect("regex was already validated by value_parser"))
563        };
564
565        let larger_size = self
566            .filter_larger_size
567            .as_deref()
568            .map(parse_human_bytes)
569            .transpose()
570            .map_err(|e| format!("Invalid filter-larger-size: {e}"))?;
571        let smaller_size = self
572            .filter_smaller_size
573            .as_deref()
574            .map(parse_human_bytes)
575            .transpose()
576            .map_err(|e| format!("Invalid filter-smaller-size: {e}"))?;
577
578        Ok(FilterConfig {
579            before_time: self.filter_mtime_before,
580            after_time: self.filter_mtime_after,
581            include_regex: compile_regex(&self.filter_include_regex),
582            exclude_regex: compile_regex(&self.filter_exclude_regex),
583            include_content_type_regex: compile_regex(&self.filter_include_content_type_regex),
584            exclude_content_type_regex: compile_regex(&self.filter_exclude_content_type_regex),
585            include_metadata_regex: compile_regex(&self.filter_include_metadata_regex),
586            exclude_metadata_regex: compile_regex(&self.filter_exclude_metadata_regex),
587            include_tag_regex: compile_regex(&self.filter_include_tag_regex),
588            exclude_tag_regex: compile_regex(&self.filter_exclude_tag_regex),
589            larger_size,
590            smaller_size,
591            keep_latest_only: self.keep_latest_only,
592        })
593    }
594
595    fn build_client_config(&self) -> Option<ClientConfig> {
596        let credential = if let Some(ref profile) = self.target_profile {
597            S3Credentials::Profile(profile.clone())
598        } else if let Some(ref access_key) = self.target_access_key {
599            let secret_key = self.target_secret_access_key.clone().unwrap_or_default();
600            S3Credentials::Credentials {
601                access_keys: AccessKeys {
602                    access_key: access_key.clone(),
603                    secret_access_key: secret_key,
604                    session_token: self.target_session_token.clone(),
605                },
606            }
607        } else {
608            S3Credentials::FromEnvironment
609        };
610
611        let request_payer = if self.target_request_payer {
612            Some(RequestPayer::Requester)
613        } else {
614            None
615        };
616
617        Some(ClientConfig {
618            client_config_location: ClientConfigLocation {
619                aws_config_file: self.aws_config_file.clone(),
620                aws_shared_credentials_file: self.aws_shared_credentials_file.clone(),
621            },
622            credential,
623            region: self.target_region.clone(),
624            endpoint_url: self.target_endpoint_url.clone(),
625            force_path_style: self.target_force_path_style,
626            accelerate: self.target_accelerate,
627            request_payer,
628            retry_config: RetryConfig {
629                aws_max_attempts: self.aws_max_attempts,
630                initial_backoff_milliseconds: self.initial_backoff_milliseconds,
631            },
632            cli_timeout_config: CLITimeoutConfig {
633                operation_timeout_milliseconds: self.operation_timeout_milliseconds,
634                operation_attempt_timeout_milliseconds: self.operation_attempt_timeout_milliseconds,
635                connect_timeout_milliseconds: self.connect_timeout_milliseconds,
636                read_timeout_milliseconds: self.read_timeout_milliseconds,
637            },
638            disable_stalled_stream_protection: self.disable_stalled_stream_protection,
639            request_checksum_calculation: RequestChecksumCalculation::WhenRequired,
640        })
641    }
642
643    fn build_tracing_config(&self, dry_run: bool) -> Option<TracingConfig> {
644        let mut tracing_config = self.verbosity.log_level().map(|log_level| TracingConfig {
645            tracing_level: log_level,
646            json_tracing: self.json_tracing,
647            aws_sdk_tracing: self.aws_sdk_tracing,
648            span_events_tracing: self.span_events_tracing,
649            disable_color_tracing: self.disable_color_tracing,
650        });
651
652        // In dry-run mode, boost the default level (Warn) to Info so dry-run
653        // output is visible without requiring -v. But respect explicit -q flags —
654        // if the user asked for quieter output, don't override their choice.
655        if dry_run {
656            if let Some(ref mut config) = tracing_config {
657                if config.tracing_level == log::Level::Warn {
658                    config.tracing_level = log::Level::Info;
659                }
660            }
661        }
662
663        tracing_config
664    }
665
666    fn parse_target(&self) -> Result<StoragePath, String> {
667        let uri = &self.target;
668        // Remove "s3://" prefix
669        let without_scheme = &uri[5..];
670
671        let (bucket, prefix) = match without_scheme.find('/') {
672            Some(idx) => {
673                let bucket = &without_scheme[..idx];
674                let prefix = &without_scheme[idx + 1..];
675                (bucket.to_string(), prefix.to_string())
676            }
677            None => (without_scheme.to_string(), String::new()),
678        };
679
680        if bucket.is_empty() {
681            return Err(ERROR_MESSAGE_INVALID_TARGET.to_string());
682        }
683
684        Ok(StoragePath::S3 { bucket, prefix })
685    }
686}
687
688impl TryFrom<CLIArgs> for Config {
689    type Error = String;
690
691    #[allow(clippy::needless_late_init)]
692    fn try_from(args: CLIArgs) -> Result<Self, Self::Error> {
693        let target = args.parse_target()?;
694        let filter_config = args.build_filter_config()?;
695        let target_client_config = args.build_client_config();
696        let tracing_config = args.build_tracing_config(args.dry_run);
697
698        // Express One Zone: override batch_size to 1 unless parallel listings allowed
699        let mut batch_size = args.batch_size;
700        let StoragePath::S3 { ref bucket, .. } = target;
701        if is_express_onezone_storage(bucket) && !args.allow_parallel_listings_in_express_one_zone {
702            if batch_size != DEFAULT_BATCH_SIZE {
703                tracing::warn!(
704                    "--batch-size={} is overridden to 1 for Express One Zone storage. \
705                     Use --allow-parallel-listings-in-express-one-zone to keep the specified value.",
706                    batch_size,
707                );
708            }
709            batch_size = 1;
710        }
711
712        // Validate rate limit vs batch size
713        if let Some(rate_limit) = args.rate_limit_objects {
714            if rate_limit < batch_size as u32 {
715                return Err(format!(
716                    "--rate-limit-objects ({}) must be greater than or equal to --batch-size ({}).",
717                    rate_limit, batch_size,
718                ));
719            }
720        }
721
722        // Handle Lua script loading
723        let filter_callback_lua_script: Option<String>;
724        let event_callback_lua_script: Option<String>;
725        let allow_lua_os_library: bool;
726        let allow_lua_unsafe_vm: bool;
727        let lua_vm_memory_limit: usize;
728        let lua_callback_timeout_milliseconds: u64;
729
730        cfg_if::cfg_if! {
731            if #[cfg(feature = "lua_support")] {
732                filter_callback_lua_script = args.filter_callback_lua_script.clone();
733                event_callback_lua_script = args.event_callback_lua_script.clone();
734                allow_lua_os_library = args.allow_lua_os_library;
735                allow_lua_unsafe_vm = args.allow_lua_unsafe_vm;
736                lua_vm_memory_limit = parse_human_bytes(&args.lua_vm_memory_limit)
737                    .and_then(|v| usize::try_from(v).map_err(|e| e.to_string()))
738                    .map_err(|e| format!("Invalid lua-vm-memory-limit: {e}"))?;
739                lua_callback_timeout_milliseconds = args.lua_callback_timeout;
740            } else {
741                filter_callback_lua_script = None;
742                event_callback_lua_script = None;
743                allow_lua_os_library = false;
744                allow_lua_unsafe_vm = false;
745                lua_vm_memory_limit = 64 * 1024 * 1024;
746                lua_callback_timeout_milliseconds = 10_000;
747            }
748        }
749
750        // Build callback managers and register Lua callbacks (like s3sync)
751        #[allow(unused_mut)]
752        let mut filter_manager = FilterManager::new();
753        cfg_if::cfg_if! {
754            if #[cfg(feature = "lua_support")] {
755                if let Some(ref script_path) = filter_callback_lua_script {
756                    let mut lua_filter_callback =
757                        crate::lua::filter::LuaFilterCallback::new(
758                            lua_vm_memory_limit,
759                            allow_lua_os_library,
760                            allow_lua_unsafe_vm,
761                            lua_callback_timeout_milliseconds,
762                        );
763                    lua_filter_callback
764                        .load_and_compile(script_path.as_str())
765                        .map_err(|e| format!("Failed to load filter Lua script: {e}"))?;
766                    filter_manager.register_callback(lua_filter_callback);
767                }
768            }
769        }
770
771        #[allow(unused_mut)]
772        let mut event_manager = EventManager::new();
773        cfg_if::cfg_if! {
774            if #[cfg(feature = "lua_support")] {
775                if let Some(ref script_path) = event_callback_lua_script {
776                    let mut lua_event_callback =
777                        crate::lua::event::LuaEventCallback::new(
778                            lua_vm_memory_limit,
779                            allow_lua_os_library,
780                            allow_lua_unsafe_vm,
781                            lua_callback_timeout_milliseconds,
782                        );
783                    lua_event_callback
784                        .load_and_compile(script_path.as_str())
785                        .map_err(|e| format!("Failed to load event Lua script: {e}"))?;
786                    event_manager.register_callback(
787                        crate::types::event_callback::EventType::ALL_EVENTS,
788                        lua_event_callback,
789                        args.dry_run,
790                    );
791                }
792            }
793        }
794
795        Ok(Config {
796            target,
797            show_no_progress: args.show_no_progress,
798            target_client_config,
799            force_retry_config: ForceRetryConfig {
800                force_retry_count: args.force_retry_count,
801                force_retry_interval_milliseconds: args.force_retry_interval_milliseconds,
802            },
803            tracing_config,
804            worker_size: args.worker_size,
805            warn_as_error: args.warn_as_error,
806            dry_run: args.dry_run,
807            rate_limit_objects: args.rate_limit_objects,
808            max_parallel_listings: args.max_parallel_listings,
809            object_listing_queue_size: args.object_listing_queue_size,
810            max_parallel_listing_max_depth: args.max_parallel_listing_max_depth,
811            allow_parallel_listings_in_express_one_zone: args
812                .allow_parallel_listings_in_express_one_zone,
813            filter_config,
814            max_keys: args.max_keys,
815            auto_complete_shell: args.auto_complete_shell,
816            event_callback_lua_script,
817            filter_callback_lua_script,
818            allow_lua_os_library,
819            allow_lua_unsafe_vm,
820            lua_vm_memory_limit,
821            lua_callback_timeout_milliseconds,
822            if_match: args.if_match,
823            max_delete: args.max_delete,
824            filter_manager,
825            event_manager,
826            batch_size,
827            delete_all_versions: args.delete_all_versions,
828            force: args.force,
829            test_user_defined_callback: false,
830        })
831    }
832}
833
834fn is_express_onezone_storage(bucket: &str) -> bool {
835    bucket.ends_with(EXPRESS_ONEZONE_STORAGE_SUFFIX)
836}