Skip to main content

rusty_sponge/
lib.rs

1//! # rusty-sponge
2//!
3//! A Rust port of the moreutils `sponge` utility: soak up all of stdin and
4//! write it atomically to a file. The library is the canonical surface; the
5//! CLI binary is a thin wrapper around [`run`].
6//!
7//! ## Quick start
8//!
9//! ```no_run
10//! use rusty_sponge::{SpongeBuilder, Target, CompatibilityMode};
11//! use std::io::Cursor;
12//! use std::path::PathBuf;
13//!
14//! let mut sponge = SpongeBuilder::new()
15//!     .target(Target::File(PathBuf::from("output.txt")))
16//!     .append(false)
17//!     .compat(CompatibilityMode::Default)
18//!     .build()?;
19//!
20//! sponge.run(Cursor::new(b"hello\nworld\n"))?;
21//! # Ok::<(), rusty_sponge::Error>(())
22//! ```
23//!
24//! ## Stability (lockstep SemVer)
25//!
26//! The library and binary share a single crate version. Within the `0.x`
27//! series, minor version bumps may introduce breaking changes per standard
28//! Cargo semantics. Every public enum and struct is `#[non_exhaustive]` so
29//! that variant additions are not breaking changes once `1.0` lands.
30//!
31//! ## Atomic-safety guarantee
32//!
33//! When writing to a regular non-symlink file, [`Sponge::run`] writes to a
34//! sibling tempfile in the target's parent directory and atomically renames
35//! into place. Mid-write failures (panic, IO error, signal) leave the
36//! original file untouched. **Symlink targets and the cross-volume fallback
37//! path explicitly forgo this guarantee** — see the crate README for the full
38//! compatibility statement.
39
40pub mod buffer;
41pub mod error;
42
43pub use error::Error;
44
45use std::path::PathBuf;
46
47/// Where the buffered input should be delivered.
48#[non_exhaustive]
49#[derive(Debug, Clone)]
50pub enum Target {
51    /// Write to stdout (no file argument case).
52    Stdout,
53    /// Atomic replacement of the named file. Regular non-symlink targets get
54    /// the sibling-tempfile + rename path; symlinks and reparse points fall
55    /// through to a non-atomic write-through path.
56    File(PathBuf),
57}
58
59/// Whether to apply Default-mode ergonomic extensions or Strict moreutils parity.
60#[non_exhaustive]
61#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
62pub enum CompatibilityMode {
63    /// Default mode: `--help`, `--version`, `completions`, env-var threshold honored.
64    #[default]
65    Default,
66    /// Strict mode: byte-equal moreutils `sponge` for documented inputs;
67    /// rejects every Default-mode addition.
68    Strict,
69}
70
71/// Runtime engine for one sponge invocation. Constructed via [`SpongeBuilder`].
72#[non_exhaustive]
73#[derive(Debug)]
74pub struct Sponge {
75    target: Target,
76    append: bool,
77    spill_threshold: usize,
78    /// Held for Phase 7 Strict-mode logic (e.g., FR-025 cross-volume fallback
79    /// warning suppression). Not yet read on the MVP path.
80    #[allow(dead_code)]
81    compat: CompatibilityMode,
82}
83
84/// Default spill threshold (128 MiB).
85pub const DEFAULT_SPILL_THRESHOLD: usize = 128 * 1024 * 1024;
86
87/// Builder for [`Sponge`]. All chain methods are `#[must_use]`.
88#[non_exhaustive]
89#[derive(Debug, Clone)]
90pub struct SpongeBuilder {
91    target: Target,
92    append: bool,
93    spill_threshold: usize,
94    compat: CompatibilityMode,
95}
96
97impl Default for SpongeBuilder {
98    fn default() -> Self {
99        Self::new()
100    }
101}
102
103impl SpongeBuilder {
104    /// Construct a new builder defaulting to `Target::Stdout`, no-append,
105    /// 128 MiB spill threshold, Default mode.
106    #[must_use]
107    pub fn new() -> Self {
108        Self {
109            target: Target::Stdout,
110            append: false,
111            spill_threshold: DEFAULT_SPILL_THRESHOLD,
112            compat: CompatibilityMode::Default,
113        }
114    }
115
116    /// Set the target.
117    #[must_use]
118    pub fn target(mut self, target: Target) -> Self {
119        self.target = target;
120        self
121    }
122
123    /// Enable `-a` append mode. Reads existing file contents into the buffer
124    /// before stdin. Requires `Target::File`; otherwise `build()` returns
125    /// [`Error::InvalidBuilderConfiguration`].
126    #[must_use]
127    pub fn append(mut self, append: bool) -> Self {
128        self.append = append;
129        self
130    }
131
132    /// Set the spill threshold in bytes.
133    #[must_use]
134    pub fn spill_threshold(mut self, bytes: usize) -> Self {
135        self.spill_threshold = bytes;
136        self
137    }
138
139    /// Set the compatibility mode.
140    #[must_use]
141    pub fn compat(mut self, compat: CompatibilityMode) -> Self {
142        self.compat = compat;
143        self
144    }
145
146    /// Validate the configuration and build a [`Sponge`].
147    pub fn build(self) -> Result<Sponge, Error> {
148        // Validation: append requires a file target.
149        if self.append && matches!(self.target, Target::Stdout) {
150            return Err(Error::InvalidBuilderConfiguration(
151                "append requires a file target",
152            ));
153        }
154        // Validation: Strict mode does not honor explicit spill-threshold overrides.
155        if self.compat == CompatibilityMode::Strict
156            && self.spill_threshold != DEFAULT_SPILL_THRESHOLD
157        {
158            return Err(Error::CompatibilityViolation(
159                "explicit spill threshold not honored in Strict mode",
160            ));
161        }
162        Ok(Sponge {
163            target: self.target,
164            append: self.append,
165            spill_threshold: self.spill_threshold,
166            compat: self.compat,
167        })
168    }
169}
170
171impl Sponge {
172    /// Drain the reader, write the buffered bytes to the configured target.
173    /// On the regular-file path this performs sibling-tempfile + atomic rename;
174    /// on the symlink/reparse path the write-through fallback (FR-010) is
175    /// pending Polish phase — the MVP returns an error there.
176    pub fn run<R: std::io::Read>(&mut self, reader: R) -> Result<(), Error> {
177        match &self.target {
178            Target::Stdout => {
179                // Pipeline-batching mode (US2): drain stdin into the buffer,
180                // then emit to stdout in one shot.
181                let mut buf = buffer::Buffer::new();
182                // For Stdout target, no on-disk spill dir is meaningful — use
183                // the system temp dir as a fallback.
184                let spill_dir = std::env::temp_dir();
185                buf.drain_reader(reader, self.spill_threshold, &spill_dir)?;
186                let stdout = std::io::stdout();
187                let mut locked = stdout.lock();
188                buf.write_to(&mut locked)?;
189                Ok(())
190            }
191            Target::File(path) => {
192                validate_target_path(path)?;
193
194                // Spill directory MUST be the target's parent (HINT-002) so
195                // that the eventual atomic-rename in `atomic::write_atomic`
196                // works without crossing a filesystem boundary.
197                let spill_dir = path
198                    .parent()
199                    .filter(|p| !p.as_os_str().is_empty())
200                    .map(std::path::PathBuf::from)
201                    .unwrap_or_else(|| std::path::PathBuf::from("."));
202
203                let mut buf = buffer::Buffer::new();
204                buf.drain_reader(reader, self.spill_threshold, &spill_dir)?;
205
206                // FR-010: symlink and reparse-point targets get the
207                // non-atomic write-through path; the atomic-safety guarantee
208                // does NOT apply on this branch.
209                if writethrough::requires_write_through(path) {
210                    writethrough::write_through(buf, path, self.append)?;
211                } else {
212                    atomic::write_atomic(buf, path, self.append)?;
213                }
214                Ok(())
215            }
216        }
217    }
218}
219
220// Internal atomic-write module. Public for integration tests in our own
221// `tests/` directory; library consumers should use the [`Sponge`] runtime.
222pub mod atomic;
223
224// Non-atomic write-through path for symlink and reparse-point targets (FR-010).
225pub mod writethrough;
226
227/// Pre-write validation: reject directory targets (FR-014). Available without
228/// the `cli` feature so library consumers can call it.
229fn validate_target_path(target: &std::path::Path) -> Result<(), Error> {
230    if let Ok(meta) = std::fs::symlink_metadata(target) {
231        if meta.is_dir() {
232            return Err(Error::TargetIsDirectory(target.to_path_buf()));
233        }
234    }
235    Ok(())
236}
237
238// CLI / mode / signal / atomic-write internals are gated behind `cli` because
239// they pull clap, signal-hook, and other binary-only deps. Library callers
240// configure compat mode via the builder.
241#[cfg(feature = "cli")]
242pub mod cli;
243#[cfg(feature = "cli")]
244pub mod mode;
245#[cfg(feature = "cli")]
246pub mod signal;
247#[cfg(feature = "cli")]
248pub mod strict;
249
250/// Binary entry-point helper used by both `src/main.rs` and `src/bin/sponge.rs`.
251///
252/// Library consumers should use [`SpongeBuilder`] directly; this helper exists
253/// only to share the binary entry shape between the default `rusty-sponge`
254/// binary and the optional `sponge` alias.
255#[cfg(feature = "cli")]
256pub fn run() -> std::process::ExitCode {
257    use clap::Parser;
258    use std::process::ExitCode;
259
260    // Install signal handlers as early as possible so that a Ctrl-C / SIGTERM
261    // during stdin reading triggers the cancel-flag path and the in-progress
262    // tempfile is dropped before exit (FR-011). Errors here are non-fatal.
263    if let Err(e) = signal::install_handlers() {
264        eprintln!("warning: could not install signal handlers: {e}");
265    }
266
267    // Pre-clap Strict-mode detection: if --strict / RUSTY_SPONGE_STRICT=1 /
268    // argv[0]=sponge select Strict, dispatch to the byte-equal-moreutils
269    // path before clap gets a chance to emit its own help/version text.
270    let raw_argv: Vec<std::ffi::OsString> = std::env::args_os().collect();
271    let pre_strict = strict::pre_scan_strict_flag(&raw_argv);
272    let env_strict = std::env::var_os("RUSTY_SPONGE_STRICT");
273    let argv0 = raw_argv.first().cloned();
274    let early_mode = mode::resolve(pre_strict, env_strict.as_deref(), argv0.as_deref());
275    if early_mode == CompatibilityMode::Strict {
276        return strict::run(&raw_argv);
277    }
278
279    let cli_args = match cli::Cli::try_parse() {
280        Ok(args) => args,
281        Err(e) => {
282            // clap handles --help / --version / parse-error printing itself.
283            e.print().ok();
284            // clap returns exit code 0 for --help / --version (the kind ==
285            // DisplayHelp/DisplayVersion), non-zero for parse errors.
286            return match e.kind() {
287                clap::error::ErrorKind::DisplayHelp | clap::error::ErrorKind::DisplayVersion => {
288                    ExitCode::SUCCESS
289                }
290                _ => ExitCode::from(2),
291            };
292        }
293    };
294
295    // Handle subcommands (currently only `completions`).
296    if let Some(cli::Subcommand::Completions { shell }) = cli_args.command {
297        use clap::CommandFactory;
298        let mut cmd = cli::Cli::command();
299        let name = cmd.get_name().to_string();
300        clap_complete::generate(shell, &mut cmd, name, &mut std::io::stdout());
301        return ExitCode::SUCCESS;
302    }
303
304    // Resolve compatibility mode (precedence: --strict > env > argv[0] > Default).
305    let argv0 = std::env::args_os().next();
306    let env_strict = std::env::var_os("RUSTY_SPONGE_STRICT");
307    let compat = mode::resolve(
308        cli::strict_flag(&cli_args),
309        env_strict.as_deref(),
310        argv0.as_deref(),
311    );
312
313    // Resolve spill threshold: env var honored only in Default mode.
314    let spill_threshold = resolve_spill_threshold(&cli_args, compat);
315
316    // Build the target.
317    let target = match cli_args.target {
318        Some(path) => Target::File(path),
319        None => Target::Stdout,
320    };
321
322    // Construct the runtime via the builder so validation goes through the
323    // same code path that library consumers use.
324    let result = SpongeBuilder::new()
325        .target(target)
326        .append(cli_args.append)
327        .spill_threshold(spill_threshold)
328        .compat(compat)
329        .build();
330
331    let mut sponge = match result {
332        Ok(s) => s,
333        Err(e) => {
334            eprintln!("rusty-sponge: {e}");
335            return ExitCode::from(1);
336        }
337    };
338
339    let stdin = std::io::stdin();
340    let locked = stdin.lock();
341    match sponge.run(locked) {
342        Ok(()) => ExitCode::SUCCESS,
343        Err(Error::Io(io_err)) if io_err.kind() == std::io::ErrorKind::Interrupted => {
344            // FR-011: signal-driven cancellation. Conventional Unix exit code
345            // for SIGINT is 130 (128 + SIGINT=2). Tempfile cleanup has already
346            // happened via Drop in the error path.
347            eprintln!("rusty-sponge: cancelled");
348            ExitCode::from(130)
349        }
350        Err(e) => {
351            eprintln!("rusty-sponge: {e}");
352            ExitCode::from(1)
353        }
354    }
355}
356
357/// Resolve the effective spill threshold from CLI + env, honoring the
358/// compatibility-mode rule that Strict mode ignores explicit overrides
359/// (FR-016 / FR-017).
360#[cfg(feature = "cli")]
361fn resolve_spill_threshold(cli_args: &cli::Cli, compat: CompatibilityMode) -> usize {
362    if compat == CompatibilityMode::Strict {
363        return DEFAULT_SPILL_THRESHOLD;
364    }
365    let Some(raw) = cli_args.spill_mb.as_deref() else {
366        return DEFAULT_SPILL_THRESHOLD;
367    };
368    match raw.trim().parse::<usize>() {
369        Ok(0) | Err(_) => {
370            eprintln!(
371                "warning: invalid RUSTY_SPONGE_SPILL_MB value '{raw}'; using default 128 MiB"
372            );
373            DEFAULT_SPILL_THRESHOLD
374        }
375        Ok(mb) => mb.saturating_mul(1024 * 1024),
376    }
377}