rusty_sponge/lib.rs
1//! # rusty-sponge
2//!
3//! A Rust port of the moreutils `sponge` utility: soak up all of stdin and
4//! write it atomically to a file. The library is the canonical surface; the
5//! CLI binary is a thin wrapper around [`run`].
6//!
7//! ## Quick start
8//!
9//! ```no_run
10//! use rusty_sponge::{SpongeBuilder, Target, CompatibilityMode};
11//! use std::io::Cursor;
12//! use std::path::PathBuf;
13//!
14//! let mut sponge = SpongeBuilder::new()
15//! .target(Target::File(PathBuf::from("output.txt")))
16//! .append(false)
17//! .compat(CompatibilityMode::Default)
18//! .build()?;
19//!
20//! sponge.run(Cursor::new(b"hello\nworld\n"))?;
21//! # Ok::<(), rusty_sponge::Error>(())
22//! ```
23//!
24//! ## Stability (lockstep SemVer)
25//!
26//! The library and binary share a single crate version. Within the `0.x`
27//! series, minor version bumps may introduce breaking changes per standard
28//! Cargo semantics. Every public enum and struct is `#[non_exhaustive]` so
29//! that variant additions are not breaking changes once `1.0` lands.
30//!
31//! ## Atomic-safety guarantee
32//!
33//! When writing to a regular non-symlink file, [`Sponge::run`] writes to a
34//! sibling tempfile in the target's parent directory and atomically renames
35//! into place. Mid-write failures (panic, IO error, signal) leave the
36//! original file untouched. **Symlink targets and the cross-volume fallback
37//! path explicitly forgo this guarantee** — see the crate README for the full
38//! compatibility statement.
39
40pub mod buffer;
41pub mod error;
42
43pub use error::Error;
44
45use std::path::PathBuf;
46
47/// Where the buffered input should be delivered.
48#[non_exhaustive]
49#[derive(Debug, Clone)]
50pub enum Target {
51 /// Write to stdout (no file argument case).
52 Stdout,
53 /// Atomic replacement of the named file. Regular non-symlink targets get
54 /// the sibling-tempfile + rename path; symlinks and reparse points fall
55 /// through to a non-atomic write-through path.
56 File(PathBuf),
57}
58
59/// Whether to apply Default-mode ergonomic extensions or Strict moreutils parity.
60#[non_exhaustive]
61#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
62pub enum CompatibilityMode {
63 /// Default mode: `--help`, `--version`, `completions`, env-var threshold honored.
64 #[default]
65 Default,
66 /// Strict mode: byte-equal moreutils `sponge` for documented inputs;
67 /// rejects every Default-mode addition.
68 Strict,
69}
70
71/// Runtime engine for one sponge invocation. Constructed via [`SpongeBuilder`].
72#[non_exhaustive]
73#[derive(Debug)]
74pub struct Sponge {
75 target: Target,
76 append: bool,
77 spill_threshold: usize,
78 /// Held for Phase 7 Strict-mode logic (e.g., FR-025 cross-volume fallback
79 /// warning suppression). Not yet read on the MVP path.
80 #[allow(dead_code)]
81 compat: CompatibilityMode,
82}
83
84/// Default spill threshold (128 MiB).
85pub const DEFAULT_SPILL_THRESHOLD: usize = 128 * 1024 * 1024;
86
87/// Builder for [`Sponge`]. All chain methods are `#[must_use]`.
88#[non_exhaustive]
89#[derive(Debug, Clone)]
90pub struct SpongeBuilder {
91 target: Target,
92 append: bool,
93 spill_threshold: usize,
94 compat: CompatibilityMode,
95}
96
97impl Default for SpongeBuilder {
98 fn default() -> Self {
99 Self::new()
100 }
101}
102
103impl SpongeBuilder {
104 /// Construct a new builder defaulting to `Target::Stdout`, no-append,
105 /// 128 MiB spill threshold, Default mode.
106 #[must_use]
107 pub fn new() -> Self {
108 Self {
109 target: Target::Stdout,
110 append: false,
111 spill_threshold: DEFAULT_SPILL_THRESHOLD,
112 compat: CompatibilityMode::Default,
113 }
114 }
115
116 /// Set the target.
117 #[must_use]
118 pub fn target(mut self, target: Target) -> Self {
119 self.target = target;
120 self
121 }
122
123 /// Enable `-a` append mode. Reads existing file contents into the buffer
124 /// before stdin. Requires `Target::File`; otherwise `build()` returns
125 /// [`Error::InvalidBuilderConfiguration`].
126 #[must_use]
127 pub fn append(mut self, append: bool) -> Self {
128 self.append = append;
129 self
130 }
131
132 /// Set the spill threshold in bytes.
133 #[must_use]
134 pub fn spill_threshold(mut self, bytes: usize) -> Self {
135 self.spill_threshold = bytes;
136 self
137 }
138
139 /// Set the compatibility mode.
140 #[must_use]
141 pub fn compat(mut self, compat: CompatibilityMode) -> Self {
142 self.compat = compat;
143 self
144 }
145
146 /// Validate the configuration and build a [`Sponge`].
147 pub fn build(self) -> Result<Sponge, Error> {
148 // Validation: append requires a file target.
149 if self.append && matches!(self.target, Target::Stdout) {
150 return Err(Error::InvalidBuilderConfiguration(
151 "append requires a file target",
152 ));
153 }
154 // Validation: Strict mode does not honor explicit spill-threshold overrides.
155 if self.compat == CompatibilityMode::Strict
156 && self.spill_threshold != DEFAULT_SPILL_THRESHOLD
157 {
158 return Err(Error::CompatibilityViolation(
159 "explicit spill threshold not honored in Strict mode",
160 ));
161 }
162 Ok(Sponge {
163 target: self.target,
164 append: self.append,
165 spill_threshold: self.spill_threshold,
166 compat: self.compat,
167 })
168 }
169}
170
171impl Sponge {
172 /// Drain the reader, write the buffered bytes to the configured target.
173 /// On the regular-file path this performs sibling-tempfile + atomic rename;
174 /// on the symlink/reparse path the write-through fallback (FR-010) is
175 /// pending Polish phase — the MVP returns an error there.
176 pub fn run<R: std::io::Read>(&mut self, reader: R) -> Result<(), Error> {
177 match &self.target {
178 Target::Stdout => {
179 // Pipeline-batching mode (US2): drain stdin into the buffer,
180 // then emit to stdout in one shot.
181 let mut buf = buffer::Buffer::new();
182 // For Stdout target, no on-disk spill dir is meaningful — use
183 // the system temp dir as a fallback.
184 let spill_dir = std::env::temp_dir();
185 buf.drain_reader(reader, self.spill_threshold, &spill_dir)?;
186 let stdout = std::io::stdout();
187 let mut locked = stdout.lock();
188 buf.write_to(&mut locked)?;
189 Ok(())
190 }
191 Target::File(path) => {
192 validate_target_path(path)?;
193
194 // Spill directory MUST be the target's parent (HINT-002) so
195 // that the eventual atomic-rename in `atomic::write_atomic`
196 // works without crossing a filesystem boundary.
197 let spill_dir = path
198 .parent()
199 .filter(|p| !p.as_os_str().is_empty())
200 .map(std::path::PathBuf::from)
201 .unwrap_or_else(|| std::path::PathBuf::from("."));
202
203 let mut buf = buffer::Buffer::new();
204 buf.drain_reader(reader, self.spill_threshold, &spill_dir)?;
205
206 // FR-010: symlink and reparse-point targets get the
207 // non-atomic write-through path; the atomic-safety guarantee
208 // does NOT apply on this branch.
209 if writethrough::requires_write_through(path) {
210 writethrough::write_through(buf, path, self.append)?;
211 } else {
212 atomic::write_atomic(buf, path, self.append)?;
213 }
214 Ok(())
215 }
216 }
217 }
218}
219
220// Internal atomic-write module. Public for integration tests in our own
221// `tests/` directory; library consumers should use the [`Sponge`] runtime.
222pub mod atomic;
223
224// Non-atomic write-through path for symlink and reparse-point targets (FR-010).
225pub mod writethrough;
226
227/// Pre-write validation: reject directory targets (FR-014). Available without
228/// the `cli` feature so library consumers can call it.
229fn validate_target_path(target: &std::path::Path) -> Result<(), Error> {
230 if let Ok(meta) = std::fs::symlink_metadata(target) {
231 if meta.is_dir() {
232 return Err(Error::TargetIsDirectory(target.to_path_buf()));
233 }
234 }
235 Ok(())
236}
237
238// CLI / mode / signal / atomic-write internals are gated behind `cli` because
239// they pull clap, signal-hook, and other binary-only deps. Library callers
240// configure compat mode via the builder.
241#[cfg(feature = "cli")]
242pub mod cli;
243#[cfg(feature = "cli")]
244pub mod mode;
245#[cfg(feature = "cli")]
246pub mod signal;
247#[cfg(feature = "cli")]
248pub mod strict;
249
250/// Binary entry-point helper used by both `src/main.rs` and `src/bin/sponge.rs`.
251///
252/// Library consumers should use [`SpongeBuilder`] directly; this helper exists
253/// only to share the binary entry shape between the default `rusty-sponge`
254/// binary and the optional `sponge` alias.
255#[cfg(feature = "cli")]
256pub fn run() -> std::process::ExitCode {
257 use clap::Parser;
258 use std::process::ExitCode;
259
260 // Install signal handlers as early as possible so that a Ctrl-C / SIGTERM
261 // during stdin reading triggers the cancel-flag path and the in-progress
262 // tempfile is dropped before exit (FR-011). Errors here are non-fatal.
263 if let Err(e) = signal::install_handlers() {
264 eprintln!("warning: could not install signal handlers: {e}");
265 }
266
267 // Pre-clap Strict-mode detection: if --strict / RUSTY_SPONGE_STRICT=1 /
268 // argv[0]=sponge select Strict, dispatch to the byte-equal-moreutils
269 // path before clap gets a chance to emit its own help/version text.
270 let raw_argv: Vec<std::ffi::OsString> = std::env::args_os().collect();
271 let pre_strict = strict::pre_scan_strict_flag(&raw_argv);
272 let env_strict = std::env::var_os("RUSTY_SPONGE_STRICT");
273 let argv0 = raw_argv.first().cloned();
274 let early_mode = mode::resolve(pre_strict, env_strict.as_deref(), argv0.as_deref());
275 if early_mode == CompatibilityMode::Strict {
276 return strict::run(&raw_argv);
277 }
278
279 let cli_args = match cli::Cli::try_parse() {
280 Ok(args) => args,
281 Err(e) => {
282 // clap handles --help / --version / parse-error printing itself.
283 e.print().ok();
284 // clap returns exit code 0 for --help / --version (the kind ==
285 // DisplayHelp/DisplayVersion), non-zero for parse errors.
286 return match e.kind() {
287 clap::error::ErrorKind::DisplayHelp | clap::error::ErrorKind::DisplayVersion => {
288 ExitCode::SUCCESS
289 }
290 _ => ExitCode::from(2),
291 };
292 }
293 };
294
295 // Handle subcommands (currently only `completions`).
296 if let Some(cli::Subcommand::Completions { shell }) = cli_args.command {
297 use clap::CommandFactory;
298 let mut cmd = cli::Cli::command();
299 let name = cmd.get_name().to_string();
300 clap_complete::generate(shell, &mut cmd, name, &mut std::io::stdout());
301 return ExitCode::SUCCESS;
302 }
303
304 // Resolve compatibility mode (precedence: --strict > env > argv[0] > Default).
305 let argv0 = std::env::args_os().next();
306 let env_strict = std::env::var_os("RUSTY_SPONGE_STRICT");
307 let compat = mode::resolve(
308 cli::strict_flag(&cli_args),
309 env_strict.as_deref(),
310 argv0.as_deref(),
311 );
312
313 // Resolve spill threshold: env var honored only in Default mode.
314 let spill_threshold = resolve_spill_threshold(&cli_args, compat);
315
316 // Build the target.
317 let target = match cli_args.target {
318 Some(path) => Target::File(path),
319 None => Target::Stdout,
320 };
321
322 // Construct the runtime via the builder so validation goes through the
323 // same code path that library consumers use.
324 let result = SpongeBuilder::new()
325 .target(target)
326 .append(cli_args.append)
327 .spill_threshold(spill_threshold)
328 .compat(compat)
329 .build();
330
331 let mut sponge = match result {
332 Ok(s) => s,
333 Err(e) => {
334 eprintln!("rusty-sponge: {e}");
335 return ExitCode::from(1);
336 }
337 };
338
339 let stdin = std::io::stdin();
340 let locked = stdin.lock();
341 match sponge.run(locked) {
342 Ok(()) => ExitCode::SUCCESS,
343 Err(Error::Io(io_err)) if io_err.kind() == std::io::ErrorKind::Interrupted => {
344 // FR-011: signal-driven cancellation. Conventional Unix exit code
345 // for SIGINT is 130 (128 + SIGINT=2). Tempfile cleanup has already
346 // happened via Drop in the error path.
347 eprintln!("rusty-sponge: cancelled");
348 ExitCode::from(130)
349 }
350 Err(e) => {
351 eprintln!("rusty-sponge: {e}");
352 ExitCode::from(1)
353 }
354 }
355}
356
357/// Resolve the effective spill threshold from CLI + env, honoring the
358/// compatibility-mode rule that Strict mode ignores explicit overrides
359/// (FR-016 / FR-017).
360#[cfg(feature = "cli")]
361fn resolve_spill_threshold(cli_args: &cli::Cli, compat: CompatibilityMode) -> usize {
362 if compat == CompatibilityMode::Strict {
363 return DEFAULT_SPILL_THRESHOLD;
364 }
365 let Some(raw) = cli_args.spill_mb.as_deref() else {
366 return DEFAULT_SPILL_THRESHOLD;
367 };
368 match raw.trim().parse::<usize>() {
369 Ok(0) | Err(_) => {
370 eprintln!(
371 "warning: invalid RUSTY_SPONGE_SPILL_MB value '{raw}'; using default 128 MiB"
372 );
373 DEFAULT_SPILL_THRESHOLD
374 }
375 Ok(mb) => mb.saturating_mul(1024 * 1024),
376 }
377}