tmpfile 0.1.0

A temporary file writer with an option to persist.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
//! Temporary file object that can be persisted.
//!
//! # Usage overview
//! The main type of this crate is [`TmpFile`], which represents a (temporary)
//! file being written to, that may optionally be persisted (or it will be
//! removed).
//!
//! To use `TmpFile` an application must first implement the [`TmpProc`] trait
//! on a type.  [`TmpProc::update()`] will be called each time a block of data
//! is written to the `TmpFile`.  [`TmpProc::finalize()`] will be called if the
//! application chooses to persist the `TmpFile`.
//!
//! When a `TmpFile` is created, the application must pass along the file's
//! _temporary_ location and an object whose type implements `TmpProc`.  The
//! instantiated `TmpFile` object implements [`std::io::Write`], which is used
//! to write data to it.
//!
//! If the entire file can not be completed, the `TmpFile` object is dropped,
//! which will automatically remove the temporary file.
//!
//! If the entire file has been written, and the application wants to persist
//! it, it calls [`TmpFile::persist()`].  This will call the
//! `TmpProc::finalize()` trait method, whose responsibility it is to return
//! the file's persistent location (and application-defined data).  Information
//! about the persisted file is finally returned to the application via an
//! instantion of [`Persisted`].
//!
//! # "Small file" special case
//! An application may not want to store small files in its filesystem.
//! For this purpose, the `TmpFile` can be set up to have a minimum file size.
//! If a `TmpFile` does not reach this size before being persisted, a memory
//! buffer of the file's contents will be returned instead of a file name of
//! the persisted file.
//!
//! The [`TmpFile::with_minsize()`] factory method can be used to use this
//! feature.
//!
//! # Deferred persist
//! There may be cases where it's impractical to call [`TmpFile::persist()`] on
//! a `TmpFile`, but where the originator of the `TmpFile` wants to manage the
//! results from the `TmpFile` when it is pesisted.  This crate has means to
//! handle such situations, but it drastically changes the semantics of
//! `TmpFile`:  The `defer_persist()` method returns a wait context that can be
//! used to wait for the `TmpFile` to finalize and send its results.  In this
//! scenario, the finalization occurrs implicitly when the `TmpFile` is
//! dropped.
//!
//! This means that deferred persist shifts the default assumption of
//! "drop-before-persist implies failure" to "drop means persist", with no
//! means to trigger "abort without finalization" (unless the `TmpProc`'s
//! finalization is able to determine that the file is incomplete).
//!
//! # Features
//! | Feature         | Function
//! |-----------------|----------
//! | `defer-persist` | Allow `Drop` to finalize `TmpFile`.

#![cfg_attr(docsrs, feature(doc_cfg))]

use std::{
  fs,
  io::Write,
  path::{Path, PathBuf},
  time::{Duration, Instant}
};


/// Used to inspect contents as it is being fed to the temporary file and to
/// finalize the temporary file when it is being persisted.
pub trait TmpProc {
  /// Application-defined data to be returned on successful finalization.
  type Output;

  /// Application-defined error type.
  type Error;

  /// Called when a buffer has been written to the `TmpFile` storage.
  fn update(&mut self, buf: &[u8]);

  /// Called when the application has chosen to persist the file.
  ///
  /// The role of this method is to:
  /// - Return its application-specific data of the associated type `Output`.
  /// - If `src` is `Some()` it means that the `TmpFile` is backed by a file,
  ///   and the implementation of this method should return, as the second
  ///   tuple member, `Some(PathBuf)`, pointing out the target file that the
  ///   temporary file should be persisted to.  If `src` is `None` the
  ///   temporary buffer is not stored in the file system and thus `None`
  ///   should be returned instead.
  ///
  /// # Errors
  /// Returns application-specific errors.
  fn finalize(
    &mut self,
    src: Option<&Path>
  ) -> Result<(Self::Output, Option<PathBuf>), Self::Error>;
}


/// A [`TmpProc`] implementation which does nothing.
pub struct NullProc<'a>(&'a Path);

impl TmpProc for NullProc<'_> {
  type Output = ();
  type Error = ();

  #[allow(unused_variables)]
  fn update(&mut self, buf: &[u8]) {}

  #[allow(unused_variables)]
  fn finalize(
    &mut self,
    src: Option<&Path>
  ) -> Result<(Self::Output, Option<PathBuf>), Self::Error> {
    Ok(((), Some(self.0.to_path_buf())))
  }
}

/// Temporary file contents container returned after successful persist.
#[derive(Debug)]
pub enum Output {
  /// The temporary file's contents have been persisted to a file.
  File(PathBuf),

  /// The temporary file's contents weren't large enough to be written to disk
  /// and are returned in this buffer.
  ///
  /// This variant can only occur if a minimum size threshold has been set.
  Buf(Vec<u8>)
}

impl Output {
  /// Fallibly convert `Output` to a `PathBuf`.
  ///
  /// # Errors
  /// If the `Output` does not represent a file name, then return the `Output`.
  pub fn try_into_fname(self) -> Result<PathBuf, Self> {
    match self {
      Self::File(fname) => Ok(fname),
      r @ Self::Buf(_) => Err(r)
    }
  }

  /// Unwrap `PathBuf`.
  ///
  /// # Panics
  /// The `Output` must represent a file name.
  #[must_use]
  pub fn unwrap_fname(self) -> PathBuf {
    let Self::File(fname) = self else {
      panic!("Not a file name");
    };
    fname
  }

  /// Fallibly convert `Output` to a buffer.
  ///
  /// # Errors
  /// If the `Output` does not represent a buffer, then return the `Output`.
  pub fn try_into_buf(self) -> Result<Vec<u8>, Self> {
    match self {
      Self::Buf(buf) => Ok(buf),
      r @ Self::File(_) => Err(r)
    }
  }

  /// Unwrap buffer.
  ///
  /// # Panics
  /// The `Output` must represent a buffer.
  #[must_use]
  pub fn unwrap_buf(self) -> Vec<u8> {
    let Self::Buf(buf) = self else {
      panic!("Not a buffer");
    };
    buf
  }
}

/// The final results of successfully persisting a [`TmpFile`].
#[non_exhaustive]
pub struct Persisted<T> {
  /// `TmpFile` output.
  ///
  /// If a minimum size has was set, this will be `Output::Buf()` if the size
  /// is less than or equal to the minimum size.  Otherwise it will be
  /// `Output::File()` containing the file name of the persisted file.
  ///
  /// If the persisted `TmpFile` did not have a minimum file size set, the
  /// output can safely be unwrapped using [`Output::unwrap_fname()`].
  pub output: Output,

  /// The size of the content written to the [`TmpFile`].
  pub size: u64,

  /// The application-defined content processor output.
  pub procres: T,

  /// The amount of time that passed between initially requesting the
  /// [`TmpFile`] writer and when it was finalized.
  pub duration: Duration
}


struct MemBuf {
  buf: Vec<u8>,
  idx: usize
}

/// File writer used to write to a temporary file that can be persisted.
pub struct TmpFile<T, E>
where
  E: From<std::io::Error>
{
  tmpfile: PathBuf,
  f: Option<Box<dyn Write + Send>>,
  tp: Box<dyn TmpProc<Output = T, Error = E> + Send>,
  size: u64,
  start_time: Instant,
  membuf: Option<MemBuf>,
  #[cfg(feature = "defer-persist")]
  sctx: Option<swctx::SetCtx<Persisted<T>, (), E>>
}

impl<T, E> TmpFile<T, E>
where
  E: From<std::io::Error>
{
  fn inner_persist(&mut self) -> Result<Persisted<T>, E> {
    // Force close file, if open
    if let Some(f) = self.f.take() {
      drop(f);
    }

    let (output, t) = if let Some(ref mut membuf) = self.membuf {
      let mut buf = std::mem::take(&mut membuf.buf);
      buf.truncate(membuf.idx);

      // Contents it stored in a memory buffer, so don't pass a path and do not
      // expect a path in return.
      let (t, _) = self.tp.finalize(None)?;

      (Output::Buf(buf), t)
    } else {
      // Tell the content processor to finalize and pass in the source
      // temporary file, which should instruct finalize() to return the
      // persisten location of the file.
      let (t, outfile) = self.tp.finalize(Some(&self.tmpfile))?;

      // ToDo: Either document this panic or return an error instead.
      let outfile = outfile.expect("An output file was not specified.");

      // Hard link temporary file to persistent file, unless the file exists
      // already.
      if !outfile.exists() {
        fs::hard_link(&self.tmpfile, &outfile)?;
      }

      (Output::File(outfile), t)
    };

    Ok(Persisted {
      output,
      size: self.size,
      procres: t,
      duration: self.start_time.elapsed()
    })
  }
}

impl<T, E> TmpFile<T, E>
where
  E: From<std::io::Error>
{
  /// Create a new [`TmpFile`].
  ///
  /// # Errors
  /// If the temporary file could not be opened for writing `std::io::Error` is
  /// returned.
  pub fn new<P>(
    fname: P,
    tp: Box<dyn TmpProc<Output = T, Error = E> + Send>
  ) -> Result<Self, std::io::Error>
  where
    P: AsRef<Path>
  {
    let tmpfile = fname.as_ref().to_path_buf();
    let f = fs::File::create(&tmpfile)?;
    let f = Box::new(f);
    Ok(Self {
      tmpfile,
      f: Some(f),
      tp,
      size: 0,
      start_time: Instant::now(),
      membuf: None,
      #[cfg(feature = "defer-persist")]
      sctx: None
    })
  }

  /// Create a new [`TmpFile`] that will not write to file unless the size
  /// exceeds a specified size.
  ///
  /// # Errors
  /// If the temporary file could not be opened for writing `std::io::Error` is
  /// returned.
  pub fn with_minsize<P>(
    fname: P,
    tp: Box<dyn TmpProc<Output = T, Error = E> + Send>,
    minsize: usize
  ) -> Result<Self, std::io::Error>
  where
    P: AsRef<Path>
  {
    let tmpfile = fname.as_ref().to_path_buf();
    let f = fs::File::create(&tmpfile)?;
    let f = Box::new(f);
    let membuf = MemBuf {
      buf: vec![0u8; minsize],
      idx: 0
    };
    let membuf = Some(membuf);
    Ok(Self {
      tmpfile,
      f: Some(f),
      tp,
      size: 0,
      start_time: Instant::now(),
      membuf,
      #[cfg(feature = "defer-persist")]
      sctx: None
    })
  }


  /// Persist the hitherto temporary file.
  ///
  /// The location of the persisted file will be determined by the [`TmpProc`]
  /// object that was passed into [`TmpFile::new()`].
  ///
  /// # Errors
  /// If it was not possible to persist, the application-defined error `E` will
  /// be returned.
  #[cfg_attr(
    feature = "defer-persist",
    doc = r"
# Panics
If the `defer-persist` feature is used: If the `TmpFile` has previously
registered to receive the finalization results via a channel using
[`TmpFile::defer_persist()`] this method will cause a panic.
"
  )]
  pub fn persist(mut self) -> Result<Persisted<T>, E> {
    #[cfg(feature = "defer-persist")]
    assert!(
      self.sctx.is_none(),
      "Con not persist TmpFile that has been configured for deferred persist"
    );

    self.inner_persist()
  }

  /// Persist this temporary file on `Drop`, but report the finalized results
  /// using a one-shot channel.
  ///
  /// This method can be used if the `TmpFile` is type-erase cast into a
  /// `dyn std::io::Write` (which loses access to [`TmpFile::persist()`]).
  ///
  /// # Caveat
  /// When using a deferred persist, the semantics of the `TmpFile` changes
  /// from "assume failure" to "assume success".  If an error occurs which
  /// should cause the temporary file to no longer be persisted, the owner of
  /// the `TmpFile` must call [`TmpFile::cancel()`] on it.
  ///
  /// # Panics
  /// This method must only be called once per `TmpFile` object.  Calling it
  /// a second time will cause a panic.
  #[cfg(feature = "defer-persist")]
  #[cfg_attr(docsrs, doc(cfg(feature = "defer-persist")))]
  pub fn defer_persist(&mut self) -> swctx::WaitCtx<Persisted<T>, (), E> {
    assert!(
      self.sctx.is_none(),
      "TmpFile already configured for deferred persist"
    );

    let (sctx, wctx) = swctx::mkpair();

    self.sctx = Some(sctx);

    wctx
  }

  /// Cancel a deferred persist request.
  #[cfg(feature = "defer-persist")]
  #[cfg_attr(docsrs, doc(cfg(feature = "defer-persist")))]
  pub fn cancel(mut self) {
    // Take out the SetCtx so the Drop handler doesn't attempt to
    // finalize/persist.
    let _ = self.sctx.take();
  }
}


impl<T, E> Write for TmpFile<T, E>
where
  E: From<std::io::Error>
{
  fn write(&mut self, buf: &[u8]) -> Result<usize, std::io::Error> {
    // If there's a memory buffer, then append to it.  Unless this write would
    // overflow the membuf, in which case switch to using a file.
    if let Some(ref mut membuf) = self.membuf {
      if membuf.idx + buf.len() > membuf.buf.len() {
        // Current write index + size of input buffer would exceed maximum
        // buffer size.

        // Open temporary file and transfer the _existing_ memory buffer to it
        let f = fs::File::create(&self.tmpfile)?;
        let mut f = Box::new(f);
        if membuf.idx > 0 {
          f.write_all(&membuf.buf[..membuf.idx])?;
        }

        // Store file handle in context
        self.f = Some(f);

        // Clear memory buffer
        self.membuf = None;
      } else {
        // There's still room.  Append to memory buffer.
        membuf.buf[membuf.idx..(membuf.idx + buf.len())].copy_from_slice(buf);

        // Move ahead write-pointer
        membuf.idx += buf.len();

        // Update total written size
        self.size += buf.len() as u64;

        // Update TmpProc
        self.tp.update(buf);

        return Ok(buf.len());
      }
    }

    // At this point the Tmpfile is writing to a file in the file system.  The
    // memory buffer case should have returned early.
    let Some(ref mut f) = self.f else {
      panic!("No file?");
    };

    let n = f.write(buf)?;
    self.tp.update(&buf[..n]);
    self.size += n as u64;

    Ok(n)
  }

  fn flush(&mut self) -> Result<(), std::io::Error> {
    if let Some(ref mut f) = self.f {
      f.flush()?;
    }
    Ok(())
  }
}

impl<T, E> Drop for TmpFile<T, E>
where
  E: From<std::io::Error>
{
  fn drop(&mut self) {
    // Close file if it hasn't been already
    if let Some(f) = self.f.take() {
      drop(f);
    }

    // If deferred persist has been requested, then handle it here.
    #[cfg(feature = "defer-persist")]
    if let Some(sctx) = self.sctx.take() {
      match self.inner_persist() {
        Ok(res) => {
          // ToDo: Log error?
          let _ = sctx.set(res);
        }
        Err(e) => {
          // ToDo: Log error?
          let _ = sctx.fail(e);
        }
      }
    }

    if let Err(_e) = fs::remove_file(&self.tmpfile) {
      // ToDo: Log error?
    }
  }
}

// vim: set ft=rust et sw=2 ts=2 sts=2 cinoptions=2 tw=79 :