Skip to main content

atom_file/
lib.rs

1//! [`AtomicFile`] provides buffered concurrent access to files with async atomic commit.
2//!
3//! [`BasicAtomicFile`] is a non-async alternative.
4
5#![deny(missing_docs)]
6
7use rustc_hash::FxHashMap as HashMap;
8use std::cmp::min;
9use std::sync::{Arc, Mutex, RwLock};
10use std::cell::RefCell;
11
12#[cfg(feature = "pstd")]
13use pstd::collections::BTreeMap;
14#[cfg(not(feature = "pstd"))]
15use std::collections::BTreeMap;
16
17/// ```Arc<Vec<u8>>```
18pub type Data = Arc<Vec<u8>>;
19
20/// Based on [BasicAtomicFile] which makes sure that updates are all-or-nothing.
21/// Performs commit asyncronously.
22///
23/// #Example
24///
25/// ```
26/// use atom_file::{AtomicFile,DummyFile,MemFile,BasicStorage};
27/// let mut af = AtomicFile::new(MemFile::new(), DummyFile::new());
28/// af.write( 0, &[1,2,3,4] );
29/// af.commit(4);
30/// af.wait_complete();
31/// ```
32///
33/// Atomic file has two maps of writes. On commit, the latest batch of writes are sent to be written to underlying
34/// storage, and are also applied to the second map in the "CommitFile". The CommitFile map is reset when all
35/// the updates to underlying storage have been applied.
36pub struct AtomicFile {
37    /// New updates are written here.
38    map: WMap,
39    /// Underlying file, with previous updates mapped.
40    cf: Arc<RwLock<CommitFile>>,
41    /// File size.
42    size: u64,
43    /// For sending update maps to be saved.
44    tx: std::sync::mpsc::Sender<(u64, WMap)>,
45    /// Held by update process while it is active.
46    busy: Arc<Mutex<()>>,
47    /// Limit on size of CommitFile map.
48    map_lim: usize,
49}
50
51impl AtomicFile {
52    /// Construct AtomicFile with default limits. stg is the main underlying storage, upd is temporary storage for updates during commit.
53    pub fn new(stg: Box<dyn Storage>, upd: Box<dyn BasicStorage>) -> Box<Self> {
54        Self::new_with_limits(stg, upd, &Limits::default())
55    }
56
57    /// Construct Atomic file with specified limits.
58    pub fn new_with_limits(
59        stg: Box<dyn Storage>,
60        upd: Box<dyn BasicStorage>,
61        lim: &Limits,
62    ) -> Box<Self> {
63        let size = stg.size();
64        let mut baf = BasicAtomicFile::new(stg.clone(), upd, lim);
65
66        let (tx, rx) = std::sync::mpsc::channel::<(u64, WMap)>();
67        let cf = Arc::new(RwLock::new(CommitFile::new(stg, lim.rbuf_mem)));
68        let busy = Arc::new(Mutex::new(())); // Lock held while async save thread is active.
69
70        // Start the thread which does save asyncronously.
71        let (cf1, busy1) = (cf.clone(), busy.clone());
72
73        std::thread::spawn(move || {
74            // Loop that recieves a map of updates and applies it to BasicAtomicFile.
75            while let Ok((size, map)) = rx.recv() {
76                let _lock = busy1.lock();
77                baf.map = map;
78                baf.commit(size);
79                cf1.write().unwrap().done_one();
80            }
81        });
82        Box::new(Self {
83            map: WMap::default(),
84            cf,
85            size,
86            tx,
87            busy,
88            map_lim: lim.map_lim,
89        })
90    }
91}
92
93impl Storage for AtomicFile {}
94
95impl BasicStorage for AtomicFile {
96    fn commit(&mut self, size: u64) {
97        self.size = size;
98        if self.map.is_empty() {
99            return;
100        }
101        if self.cf.read().unwrap().map.len() > self.map_lim {
102            self.wait_complete();
103        }
104        let map = std::mem::take(&mut self.map);
105        let cf = &mut *self.cf.write().unwrap();
106        cf.todo += 1;
107        // Apply map of updates to CommitFile.
108        map.to_storage(cf);
109        // Send map of updates to thread to be written to underlying storage.
110        self.tx.send((size, map)).unwrap();
111    }
112
113    fn size(&self) -> u64 {
114        self.size
115    }
116
117    fn read(&self, start: u64, data: &mut [u8]) {
118        self.map.read(start, data, &*self.cf.read().unwrap());
119    }
120
121    fn write_data(&mut self, start: u64, data: Data, off: usize, len: usize) {
122        self.map.write(start, data, off, len);
123    }
124
125    fn write(&mut self, start: u64, data: &[u8]) {
126        let len = data.len();
127        let d = Arc::new(data.to_vec());
128        self.write_data(start, d, 0, len);
129    }
130
131    fn wait_complete(&self) {
132        while self.cf.read().unwrap().todo != 0 {
133            let _x = self.busy.lock();
134        }
135    }
136}
137
138struct CommitFile {
139    /// Buffered underlying storage.
140    stg: ReadBufStg<256>,
141    /// Map of committed updates.
142    map: WMap,
143    /// Number of outstanding unsaved commits.
144    todo: usize,
145}
146
147impl CommitFile {
148    fn new(stg: Box<dyn Storage>, buf_mem: usize) -> Self {
149        Self {
150            stg: ReadBufStg::<256>::new(stg, 50, buf_mem / 256),
151            map: WMap::default(),
152            todo: 0,
153        }
154    }
155
156    fn done_one(&mut self) {
157        self.todo -= 1;
158        if self.todo == 0 {
159            self.map = WMap::default();
160            self.stg.reset();
161        }
162    }
163}
164
165impl BasicStorage for CommitFile {
166    fn commit(&mut self, _size: u64) {
167        panic!()
168    }
169
170    fn size(&self) -> u64 {
171        panic!()
172    }
173
174    fn read(&self, start: u64, data: &mut [u8]) {
175        self.map.read(start, data, &self.stg);
176    }
177
178    fn write_data(&mut self, start: u64, data: Data, off: usize, len: usize) {
179        self.map.write(start, data, off, len);
180    }
181
182    fn write(&mut self, _start: u64, _data: &[u8]) {
183        panic!()
184    }
185}
186
187/// Storage interface - BasicStorage is some kind of "file" storage.
188///
189/// read and write methods take a start which is a byte offset in the underlying file.
190pub trait BasicStorage: Send {
191    /// Get the size of the underlying storage.
192    /// Note : this is valid initially and after a commit but is not defined after write is called.
193    fn size(&self) -> u64;
194
195    /// Read data.
196    fn read(&self, start: u64, data: &mut [u8]);
197
198    /// Write byte slice to storage.
199    fn write(&mut self, start: u64, data: &[u8]);
200
201    /// Write byte Vec.
202    fn write_vec(&mut self, start: u64, data: Vec<u8>) {
203        let len = data.len();
204        let d = Arc::new(data);
205        self.write_data(start, d, 0, len);
206    }
207
208    /// Write Data slice.
209    fn write_data(&mut self, start: u64, data: Data, off: usize, len: usize) {
210        self.write(start, &data[off..off + len]);
211    }
212
213    /// Finish write transaction, size is new size of underlying storage.
214    fn commit(&mut self, size: u64);
215
216    /// Write u64.
217    fn write_u64(&mut self, start: u64, value: u64) {
218        self.write(start, &value.to_le_bytes());
219    }
220
221    /// Read u64.
222    fn read_u64(&self, start: u64) -> u64 {
223        let mut bytes = [0; 8];
224        self.read(start, &mut bytes);
225        u64::from_le_bytes(bytes)
226    }
227
228    /// Clone. note: provided method panics.
229    fn clone(&self) -> Box<dyn Storage> {
230        panic!()
231    }
232
233    /// Wait until current writes are complete.
234    fn wait_complete(&self) {}
235}
236
237/// BasicStorage that can be shared between threads.
238pub trait Storage: BasicStorage + Sync {}
239
240/// Simple implementation of [Storage] using `Arc<Mutex<Vec<u8>>`.
241#[derive(Default)]
242pub struct MemFile {
243    v: Arc<Mutex<Vec<u8>>>,
244}
245
246impl MemFile {
247    /// Get a new (boxed) MemFile.
248    pub fn new() -> Box<Self> {
249        Box::default()
250    }
251}
252
253impl Storage for MemFile {}
254
255impl BasicStorage for MemFile {
256    fn size(&self) -> u64 {
257        let v = self.v.lock().unwrap();
258        v.len() as u64
259    }
260
261    fn read(&self, off: u64, bytes: &mut [u8]) {
262        let off = off as usize;
263        let len = bytes.len();
264        let mut v = self.v.lock().unwrap();
265        if off + len > v.len() {
266            v.resize(off + len, 0);
267        }
268        bytes.copy_from_slice(&v[off..off + len]);
269    }
270
271    fn write(&mut self, off: u64, bytes: &[u8]) {
272        let off = off as usize;
273        let len = bytes.len();
274        let mut v = self.v.lock().unwrap();
275        if off + len > v.len() {
276            v.resize(off + len, 0);
277        }
278        v[off..off + len].copy_from_slice(bytes);
279    }
280
281    fn commit(&mut self, size: u64) {
282        let mut v = self.v.lock().unwrap();
283        v.resize(size as usize, 0);
284    }
285
286    fn clone(&self) -> Box<dyn Storage> {
287        Box::new(Self { v: self.v.clone() })
288    }
289}
290
291use std::{fs, fs::OpenOptions, io::Read, io::Seek, io::SeekFrom, io::Write};
292
293struct FileInner {
294    f: fs::File,
295}
296
297impl FileInner {
298    /// Construct from filename.
299    pub fn new(filename: &str) -> Self {
300        Self {
301            f: OpenOptions::new()
302                .read(true)
303                .write(true)
304                .create(true)
305                .truncate(false)
306                .open(filename)
307                .unwrap(),
308        }
309    }
310
311    fn size(&mut self) -> u64 {
312        self.f.seek(SeekFrom::End(0)).unwrap()
313    }
314
315    fn read(&mut self, off: u64, bytes: &mut [u8]) {
316        self.f.seek(SeekFrom::Start(off)).unwrap();
317        let _ = self.f.read(bytes).unwrap();
318    }
319
320    fn write(&mut self, off: u64, bytes: &[u8]) {
321        // The list of operating systems which auto-zero is likely more than this...research is todo.
322        #[cfg(not(any(target_os = "windows", target_os = "linux")))]
323        {
324            let size = self.f.seek(SeekFrom::End(0)).unwrap();
325            if off > size {
326                self.f.set_len(off).unwrap();
327            }
328        }
329        self.f.seek(SeekFrom::Start(off)).unwrap();
330        let _ = self.f.write(bytes).unwrap();
331    }
332
333    fn commit(&mut self, size: u64) {
334        self.f.set_len(size).unwrap();
335        self.f.sync_all().unwrap();
336    }
337}
338
339/// Can be used for atomic upd file ( does not implement Sync ).
340pub struct FastFileStorage {
341    file: RefCell<FileInner>,
342}
343
344impl FastFileStorage {
345    /// Construct from filename.
346    pub fn new(filename: &str) -> Box<Self> {
347        Box::new(Self {
348            file: RefCell::new(FileInner::new(filename)),
349        })
350    }
351}
352
353impl BasicStorage for FastFileStorage {
354    fn size(&self) -> u64 {
355        self.file.borrow_mut().size()
356    }
357    fn read(&self, off: u64, bytes: &mut [u8]) {
358        self.file.borrow_mut().read(off, bytes);
359    }
360
361    fn write(&mut self, off: u64, bytes: &[u8]) {
362        self.file.borrow_mut().write(off, bytes);
363    }
364
365    fn commit(&mut self, size: u64) {
366        self.file.borrow_mut().commit(size);
367    }
368}
369
370/// Simple implementation of [Storage] using [`std::fs::File`].
371pub struct SimpleFileStorage {
372    file: Arc<Mutex<FileInner>>,
373}
374
375impl SimpleFileStorage {
376    /// Construct from filename.
377    pub fn new(filename: &str) -> Box<Self> {
378        Box::new(Self {
379            file: Arc::new(Mutex::new(FileInner::new(filename))),
380        })
381    }
382}
383
384impl Storage for SimpleFileStorage {}
385
386impl BasicStorage for SimpleFileStorage {
387    fn size(&self) -> u64 {
388        self.file.lock().unwrap().size()
389    }
390
391    fn read(&self, off: u64, bytes: &mut [u8]) {
392        self.file.lock().unwrap().read(off, bytes);
393    }
394
395    fn write(&mut self, off: u64, bytes: &[u8]) {
396        self.file.lock().unwrap().write(off, bytes);
397    }
398
399    fn commit(&mut self, size: u64) {
400        self.file.lock().unwrap().commit(size);
401    }
402
403    fn clone(&self) -> Box<dyn Storage> {
404        Box::new(Self {
405            file: self.file.clone(),
406        })
407    }
408}
409
410/// Alternative to SimpleFileStorage that uses multiple [SimpleFileStorage]s to allow parallel reads by different threads.
411#[allow(clippy::vec_box)]
412pub struct MultiFileStorage {
413    filename: String,
414    files: Arc<Mutex<Vec<FileInner>>>,
415}
416
417impl MultiFileStorage {
418    /// Create new MultiFileStorage.
419    pub fn new(filename: &str) -> Box<Self> {
420        Box::new(Self {
421            filename: filename.to_owned(),
422            files: Arc::new(Mutex::new(Vec::new())),
423        })
424    }
425
426    fn get_file(&self) -> FileInner {
427        match self.files.lock().unwrap().pop() {
428            Some(f) => f,
429            _ => FileInner::new(&self.filename),
430        }
431    }
432
433    fn put_file(&self, f: FileInner) {
434        self.files.lock().unwrap().push(f);
435    }
436}
437
438impl Storage for MultiFileStorage {}
439
440impl BasicStorage for MultiFileStorage {
441    fn size(&self) -> u64 {
442        let mut f = self.get_file();
443        let result = f.size();
444        self.put_file(f);
445        result
446    }
447
448    fn read(&self, off: u64, bytes: &mut [u8]) {
449        let mut f = self.get_file();
450        f.read(off, bytes);
451        self.put_file(f);
452    }
453
454    fn write(&mut self, off: u64, bytes: &[u8]) {
455        let mut f = self.get_file();
456        f.write(off, bytes);
457        self.put_file(f);
458    }
459
460    fn commit(&mut self, size: u64) {
461        let mut f = self.get_file();
462        f.commit(size);
463        self.put_file(f);
464    }
465
466    fn clone(&self) -> Box<dyn Storage> {
467        Box::new(Self {
468            filename: self.filename.clone(),
469            files: self.files.clone(),
470        })
471    }
472}
473
474/// Dummy Stg that can be used for Atomic upd file if "reliable" atomic commits are not required.
475pub struct DummyFile {}
476impl DummyFile {
477    /// Construct.
478    pub fn new() -> Box<Self> {
479        Box::new(Self {})
480    }
481}
482
483impl Storage for DummyFile {}
484
485impl BasicStorage for DummyFile {
486    fn size(&self) -> u64 {
487        0
488    }
489
490    fn read(&self, _off: u64, _bytes: &mut [u8]) {}
491
492    fn write(&mut self, _off: u64, _bytes: &[u8]) {}
493
494    fn commit(&mut self, _size: u64) {}
495
496    fn clone(&self) -> Box<dyn Storage> {
497        Self::new()
498    }
499}
500
501/// Memory configuration limits.
502#[non_exhaustive]
503pub struct Limits {
504    /// Limit on size of commit write map, default is 5000.
505    pub map_lim: usize,
506    /// Memory for buffering small reads, default is 0x200000 ( 2MB ).
507    pub rbuf_mem: usize,
508    /// Memory for buffering writes to main storage, default is 0x100000 (1MB).
509    pub swbuf: usize,
510    /// Memory for buffering writes to temporary storage, default is 0x100000 (1MB).
511    pub uwbuf: usize,
512}
513
514impl Default for Limits {
515    fn default() -> Self {
516        Self {
517            map_lim: 5000,
518            rbuf_mem: 0x200000,
519            swbuf: 0x100000,
520            uwbuf: 0x100000,
521        }
522    }
523}
524
525/// Write Buffer.
526struct WriteBuffer {
527    /// Current write index into buf.
528    ix: usize,
529    /// Current file position.
530    pos: u64,
531    /// Underlying storage.
532    pub stg: Box<dyn BasicStorage>,
533    /// Buffer.
534    buf: Vec<u8>,
535}
536
537impl WriteBuffer {
538    /// Construct.
539    pub fn new(stg: Box<dyn BasicStorage>, buf_size: usize) -> Self {
540        Self {
541            ix: 0,
542            pos: u64::MAX,
543            stg,
544            buf: vec![0; buf_size],
545        }
546    }
547
548    /// Write data to specified offset,
549    pub fn write(&mut self, off: u64, data: &[u8]) {
550        if self.pos + self.ix as u64 != off {
551            self.flush(off);
552        }
553        let mut done: usize = 0;
554        let mut todo: usize = data.len();
555        while todo > 0 {
556            let mut n: usize = self.buf.len() - self.ix;
557            if n == 0 {
558                self.flush(off + done as u64);
559                n = self.buf.len();
560            }
561            if n > todo {
562                n = todo;
563            }
564            self.buf[self.ix..self.ix + n].copy_from_slice(&data[done..done + n]);
565            todo -= n;
566            done += n;
567            self.ix += n;
568        }
569    }
570
571    fn flush(&mut self, new_pos: u64) {
572        if self.ix > 0 {
573            self.stg.write(self.pos, &self.buf[0..self.ix]);
574        }
575        self.ix = 0;
576        self.pos = new_pos;
577    }
578
579    /// Commit.
580    pub fn commit(&mut self, size: u64) {
581        self.flush(u64::MAX);
582        self.stg.commit(size);
583    }
584
585    /// Write u64.
586    pub fn write_u64(&mut self, start: u64, value: u64) {
587        self.write(start, &value.to_le_bytes());
588    }
589}
590
591/// ReadBufStg buffers small (up to limit) reads to the underlying storage using multiple buffers. Only supported functions are read and reset.
592///
593/// See implementation of AtomicFile for how this is used in conjunction with WMap.
594///
595/// N is buffer size.
596struct ReadBufStg<const N: usize> {
597    /// Underlying storage.
598    stg: Box<dyn Storage>,
599    /// Buffers.
600    buf: Mutex<ReadBuffer<N>>,
601    /// Read size that is considered small.
602    limit: usize,
603}
604
605impl<const N: usize> Drop for ReadBufStg<N> {
606    fn drop(&mut self) {
607        self.reset();
608    }
609}
610
611impl<const N: usize> ReadBufStg<N> {
612    /// limit is the size of a read that is considered "small", max_buf is the maximum number of buffers used.
613    pub fn new(stg: Box<dyn Storage>, limit: usize, max_buf: usize) -> Self {
614        Self {
615            stg,
616            buf: Mutex::new(ReadBuffer::<N>::new(max_buf)),
617            limit,
618        }
619    }
620
621    /// Clears the buffers.
622    fn reset(&mut self) {
623        self.buf.lock().unwrap().reset();
624    }
625}
626
627impl<const N: usize> BasicStorage for ReadBufStg<N> {
628    /// Read data from storage.
629    fn read(&self, start: u64, data: &mut [u8]) {
630        if data.len() <= self.limit {
631            self.buf.lock().unwrap().read(&*self.stg, start, data);
632        } else {
633            self.stg.read(start, data);
634        }
635    }
636
637    /// Panics.
638    fn size(&self) -> u64 {
639        panic!()
640    }
641
642    /// Panics.
643    fn write(&mut self, _start: u64, _data: &[u8]) {
644        panic!();
645    }
646
647    /// Panics.
648    fn commit(&mut self, _size: u64) {
649        panic!();
650    }
651}
652
653impl<const N: usize> Storage for ReadBufStg<N> {}
654
655struct ReadBuffer<const N: usize> {
656    /// Maps sector mumbers cached buffers.
657    map: HashMap<u64, Box<[u8; N]>>,
658    /// Maximum number of buffers.
659    max_buf: usize,
660}
661
662impl<const N: usize> ReadBuffer<N> {
663    fn new(max_buf: usize) -> Self {
664        Self {
665            map: HashMap::default(),
666            max_buf,
667        }
668    }
669
670    fn reset(&mut self) {
671        self.map.clear();
672    }
673
674    fn read(&mut self, stg: &dyn BasicStorage, off: u64, data: &mut [u8]) {
675        let mut done = 0;
676        while done < data.len() {
677            let off = off + done as u64;
678            let sector = off / N as u64;
679            let disp = (off % N as u64) as usize;
680            let amount = min(data.len() - done, N - disp);
681
682            let p = self.map.entry(sector).or_insert_with(|| {
683                let mut p: Box<[u8; N]> = vec![0; N].try_into().unwrap();
684                stg.read(sector * N as u64, &mut *p);
685                p
686            });
687            data[done..done + amount].copy_from_slice(&p[disp..disp + amount]);
688            done += amount;
689        }
690        if self.map.len() >= self.max_buf {
691            self.reset();
692        }
693    }
694}
695
696#[derive(Default)]
697/// Slice of Data to be written to storage.
698struct DataSlice {
699    /// Slice data.
700    pub data: Data,
701    /// Start of slice.
702    pub off: usize,
703    /// Length of slice.
704    pub len: usize,
705}
706
707impl DataSlice {
708    /// Get reference to the whole slice.
709    pub fn all(&self) -> &[u8] {
710        &self.data[self.off..self.off + self.len]
711    }
712    /// Get reference to part of slice.
713    pub fn part(&self, off: usize, len: usize) -> &[u8] {
714        &self.data[self.off + off..self.off + off + len]
715    }
716    /// Trim specified amount from start of slice.
717    pub fn trim(&mut self, trim: usize) {
718        self.off += trim;
719        self.len -= trim;
720    }
721    /// Take the data.
722    #[allow(dead_code)]
723    pub fn take(&mut self) -> Data {
724        std::mem::take(&mut self.data)
725    }
726}
727
728#[derive(Default)]
729/// Updateable store based on some underlying storage.
730struct WMap {
731    /// Map of writes. Key is the end of the slice.
732    map: BTreeMap<u64, DataSlice>,
733}
734
735impl WMap {
736    /// Is the map empty?
737    pub fn is_empty(&self) -> bool {
738        self.map.is_empty()
739    }
740
741    /// Number of key-value pairs in the map.
742    pub fn len(&self) -> usize {
743        self.map.len()
744    }
745
746    /// Take the map and convert it to a Vec.
747    pub fn convert_to_vec(&mut self) -> Vec<(u64, DataSlice)> {
748        let map = std::mem::take(&mut self.map);
749        let mut result = Vec::with_capacity(map.len());
750        for (end, v) in map {
751            let start = end - v.len as u64;
752            result.push((start, v));
753        }
754        result
755    }
756
757    /// Write the map into storage.
758    pub fn to_storage(&self, stg: &mut dyn BasicStorage) {
759        for (end, v) in self.map.iter() {
760            let start = end - v.len as u64;
761            stg.write_data(start, v.data.clone(), v.off, v.len);
762        }
763    }
764
765    #[cfg(not(feature = "pstd"))]
766    /// Write to storage, existing writes which overlap with new write need to be trimmed or removed.
767    pub fn write(&mut self, start: u64, data: Data, off: usize, len: usize) {
768        if len != 0 {
769            let (mut insert, mut remove) = (Vec::new(), Vec::new());
770            let end = start + len as u64;
771            for (ee, v) in self.map.range_mut(start + 1..) {
772                let ee = *ee;
773                let es = ee - v.len as u64; // Existing write Start.
774                if es >= end {
775                    // Existing write starts after end of new write, nothing to do.
776                    break;
777                } else if start <= es {
778                    if end < ee {
779                        // New write starts before existing write, but doesn't subsume it. Trim existing write.
780                        v.trim((end - es) as usize);
781                        break;
782                    }
783                    // New write subsumes existing write entirely, remove existing write.
784                    remove.push(ee);
785                } else if end < ee {
786                    // New write starts in middle of existing write, ends before end of existing write,
787                    // put start of existing write in insert list, trim existing write.
788                    insert.push((es, v.data.clone(), v.off, (start - es) as usize));
789                    v.trim((end - es) as usize);
790                    break;
791                } else {
792                    // New write starts in middle of existing write, ends after existing write,
793                    // put start of existing write in insert list, remove existing write.
794                    insert.push((es, v.take(), v.off, (start - es) as usize));
795                    remove.push(ee);
796                }
797            }
798            for end in remove {
799                self.map.remove(&end);
800            }
801            for (start, data, off, len) in insert {
802                self.map
803                    .insert(start + len as u64, DataSlice { data, off, len });
804            }
805            self.map
806                .insert(start + len as u64, DataSlice { data, off, len });
807        }
808    }
809
810    #[cfg(feature = "pstd")]
811    /// Write to storage, existing writes which overlap with new write need to be trimmed or removed.
812    pub fn write(&mut self, start: u64, data: Data, off: usize, len: usize) {
813        if len != 0 {
814            let end = start + len as u64;
815            let mut c = self
816                .map
817                .lower_bound_mut(std::ops::Bound::Excluded(&start))
818                .with_mutable_key();
819            while let Some((eend, v)) = c.next() {
820                let ee = *eend;
821                let es = ee - v.len as u64; // Existing write Start.
822                if es >= end {
823                    // Existing write starts after end of new write, nothing to do.
824                    c.prev();
825                    break;
826                } else if start <= es {
827                    if end < ee {
828                        // New write starts before existing write, but doesn't subsume it. Trim existing write.
829                        v.trim((end - es) as usize);
830                        c.prev();
831                        break;
832                    }
833                    // New write subsumes existing write entirely, remove existing write.
834                    c.remove_prev();
835                } else if end < ee {
836                    // New write starts in middle of existing write, ends before end of existing write,
837                    // trim existing write, insert start of existing write.
838                    let (data, off, len) = (v.data.clone(), v.off, (start - es) as usize);
839                    v.trim((end - es) as usize);
840                    c.prev();
841                    c.insert_before_unchecked(es + len as u64, DataSlice { data, off, len });
842                    break;
843                } else {
844                    // New write starts in middle of existing write, ends after existing write,
845                    // Trim existing write ( modifies key, but this is ok as ordering is not affected ).
846                    v.len = (start - es) as usize;
847                    *eend = es + v.len as u64;
848                }
849            }
850            // Insert the new write.
851            c.insert_after_unchecked(start + len as u64, DataSlice { data, off, len });
852        }
853    }
854
855    /// Read from storage, taking map of existing writes into account. Unwritten ranges are read from underlying storage.
856    pub fn read(&self, start: u64, data: &mut [u8], u: &dyn BasicStorage) {
857        let len = data.len();
858        if len != 0 {
859            let mut done = 0;
860            for (&end, v) in self.map.range(start + 1..) {
861                let es = end - v.len as u64; // Existing write Start.
862                let doff = start + done as u64;
863                if es > doff {
864                    // Read from underlying storage.
865                    let a = min(len - done, (es - doff) as usize);
866                    u.read(doff, &mut data[done..done + a]);
867                    done += a;
868                    if done == len {
869                        return;
870                    }
871                }
872                // Use existing write.
873                let skip = (start + done as u64 - es) as usize;
874                let a = min(len - done, v.len - skip);
875                data[done..done + a].copy_from_slice(v.part(skip, a));
876                done += a;
877                if done == len {
878                    return;
879                }
880            }
881            u.read(start + done as u64, &mut data[done..]);
882        }
883    }
884}
885
886/// Basis for [crate::AtomicFile] ( non-async alternative ). Provides two-phase commit and buffering of writes.
887pub struct BasicAtomicFile {
888    /// The main underlying storage.
889    stg: WriteBuffer,
890    /// Temporary storage for updates during commit.
891    upd: WriteBuffer,
892    /// Map of writes.
893    map: WMap,
894    /// List of writes.
895    list: Vec<(u64, DataSlice)>,
896    size: u64,
897}
898
899impl BasicAtomicFile {
900    /// stg is the main underlying storage, upd is temporary storage for updates during commit.
901    pub fn new(stg: Box<dyn BasicStorage>, upd: Box<dyn BasicStorage>, lim: &Limits) -> Box<Self> {
902        let size = stg.size();
903        let mut result = Box::new(Self {
904            stg: WriteBuffer::new(stg, lim.swbuf),
905            upd: WriteBuffer::new(upd, lim.uwbuf),
906            map: WMap::default(),
907            list: Vec::new(),
908            size,
909        });
910        result.init();
911        result
912    }
913
914    /// Apply outstanding updates.
915    fn init(&mut self) {
916        let end = self.upd.stg.read_u64(0);
917        let size = self.upd.stg.read_u64(8);
918        if end == 0 {
919            return;
920        }
921        assert!(end == self.upd.stg.size());
922        let mut pos = 16;
923        while pos < end {
924            let start = self.upd.stg.read_u64(pos);
925            pos += 8;
926            let len = self.upd.stg.read_u64(pos);
927            pos += 8;
928            let mut buf = vec![0; len as usize];
929            self.upd.stg.read(pos, &mut buf);
930            pos += len;
931            self.stg.write(start, &buf);
932        }
933        self.stg.commit(size);
934        self.upd.commit(0);
935    }
936
937    /// Perform the specified phase ( 1 or 2 ) of a two-phase commit.
938    pub fn commit_phase(&mut self, size: u64, phase: u8) {
939        if self.map.is_empty() && self.list.is_empty() {
940            return;
941        }
942        if phase == 1 {
943            self.list = self.map.convert_to_vec();
944
945            // Write the updates to upd.
946            // First set the end position to zero.
947            self.upd.write_u64(0, 0);
948            self.upd.write_u64(8, size);
949            self.upd.commit(16); // Not clear if this is necessary.
950
951            // Write the update records.
952            let mut stg_written = false;
953            let mut pos: u64 = 16;
954            for (start, v) in self.list.iter() {
955                let (start, len, data) = (*start, v.len as u64, v.all());
956                if start >= self.size {
957                    // Writes beyond current stg size can be written directly.
958                    stg_written = true;
959                    self.stg.write(start, data);
960                } else {
961                    self.upd.write_u64(pos, start);
962                    pos += 8;
963                    self.upd.write_u64(pos, len);
964                    pos += 8;
965                    self.upd.write(pos, data);
966                    pos += len;
967                }
968            }
969            if stg_written {
970                self.stg.commit(size);
971            }
972            self.upd.commit(pos); // Not clear if this is necessary.
973
974            // Set the end position.
975            self.upd.write_u64(0, pos);
976            self.upd.write_u64(8, size);
977            self.upd.commit(pos);
978        } else {
979            for (start, v) in self.list.iter() {
980                if *start < self.size {
981                    // Writes beyond current stg size have already been written.
982                    self.stg.write(*start, v.all());
983                }
984            }
985            self.list.clear();
986            self.stg.commit(size);
987            self.upd.commit(0);
988        }
989    }
990}
991
992impl BasicStorage for BasicAtomicFile {
993    fn commit(&mut self, size: u64) {
994        self.commit_phase(size, 1);
995        self.commit_phase(size, 2);
996        self.size = size;
997    }
998
999    fn size(&self) -> u64 {
1000        self.size
1001    }
1002
1003    fn read(&self, start: u64, data: &mut [u8]) {
1004        self.map.read(start, data, &*self.stg.stg);
1005    }
1006
1007    fn write_data(&mut self, start: u64, data: Data, off: usize, len: usize) {
1008        self.map.write(start, data, off, len);
1009    }
1010
1011    fn write(&mut self, start: u64, data: &[u8]) {
1012        let len = data.len();
1013        let d = Arc::new(data.to_vec());
1014        self.write_data(start, d, 0, len);
1015    }
1016}
1017    
1018/// Optimized implementation of [Storage] ( unix only ). 
1019#[cfg(target_family = "unix")]
1020pub struct UnixFileStorage
1021{
1022    size: u64,
1023    f: fs::File,
1024}
1025#[cfg(target_family = "unix")]
1026impl UnixFileStorage
1027{
1028    /// Construct from filename.
1029    pub fn new(filename: &str) -> Self
1030    {
1031        let mut f = OpenOptions::new()
1032                .read(true)
1033                .write(true)
1034                .create(true)
1035                .truncate(false)
1036                .open(filename)
1037                .unwrap();
1038        let size = f.seek(SeekFrom::End(0)).unwrap();
1039        Self{ size, f }
1040    }
1041}
1042
1043#[cfg(target_family = "unix")]
1044impl Storage for UnixFileStorage {}
1045
1046#[cfg(target_family = "unix")]
1047use std::os::unix::fs::FileExt;
1048
1049#[cfg(target_family = "unix")]
1050impl BasicStorage for UnixFileStorage
1051{
1052    fn read(&self, start: u64, data: &mut [u8])
1053    {
1054        
1055        let _ = self.f.read_at(data, start );
1056    }
1057
1058    fn write(&mut self, start: u64, data: &[u8])
1059    {
1060        
1061        let _ = self.f.write_at(data, start );
1062    }
1063
1064    fn size(&self) -> u64 {
1065        self.size
1066    }
1067
1068    fn commit(&mut self, size: u64)
1069    {
1070        self.size = size;
1071        self.f.set_len(size).unwrap();
1072        self.f.sync_all().unwrap();
1073    }
1074}
1075
1076/// Optimised Storage
1077#[cfg(target_family = "unix")]
1078pub type OptFileStorage = UnixFileStorage;
1079
1080/// Optimized implementation of [Storage] ( windows only ). 
1081#[cfg(target_family = "windows")]
1082pub struct WindowsFileStorage
1083{
1084    size: u64,
1085    f: fs::File,
1086}
1087#[cfg(target_family = "windows")]
1088impl WindowsFileStorage
1089{
1090    /// Construct from filename.
1091    pub fn new(filename: &str) -> Self
1092    {
1093        let mut f = OpenOptions::new()
1094                .read(true)
1095                .write(true)
1096                .create(true)
1097                .truncate(false)
1098                .open(filename)
1099                .unwrap();
1100        let size = f.seek(SeekFrom::End(0)).unwrap();
1101        Self{ size, f }
1102    }
1103}
1104
1105#[cfg(target_family = "windows")]
1106impl Storage for WindowsFileStorage {}
1107
1108#[cfg(target_family = "windows")]
1109use std::os::windows::fs::FileExt;
1110
1111#[cfg(target_family = "windows")]
1112impl BasicStorage for WindowsFileStorage
1113{
1114    fn read(&self, start: u64, data: &mut [u8])
1115    {
1116        
1117        let _ = self.f.seek_read(data, start );
1118    }
1119
1120    fn write(&mut self, start: u64, data: &[u8])
1121    {
1122        
1123        let _ = self.f.seek_write(data, start );
1124    }
1125
1126    fn size(&self) -> u64 {
1127        self.size
1128    }
1129
1130    fn commit(&mut self, size: u64)
1131    {
1132        self.size = size;
1133        self.f.set_len(size).unwrap();
1134        self.f.sync_all().unwrap();
1135    }
1136}
1137
1138/// Optimised Storage
1139#[cfg(target_family = "windows")]
1140pub type OptFileStorage = WindowsFileStorage;
1141
1142
1143/// Optimised Storage
1144#[cfg(not(any(target_family = "unix",target_family = "windows")))]
1145pub type OptFileStorage = MultiFileStorage;
1146
1147#[cfg(test)]
1148/// Get amount of testing from environment variable TA.
1149fn test_amount() -> usize {
1150    str::parse(&std::env::var("TA").unwrap_or("1".to_string())).unwrap()
1151}
1152
1153#[test]
1154fn test_atomic_file() {
1155    use rand::Rng;
1156    /* Idea of test is to check AtomicFile and MemFile behave the same */
1157
1158    let ta = test_amount();
1159    println!(" Test amount={}", ta);
1160
1161    let mut rng = rand::thread_rng();
1162
1163    for _ in 0..100 {
1164        let mut s1 = AtomicFile::new(MemFile::new(), MemFile::new());
1165        // let mut s1 = BasicAtomicFile::new(MemFile::new(), MemFile::new(), &Limits::default() );
1166        let mut s2 = MemFile::new();
1167
1168        for _ in 0..1000 * ta {
1169            let off: usize = rng.r#gen::<usize>() % 100;
1170            let mut len = 1 + rng.r#gen::<usize>() % 20;
1171            let w: bool = rng.r#gen();
1172            if w {
1173                let mut bytes = Vec::new();
1174                while len > 0 {
1175                    len -= 1;
1176                    let b: u8 = rng.r#gen::<u8>();
1177                    bytes.push(b);
1178                }
1179                s1.write(off as u64, &bytes);
1180                s2.write(off as u64, &bytes);
1181            } else {
1182                let mut b2 = vec![0; len];
1183                let mut b3 = vec![0; len];
1184                s1.read(off as u64, &mut b2);
1185                s2.read(off as u64, &mut b3);
1186                assert!(b2 == b3);
1187            }
1188            if rng.r#gen::<usize>() % 50 == 0 {
1189                s1.commit(200);
1190                s2.commit(200);
1191            }
1192        }
1193    }
1194}