Skip to main content

atom_file/
lib.rs

1//! [`AtomicFile`] provides buffered concurrent access to files with async atomic commit.
2//!
3//! [`BasicAtomicFile`] is a non-async alternative.
4
5#![deny(missing_docs)]
6
7use rustc_hash::FxHashMap as HashMap;
8use std::cmp::min;
9use std::sync::{Arc, Mutex, RwLock};
10use std::cell::RefCell;
11
12#[cfg(feature = "pstd")]
13use pstd::collections::BTreeMap;
14#[cfg(not(feature = "pstd"))]
15use std::collections::BTreeMap;
16
17/// ```Arc<Vec<u8>>```
18pub type Data = Arc<Vec<u8>>;
19
20/// Based on [BasicAtomicFile] which makes sure that updates are all-or-nothing.
21/// Performs commit asyncronously.
22///
23/// #Example
24///
25/// ```
26/// use atom_file::{AtomicFile,DummyFile,MemFile,BasicStorage};
27/// let mut af = AtomicFile::new(MemFile::new(), DummyFile::new());
28/// af.write( 0, &[1,2,3,4] );
29/// af.commit(4);
30/// af.wait_complete();
31/// ```
32///
33/// Atomic file has two maps of writes. On commit, the latest batch of writes are sent to be written to underlying
34/// storage, and are also applied to the second map in the "CommitFile". The CommitFile map is reset when all
35/// the updates to underlying storage have been applied.
36pub struct AtomicFile {
37    /// New updates are written here.
38    map: WMap,
39    /// Underlying file, with previous updates mapped.
40    cf: Arc<RwLock<CommitFile>>,
41    /// File size.
42    size: u64,
43    /// For sending update maps to be saved.
44    tx: std::sync::mpsc::Sender<(u64, WMap)>,
45    /// Held by update process while it is active.
46    busy: Arc<Mutex<()>>,
47    /// Limit on size of CommitFile map.
48    map_lim: usize,
49}
50
51impl AtomicFile {
52    /// Construct AtomicFile with default limits. stg is the main underlying storage, upd is temporary storage for updates during commit.
53    pub fn new(stg: Box<dyn Storage>, upd: Box<dyn BasicStorage>) -> Box<Self> {
54        Self::new_with_limits(stg, upd, &Limits::default())
55    }
56
57    /// Construct Atomic file with specified limits.
58    pub fn new_with_limits(
59        stg: Box<dyn Storage>,
60        upd: Box<dyn BasicStorage>,
61        lim: &Limits,
62    ) -> Box<Self> {
63        let size = stg.size();
64        let mut baf = BasicAtomicFile::new(stg.clone(), upd, lim);
65
66        let (tx, rx) = std::sync::mpsc::channel::<(u64, WMap)>();
67        let cf = Arc::new(RwLock::new(CommitFile::new(stg, lim.rbuf_mem)));
68        let busy = Arc::new(Mutex::new(())); // Lock held while async save thread is active.
69
70        // Start the thread which does save asyncronously.
71        let (cf1, busy1) = (cf.clone(), busy.clone());
72
73        std::thread::spawn(move || {
74            // Loop that recieves a map of updates and applies it to BasicAtomicFile.
75            while let Ok((size, map)) = rx.recv() {
76                let _lock = busy1.lock();
77                baf.map = map;
78                baf.commit(size);
79                cf1.write().unwrap().done_one();
80            }
81        });
82        Box::new(Self {
83            map: WMap::default(),
84            cf,
85            size,
86            tx,
87            busy,
88            map_lim: lim.map_lim,
89        })
90    }
91}
92
93impl Storage for AtomicFile {}
94
95impl BasicStorage for AtomicFile {
96    fn commit(&mut self, size: u64) {
97        self.size = size;
98        if self.map.is_empty() {
99            return;
100        }
101        if self.cf.read().unwrap().map.len() > self.map_lim {
102            self.wait_complete();
103        }
104        let map = std::mem::take(&mut self.map);
105        let cf = &mut *self.cf.write().unwrap();
106        cf.todo += 1;
107        // Apply map of updates to CommitFile.
108        map.to_storage(cf);
109        // Send map of updates to thread to be written to underlying storage.
110        self.tx.send((size, map)).unwrap();
111    }
112
113    fn size(&self) -> u64 {
114        self.size
115    }
116
117    fn read(&self, start: u64, data: &mut [u8]) {
118        self.map.read(start, data, &*self.cf.read().unwrap());
119    }
120
121    fn write_data(&mut self, start: u64, data: Data, off: usize, len: usize) {
122        self.map.write(start, data, off, len);
123    }
124
125    fn write(&mut self, start: u64, data: &[u8]) {
126        let len = data.len();
127        let d = Arc::new(data.to_vec());
128        self.write_data(start, d, 0, len);
129    }
130
131    fn wait_complete(&self) {
132        while self.cf.read().unwrap().todo != 0 {
133            let _x = self.busy.lock();
134        }
135    }
136}
137
138struct CommitFile {
139    /// Buffered underlying storage.
140    stg: ReadBufStg<256>,
141    /// Map of committed updates.
142    map: WMap,
143    /// Number of outstanding unsaved commits.
144    todo: usize,
145}
146
147impl CommitFile {
148    fn new(stg: Box<dyn Storage>, buf_mem: usize) -> Self {
149        Self {
150            stg: ReadBufStg::<256>::new(stg, 50, buf_mem / 256),
151            map: WMap::default(),
152            todo: 0,
153        }
154    }
155
156    fn done_one(&mut self) {
157        self.todo -= 1;
158        if self.todo == 0 {
159            self.map = WMap::default();
160            self.stg.reset();
161        }
162    }
163}
164
165impl BasicStorage for CommitFile {
166    fn commit(&mut self, _size: u64) {
167        panic!()
168    }
169
170    fn size(&self) -> u64 {
171        panic!()
172    }
173
174    fn read(&self, start: u64, data: &mut [u8]) {
175        self.map.read(start, data, &self.stg);
176    }
177
178    fn write_data(&mut self, start: u64, data: Data, off: usize, len: usize) {
179        self.map.write(start, data, off, len);
180    }
181
182    fn write(&mut self, _start: u64, _data: &[u8]) {
183        panic!()
184    }
185}
186
187/// Storage interface - BasicStorage is some kind of "file" storage.
188///
189/// read and write methods take a start which is a byte offset in the underlying file.
190pub trait BasicStorage: Send {
191    /// Get the size of the underlying storage.
192    /// Note : this is valid initially and after a commit but is not defined after write is called.
193    fn size(&self) -> u64;
194
195    /// Read data.
196    fn read(&self, start: u64, data: &mut [u8]);
197
198    /// Write byte slice to storage.
199    fn write(&mut self, start: u64, data: &[u8]);
200
201    /// Write byte Vec.
202    fn write_vec(&mut self, start: u64, data: Vec<u8>) {
203        let len = data.len();
204        let d = Arc::new(data);
205        self.write_data(start, d, 0, len);
206    }
207
208    /// Write Data slice.
209    fn write_data(&mut self, start: u64, data: Data, off: usize, len: usize) {
210        self.write(start, &data[off..off + len]);
211    }
212
213    /// Finish write transaction, size is new size of underlying storage.
214    fn commit(&mut self, size: u64);
215
216    /// Write u64.
217    fn write_u64(&mut self, start: u64, value: u64) {
218        self.write(start, &value.to_le_bytes());
219    }
220
221    /// Read u64.
222    fn read_u64(&self, start: u64) -> u64 {
223        let mut bytes = [0; 8];
224        self.read(start, &mut bytes);
225        u64::from_le_bytes(bytes)
226    }
227
228    /// Clone. note: provided method panics.
229    fn clone(&self) -> Box<dyn Storage> {
230        panic!()
231    }
232
233    /// Wait until current writes are complete.
234    fn wait_complete(&self) {}
235}
236
237/// BasicStorage that can be shared between threads.
238pub trait Storage: BasicStorage + Sync {}
239
240/// Simple implementation of [Storage] using `Arc<Mutex<Vec<u8>>`.
241#[derive(Default)]
242pub struct MemFile {
243    v: Arc<Mutex<Vec<u8>>>,
244}
245
246impl MemFile {
247    /// Get a new (boxed) MemFile.
248    pub fn new() -> Box<Self> {
249        Box::default()
250    }
251}
252
253impl Storage for MemFile {}
254
255impl BasicStorage for MemFile {
256    fn size(&self) -> u64 {
257        let v = self.v.lock().unwrap();
258        v.len() as u64
259    }
260
261    fn read(&self, off: u64, bytes: &mut [u8]) {
262        let off = off as usize;
263        let len = bytes.len();
264        let mut v = self.v.lock().unwrap();
265        if off + len > v.len() {
266            v.resize(off + len, 0);
267        }
268        bytes.copy_from_slice(&v[off..off + len]);
269    }
270
271    fn write(&mut self, off: u64, bytes: &[u8]) {
272        let off = off as usize;
273        let len = bytes.len();
274        let mut v = self.v.lock().unwrap();
275        if off + len > v.len() {
276            v.resize(off + len, 0);
277        }
278        v[off..off + len].copy_from_slice(bytes);
279    }
280
281    fn commit(&mut self, size: u64) {
282        let mut v = self.v.lock().unwrap();
283        v.resize(size as usize, 0);
284    }
285
286    fn clone(&self) -> Box<dyn Storage> {
287        Box::new(Self { v: self.v.clone() })
288    }
289}
290
291use std::{fs, fs::OpenOptions, io::Read, io::Seek, io::SeekFrom, io::Write};
292
293struct FileInner {
294    f: fs::File,
295}
296
297impl FileInner {
298    /// Construct from filename.
299    pub fn new(filename: &str) -> Self {
300        Self {
301            f: OpenOptions::new()
302                .read(true)
303                .write(true)
304                .create(true)
305                .truncate(false)
306                .open(filename)
307                .unwrap(),
308        }
309    }
310
311    fn size(&mut self) -> u64 {
312        self.f.seek(SeekFrom::End(0)).unwrap()
313    }
314
315    fn read(&mut self, off: u64, bytes: &mut [u8]) {
316        self.f.seek(SeekFrom::Start(off)).unwrap();
317        let _ = self.f.read(bytes).unwrap();
318    }
319
320    fn write(&mut self, off: u64, bytes: &[u8]) {
321        // The list of operating systems which auto-zero is likely more than this...research is todo.
322        #[cfg(not(any(target_os = "windows", target_os = "linux")))]
323        {
324            let size = self.f.seek(SeekFrom::End(0)).unwrap();
325            if off > size {
326                self.f.set_len(off).unwrap();
327            }
328        }
329        self.f.seek(SeekFrom::Start(off)).unwrap();
330        let _ = self.f.write(bytes).unwrap();
331    }
332
333    fn commit(&mut self, size: u64) {
334        self.f.set_len(size).unwrap();
335        self.f.sync_all().unwrap();
336    }
337}
338
339/// Can be used for atomic upd file ( does not implement Sync ).
340pub struct FastFileStorage {
341    file: RefCell<FileInner>,
342}
343
344impl FastFileStorage {
345    /// Construct from filename.
346    pub fn new(filename: &str) -> Box<Self> {
347        Box::new(Self {
348            file: RefCell::new(FileInner::new(filename)),
349        })
350    }
351}
352
353impl BasicStorage for FastFileStorage {
354    fn size(&self) -> u64 {
355        self.file.borrow_mut().size()
356    }
357    fn read(&self, off: u64, bytes: &mut [u8]) {
358        self.file.borrow_mut().read(off, bytes);
359    }
360
361    fn write(&mut self, off: u64, bytes: &[u8]) {
362        self.file.borrow_mut().write(off, bytes);
363    }
364
365    fn commit(&mut self, size: u64) {
366        self.file.borrow_mut().commit(size);
367    }
368}
369
370/// Simple implementation of [Storage] using [`std::fs::File`].
371pub struct SimpleFileStorage {
372    file: Arc<Mutex<FileInner>>,
373}
374
375impl SimpleFileStorage {
376    /// Construct from filename.
377    pub fn new(filename: &str) -> Box<Self> {
378        Box::new(Self {
379            file: Arc::new(Mutex::new(FileInner::new(filename))),
380        })
381    }
382}
383
384impl Storage for SimpleFileStorage {}
385
386impl BasicStorage for SimpleFileStorage {
387    fn size(&self) -> u64 {
388        self.file.lock().unwrap().size()
389    }
390
391    fn read(&self, off: u64, bytes: &mut [u8]) {
392        self.file.lock().unwrap().read(off, bytes);
393    }
394
395    fn write(&mut self, off: u64, bytes: &[u8]) {
396        self.file.lock().unwrap().write(off, bytes);
397    }
398
399    fn commit(&mut self, size: u64) {
400        self.file.lock().unwrap().commit(size);
401    }
402
403    fn clone(&self) -> Box<dyn Storage> {
404        Box::new(Self {
405            file: self.file.clone(),
406        })
407    }
408}
409
410/// Alternative to SimpleFileStorage that uses multiple [SimpleFileStorage]s to allow parallel reads by different threads.
411#[allow(clippy::vec_box)]
412pub struct MultiFileStorage {
413    filename: String,
414    files: Arc<Mutex<Vec<FileInner>>>,
415}
416
417impl MultiFileStorage {
418    /// Create new MultiFileStorage.
419    pub fn new(filename: &str) -> Box<Self> {
420        Box::new(Self {
421            filename: filename.to_owned(),
422            files: Arc::new(Mutex::new(Vec::new())),
423        })
424    }
425
426    fn get_file(&self) -> FileInner {
427        match self.files.lock().unwrap().pop() {
428            Some(f) => f,
429            _ => FileInner::new(&self.filename),
430        }
431    }
432
433    fn put_file(&self, f: FileInner) {
434        self.files.lock().unwrap().push(f);
435    }
436}
437
438impl Storage for MultiFileStorage {}
439
440impl BasicStorage for MultiFileStorage {
441    fn size(&self) -> u64 {
442        let mut f = self.get_file();
443        let result = f.size();
444        self.put_file(f);
445        result
446    }
447
448    fn read(&self, off: u64, bytes: &mut [u8]) {
449        let mut f = self.get_file();
450        f.read(off, bytes);
451        self.put_file(f);
452    }
453
454    fn write(&mut self, off: u64, bytes: &[u8]) {
455        let mut f = self.get_file();
456        f.write(off, bytes);
457        self.put_file(f);
458    }
459
460    fn commit(&mut self, size: u64) {
461        let mut f = self.get_file();
462        f.commit(size);
463        self.put_file(f);
464    }
465
466    fn clone(&self) -> Box<dyn Storage> {
467        Box::new(Self {
468            filename: self.filename.clone(),
469            files: self.files.clone(),
470        })
471    }
472}
473
474/// Dummy Stg that can be used for Atomic upd file if "reliable" atomic commits are not required.
475pub struct DummyFile {}
476impl DummyFile {
477    /// Construct.
478    pub fn new() -> Box<Self> {
479        Box::new(Self {})
480    }
481}
482
483impl Storage for DummyFile {}
484
485impl BasicStorage for DummyFile {
486    fn size(&self) -> u64 {
487        0
488    }
489
490    fn read(&self, _off: u64, _bytes: &mut [u8]) {}
491
492    fn write(&mut self, _off: u64, _bytes: &[u8]) {}
493
494    fn commit(&mut self, _size: u64) {}
495
496    fn clone(&self) -> Box<dyn Storage> {
497        Self::new()
498    }
499}
500
501/// Memory configuration limits.
502#[non_exhaustive]
503pub struct Limits {
504    /// Limit on size of commit write map, default is 5000.
505    pub map_lim: usize,
506    /// Memory for buffering small reads, default is 0x200000 ( 2MB ).
507    pub rbuf_mem: usize,
508    /// Memory for buffering writes to main storage, default is 0x100000 (1MB).
509    pub swbuf: usize,
510    /// Memory for buffering writes to temporary storage, default is 0x100000 (1MB).
511    pub uwbuf: usize,
512}
513
514impl Default for Limits {
515    fn default() -> Self {
516        Self {
517            map_lim: 5000,
518            rbuf_mem: 0x200000,
519            swbuf: 0x100000,
520            uwbuf: 0x100000,
521        }
522    }
523}
524
525/// Write Buffer.
526struct WriteBuffer {
527    /// Current write index into buf.
528    ix: usize,
529    /// Current file position.
530    pos: u64,
531    /// Underlying storage.
532    pub stg: Box<dyn BasicStorage>,
533    /// Buffer.
534    buf: Vec<u8>,
535}
536
537impl WriteBuffer {
538    /// Construct.
539    pub fn new(stg: Box<dyn BasicStorage>, buf_size: usize) -> Self {
540        Self {
541            ix: 0,
542            pos: u64::MAX,
543            stg,
544            buf: vec![0; buf_size],
545        }
546    }
547
548    /// Write data to specified offset,
549    pub fn write(&mut self, off: u64, data: &[u8]) {
550        if self.pos + self.ix as u64 != off {
551            self.flush(off);
552        }
553        let mut done: usize = 0;
554        let mut todo: usize = data.len();
555        while todo > 0 {
556            let mut n: usize = self.buf.len() - self.ix;
557            if n == 0 {
558                self.flush(off + done as u64);
559                n = self.buf.len();
560            }
561            if n > todo {
562                n = todo;
563            }
564            self.buf[self.ix..self.ix + n].copy_from_slice(&data[done..done + n]);
565            todo -= n;
566            done += n;
567            self.ix += n;
568        }
569    }
570
571    fn flush(&mut self, new_pos: u64) {
572        if self.ix > 0 {
573            self.stg.write(self.pos, &self.buf[0..self.ix]);
574        }
575        self.ix = 0;
576        self.pos = new_pos;
577    }
578
579    /// Commit.
580    pub fn commit(&mut self, size: u64) {
581        self.flush(u64::MAX);
582        self.stg.commit(size);
583    }
584
585    /// Write u64.
586    pub fn write_u64(&mut self, start: u64, value: u64) {
587        self.write(start, &value.to_le_bytes());
588    }
589}
590
591/// ReadBufStg buffers small (up to limit) reads to the underlying storage using multiple buffers. Only supported functions are read and reset.
592///
593/// See implementation of AtomicFile for how this is used in conjunction with WMap.
594///
595/// N is buffer size.
596struct ReadBufStg<const N: usize> {
597    /// Underlying storage.
598    stg: Box<dyn Storage>,
599    /// Buffers.
600    buf: Mutex<ReadBuffer<N>>,
601    /// Read size that is considered small.
602    limit: usize,
603}
604
605impl<const N: usize> Drop for ReadBufStg<N> {
606    fn drop(&mut self) {
607        self.reset();
608    }
609}
610
611impl<const N: usize> ReadBufStg<N> {
612    /// limit is the size of a read that is considered "small", max_buf is the maximum number of buffers used.
613    pub fn new(stg: Box<dyn Storage>, limit: usize, max_buf: usize) -> Self {
614        Self {
615            stg,
616            buf: Mutex::new(ReadBuffer::<N>::new(max_buf)),
617            limit,
618        }
619    }
620
621    /// Clears the buffers.
622    fn reset(&mut self) {
623        self.buf.lock().unwrap().reset();
624    }
625}
626
627impl<const N: usize> BasicStorage for ReadBufStg<N> {
628    /// Read data from storage.
629    fn read(&self, start: u64, data: &mut [u8]) {
630        if data.len() <= self.limit {
631            self.buf.lock().unwrap().read(&*self.stg, start, data);
632        } else {
633            self.stg.read(start, data);
634        }
635    }
636
637    /// Panics.
638    fn size(&self) -> u64 {
639        panic!()
640    }
641
642    /// Panics.
643    fn write(&mut self, _start: u64, _data: &[u8]) {
644        panic!();
645    }
646
647    /// Panics.
648    fn commit(&mut self, _size: u64) {
649        panic!();
650    }
651}
652
653struct ReadBuffer<const N: usize> {
654    /// Maps sector mumbers cached buffers.
655    map: HashMap<u64, Box<[u8; N]>>,
656    /// Maximum number of buffers.
657    max_buf: usize,
658}
659
660impl<const N: usize> ReadBuffer<N> {
661    fn new(max_buf: usize) -> Self {
662        Self {
663            map: HashMap::default(),
664            max_buf,
665        }
666    }
667
668    fn reset(&mut self) {
669        self.map.clear();
670    }
671
672    fn read(&mut self, stg: &dyn BasicStorage, off: u64, data: &mut [u8]) {
673        let mut done = 0;
674        while done < data.len() {
675            let off = off + done as u64;
676            let sector = off / N as u64;
677            let disp = (off % N as u64) as usize;
678            let amount = min(data.len() - done, N - disp);
679
680            let p = self.map.entry(sector).or_insert_with(|| {
681                let mut p: Box<[u8; N]> = vec![0; N].try_into().unwrap();
682                stg.read(sector * N as u64, &mut *p);
683                p
684            });
685            data[done..done + amount].copy_from_slice(&p[disp..disp + amount]);
686            done += amount;
687        }
688        if self.map.len() >= self.max_buf {
689            self.reset();
690        }
691    }
692}
693
694#[derive(Default)]
695/// Slice of Data to be written to storage.
696struct DataSlice {
697    /// Slice data.
698    pub data: Data,
699    /// Start of slice.
700    pub off: usize,
701    /// Length of slice.
702    pub len: usize,
703}
704
705impl DataSlice {
706    /// Get reference to the whole slice.
707    pub fn all(&self) -> &[u8] {
708        &self.data[self.off..self.off + self.len]
709    }
710    /// Get reference to part of slice.
711    pub fn part(&self, off: usize, len: usize) -> &[u8] {
712        &self.data[self.off + off..self.off + off + len]
713    }
714    /// Trim specified amount from start of slice.
715    pub fn trim(&mut self, trim: usize) {
716        self.off += trim;
717        self.len -= trim;
718    }
719    /// Take the data.
720    #[allow(dead_code)]
721    pub fn take(&mut self) -> Data {
722        std::mem::take(&mut self.data)
723    }
724}
725
726#[derive(Default)]
727/// Updateable store based on some underlying storage.
728struct WMap {
729    /// Map of writes. Key is the end of the slice.
730    map: BTreeMap<u64, DataSlice>,
731}
732
733impl WMap {
734    /// Is the map empty?
735    pub fn is_empty(&self) -> bool {
736        self.map.is_empty()
737    }
738
739    /// Number of key-value pairs in the map.
740    pub fn len(&self) -> usize {
741        self.map.len()
742    }
743
744    /// Take the map and convert it to a Vec.
745    pub fn convert_to_vec(&mut self) -> Vec<(u64, DataSlice)> {
746        let map = std::mem::take(&mut self.map);
747        let mut result = Vec::with_capacity(map.len());
748        for (end, v) in map {
749            let start = end - v.len as u64;
750            result.push((start, v));
751        }
752        result
753    }
754
755    /// Write the map into storage.
756    pub fn to_storage(&self, stg: &mut dyn BasicStorage) {
757        for (end, v) in self.map.iter() {
758            let start = end - v.len as u64;
759            stg.write_data(start, v.data.clone(), v.off, v.len);
760        }
761    }
762
763    #[cfg(not(feature = "pstd"))]
764    /// Write to storage, existing writes which overlap with new write need to be trimmed or removed.
765    pub fn write(&mut self, start: u64, data: Data, off: usize, len: usize) {
766        if len != 0 {
767            let (mut insert, mut remove) = (Vec::new(), Vec::new());
768            let end = start + len as u64;
769            for (ee, v) in self.map.range_mut(start + 1..) {
770                let ee = *ee;
771                let es = ee - v.len as u64; // Existing write Start.
772                if es >= end {
773                    // Existing write starts after end of new write, nothing to do.
774                    break;
775                } else if start <= es {
776                    if end < ee {
777                        // New write starts before existing write, but doesn't subsume it. Trim existing write.
778                        v.trim((end - es) as usize);
779                        break;
780                    }
781                    // New write subsumes existing write entirely, remove existing write.
782                    remove.push(ee);
783                } else if end < ee {
784                    // New write starts in middle of existing write, ends before end of existing write,
785                    // put start of existing write in insert list, trim existing write.
786                    insert.push((es, v.data.clone(), v.off, (start - es) as usize));
787                    v.trim((end - es) as usize);
788                    break;
789                } else {
790                    // New write starts in middle of existing write, ends after existing write,
791                    // put start of existing write in insert list, remove existing write.
792                    insert.push((es, v.take(), v.off, (start - es) as usize));
793                    remove.push(ee);
794                }
795            }
796            for end in remove {
797                self.map.remove(&end);
798            }
799            for (start, data, off, len) in insert {
800                self.map
801                    .insert(start + len as u64, DataSlice { data, off, len });
802            }
803            self.map
804                .insert(start + len as u64, DataSlice { data, off, len });
805        }
806    }
807
808    #[cfg(feature = "pstd")]
809    /// Write to storage, existing writes which overlap with new write need to be trimmed or removed.
810    pub fn write(&mut self, start: u64, data: Data, off: usize, len: usize) {
811        if len != 0 {
812            let end = start + len as u64;
813            let mut c = self
814                .map
815                .lower_bound_mut(std::ops::Bound::Excluded(&start))
816                .with_mutable_key();
817            while let Some((eend, v)) = c.next() {
818                let ee = *eend;
819                let es = ee - v.len as u64; // Existing write Start.
820                if es >= end {
821                    // Existing write starts after end of new write, nothing to do.
822                    c.prev();
823                    break;
824                } else if start <= es {
825                    if end < ee {
826                        // New write starts before existing write, but doesn't subsume it. Trim existing write.
827                        v.trim((end - es) as usize);
828                        c.prev();
829                        break;
830                    }
831                    // New write subsumes existing write entirely, remove existing write.
832                    c.remove_prev();
833                } else if end < ee {
834                    // New write starts in middle of existing write, ends before end of existing write,
835                    // trim existing write, insert start of existing write.
836                    let (data, off, len) = (v.data.clone(), v.off, (start - es) as usize);
837                    v.trim((end - es) as usize);
838                    c.prev();
839                    c.insert_before_unchecked(es + len as u64, DataSlice { data, off, len });
840                    break;
841                } else {
842                    // New write starts in middle of existing write, ends after existing write,
843                    // Trim existing write ( modifies key, but this is ok as ordering is not affected ).
844                    v.len = (start - es) as usize;
845                    *eend = es + v.len as u64;
846                }
847            }
848            // Insert the new write.
849            c.insert_after_unchecked(start + len as u64, DataSlice { data, off, len });
850        }
851    }
852
853    /// Read from storage, taking map of existing writes into account. Unwritten ranges are read from underlying storage.
854    pub fn read(&self, start: u64, data: &mut [u8], u: &dyn BasicStorage) {
855        let len = data.len();
856        if len != 0 {
857            let mut done = 0;
858            for (&end, v) in self.map.range(start + 1..) {
859                let es = end - v.len as u64; // Existing write Start.
860                let doff = start + done as u64;
861                if es > doff {
862                    // Read from underlying storage.
863                    let a = min(len - done, (es - doff) as usize);
864                    u.read(doff, &mut data[done..done + a]);
865                    done += a;
866                    if done == len {
867                        return;
868                    }
869                }
870                // Use existing write.
871                let skip = (start + done as u64 - es) as usize;
872                let a = min(len - done, v.len - skip);
873                data[done..done + a].copy_from_slice(v.part(skip, a));
874                done += a;
875                if done == len {
876                    return;
877                }
878            }
879            u.read(start + done as u64, &mut data[done..]);
880        }
881    }
882}
883
884/// Basis for [crate::AtomicFile] ( non-async alternative ). Provides two-phase commit and buffering of writes.
885pub struct BasicAtomicFile {
886    /// The main underlying storage.
887    stg: WriteBuffer,
888    /// Temporary storage for updates during commit.
889    upd: WriteBuffer,
890    /// Map of writes.
891    map: WMap,
892    /// List of writes.
893    list: Vec<(u64, DataSlice)>,
894    size: u64,
895}
896
897impl BasicAtomicFile {
898    /// stg is the main underlying storage, upd is temporary storage for updates during commit.
899    pub fn new(stg: Box<dyn BasicStorage>, upd: Box<dyn BasicStorage>, lim: &Limits) -> Box<Self> {
900        let size = stg.size();
901        let mut result = Box::new(Self {
902            stg: WriteBuffer::new(stg, lim.swbuf),
903            upd: WriteBuffer::new(upd, lim.uwbuf),
904            map: WMap::default(),
905            list: Vec::new(),
906            size,
907        });
908        result.init();
909        result
910    }
911
912    /// Apply outstanding updates.
913    fn init(&mut self) {
914        let end = self.upd.stg.read_u64(0);
915        let size = self.upd.stg.read_u64(8);
916        if end == 0 {
917            return;
918        }
919        assert!(end == self.upd.stg.size());
920        let mut pos = 16;
921        while pos < end {
922            let start = self.upd.stg.read_u64(pos);
923            pos += 8;
924            let len = self.upd.stg.read_u64(pos);
925            pos += 8;
926            let mut buf = vec![0; len as usize];
927            self.upd.stg.read(pos, &mut buf);
928            pos += len;
929            self.stg.write(start, &buf);
930        }
931        self.stg.commit(size);
932        self.upd.commit(0);
933    }
934
935    /// Perform the specified phase ( 1 or 2 ) of a two-phase commit.
936    pub fn commit_phase(&mut self, size: u64, phase: u8) {
937        if self.map.is_empty() && self.list.is_empty() {
938            return;
939        }
940        if phase == 1 {
941            self.list = self.map.convert_to_vec();
942
943            // Write the updates to upd.
944            // First set the end position to zero.
945            self.upd.write_u64(0, 0);
946            self.upd.write_u64(8, size);
947            self.upd.commit(16); // Not clear if this is necessary.
948
949            // Write the update records.
950            let mut stg_written = false;
951            let mut pos: u64 = 16;
952            for (start, v) in self.list.iter() {
953                let (start, len, data) = (*start, v.len as u64, v.all());
954                if start >= self.size {
955                    // Writes beyond current stg size can be written directly.
956                    stg_written = true;
957                    self.stg.write(start, data);
958                } else {
959                    self.upd.write_u64(pos, start);
960                    pos += 8;
961                    self.upd.write_u64(pos, len);
962                    pos += 8;
963                    self.upd.write(pos, data);
964                    pos += len;
965                }
966            }
967            if stg_written {
968                self.stg.commit(size);
969            }
970            self.upd.commit(pos); // Not clear if this is necessary.
971
972            // Set the end position.
973            self.upd.write_u64(0, pos);
974            self.upd.write_u64(8, size);
975            self.upd.commit(pos);
976        } else {
977            for (start, v) in self.list.iter() {
978                if *start < self.size {
979                    // Writes beyond current stg size have already been written.
980                    self.stg.write(*start, v.all());
981                }
982            }
983            self.list.clear();
984            self.stg.commit(size);
985            self.upd.commit(0);
986        }
987    }
988}
989
990impl BasicStorage for BasicAtomicFile {
991    fn commit(&mut self, size: u64) {
992        self.commit_phase(size, 1);
993        self.commit_phase(size, 2);
994        self.size = size;
995    }
996
997    fn size(&self) -> u64 {
998        self.size
999    }
1000
1001    fn read(&self, start: u64, data: &mut [u8]) {
1002        self.map.read(start, data, &*self.stg.stg);
1003    }
1004
1005    fn write_data(&mut self, start: u64, data: Data, off: usize, len: usize) {
1006        self.map.write(start, data, off, len);
1007    }
1008
1009    fn write(&mut self, start: u64, data: &[u8]) {
1010        let len = data.len();
1011        let d = Arc::new(data.to_vec());
1012        self.write_data(start, d, 0, len);
1013    }
1014}
1015
1016#[cfg(test)]
1017/// Get amount of testing from environment variable TA.
1018fn test_amount() -> usize {
1019    str::parse(&std::env::var("TA").unwrap_or("1".to_string())).unwrap()
1020}
1021
1022#[test]
1023fn test_atomic_file() {
1024    use rand::Rng;
1025    /* Idea of test is to check AtomicFile and MemFile behave the same */
1026
1027    let ta = test_amount();
1028    println!(" Test amount={}", ta);
1029
1030    let mut rng = rand::thread_rng();
1031
1032    for _ in 0..100 {
1033        let mut s1 = AtomicFile::new(MemFile::new(), MemFile::new());
1034        // let mut s1 = BasicAtomicFile::new(MemFile::new(), MemFile::new(), &Limits::default() );
1035        let mut s2 = MemFile::new();
1036
1037        for _ in 0..1000 * ta {
1038            let off: usize = rng.r#gen::<usize>() % 100;
1039            let mut len = 1 + rng.r#gen::<usize>() % 20;
1040            let w: bool = rng.r#gen();
1041            if w {
1042                let mut bytes = Vec::new();
1043                while len > 0 {
1044                    len -= 1;
1045                    let b: u8 = rng.r#gen::<u8>();
1046                    bytes.push(b);
1047                }
1048                s1.write(off as u64, &bytes);
1049                s2.write(off as u64, &bytes);
1050            } else {
1051                let mut b2 = vec![0; len];
1052                let mut b3 = vec![0; len];
1053                s1.read(off as u64, &mut b2);
1054                s2.read(off as u64, &mut b3);
1055                assert!(b2 == b3);
1056            }
1057            if rng.r#gen::<usize>() % 50 == 0 {
1058                s1.commit(200);
1059                s2.commit(200);
1060            }
1061        }
1062    }
1063}