atomic_file/
lib.rs

1#![forbid(unsafe_code)]
2#![deny(missing_docs)]
3#![deny(unused_must_use)]
4#![deny(unused_mut)]
5
6//! The AtomicFile crate provides a wrapper to async_std::File to enable more convenient and safe
7//! interactions with on-disk data. All operations on AtomicFile are ACID, and the AtomicFile type
8//! includes an invisible 4096 byte header which manages details like version number and file
9//! identifier.
10//!
11//! The main use of a version number and file identifier are to provide easy upgrade capabilities
12//! for AtomicFiles, and also to ensure that the wrong file is never being opened in the event that
13//! the user incorrectly moved a file from one place to another.
14//!
15//! The main advantage of using an AtomicFile is its ACID guarantees, which ensures that data will
16//! never be corrupted in the event of a sudden loss of power. Typical file usage patters leave
17//! users vulnerable to corruption, especially when updating a file. AtomicFile protects against
18//! corruption by using a double-write scheme to guarantee that correct data exists on disk, and
19//! uses a checksum to verify at startup that the correct instance of the doubly-written file is
20//! loaded. This does mean that two files will exist on disk for each AtomicFile - a .atomic_file
21//! and a .atomic_file_backup.
22//!
23//! The checksum used by an AtomicFile is 6 bytes. We use a 6 byte checksum because our threat
24//! model is arbitrary disk failure, not a human adversary. A human adversary could write any
25//! checksum they want to defeat our corruption detection. The checksum is written as hex in the
26//! first 12 bytes of the file.
27//!
28//! If a file needs to be manually modified, the checksum can be overwritten. Change the checksum
29//! to 'ffffffffffff' (12 chars) and the checksum will be accepted independent of the file
30//! contents. The checks for the identifier will still trigger.
31//!
32//! Data corruption can still occur in the event of something extreme like physical damage to the
33//! hard drive, but changes of recovery are better and the user is protected against all common
34//! forms of corruption (which stem from power being lost unexpectedly).
35//!
36//! The 'Atomic' property of the AtomicFile is that the only read and write operations fully read
37//! or fully write the file.
38//! ```
39//! // Basic file operations
40//!
41//! use std::path::PathBuf;
42//! use atomic_file::{
43//!     open, open_file,
44//!     OpenSettings::CreateIfNotExists,
45//! };
46//!
47//! #[async_std::main]
48//! async fn main() {
49//!     // Create a version 1 file with open_file. We pass in an empty vector for the upgrade path,
50//!     // and 'CreateIfNotExists' to indicate that we want to create the non-existing file.
51//!     let mut path = PathBuf::new();
52//!     path.push("target");
53//!     path.push("docs-example-1");
54//!     let identifier = "AtomicFileDocs::docs-example-1";
55//!     let mut file = open_file(&path, identifier, 1, &Vec::new(), CreateIfNotExists).await.unwrap();
56//!
57//!     // Use 'contents' and 'write_file' to read and write the logical data of the file. Each
58//!     // one will always read or write the full contents of the file.
59//!     file.write_file(b"hello, world!").await.unwrap();
60//!     let file_data = file.contents();
61//!     if file_data != b"hello, world!" {
62//!         panic!("example did not read correctly");
63//!     }
64//!     drop(file);
65//!
66//!     // Now that we have created a file, we can use 'open(path, identifier)' as an alias for:
67//!     // 'open_file(path, identifier, 1, Vec::new(), ErrorIfNotExists)'
68//!     let file = open(&path, identifier);
69//!     # drop(file);
70//!     # atomic_file::delete_file(&path).await.unwrap();
71//! }
72//! ```
73//! AtomicFile uses a versioning and upgrading scheme to simplify the process of releasing new
74//! versions of a file. When opening a file, you pass in a version number and an upgrade path which
75//! will allow the file opening process to automatically upgrade your files from their current
76//! version to the latest version.
77//! ```
78//! // Simple upgrade example
79//! use std::path::PathBuf;
80//!
81//! use anyhow::{bail, Result, Error};
82//! use atomic_file::{open, open_file, AtomicFile, Upgrade};
83//! use atomic_file::OpenSettings::ErrorIfNotExists;
84//! # use atomic_file::OpenSettings::CreateIfNotExists;
85//!
86//! // An example of a function that upgrades a file from version 1 to version 2, while making
87//! // changes to the body of the file.
88//! fn example_upgrade(
89//!     data: Vec<u8>,
90//!     initial_version: u8,
91//!     updated_version: u8,
92//! ) -> Result<Vec<u8>, Error> {
93//!     // Check that the version is okay.
94//!     if initial_version != 1 || updated_version != 2 {
95//!         bail!("wrong version");
96//!     }
97//!
98//!     // Return updated contents for the file.
99//!     Ok((b"hello, update!".to_vec()))
100//! }
101//!
102//! #[async_std::main]
103//! async fn main() {
104//!     # let mut p = PathBuf::new();
105//!     # p.push("target");
106//!     # p.push("docs-example-2");
107//!     # let i = "AtomicFileDocs::docs-example-2";
108//!     # let mut f = atomic_file::open_file(&p, i, 1, &Vec::new(), CreateIfNotExists).await.unwrap();
109//!     # f.write_file(b"hello, world!").await.unwrap();
110//!     # drop(f);
111//!     let mut path = PathBuf::new();
112//!     path.push("target");
113//!     path.push("docs-example-2");
114//!     let identifier = "AtomicFileDocs::docs-example-2";
115//!     let upgrade = Upgrade {
116//!         initial_version: 1,
117//!         updated_version: 2,
118//!         process: example_upgrade,
119//!     };
120//!     let mut file = open_file(&path, identifier, 2, &vec![upgrade], ErrorIfNotExists).await.unwrap();
121//!     // Note that the upgrades are passed in as a vector, allowing the caller to
122//!     // define entire upgrade chains, e.g. 1->2 and 2->3. The final file that gets returned
123//!     // will have been upgraded through the chain to the latest version.
124//!     let file_data = file.contents();
125//!     if file_data != b"hello, update!" {
126//!         panic!("upgrade appears to have failed: \n{:?}\n{:?}", file_data, b"hello, update!");
127//!     }
128//!
129//!     // Perform cleanup.
130//!     drop(file);
131//!     atomic_file::delete_file(&path).await.unwrap();
132//! }
133//! ```
134//!
135//! If you would like to contribute to this crate, we are looking for a way to make the upgrade
136//! functions async+Send as prior attempts were unsuccessful.
137
138use async_std::{
139    fs::{File, OpenOptions},
140    io::prelude::SeekExt,
141    io::{ReadExt, SeekFrom, WriteExt},
142};
143use std::{
144    collections::HashMap,
145    path::PathBuf,
146    str::from_utf8,
147};
148
149use anyhow::{bail, Context, Error, Result};
150use sha2::{Digest, Sha256};
151
152/// OpenSettings provides the two options for opening a file in the event that the file does not
153/// exist: create the file and return an error.
154pub enum OpenSettings {
155    /// A new file will be created if the file does not exist.
156    CreateIfNotExists,
157
158    /// An error will be returned if the file does not exist.
159    ErrorIfNotExists,
160}
161
162/// UpgradeFunc defines the signature for a function that can be used to upgrade an
163/// AtomicFile. The UpgradeFunc function will receive the file data that needs to be upgraded along
164/// with the intended initial version and final version that is expected from the upgrade.
165///
166/// We pass in the initial version and final version as arguments to provide an extra level of
167/// redundancy and to prevent mistakes when copy-pasting upgrades, as an incorrect upgrade could
168/// corrupt user data.
169pub type UpgradeFunc =
170    fn(data: Vec<u8>, initial_version: u8, upgraded_version: u8) -> Result<Vec<u8>, Error>;
171
172/// Upgrade defines an upgrade process for upgrading the data in a file from one version to
173/// another.
174pub struct Upgrade {
175    /// initial_version designates the version of the file that this upgrade should be applied to.
176    pub initial_version: u8,
177    /// updated_version designates the version of the file after the upgrade is complete.
178    pub updated_version: u8,
179    /// process defines the function that is used to upgrade the file.
180    pub process: UpgradeFunc,
181}
182
183/// AtompicFile defines the main type for the crate, and implements an API for safely
184/// handling atomic files. The API is based on the async_std::File interface, but with some
185/// adjustments that are designed to make it both safer and more ergonomic.
186#[derive(Debug)]
187pub struct AtomicFile {
188    backup_file: File,
189    file: File,
190    identifier: String,
191    logical_data: Vec<u8>,
192    version: u8,
193}
194
195/// add_extension will add a new extension to the provided pathbuf, rather than overwriting the
196/// existing one. For example, 'add_extension("stuff.tar", "gz")' returns 'stuff.tar.gz', while
197/// calling set_extension directly would return 'stuff.gz', which is the wrong result.
198fn add_extension(path: &mut PathBuf, extension: &str) {
199    match path.extension() {
200        Some(ext) => {
201            let mut ext = ext.to_os_string();
202            ext.push(".");
203            ext.push(extension);
204            path.set_extension(ext)
205        }
206        None => path.set_extension(extension),
207    };
208}
209
210/// version_to_bytes will write out the version in ascii, adding leading zeroes if needed and
211/// placing a newline at the end.
212fn version_to_bytes(version: u8) -> [u8; 4] {
213    // 0 is not an allowed version, every other possible u8 is okay.
214    if version == 0 {
215        panic!("version is not allowed to be 0");
216    }
217
218    // Compute the 4 version bytes based on the latest version.
219    let mut version_string = format!("{}\n", version);
220    if version_string.len() == 2 {
221        version_string = format!("00{}", version_string);
222    } else if version_string.len() == 3 {
223        version_string = format!("0{}", version_string);
224    }
225    let version_bytes = version_string.as_bytes();
226    let mut version_arr = [0u8; 4];
227    version_arr.copy_from_slice(version_bytes);
228    version_arr
229}
230
231/// identifier_and_version_from_metadata will pull the identifier and version out of the metadata.
232fn identifier_and_version_from_metadata(metadata: &[u8]) -> Result<(String, u8), Error> {
233    if metadata.len() < 4096 {
234        bail!("provided metadata is not the right size");
235    }
236
237    let version_str =
238        from_utf8(&metadata[13..16]).context("the on-disk version could not be parsed")?;
239    let version: u8 = version_str
240        .parse()
241        .context("unable to parse version of metadata")?;
242
243    let mut clean_identifier = false;
244    let mut identifier = "".to_string();
245    let mut atomic_identifier_offset = 0;
246    for i in 0..201 {
247        if metadata[i + 17] == '\n' as u8 {
248            clean_identifier = true;
249            atomic_identifier_offset = i + 18;
250            break;
251        }
252        if metadata[i + 17] > 127 {
253            bail!("identifier contains non-ascii characters before termination sequence");
254        }
255        identifier.push(metadata[i + 17] as char);
256    }
257    if !clean_identifier {
258        bail!("provided metadata does not have a legally terminating identifier");
259    }
260    let atomic_identifier = "DavidVorick/atomic_file-v1\n".as_bytes();
261    if metadata[atomic_identifier_offset..atomic_identifier_offset+27] != atomic_identifier[..] {
262        bail!("file does not appear to be an atomic file");
263    }
264
265    Ok((identifier, version))
266}
267
268impl AtomicFile {
269    /// fill_metadata will write the first 4096 bytes to contain the proper metadata for the file,
270    /// including the first 64 bytes which serve as the checksum.
271    fn fill_metadata(&self, buf: &mut [u8]) {
272        if buf.len() < 4096 {
273            panic!("misuse of fill_metadata, check stack trace");
274        }
275        if self.identifier.len() > 200 {
276            panic!(
277                "file has too-large identifier, ensure bounds checking is in place in open_file"
278            );
279        }
280        let version_bytes = version_to_bytes(self.version);
281
282        // Fill out the header data.
283        buf[12] = '\n' as u8;
284        buf[13..17].copy_from_slice(&version_bytes);
285        let iden_bytes = self.identifier.as_bytes();
286        buf[17..17 + iden_bytes.len()].copy_from_slice(iden_bytes);
287        buf[17 + iden_bytes.len()] = '\n' as u8;
288        buf[18 + iden_bytes.len()] = 255; // newline+255 is the termination sequence for identifier
289        let atomic_identifier = "DavidVorick/atomic_file-v1\n".as_bytes();
290        buf[18 + iden_bytes.len()..18 + iden_bytes.len() + 27].copy_from_slice(atomic_identifier);
291        buf[4095] = '\n' as u8;
292
293        // Grab the checksum of the data and fill it in as the first 64 bytes.
294        let mut hasher = Sha256::new();
295        hasher.update(&buf[12..]);
296        let result = hasher.finalize();
297        let result_hex = hex::encode(result);
298        buf[..12].copy_from_slice(result_hex[..12].as_bytes());
299    }
300
301    /// contents will return a copy of the contents of the file.
302    pub fn contents(&self) -> Vec<u8> {
303        self.logical_data.clone()
304    }
305
306    /// write_file will perform a safe, ACID operation to write the provided data to the file. This
307    /// will incur a performance cost of 2 fsyncs and of writing the full contents of the file to
308    /// disk twice. For SSDs, the cost of two fsyncs is typically under 25ms and often under 3ms.
309    /// For HDDs, the cost of two fsyncs is typically over 200ms and often over 800ms.
310    pub async fn write_file(&mut self, buf: &[u8]) -> Result<(), Error> {
311        // Build the full physical data of the file, including the leading checksum in the
312        // metadata.
313        let mut full_data = vec![0u8; 4096 + buf.len()];
314        full_data[4096..].copy_from_slice(buf);
315        self.fill_metadata(&mut full_data);
316
317        // Write out the full data to the backup file.
318        self.backup_file
319            .set_len(full_data.len() as u64)
320            .await
321            .context("unable to set the length of the backup file")?;
322        self.backup_file
323            .seek(SeekFrom::Start(0))
324            .await
325            .context("unable to seek to start of backup file")?;
326        self.backup_file
327            .write_all(&full_data)
328            .await
329            .context("unable to write to backup file")?;
330        self.backup_file
331            .flush()
332            .await
333            .context("unable to flush backup file")?;
334        self.backup_file
335            .sync_all()
336            .await
337            .context("fsync of backup file failed")?;
338
339        // Backup file is safe, repeat the operation on the main file.
340        self.file
341            .set_len(full_data.len() as u64)
342            .await
343            .context("unable to set the length of the backup file")?;
344        self.file
345            .seek(SeekFrom::Start(0))
346            .await
347            .context("unable to seek to start of backup file")?;
348        self.file
349            .write_all(&full_data)
350            .await
351            .context("unable to write to backup file")?;
352        self.file
353            .flush()
354            .await
355            .context("unable to flush backup file")?;
356        self.file
357            .sync_all()
358            .await
359            .context("fsync of backup file failed")?;
360
361        // Set the data of the file to the newly provided data.
362        self.logical_data = buf.to_vec();
363        Ok(())
364    }
365}
366
367/// verify_upgrade_paths verify that the set of paths provided for performing upgrades all lead to
368/// the latest version, and will return an error if some path doesn't lead to the latest version.
369/// It will also return an error if two possible paths exist for a given version.
370fn verify_upgrade_paths(
371    upgrade_paths: &Vec<Upgrade>,
372    current_version: u8,
373    latest_version: u8,
374) -> Result<(), Error> {
375    // Enusre 0 was not used as the latest_version.
376    if latest_version == 0 {
377        bail!("version 0 is not allowed for a VersionedFile");
378    }
379
380    // Verify that an upgrade path exists for the file which carries it to the latest version.
381    let mut version_routes = HashMap::new();
382    // Verify basic properties of the graph (no cycles, no repeat sources).
383    for path in upgrade_paths {
384        if path.initial_version >= path.updated_version {
385            bail!("upgrade paths must always lead to a higher version number");
386        }
387        if version_routes.contains_key(&path.initial_version) {
388            bail!("upgrade paths can only have one upgrade for each version");
389        }
390        if path.updated_version > latest_version {
391            bail!("upgrade paths lead beyond the latest version");
392        }
393        if path.initial_version == 0 {
394            bail!("version 0 is not allowed for a VersionedFile");
395        }
396        version_routes.insert(path.initial_version, path.updated_version);
397    }
398    // Verify that all upgrades lead to the latest version. We iterate over the version_routes and mark every
399    // node that connects to a finished node.
400    let mut complete_paths = HashMap::new();
401    complete_paths.insert(latest_version, {});
402    loop {
403        let mut progress = false;
404        let mut finished = true;
405
406        for (key, value) in &version_routes {
407            if complete_paths.contains_key(key) {
408                continue;
409            }
410            if complete_paths.contains_key(value) {
411                progress = true;
412                complete_paths.insert(*key, {});
413            } else {
414                finished = false;
415            }
416        }
417
418        if finished {
419            break;
420        }
421        if progress == false {
422            bail!("update graph is incomplete, not all nodes lead to the latest version");
423        }
424    }
425    // Verify that the current version of the file is found in the upgrade paths.
426    if !complete_paths.contains_key(&current_version) {
427        bail!("no upgrade found for current version of file");
428    }
429    Ok(())
430}
431
432/// perform_file_upgrade takes a file and an upgrade, and then executes the upgrade against the
433/// file.
434async fn perform_file_upgrade(file: &mut AtomicFile, u: &Upgrade) -> Result<(), Error> {
435    // Check that we've got the right upgrade.
436    if file.version != u.initial_version {
437        bail!("wrong update has been selected for this file");
438    }
439
440    // Perform the update on the data.
441    let new_data = (u.process)(file.logical_data.clone(), u.initial_version, u.updated_version)
442        .context(format!(
443            "unable to complete file upgrade from version {} to {}",
444            u.initial_version, u.updated_version
445        ))?;
446    file.logical_data = new_data;
447
448    // Update the version of the file. We don't actually write the changes to disk yet because
449    // these updates are recoverable / repeatable at next boot.
450    file.version = u.updated_version;
451    Ok(())
452}
453
454/// perform_file_upgrades will take a file through the upgrade chain.
455async fn perform_file_upgrades(
456    file: &mut AtomicFile,
457    latest_version: u8,
458    upgrades: &Vec<Upgrade>,
459) -> Result<(), Error> {
460    // Execute the upgrades.
461    while file.version != latest_version {
462        let mut found = false;
463        for upgrade in upgrades {
464            if upgrade.initial_version == file.version {
465                perform_file_upgrade(file, upgrade)
466                    .await
467                    .context("unable to complete file upgrade")?;
468                file.version = upgrade.updated_version;
469                found = true;
470                break;
471            }
472        }
473
474        // The upgrades verifier ensures that if an upgrade exists in the set of upgrades, then
475        // there also exists a path to the latest_version from that upgrade. Therefore, if this
476        // file doesn't have a path to the latest version, no other upgrades will be executed
477        // either - at least as long as the upgrade verifier is working.
478        if !found {
479            panic!("attempting to perform file upgrades without a viable upgrade path");
480        }
481    }
482
483    Ok(())
484}
485
486/// delete_file will delete the atomic file at the given filepath. This will delete both the
487/// .atomic_file and the .atomic_file_backup
488pub async fn delete_file(filepath: &PathBuf) -> Result<(), Error> {
489    let mut main_path = filepath.clone();
490    let mut backup_path = filepath.clone();
491    add_extension(&mut main_path, "atomic_file");
492    async_std::fs::remove_file(main_path.clone()).await.context("unable to backup file")?;
493    add_extension(&mut backup_path, "atomic_file_backup");
494    async_std::fs::remove_file(backup_path.clone()).await.context("unable to remove main file")?;
495    Ok(())
496}
497
498/// exists will return whether or not an atomic file is considered to exist on disk.
499pub fn exists(filepath: &PathBuf) -> bool {
500    let mut path = filepath.clone();
501    add_extension(&mut path, "atomic_file");
502    path.exists()
503}
504
505/// open is a convenience wrapper for open_file which uses '1' as the version, an empty vector as
506/// the upgrade path, and ErrorIfNotExists as the open setting.
507pub async fn open(filepath: &PathBuf, expected_identifier: &str) -> Result<AtomicFile, Error> {
508    open_file(filepath, expected_identifier, 1, &Vec::new(), OpenSettings::ErrorIfNotExists).await
509}
510
511/// open_file will open an atomic file, using the backup of the file if the checksum fails. If
512/// 'create_if_not_exists' is set to 'true', a new file empty file will be created if a file does
513/// not already exist.
514///
515/// If the version of the file on-disk is outdated, the upgrades will be used in a chain to upgrade
516/// the file to the latest version. If no valid path exists from the file's current version to the
517/// latest version, an error will be returned.
518///
519/// An error will also be returned if the file has the wrong identifier, or if it is determined
520/// that both the file and its backup are corrupt. Both files should only be corrupt in the event
521/// of significant physical damage to the storage device.
522pub async fn open_file(
523    filepath: &PathBuf,
524    expected_identifier: &str,
525    latest_version: u8,
526    upgrades: &Vec<Upgrade>,
527    open_settings: OpenSettings,
528) -> Result<AtomicFile, Error> {
529    // Verify that the inputs match all requirements.
530    let path_str = filepath.to_str().context("could not stringify path")?;
531    if !path_str.is_ascii() {
532        bail!("path should be valid ascii");
533    }
534    if expected_identifier.len() > 200 {
535        bail!("the identifier of an atomic file cannot exceed 200 bytes");
536    }
537    if !expected_identifier.is_ascii() {
538        bail!("the identifier must be ascii");
539    }
540    if latest_version == 0 {
541        bail!("version is not allowed to be zero");
542    }
543    // Check that the identifier doesn't contain a newline.
544    for c in expected_identifier.chars() {
545        if c == '\n' {
546            bail!("identifier is not allowed to contain newlines");
547        }
548    }
549
550    // Parse the enum.
551    let create_if_not_exists = match open_settings {
552        OpenSettings::CreateIfNotExists => true,
553        OpenSettings::ErrorIfNotExists => false,
554    };
555
556    // Build the paths for the main file and the backup file.
557    let mut filepath = filepath.clone();
558    let mut backup_filepath = filepath.clone();
559    add_extension(&mut filepath, "atomic_file");
560    add_extension(&mut backup_filepath, "atomic_file_backup");
561    let filepath_exists = filepath.exists();
562
563    // If the 'create' flag is not set and the main file does not exist, exit with a does not exist
564    // error. We don't care about the backup file in this case, beacuse if the main file wasn't
565    // even created we can assume the file is missing.
566    if !create_if_not_exists && !filepath_exists {
567        bail!("file does not exist");
568    }
569
570    // If the main file does exist, then we should be able to rely on the assumption that either
571    // the main file is not corrupt, or the backup file is not corrupt. If the
572    // 'create_if_not_exists' flag has been set, we'll create a blank file which will fail the
573    // checksum, and then a blank backup will be created with a failed checksum, and then the user
574    // will receive a blank file that's created and ready for them to modify.
575    let mut file = OpenOptions::new()
576        .read(true)
577        .write(true)
578        .create(true)
579        .open(filepath)
580        .await
581        .context("unable to open versioned file")?;
582    let mut backup_file = OpenOptions::new()
583        .read(true)
584        .write(true)
585        .create(true)
586        .open(backup_filepath)
587        .await
588        .context("unable to open versioned file")?;
589
590    // Read the contents of the main file and verify the checksum.
591    let file_md = file
592        .metadata()
593        .await
594        .context("unable to get file metadata")?;
595    let file_len = file_md.len();
596    if file_len >= 4096 {
597        let mut buf = vec![0u8; file_len as usize];
598        file.read_exact(&mut buf)
599            .await
600            .context("unable to read file")?;
601
602        let mut hasher = Sha256::new();
603        hasher.update(&buf[12..]);
604        let result = hasher.finalize();
605        let result_hex = hex::encode(result);
606        let result_hex_bytes = result_hex.as_bytes();
607
608        // If the file needs to be manually modified for some reason, the hash will no longer work.
609        // By changing the checksum to all 'fffff...', the user/developer is capable of overriding
610        // the checksum.
611        let override_value = [255u8; 6];
612        let override_hex = hex::encode(override_value);
613        let override_hex_bytes = override_hex.as_bytes();
614
615        // If the checksum passes, perform any required updates on the file and pass the file along to
616        // the user.
617        if result_hex_bytes[..12] == buf[..12] || buf[..12] == override_hex_bytes[..] {
618            let (identifier, version) = identifier_and_version_from_metadata(&buf[..4096])
619                .context("unable to parse version and identifier from file metadata")?;
620            if identifier != expected_identifier {
621                bail!("file has the wrong identifier");
622            }
623            verify_upgrade_paths(&upgrades, version, latest_version)
624                .context("upgrade paths are invalid")?;
625            let mut atomic_file = AtomicFile {
626                backup_file,
627                file,
628                identifier,
629                logical_data: buf[4096..].to_vec(),
630                version,
631            };
632            perform_file_upgrades(&mut atomic_file, latest_version, upgrades)
633                .await
634                .context("unable to upgrade file")?;
635            return Ok(atomic_file);
636        }
637    }
638
639    // If we got this far, the main file exists but was either corrupt or had no data in it. We
640    // check the backup file now.
641    let backup_file_md = backup_file
642        .metadata()
643        .await
644        .context("unable to get backup_file metadata")?;
645    let backup_file_len = backup_file_md.len();
646    if backup_file_len >= 4096 {
647        let mut buf = vec![0u8; backup_file_len as usize];
648        backup_file
649            .read_exact(&mut buf)
650            .await
651            .context("unable to read backup_file")?;
652
653        let mut hasher = Sha256::new();
654        hasher.update(&buf[12..]);
655        let result = hasher.finalize();
656        let result_hex = hex::encode(result);
657        let result_hex_bytes = result_hex.as_bytes();
658
659        // If the checksum passes, perform any required updates on the file and pass the file along to
660        // the user.
661        if result_hex_bytes[..12] == buf[..12] {
662            let (identifier, version) = identifier_and_version_from_metadata(&buf[..4096])
663                .context("unable to parse version and identifier from file metadata")?;
664            if identifier != expected_identifier {
665                bail!("file has the wrong identifier");
666            }
667            verify_upgrade_paths(&upgrades, version, latest_version)
668                .context("upgrade paths are invalid")?;
669
670            let mut atomic_file = AtomicFile {
671                backup_file,
672                file,
673                identifier,
674                logical_data: buf[4096..].to_vec(),
675                version,
676            };
677            perform_file_upgrades(&mut atomic_file, latest_version, upgrades)
678                .await
679                .context("unable to upgrade file")?;
680
681            // Backup is fine but file is corrupt; we need to write the full data to the main file
682            // so that the next change is safe, as the next change will start by overwriting the
683            // backup data.
684            atomic_file
685                .file
686                .set_len(buf.len() as u64)
687                .await
688                .context("unable to set length of atomic file")?;
689            atomic_file
690                .file
691                .seek(SeekFrom::Start(0))
692                .await
693                .context("unable to seek in atomic file")?;
694            atomic_file
695                .file
696                .write_all(&buf)
697                .await
698                .context("unable to write backup data to atomic file")?;
699            atomic_file
700                .file
701                .sync_all()
702                .await
703                .context("unable to sync backup data to atomic file")?;
704
705            return Ok(atomic_file);
706        }
707    }
708
709    // If we got this far, we either have a new file or a corrupt file. If the length of the main
710    // file is '0', we assume it's a new file. If the main file has a size of 0 and the backup file
711    // is corrupt, this is still equivalent to having no file at all as it means the file creation
712    // process got interrupted before it completed.
713    if file_len == 0 {
714        let mut af = AtomicFile {
715            backup_file,
716            file,
717            identifier: expected_identifier.to_string(),
718            logical_data: Vec::new(),
719            version: latest_version,
720        };
721        af.write_file(&Vec::new()).await.context("unable to create new file")?;
722        return Ok(af);
723    }
724
725    // We should only reach this code if both files have data and are failing the checksum, which
726    // indicates unrecoverable corruption. Fail rather than try to make a new file in this case.
727    bail!("there appears to have been unrecoverable file corruption");
728}
729
730#[cfg(test)]
731mod tests {
732    use super::*;
733
734    use std::io::{Seek, Write};
735    use testdir::testdir;
736    use OpenSettings::{
737        CreateIfNotExists,
738        ErrorIfNotExists,
739    };
740
741    // Create a helper function which does a null upgrade so that we can do testing of the upgrade
742    // path verifier.
743    fn stub_upgrade(v: Vec<u8>, _: u8, _: u8) -> Result<Vec<u8>, Error> {
744        Ok(v)
745    }
746
747    // This is a basic upgrade function that expects the current contents of the file to be
748    // "test_data". It will alter the contents so that they say "test".
749    fn smoke_upgrade_1_2(
750        data: Vec<u8>,
751        initial_version: u8,
752        updated_version: u8,
753    ) -> Result<Vec<u8>, Error> {
754        // Verify that the correct version is being used.
755        if initial_version != 1 || updated_version != 2 {
756            bail!("this upgrade is intended to take the file from version 1 to version 2");
757        }
758        if data.len() != 9 {
759            bail!("file is wrong len");
760        }
761        if data != b"test_data" {
762            bail!(format!("file appears corrupt: {:?}", data));
763        }
764
765        // Replace the data with new data.
766        Ok(b"test".to_vec())
767    }
768
769    fn smoke_upgrade_2_3(
770        data: Vec<u8>,
771        initial_version: u8,
772        updated_version: u8,
773    ) -> Result<Vec<u8>, Error> {
774        // Verify that the correct version is being used.
775        if initial_version != 2 || updated_version != 3 {
776            bail!("this upgrade is intended to take the file from version 2 to version 3");
777        }
778        if data.len() != 4 {
779            bail!("file is wrong len");
780        }
781        if data != b"test" {
782            bail!("file appears corrupt");
783        }
784
785        // Replace the data with new data.
786        Ok(b"testtest".to_vec())
787    }
788
789    fn smoke_upgrade_3_4(
790        data: Vec<u8>,
791        initial_version: u8,
792        updated_version: u8,
793    ) -> Result<Vec<u8>, Error> {
794        // Verify that the correct version is being used.
795        if initial_version != 3 || updated_version != 4 {
796            bail!("this upgrade is intended to take the file from version 1 to version 2");
797        }
798        if data.len() != 8 {
799            bail!("file is wrong len");
800        }
801        // Read the file and verify that we are upgrading the correct data.
802        if data != b"testtest" {
803            bail!("file appears corrupt");
804        }
805
806        // Truncate the file and write the new data into it.
807        Ok(b"testtesttest".to_vec())
808    }
809
810    // Do basic testing of all the major functions for VersionedFiles
811    async fn smoke_test() {
812        // Create a basic versioned file.
813        let dir = testdir!();
814        let test_dat = dir.join("test.dat");
815        open_file(&test_dat, "versioned_file::test.dat", 0, &Vec::new(), CreateIfNotExists)
816            .await
817            .context("unable to create versioned file")
818            .unwrap_err();
819        open_file(&test_dat, "versioned_file::test.dat", 1, &Vec::new(), CreateIfNotExists)
820            .await
821            .context("unable to create versioned file")
822            .unwrap();
823        // Try to open it again.
824        open_file(&test_dat, "versioned_file::test.dat", 1, &Vec::new(), CreateIfNotExists)
825            .await
826            .context("unable to create versioned file")
827            .unwrap();
828        // Try to open it with the wrong specifier.
829        open_file(&test_dat, "bad_versioned_file::test.dat", 1, &Vec::new(), CreateIfNotExists)
830            .await
831            .context("unable to create versioned file")
832            .unwrap_err();
833
834        // Try to make some invalid new files.
835        let invalid_name = dir.join("❄️"); // snowflake emoji in filename
836        open_file(&invalid_name, "versioned_file::test.dat", 1, &Vec::new(), CreateIfNotExists)
837            .await
838            .context("unable to create versioned file")
839            .unwrap_err();
840        let invalid_id = dir.join("invalid_identifier.dat");
841        open_file(&invalid_id, "versioned_file::test.dat::❄️", 1, &Vec::new(), CreateIfNotExists)
842            .await
843            .context("unable to create versioned file")
844            .unwrap_err();
845
846        // Perform a test where we open test.dat and write a small amount of data to it. Then we
847        // will open the file again and read back that data.
848        let mut file = open_file(&test_dat, "versioned_file::test.dat", 1, &Vec::new(), CreateIfNotExists)
849            .await
850            .unwrap();
851        file.write_file(b"test_data").await.unwrap();
852        let file = open_file(&test_dat, "versioned_file::test.dat", 1, &Vec::new(), CreateIfNotExists)
853            .await
854            .unwrap();
855        if file.contents().len() != 9 {
856            panic!("file has unexpected len");
857        }
858        if &file.contents() != b"test_data" {
859            panic!("data read does not match data written");
860        }
861        // Try to open the file again and ensure the write happened in the correct spot.
862        open_file(&test_dat, "versioned_file::test.dat", 1, &Vec::new(), CreateIfNotExists)
863            .await
864            .unwrap();
865
866        // Open the file again, this time with an upgrade for smoke_upgrade_1_2.
867        let mut upgrade_chain = vec![Upgrade {
868            initial_version: 1,
869            updated_version: 2,
870            process: smoke_upgrade_1_2,
871        }];
872        let file = open_file(&test_dat, "versioned_file::test.dat", 2, &upgrade_chain, CreateIfNotExists)
873            .await
874            .unwrap();
875        if file.contents().len() != 4 {
876            panic!("file has wrong len");
877        }
878        if &file.contents() != b"test" {
879            panic!("data read does not match data written");
880        }
881        // Try to open the file again to make sure everything still completes.
882        open_file(&test_dat, "versioned_file::test.dat", 2, &upgrade_chain, CreateIfNotExists)
883            .await
884            .unwrap();
885
886        // Attempt to do two upgrades at once, from 2 to 3  and 3 to 4.
887        upgrade_chain.push(Upgrade {
888            initial_version: 2,
889            updated_version: 3,
890            process: smoke_upgrade_2_3,
891        });
892        upgrade_chain.push(Upgrade {
893            initial_version: 3,
894            updated_version: 4,
895            process: smoke_upgrade_3_4,
896        });
897        let file = open_file(&test_dat, "versioned_file::test.dat", 4, &upgrade_chain, CreateIfNotExists)
898            .await
899            .unwrap();
900        if file.contents().len() != 12 {
901            panic!("file has wrong len");
902        }
903        if &file.contents() != b"testtesttest" {
904            panic!("data read does not match data written");
905        }
906        drop(file);
907        // Try to open the file again to make sure everything still completes.
908        open_file(&test_dat, "versioned_file::test.dat", 4, &upgrade_chain, CreateIfNotExists)
909            .await
910            .unwrap();
911
912        // Corrupt the data of the file. It should open correctly, and then after opening the
913        // corruption should be repaired.
914        let mut test_main = test_dat.clone();
915        add_extension(&mut test_main, "atomic_file");
916        let original_data = std::fs::read(&test_main).unwrap();
917        std::fs::write(&test_main, b"file corruption!").unwrap();
918        open_file(&test_dat, "versioned_file::test.dat", 4, &upgrade_chain, CreateIfNotExists)
919            .await
920            .unwrap();
921        let repaired_data = std::fs::read(&test_main).unwrap();
922        assert!(repaired_data == original_data);
923
924        // Try modifying the checksum of the file to see if it still completes. We modify the raw
925        // data as well to see if the backup loads.
926        let mut raw_file_name = test_dat.clone();
927        add_extension(&mut raw_file_name, "atomic_file");
928        println!("{:?}", raw_file_name);
929        let mut raw_file = std::fs::OpenOptions::new()
930            .read(true)
931            .write(true)
932            .open(raw_file_name)
933            .unwrap();
934        raw_file.set_len(4096).unwrap();
935        raw_file.write("ffff".as_bytes()).unwrap();
936
937        // Try to open the file with a bad checksum and make sure it fails.
938        let shorter_file = open_file(&test_dat, "versioned_file::test.dat", 4, &upgrade_chain, CreateIfNotExists)
939            .await
940            .unwrap();
941        assert!(shorter_file.contents().len() != 0); // should be the original file
942
943        // Write out the full checksum override then see that the file still opens.
944        raw_file.set_len(4096).unwrap();
945        raw_file.seek(std::io::SeekFrom::Start(0)).unwrap();
946        raw_file.write("ffffffffffff".as_bytes()).unwrap();
947        let shorter_file = open(&test_dat, "versioned_file::test.dat")
948            .await
949            .unwrap();
950        assert!(shorter_file.contents().len() == 0); // should accept the manually modified file
951
952        // Try deleting the file. When we open the file again with a new identifier, the new
953        // identifier should succeed.
954        delete_file(&test_dat).await.unwrap();
955        open_file(&test_dat, "versioned_file::test.dat::after_delete", 1, &Vec::new(), CreateIfNotExists)
956            .await
957            .unwrap();
958
959        // Delete the file again, then try to open it with 'create_if_not_exists' set to false. The
960        // file should not be created, which means it'll fail on subsequent opens as well. We also
961        // sneak in a few checks of 'open()'
962        assert!(exists(&test_dat));
963        open(&test_dat, "versioned_file::test.dat::after_delete").await.unwrap();
964        delete_file(&test_dat).await.unwrap();
965        open(&test_dat, "versioned_file::test.dat::after_delete").await.unwrap_err();
966        assert!(!exists(&test_dat));
967        open_file(&test_dat, "versioned_file::test.dat::after_delete", 1, &Vec::new(), ErrorIfNotExists)
968            .await
969            .unwrap_err();
970        open_file(&test_dat, "versioned_file::test.dat::after_delete", 1, &Vec::new(), ErrorIfNotExists)
971            .await
972            .unwrap_err();
973
974        // Leave one file in the testdir so it can be viewed later.
975        let mut f = open_file(&test_dat, "versioned_file::test.dat::after_delete", 1, &Vec::new(), CreateIfNotExists)
976            .await
977            .unwrap();
978        f.write_file("this is where the real file data is stored!".as_bytes()).await.unwrap();
979    }
980
981    #[async_std::test]
982    async fn smoke_test_async_std() {
983        smoke_test().await;
984    }
985
986    #[tokio::test]
987    async fn smoke_test_tokio() {
988        smoke_test().await;
989    }
990
991    #[test]
992    // Attempt to provide comprehensive test coverage of the upgrade path verifier.
993    fn test_verify_upgrade_paths() {
994        // Passing in no upgrades should be fine.
995        verify_upgrade_paths(&Vec::new(), 0, 0).unwrap_err(); // 0 is not a legal version
996        verify_upgrade_paths(&Vec::new(), 0, 1).unwrap_err(); // 0 is not a legal version
997        verify_upgrade_paths(&Vec::new(), 1, 1).unwrap();
998        verify_upgrade_paths(&Vec::new(), 2, 2).unwrap();
999        verify_upgrade_paths(&Vec::new(), 255, 255).unwrap();
1000
1001        // Passing in a single upgrade should be okay.
1002        verify_upgrade_paths(
1003            &vec![Upgrade {
1004                initial_version: 1,
1005                updated_version: 2,
1006                process: stub_upgrade,
1007            }],
1008            1,
1009            2,
1010        )
1011        .unwrap();
1012
1013        // A non-increasing upgrade is not okay.
1014        verify_upgrade_paths(
1015            &vec![Upgrade {
1016                initial_version: 2,
1017                updated_version: 2,
1018                process: stub_upgrade,
1019            }],
1020            2,
1021            2,
1022        )
1023        .unwrap_err();
1024
1025        // No route to final version is not okay.
1026        verify_upgrade_paths(
1027            &vec![Upgrade {
1028                initial_version: 1,
1029                updated_version: 2,
1030                process: stub_upgrade,
1031            }],
1032            1,
1033            3,
1034        )
1035        .unwrap_err();
1036
1037        // Simple path is okay.
1038        verify_upgrade_paths(
1039            &vec![
1040                Upgrade {
1041                    initial_version: 1,
1042                    updated_version: 2,
1043                    process: stub_upgrade,
1044                },
1045                Upgrade {
1046                    initial_version: 2,
1047                    updated_version: 3,
1048                    process: stub_upgrade,
1049                },
1050            ],
1051            1,
1052            3,
1053        )
1054        .unwrap();
1055
1056        // Two starting options for the same version is not okay.
1057        verify_upgrade_paths(
1058            &vec![
1059                Upgrade {
1060                    initial_version: 1,
1061                    updated_version: 2,
1062                    process: stub_upgrade,
1063                },
1064                Upgrade {
1065                    initial_version: 2,
1066                    updated_version: 3,
1067                    process: stub_upgrade,
1068                },
1069                Upgrade {
1070                    initial_version: 1,
1071                    updated_version: 3,
1072                    process: stub_upgrade,
1073                },
1074            ],
1075            1,
1076            3,
1077        )
1078        .unwrap_err();
1079
1080        // Two ending options for the same version is okay.
1081        verify_upgrade_paths(
1082            &vec![
1083                Upgrade {
1084                    initial_version: 1,
1085                    updated_version: 3,
1086                    process: stub_upgrade,
1087                },
1088                Upgrade {
1089                    initial_version: 2,
1090                    updated_version: 3,
1091                    process: stub_upgrade,
1092                },
1093            ],
1094            1,
1095            3,
1096        )
1097        .unwrap();
1098
1099        // Two ending options for the same version, version too high.
1100        verify_upgrade_paths(
1101            &vec![
1102                Upgrade {
1103                    initial_version: 1,
1104                    updated_version: 3,
1105                    process: stub_upgrade,
1106                },
1107                Upgrade {
1108                    initial_version: 2,
1109                    updated_version: 3,
1110                    process: stub_upgrade,
1111                },
1112            ],
1113            1,
1114            2,
1115        )
1116        .unwrap_err();
1117
1118        // Complex valid structure.
1119        verify_upgrade_paths(
1120            &vec![
1121                Upgrade {
1122                    initial_version: 1,
1123                    updated_version: 3,
1124                    process: stub_upgrade,
1125                },
1126                Upgrade {
1127                    initial_version: 2,
1128                    updated_version: 3,
1129                    process: stub_upgrade,
1130                },
1131                Upgrade {
1132                    initial_version: 3,
1133                    updated_version: 6,
1134                    process: stub_upgrade,
1135                },
1136                Upgrade {
1137                    initial_version: 4,
1138                    updated_version: 6,
1139                    process: stub_upgrade,
1140                },
1141                Upgrade {
1142                    initial_version: 5,
1143                    updated_version: 6,
1144                    process: stub_upgrade,
1145                },
1146            ],
1147            1,
1148            6,
1149        )
1150        .unwrap();
1151
1152        // Complex valid structure.
1153        verify_upgrade_paths(
1154            &vec![
1155                Upgrade {
1156                    initial_version: 1,
1157                    updated_version: 3,
1158                    process: stub_upgrade,
1159                },
1160                Upgrade {
1161                    initial_version: 2,
1162                    updated_version: 3,
1163                    process: stub_upgrade,
1164                },
1165                Upgrade {
1166                    initial_version: 3,
1167                    updated_version: 6,
1168                    process: stub_upgrade,
1169                },
1170                Upgrade {
1171                    initial_version: 4,
1172                    updated_version: 6,
1173                    process: stub_upgrade,
1174                },
1175            ],
1176            1,
1177            6,
1178        )
1179        .unwrap();
1180
1181        // Complex valid structure.
1182        verify_upgrade_paths(
1183            &vec![
1184                Upgrade {
1185                    initial_version: 1,
1186                    updated_version: 3,
1187                    process: stub_upgrade,
1188                },
1189                Upgrade {
1190                    initial_version: 2,
1191                    updated_version: 3,
1192                    process: stub_upgrade,
1193                },
1194                Upgrade {
1195                    initial_version: 3,
1196                    updated_version: 6,
1197                    process: stub_upgrade,
1198                },
1199                Upgrade {
1200                    initial_version: 4,
1201                    updated_version: 6,
1202                    process: stub_upgrade,
1203                },
1204            ],
1205            5,
1206            6,
1207        )
1208        .unwrap_err();
1209
1210        // Complex valid structure, randomly ordered.
1211        verify_upgrade_paths(
1212            &vec![
1213                Upgrade {
1214                    initial_version: 5,
1215                    updated_version: 6,
1216                    process: stub_upgrade,
1217                },
1218                Upgrade {
1219                    initial_version: 2,
1220                    updated_version: 3,
1221                    process: stub_upgrade,
1222                },
1223                Upgrade {
1224                    initial_version: 3,
1225                    updated_version: 6,
1226                    process: stub_upgrade,
1227                },
1228                Upgrade {
1229                    initial_version: 1,
1230                    updated_version: 3,
1231                    process: stub_upgrade,
1232                },
1233                Upgrade {
1234                    initial_version: 4,
1235                    updated_version: 6,
1236                    process: stub_upgrade,
1237                },
1238            ],
1239            1,
1240            6,
1241        )
1242        .unwrap();
1243
1244        // Complex structure, randomly ordered, one orphan.
1245        verify_upgrade_paths(
1246            &vec![
1247                Upgrade {
1248                    initial_version: 2,
1249                    updated_version: 5,
1250                    process: stub_upgrade,
1251                },
1252                Upgrade {
1253                    initial_version: 6,
1254                    updated_version: 7,
1255                    process: stub_upgrade,
1256                },
1257                Upgrade {
1258                    initial_version: 3,
1259                    updated_version: 6,
1260                    process: stub_upgrade,
1261                },
1262                Upgrade {
1263                    initial_version: 1,
1264                    updated_version: 4,
1265                    process: stub_upgrade,
1266                },
1267                Upgrade {
1268                    initial_version: 4,
1269                    updated_version: 6,
1270                    process: stub_upgrade,
1271                },
1272            ],
1273            1,
1274            6,
1275        )
1276        .unwrap_err();
1277    }
1278
1279    #[test]
1280    fn test_version_to_bytes() {
1281        assert!(&version_to_bytes(1) == b"001\n");
1282        assert!(&version_to_bytes(2) == b"002\n");
1283        assert!(&version_to_bytes(9) == b"009\n");
1284        assert!(&version_to_bytes(10) == b"010\n");
1285        assert!(&version_to_bytes(39) == b"039\n");
1286        assert!(&version_to_bytes(139) == b"139\n");
1287    }
1288}