atomic_file/lib.rs
1#![forbid(unsafe_code)]
2#![deny(missing_docs)]
3#![deny(unused_must_use)]
4#![deny(unused_mut)]
5
6//! The AtomicFile crate provides a wrapper to async_std::File to enable more convenient and safe
7//! interactions with on-disk data. All operations on AtomicFile are ACID, and the AtomicFile type
8//! includes an invisible 4096 byte header which manages details like version number and file
9//! identifier.
10//!
11//! The main use of a version number and file identifier are to provide easy upgrade capabilities
12//! for AtomicFiles, and also to ensure that the wrong file is never being opened in the event that
13//! the user incorrectly moved a file from one place to another.
14//!
15//! The main advantage of using an AtomicFile is its ACID guarantees, which ensures that data will
16//! never be corrupted in the event of a sudden loss of power. Typical file usage patters leave
17//! users vulnerable to corruption, especially when updating a file. AtomicFile protects against
18//! corruption by using a double-write scheme to guarantee that correct data exists on disk, and
19//! uses a checksum to verify at startup that the correct instance of the doubly-written file is
20//! loaded. This does mean that two files will exist on disk for each AtomicFile - a .atomic_file
21//! and a .atomic_file_backup.
22//!
23//! The checksum used by an AtomicFile is 6 bytes. We use a 6 byte checksum because our threat
24//! model is arbitrary disk failure, not a human adversary. A human adversary could write any
25//! checksum they want to defeat our corruption detection. The checksum is written as hex in the
26//! first 12 bytes of the file.
27//!
28//! If a file needs to be manually modified, the checksum can be overwritten. Change the checksum
29//! to 'ffffffffffff' (12 chars) and the checksum will be accepted independent of the file
30//! contents. The checks for the identifier will still trigger.
31//!
32//! Data corruption can still occur in the event of something extreme like physical damage to the
33//! hard drive, but changes of recovery are better and the user is protected against all common
34//! forms of corruption (which stem from power being lost unexpectedly).
35//!
36//! The 'Atomic' property of the AtomicFile is that the only read and write operations fully read
37//! or fully write the file.
38//! ```
39//! // Basic file operations
40//!
41//! use std::path::PathBuf;
42//! use atomic_file::{
43//! open, open_file,
44//! OpenSettings::CreateIfNotExists,
45//! };
46//!
47//! #[async_std::main]
48//! async fn main() {
49//! // Create a version 1 file with open_file. We pass in an empty vector for the upgrade path,
50//! // and 'CreateIfNotExists' to indicate that we want to create the non-existing file.
51//! let mut path = PathBuf::new();
52//! path.push("target");
53//! path.push("docs-example-1");
54//! let identifier = "AtomicFileDocs::docs-example-1";
55//! let mut file = open_file(&path, identifier, 1, &Vec::new(), CreateIfNotExists).await.unwrap();
56//!
57//! // Use 'contents' and 'write_file' to read and write the logical data of the file. Each
58//! // one will always read or write the full contents of the file.
59//! file.write_file(b"hello, world!").await.unwrap();
60//! let file_data = file.contents();
61//! if file_data != b"hello, world!" {
62//! panic!("example did not read correctly");
63//! }
64//! drop(file);
65//!
66//! // Now that we have created a file, we can use 'open(path, identifier)' as an alias for:
67//! // 'open_file(path, identifier, 1, Vec::new(), ErrorIfNotExists)'
68//! let file = open(&path, identifier);
69//! # drop(file);
70//! # atomic_file::delete_file(&path).await.unwrap();
71//! }
72//! ```
73//! AtomicFile uses a versioning and upgrading scheme to simplify the process of releasing new
74//! versions of a file. When opening a file, you pass in a version number and an upgrade path which
75//! will allow the file opening process to automatically upgrade your files from their current
76//! version to the latest version.
77//! ```
78//! // Simple upgrade example
79//! use std::path::PathBuf;
80//!
81//! use anyhow::{bail, Result, Error};
82//! use atomic_file::{open, open_file, AtomicFile, Upgrade};
83//! use atomic_file::OpenSettings::ErrorIfNotExists;
84//! # use atomic_file::OpenSettings::CreateIfNotExists;
85//!
86//! // An example of a function that upgrades a file from version 1 to version 2, while making
87//! // changes to the body of the file.
88//! fn example_upgrade(
89//! data: Vec<u8>,
90//! initial_version: u8,
91//! updated_version: u8,
92//! ) -> Result<Vec<u8>, Error> {
93//! // Check that the version is okay.
94//! if initial_version != 1 || updated_version != 2 {
95//! bail!("wrong version");
96//! }
97//!
98//! // Return updated contents for the file.
99//! Ok((b"hello, update!".to_vec()))
100//! }
101//!
102//! #[async_std::main]
103//! async fn main() {
104//! # let mut p = PathBuf::new();
105//! # p.push("target");
106//! # p.push("docs-example-2");
107//! # let i = "AtomicFileDocs::docs-example-2";
108//! # let mut f = atomic_file::open_file(&p, i, 1, &Vec::new(), CreateIfNotExists).await.unwrap();
109//! # f.write_file(b"hello, world!").await.unwrap();
110//! # drop(f);
111//! let mut path = PathBuf::new();
112//! path.push("target");
113//! path.push("docs-example-2");
114//! let identifier = "AtomicFileDocs::docs-example-2";
115//! let upgrade = Upgrade {
116//! initial_version: 1,
117//! updated_version: 2,
118//! process: example_upgrade,
119//! };
120//! let mut file = open_file(&path, identifier, 2, &vec![upgrade], ErrorIfNotExists).await.unwrap();
121//! // Note that the upgrades are passed in as a vector, allowing the caller to
122//! // define entire upgrade chains, e.g. 1->2 and 2->3. The final file that gets returned
123//! // will have been upgraded through the chain to the latest version.
124//! let file_data = file.contents();
125//! if file_data != b"hello, update!" {
126//! panic!("upgrade appears to have failed: \n{:?}\n{:?}", file_data, b"hello, update!");
127//! }
128//!
129//! // Perform cleanup.
130//! drop(file);
131//! atomic_file::delete_file(&path).await.unwrap();
132//! }
133//! ```
134//!
135//! If you would like to contribute to this crate, we are looking for a way to make the upgrade
136//! functions async+Send as prior attempts were unsuccessful.
137
138use async_std::{
139 fs::{File, OpenOptions},
140 io::prelude::SeekExt,
141 io::{ReadExt, SeekFrom, WriteExt},
142};
143use std::{
144 collections::HashMap,
145 path::PathBuf,
146 str::from_utf8,
147};
148
149use anyhow::{bail, Context, Error, Result};
150use sha2::{Digest, Sha256};
151
152/// OpenSettings provides the two options for opening a file in the event that the file does not
153/// exist: create the file and return an error.
154pub enum OpenSettings {
155 /// A new file will be created if the file does not exist.
156 CreateIfNotExists,
157
158 /// An error will be returned if the file does not exist.
159 ErrorIfNotExists,
160}
161
162/// UpgradeFunc defines the signature for a function that can be used to upgrade an
163/// AtomicFile. The UpgradeFunc function will receive the file data that needs to be upgraded along
164/// with the intended initial version and final version that is expected from the upgrade.
165///
166/// We pass in the initial version and final version as arguments to provide an extra level of
167/// redundancy and to prevent mistakes when copy-pasting upgrades, as an incorrect upgrade could
168/// corrupt user data.
169pub type UpgradeFunc =
170 fn(data: Vec<u8>, initial_version: u8, upgraded_version: u8) -> Result<Vec<u8>, Error>;
171
172/// Upgrade defines an upgrade process for upgrading the data in a file from one version to
173/// another.
174pub struct Upgrade {
175 /// initial_version designates the version of the file that this upgrade should be applied to.
176 pub initial_version: u8,
177 /// updated_version designates the version of the file after the upgrade is complete.
178 pub updated_version: u8,
179 /// process defines the function that is used to upgrade the file.
180 pub process: UpgradeFunc,
181}
182
183/// AtompicFile defines the main type for the crate, and implements an API for safely
184/// handling atomic files. The API is based on the async_std::File interface, but with some
185/// adjustments that are designed to make it both safer and more ergonomic.
186#[derive(Debug)]
187pub struct AtomicFile {
188 backup_file: File,
189 file: File,
190 identifier: String,
191 logical_data: Vec<u8>,
192 version: u8,
193}
194
195/// add_extension will add a new extension to the provided pathbuf, rather than overwriting the
196/// existing one. For example, 'add_extension("stuff.tar", "gz")' returns 'stuff.tar.gz', while
197/// calling set_extension directly would return 'stuff.gz', which is the wrong result.
198fn add_extension(path: &mut PathBuf, extension: &str) {
199 match path.extension() {
200 Some(ext) => {
201 let mut ext = ext.to_os_string();
202 ext.push(".");
203 ext.push(extension);
204 path.set_extension(ext)
205 }
206 None => path.set_extension(extension),
207 };
208}
209
210/// version_to_bytes will write out the version in ascii, adding leading zeroes if needed and
211/// placing a newline at the end.
212fn version_to_bytes(version: u8) -> [u8; 4] {
213 // 0 is not an allowed version, every other possible u8 is okay.
214 if version == 0 {
215 panic!("version is not allowed to be 0");
216 }
217
218 // Compute the 4 version bytes based on the latest version.
219 let mut version_string = format!("{}\n", version);
220 if version_string.len() == 2 {
221 version_string = format!("00{}", version_string);
222 } else if version_string.len() == 3 {
223 version_string = format!("0{}", version_string);
224 }
225 let version_bytes = version_string.as_bytes();
226 let mut version_arr = [0u8; 4];
227 version_arr.copy_from_slice(version_bytes);
228 version_arr
229}
230
231/// identifier_and_version_from_metadata will pull the identifier and version out of the metadata.
232fn identifier_and_version_from_metadata(metadata: &[u8]) -> Result<(String, u8), Error> {
233 if metadata.len() < 4096 {
234 bail!("provided metadata is not the right size");
235 }
236
237 let version_str =
238 from_utf8(&metadata[13..16]).context("the on-disk version could not be parsed")?;
239 let version: u8 = version_str
240 .parse()
241 .context("unable to parse version of metadata")?;
242
243 let mut clean_identifier = false;
244 let mut identifier = "".to_string();
245 let mut atomic_identifier_offset = 0;
246 for i in 0..201 {
247 if metadata[i + 17] == '\n' as u8 {
248 clean_identifier = true;
249 atomic_identifier_offset = i + 18;
250 break;
251 }
252 if metadata[i + 17] > 127 {
253 bail!("identifier contains non-ascii characters before termination sequence");
254 }
255 identifier.push(metadata[i + 17] as char);
256 }
257 if !clean_identifier {
258 bail!("provided metadata does not have a legally terminating identifier");
259 }
260 let atomic_identifier = "DavidVorick/atomic_file-v1\n".as_bytes();
261 if metadata[atomic_identifier_offset..atomic_identifier_offset+27] != atomic_identifier[..] {
262 bail!("file does not appear to be an atomic file");
263 }
264
265 Ok((identifier, version))
266}
267
268impl AtomicFile {
269 /// fill_metadata will write the first 4096 bytes to contain the proper metadata for the file,
270 /// including the first 64 bytes which serve as the checksum.
271 fn fill_metadata(&self, buf: &mut [u8]) {
272 if buf.len() < 4096 {
273 panic!("misuse of fill_metadata, check stack trace");
274 }
275 if self.identifier.len() > 200 {
276 panic!(
277 "file has too-large identifier, ensure bounds checking is in place in open_file"
278 );
279 }
280 let version_bytes = version_to_bytes(self.version);
281
282 // Fill out the header data.
283 buf[12] = '\n' as u8;
284 buf[13..17].copy_from_slice(&version_bytes);
285 let iden_bytes = self.identifier.as_bytes();
286 buf[17..17 + iden_bytes.len()].copy_from_slice(iden_bytes);
287 buf[17 + iden_bytes.len()] = '\n' as u8;
288 buf[18 + iden_bytes.len()] = 255; // newline+255 is the termination sequence for identifier
289 let atomic_identifier = "DavidVorick/atomic_file-v1\n".as_bytes();
290 buf[18 + iden_bytes.len()..18 + iden_bytes.len() + 27].copy_from_slice(atomic_identifier);
291 buf[4095] = '\n' as u8;
292
293 // Grab the checksum of the data and fill it in as the first 64 bytes.
294 let mut hasher = Sha256::new();
295 hasher.update(&buf[12..]);
296 let result = hasher.finalize();
297 let result_hex = hex::encode(result);
298 buf[..12].copy_from_slice(result_hex[..12].as_bytes());
299 }
300
301 /// contents will return a copy of the contents of the file.
302 pub fn contents(&self) -> Vec<u8> {
303 self.logical_data.clone()
304 }
305
306 /// write_file will perform a safe, ACID operation to write the provided data to the file. This
307 /// will incur a performance cost of 2 fsyncs and of writing the full contents of the file to
308 /// disk twice. For SSDs, the cost of two fsyncs is typically under 25ms and often under 3ms.
309 /// For HDDs, the cost of two fsyncs is typically over 200ms and often over 800ms.
310 pub async fn write_file(&mut self, buf: &[u8]) -> Result<(), Error> {
311 // Build the full physical data of the file, including the leading checksum in the
312 // metadata.
313 let mut full_data = vec![0u8; 4096 + buf.len()];
314 full_data[4096..].copy_from_slice(buf);
315 self.fill_metadata(&mut full_data);
316
317 // Write out the full data to the backup file.
318 self.backup_file
319 .set_len(full_data.len() as u64)
320 .await
321 .context("unable to set the length of the backup file")?;
322 self.backup_file
323 .seek(SeekFrom::Start(0))
324 .await
325 .context("unable to seek to start of backup file")?;
326 self.backup_file
327 .write_all(&full_data)
328 .await
329 .context("unable to write to backup file")?;
330 self.backup_file
331 .flush()
332 .await
333 .context("unable to flush backup file")?;
334 self.backup_file
335 .sync_all()
336 .await
337 .context("fsync of backup file failed")?;
338
339 // Backup file is safe, repeat the operation on the main file.
340 self.file
341 .set_len(full_data.len() as u64)
342 .await
343 .context("unable to set the length of the backup file")?;
344 self.file
345 .seek(SeekFrom::Start(0))
346 .await
347 .context("unable to seek to start of backup file")?;
348 self.file
349 .write_all(&full_data)
350 .await
351 .context("unable to write to backup file")?;
352 self.file
353 .flush()
354 .await
355 .context("unable to flush backup file")?;
356 self.file
357 .sync_all()
358 .await
359 .context("fsync of backup file failed")?;
360
361 // Set the data of the file to the newly provided data.
362 self.logical_data = buf.to_vec();
363 Ok(())
364 }
365}
366
367/// verify_upgrade_paths verify that the set of paths provided for performing upgrades all lead to
368/// the latest version, and will return an error if some path doesn't lead to the latest version.
369/// It will also return an error if two possible paths exist for a given version.
370fn verify_upgrade_paths(
371 upgrade_paths: &Vec<Upgrade>,
372 current_version: u8,
373 latest_version: u8,
374) -> Result<(), Error> {
375 // Enusre 0 was not used as the latest_version.
376 if latest_version == 0 {
377 bail!("version 0 is not allowed for a VersionedFile");
378 }
379
380 // Verify that an upgrade path exists for the file which carries it to the latest version.
381 let mut version_routes = HashMap::new();
382 // Verify basic properties of the graph (no cycles, no repeat sources).
383 for path in upgrade_paths {
384 if path.initial_version >= path.updated_version {
385 bail!("upgrade paths must always lead to a higher version number");
386 }
387 if version_routes.contains_key(&path.initial_version) {
388 bail!("upgrade paths can only have one upgrade for each version");
389 }
390 if path.updated_version > latest_version {
391 bail!("upgrade paths lead beyond the latest version");
392 }
393 if path.initial_version == 0 {
394 bail!("version 0 is not allowed for a VersionedFile");
395 }
396 version_routes.insert(path.initial_version, path.updated_version);
397 }
398 // Verify that all upgrades lead to the latest version. We iterate over the version_routes and mark every
399 // node that connects to a finished node.
400 let mut complete_paths = HashMap::new();
401 complete_paths.insert(latest_version, {});
402 loop {
403 let mut progress = false;
404 let mut finished = true;
405
406 for (key, value) in &version_routes {
407 if complete_paths.contains_key(key) {
408 continue;
409 }
410 if complete_paths.contains_key(value) {
411 progress = true;
412 complete_paths.insert(*key, {});
413 } else {
414 finished = false;
415 }
416 }
417
418 if finished {
419 break;
420 }
421 if progress == false {
422 bail!("update graph is incomplete, not all nodes lead to the latest version");
423 }
424 }
425 // Verify that the current version of the file is found in the upgrade paths.
426 if !complete_paths.contains_key(¤t_version) {
427 bail!("no upgrade found for current version of file");
428 }
429 Ok(())
430}
431
432/// perform_file_upgrade takes a file and an upgrade, and then executes the upgrade against the
433/// file.
434async fn perform_file_upgrade(file: &mut AtomicFile, u: &Upgrade) -> Result<(), Error> {
435 // Check that we've got the right upgrade.
436 if file.version != u.initial_version {
437 bail!("wrong update has been selected for this file");
438 }
439
440 // Perform the update on the data.
441 let new_data = (u.process)(file.logical_data.clone(), u.initial_version, u.updated_version)
442 .context(format!(
443 "unable to complete file upgrade from version {} to {}",
444 u.initial_version, u.updated_version
445 ))?;
446 file.logical_data = new_data;
447
448 // Update the version of the file. We don't actually write the changes to disk yet because
449 // these updates are recoverable / repeatable at next boot.
450 file.version = u.updated_version;
451 Ok(())
452}
453
454/// perform_file_upgrades will take a file through the upgrade chain.
455async fn perform_file_upgrades(
456 file: &mut AtomicFile,
457 latest_version: u8,
458 upgrades: &Vec<Upgrade>,
459) -> Result<(), Error> {
460 // Execute the upgrades.
461 while file.version != latest_version {
462 let mut found = false;
463 for upgrade in upgrades {
464 if upgrade.initial_version == file.version {
465 perform_file_upgrade(file, upgrade)
466 .await
467 .context("unable to complete file upgrade")?;
468 file.version = upgrade.updated_version;
469 found = true;
470 break;
471 }
472 }
473
474 // The upgrades verifier ensures that if an upgrade exists in the set of upgrades, then
475 // there also exists a path to the latest_version from that upgrade. Therefore, if this
476 // file doesn't have a path to the latest version, no other upgrades will be executed
477 // either - at least as long as the upgrade verifier is working.
478 if !found {
479 panic!("attempting to perform file upgrades without a viable upgrade path");
480 }
481 }
482
483 Ok(())
484}
485
486/// delete_file will delete the atomic file at the given filepath. This will delete both the
487/// .atomic_file and the .atomic_file_backup
488pub async fn delete_file(filepath: &PathBuf) -> Result<(), Error> {
489 let mut main_path = filepath.clone();
490 let mut backup_path = filepath.clone();
491 add_extension(&mut main_path, "atomic_file");
492 async_std::fs::remove_file(main_path.clone()).await.context("unable to backup file")?;
493 add_extension(&mut backup_path, "atomic_file_backup");
494 async_std::fs::remove_file(backup_path.clone()).await.context("unable to remove main file")?;
495 Ok(())
496}
497
498/// exists will return whether or not an atomic file is considered to exist on disk.
499pub fn exists(filepath: &PathBuf) -> bool {
500 let mut path = filepath.clone();
501 add_extension(&mut path, "atomic_file");
502 path.exists()
503}
504
505/// open is a convenience wrapper for open_file which uses '1' as the version, an empty vector as
506/// the upgrade path, and ErrorIfNotExists as the open setting.
507pub async fn open(filepath: &PathBuf, expected_identifier: &str) -> Result<AtomicFile, Error> {
508 open_file(filepath, expected_identifier, 1, &Vec::new(), OpenSettings::ErrorIfNotExists).await
509}
510
511/// open_file will open an atomic file, using the backup of the file if the checksum fails. If
512/// 'create_if_not_exists' is set to 'true', a new file empty file will be created if a file does
513/// not already exist.
514///
515/// If the version of the file on-disk is outdated, the upgrades will be used in a chain to upgrade
516/// the file to the latest version. If no valid path exists from the file's current version to the
517/// latest version, an error will be returned.
518///
519/// An error will also be returned if the file has the wrong identifier, or if it is determined
520/// that both the file and its backup are corrupt. Both files should only be corrupt in the event
521/// of significant physical damage to the storage device.
522pub async fn open_file(
523 filepath: &PathBuf,
524 expected_identifier: &str,
525 latest_version: u8,
526 upgrades: &Vec<Upgrade>,
527 open_settings: OpenSettings,
528) -> Result<AtomicFile, Error> {
529 // Verify that the inputs match all requirements.
530 let path_str = filepath.to_str().context("could not stringify path")?;
531 if !path_str.is_ascii() {
532 bail!("path should be valid ascii");
533 }
534 if expected_identifier.len() > 200 {
535 bail!("the identifier of an atomic file cannot exceed 200 bytes");
536 }
537 if !expected_identifier.is_ascii() {
538 bail!("the identifier must be ascii");
539 }
540 if latest_version == 0 {
541 bail!("version is not allowed to be zero");
542 }
543 // Check that the identifier doesn't contain a newline.
544 for c in expected_identifier.chars() {
545 if c == '\n' {
546 bail!("identifier is not allowed to contain newlines");
547 }
548 }
549
550 // Parse the enum.
551 let create_if_not_exists = match open_settings {
552 OpenSettings::CreateIfNotExists => true,
553 OpenSettings::ErrorIfNotExists => false,
554 };
555
556 // Build the paths for the main file and the backup file.
557 let mut filepath = filepath.clone();
558 let mut backup_filepath = filepath.clone();
559 add_extension(&mut filepath, "atomic_file");
560 add_extension(&mut backup_filepath, "atomic_file_backup");
561 let filepath_exists = filepath.exists();
562
563 // If the 'create' flag is not set and the main file does not exist, exit with a does not exist
564 // error. We don't care about the backup file in this case, beacuse if the main file wasn't
565 // even created we can assume the file is missing.
566 if !create_if_not_exists && !filepath_exists {
567 bail!("file does not exist");
568 }
569
570 // If the main file does exist, then we should be able to rely on the assumption that either
571 // the main file is not corrupt, or the backup file is not corrupt. If the
572 // 'create_if_not_exists' flag has been set, we'll create a blank file which will fail the
573 // checksum, and then a blank backup will be created with a failed checksum, and then the user
574 // will receive a blank file that's created and ready for them to modify.
575 let mut file = OpenOptions::new()
576 .read(true)
577 .write(true)
578 .create(true)
579 .open(filepath)
580 .await
581 .context("unable to open versioned file")?;
582 let mut backup_file = OpenOptions::new()
583 .read(true)
584 .write(true)
585 .create(true)
586 .open(backup_filepath)
587 .await
588 .context("unable to open versioned file")?;
589
590 // Read the contents of the main file and verify the checksum.
591 let file_md = file
592 .metadata()
593 .await
594 .context("unable to get file metadata")?;
595 let file_len = file_md.len();
596 if file_len >= 4096 {
597 let mut buf = vec![0u8; file_len as usize];
598 file.read_exact(&mut buf)
599 .await
600 .context("unable to read file")?;
601
602 let mut hasher = Sha256::new();
603 hasher.update(&buf[12..]);
604 let result = hasher.finalize();
605 let result_hex = hex::encode(result);
606 let result_hex_bytes = result_hex.as_bytes();
607
608 // If the file needs to be manually modified for some reason, the hash will no longer work.
609 // By changing the checksum to all 'fffff...', the user/developer is capable of overriding
610 // the checksum.
611 let override_value = [255u8; 6];
612 let override_hex = hex::encode(override_value);
613 let override_hex_bytes = override_hex.as_bytes();
614
615 // If the checksum passes, perform any required updates on the file and pass the file along to
616 // the user.
617 if result_hex_bytes[..12] == buf[..12] || buf[..12] == override_hex_bytes[..] {
618 let (identifier, version) = identifier_and_version_from_metadata(&buf[..4096])
619 .context("unable to parse version and identifier from file metadata")?;
620 if identifier != expected_identifier {
621 bail!("file has the wrong identifier");
622 }
623 verify_upgrade_paths(&upgrades, version, latest_version)
624 .context("upgrade paths are invalid")?;
625 let mut atomic_file = AtomicFile {
626 backup_file,
627 file,
628 identifier,
629 logical_data: buf[4096..].to_vec(),
630 version,
631 };
632 perform_file_upgrades(&mut atomic_file, latest_version, upgrades)
633 .await
634 .context("unable to upgrade file")?;
635 return Ok(atomic_file);
636 }
637 }
638
639 // If we got this far, the main file exists but was either corrupt or had no data in it. We
640 // check the backup file now.
641 let backup_file_md = backup_file
642 .metadata()
643 .await
644 .context("unable to get backup_file metadata")?;
645 let backup_file_len = backup_file_md.len();
646 if backup_file_len >= 4096 {
647 let mut buf = vec![0u8; backup_file_len as usize];
648 backup_file
649 .read_exact(&mut buf)
650 .await
651 .context("unable to read backup_file")?;
652
653 let mut hasher = Sha256::new();
654 hasher.update(&buf[12..]);
655 let result = hasher.finalize();
656 let result_hex = hex::encode(result);
657 let result_hex_bytes = result_hex.as_bytes();
658
659 // If the checksum passes, perform any required updates on the file and pass the file along to
660 // the user.
661 if result_hex_bytes[..12] == buf[..12] {
662 let (identifier, version) = identifier_and_version_from_metadata(&buf[..4096])
663 .context("unable to parse version and identifier from file metadata")?;
664 if identifier != expected_identifier {
665 bail!("file has the wrong identifier");
666 }
667 verify_upgrade_paths(&upgrades, version, latest_version)
668 .context("upgrade paths are invalid")?;
669
670 let mut atomic_file = AtomicFile {
671 backup_file,
672 file,
673 identifier,
674 logical_data: buf[4096..].to_vec(),
675 version,
676 };
677 perform_file_upgrades(&mut atomic_file, latest_version, upgrades)
678 .await
679 .context("unable to upgrade file")?;
680
681 // Backup is fine but file is corrupt; we need to write the full data to the main file
682 // so that the next change is safe, as the next change will start by overwriting the
683 // backup data.
684 atomic_file
685 .file
686 .set_len(buf.len() as u64)
687 .await
688 .context("unable to set length of atomic file")?;
689 atomic_file
690 .file
691 .seek(SeekFrom::Start(0))
692 .await
693 .context("unable to seek in atomic file")?;
694 atomic_file
695 .file
696 .write_all(&buf)
697 .await
698 .context("unable to write backup data to atomic file")?;
699 atomic_file
700 .file
701 .sync_all()
702 .await
703 .context("unable to sync backup data to atomic file")?;
704
705 return Ok(atomic_file);
706 }
707 }
708
709 // If we got this far, we either have a new file or a corrupt file. If the length of the main
710 // file is '0', we assume it's a new file. If the main file has a size of 0 and the backup file
711 // is corrupt, this is still equivalent to having no file at all as it means the file creation
712 // process got interrupted before it completed.
713 if file_len == 0 {
714 let mut af = AtomicFile {
715 backup_file,
716 file,
717 identifier: expected_identifier.to_string(),
718 logical_data: Vec::new(),
719 version: latest_version,
720 };
721 af.write_file(&Vec::new()).await.context("unable to create new file")?;
722 return Ok(af);
723 }
724
725 // We should only reach this code if both files have data and are failing the checksum, which
726 // indicates unrecoverable corruption. Fail rather than try to make a new file in this case.
727 bail!("there appears to have been unrecoverable file corruption");
728}
729
730#[cfg(test)]
731mod tests {
732 use super::*;
733
734 use std::io::{Seek, Write};
735 use testdir::testdir;
736 use OpenSettings::{
737 CreateIfNotExists,
738 ErrorIfNotExists,
739 };
740
741 // Create a helper function which does a null upgrade so that we can do testing of the upgrade
742 // path verifier.
743 fn stub_upgrade(v: Vec<u8>, _: u8, _: u8) -> Result<Vec<u8>, Error> {
744 Ok(v)
745 }
746
747 // This is a basic upgrade function that expects the current contents of the file to be
748 // "test_data". It will alter the contents so that they say "test".
749 fn smoke_upgrade_1_2(
750 data: Vec<u8>,
751 initial_version: u8,
752 updated_version: u8,
753 ) -> Result<Vec<u8>, Error> {
754 // Verify that the correct version is being used.
755 if initial_version != 1 || updated_version != 2 {
756 bail!("this upgrade is intended to take the file from version 1 to version 2");
757 }
758 if data.len() != 9 {
759 bail!("file is wrong len");
760 }
761 if data != b"test_data" {
762 bail!(format!("file appears corrupt: {:?}", data));
763 }
764
765 // Replace the data with new data.
766 Ok(b"test".to_vec())
767 }
768
769 fn smoke_upgrade_2_3(
770 data: Vec<u8>,
771 initial_version: u8,
772 updated_version: u8,
773 ) -> Result<Vec<u8>, Error> {
774 // Verify that the correct version is being used.
775 if initial_version != 2 || updated_version != 3 {
776 bail!("this upgrade is intended to take the file from version 2 to version 3");
777 }
778 if data.len() != 4 {
779 bail!("file is wrong len");
780 }
781 if data != b"test" {
782 bail!("file appears corrupt");
783 }
784
785 // Replace the data with new data.
786 Ok(b"testtest".to_vec())
787 }
788
789 fn smoke_upgrade_3_4(
790 data: Vec<u8>,
791 initial_version: u8,
792 updated_version: u8,
793 ) -> Result<Vec<u8>, Error> {
794 // Verify that the correct version is being used.
795 if initial_version != 3 || updated_version != 4 {
796 bail!("this upgrade is intended to take the file from version 1 to version 2");
797 }
798 if data.len() != 8 {
799 bail!("file is wrong len");
800 }
801 // Read the file and verify that we are upgrading the correct data.
802 if data != b"testtest" {
803 bail!("file appears corrupt");
804 }
805
806 // Truncate the file and write the new data into it.
807 Ok(b"testtesttest".to_vec())
808 }
809
810 // Do basic testing of all the major functions for VersionedFiles
811 async fn smoke_test() {
812 // Create a basic versioned file.
813 let dir = testdir!();
814 let test_dat = dir.join("test.dat");
815 open_file(&test_dat, "versioned_file::test.dat", 0, &Vec::new(), CreateIfNotExists)
816 .await
817 .context("unable to create versioned file")
818 .unwrap_err();
819 open_file(&test_dat, "versioned_file::test.dat", 1, &Vec::new(), CreateIfNotExists)
820 .await
821 .context("unable to create versioned file")
822 .unwrap();
823 // Try to open it again.
824 open_file(&test_dat, "versioned_file::test.dat", 1, &Vec::new(), CreateIfNotExists)
825 .await
826 .context("unable to create versioned file")
827 .unwrap();
828 // Try to open it with the wrong specifier.
829 open_file(&test_dat, "bad_versioned_file::test.dat", 1, &Vec::new(), CreateIfNotExists)
830 .await
831 .context("unable to create versioned file")
832 .unwrap_err();
833
834 // Try to make some invalid new files.
835 let invalid_name = dir.join("❄️"); // snowflake emoji in filename
836 open_file(&invalid_name, "versioned_file::test.dat", 1, &Vec::new(), CreateIfNotExists)
837 .await
838 .context("unable to create versioned file")
839 .unwrap_err();
840 let invalid_id = dir.join("invalid_identifier.dat");
841 open_file(&invalid_id, "versioned_file::test.dat::❄️", 1, &Vec::new(), CreateIfNotExists)
842 .await
843 .context("unable to create versioned file")
844 .unwrap_err();
845
846 // Perform a test where we open test.dat and write a small amount of data to it. Then we
847 // will open the file again and read back that data.
848 let mut file = open_file(&test_dat, "versioned_file::test.dat", 1, &Vec::new(), CreateIfNotExists)
849 .await
850 .unwrap();
851 file.write_file(b"test_data").await.unwrap();
852 let file = open_file(&test_dat, "versioned_file::test.dat", 1, &Vec::new(), CreateIfNotExists)
853 .await
854 .unwrap();
855 if file.contents().len() != 9 {
856 panic!("file has unexpected len");
857 }
858 if &file.contents() != b"test_data" {
859 panic!("data read does not match data written");
860 }
861 // Try to open the file again and ensure the write happened in the correct spot.
862 open_file(&test_dat, "versioned_file::test.dat", 1, &Vec::new(), CreateIfNotExists)
863 .await
864 .unwrap();
865
866 // Open the file again, this time with an upgrade for smoke_upgrade_1_2.
867 let mut upgrade_chain = vec![Upgrade {
868 initial_version: 1,
869 updated_version: 2,
870 process: smoke_upgrade_1_2,
871 }];
872 let file = open_file(&test_dat, "versioned_file::test.dat", 2, &upgrade_chain, CreateIfNotExists)
873 .await
874 .unwrap();
875 if file.contents().len() != 4 {
876 panic!("file has wrong len");
877 }
878 if &file.contents() != b"test" {
879 panic!("data read does not match data written");
880 }
881 // Try to open the file again to make sure everything still completes.
882 open_file(&test_dat, "versioned_file::test.dat", 2, &upgrade_chain, CreateIfNotExists)
883 .await
884 .unwrap();
885
886 // Attempt to do two upgrades at once, from 2 to 3 and 3 to 4.
887 upgrade_chain.push(Upgrade {
888 initial_version: 2,
889 updated_version: 3,
890 process: smoke_upgrade_2_3,
891 });
892 upgrade_chain.push(Upgrade {
893 initial_version: 3,
894 updated_version: 4,
895 process: smoke_upgrade_3_4,
896 });
897 let file = open_file(&test_dat, "versioned_file::test.dat", 4, &upgrade_chain, CreateIfNotExists)
898 .await
899 .unwrap();
900 if file.contents().len() != 12 {
901 panic!("file has wrong len");
902 }
903 if &file.contents() != b"testtesttest" {
904 panic!("data read does not match data written");
905 }
906 drop(file);
907 // Try to open the file again to make sure everything still completes.
908 open_file(&test_dat, "versioned_file::test.dat", 4, &upgrade_chain, CreateIfNotExists)
909 .await
910 .unwrap();
911
912 // Corrupt the data of the file. It should open correctly, and then after opening the
913 // corruption should be repaired.
914 let mut test_main = test_dat.clone();
915 add_extension(&mut test_main, "atomic_file");
916 let original_data = std::fs::read(&test_main).unwrap();
917 std::fs::write(&test_main, b"file corruption!").unwrap();
918 open_file(&test_dat, "versioned_file::test.dat", 4, &upgrade_chain, CreateIfNotExists)
919 .await
920 .unwrap();
921 let repaired_data = std::fs::read(&test_main).unwrap();
922 assert!(repaired_data == original_data);
923
924 // Try modifying the checksum of the file to see if it still completes. We modify the raw
925 // data as well to see if the backup loads.
926 let mut raw_file_name = test_dat.clone();
927 add_extension(&mut raw_file_name, "atomic_file");
928 println!("{:?}", raw_file_name);
929 let mut raw_file = std::fs::OpenOptions::new()
930 .read(true)
931 .write(true)
932 .open(raw_file_name)
933 .unwrap();
934 raw_file.set_len(4096).unwrap();
935 raw_file.write("ffff".as_bytes()).unwrap();
936
937 // Try to open the file with a bad checksum and make sure it fails.
938 let shorter_file = open_file(&test_dat, "versioned_file::test.dat", 4, &upgrade_chain, CreateIfNotExists)
939 .await
940 .unwrap();
941 assert!(shorter_file.contents().len() != 0); // should be the original file
942
943 // Write out the full checksum override then see that the file still opens.
944 raw_file.set_len(4096).unwrap();
945 raw_file.seek(std::io::SeekFrom::Start(0)).unwrap();
946 raw_file.write("ffffffffffff".as_bytes()).unwrap();
947 let shorter_file = open(&test_dat, "versioned_file::test.dat")
948 .await
949 .unwrap();
950 assert!(shorter_file.contents().len() == 0); // should accept the manually modified file
951
952 // Try deleting the file. When we open the file again with a new identifier, the new
953 // identifier should succeed.
954 delete_file(&test_dat).await.unwrap();
955 open_file(&test_dat, "versioned_file::test.dat::after_delete", 1, &Vec::new(), CreateIfNotExists)
956 .await
957 .unwrap();
958
959 // Delete the file again, then try to open it with 'create_if_not_exists' set to false. The
960 // file should not be created, which means it'll fail on subsequent opens as well. We also
961 // sneak in a few checks of 'open()'
962 assert!(exists(&test_dat));
963 open(&test_dat, "versioned_file::test.dat::after_delete").await.unwrap();
964 delete_file(&test_dat).await.unwrap();
965 open(&test_dat, "versioned_file::test.dat::after_delete").await.unwrap_err();
966 assert!(!exists(&test_dat));
967 open_file(&test_dat, "versioned_file::test.dat::after_delete", 1, &Vec::new(), ErrorIfNotExists)
968 .await
969 .unwrap_err();
970 open_file(&test_dat, "versioned_file::test.dat::after_delete", 1, &Vec::new(), ErrorIfNotExists)
971 .await
972 .unwrap_err();
973
974 // Leave one file in the testdir so it can be viewed later.
975 let mut f = open_file(&test_dat, "versioned_file::test.dat::after_delete", 1, &Vec::new(), CreateIfNotExists)
976 .await
977 .unwrap();
978 f.write_file("this is where the real file data is stored!".as_bytes()).await.unwrap();
979 }
980
981 #[async_std::test]
982 async fn smoke_test_async_std() {
983 smoke_test().await;
984 }
985
986 #[tokio::test]
987 async fn smoke_test_tokio() {
988 smoke_test().await;
989 }
990
991 #[test]
992 // Attempt to provide comprehensive test coverage of the upgrade path verifier.
993 fn test_verify_upgrade_paths() {
994 // Passing in no upgrades should be fine.
995 verify_upgrade_paths(&Vec::new(), 0, 0).unwrap_err(); // 0 is not a legal version
996 verify_upgrade_paths(&Vec::new(), 0, 1).unwrap_err(); // 0 is not a legal version
997 verify_upgrade_paths(&Vec::new(), 1, 1).unwrap();
998 verify_upgrade_paths(&Vec::new(), 2, 2).unwrap();
999 verify_upgrade_paths(&Vec::new(), 255, 255).unwrap();
1000
1001 // Passing in a single upgrade should be okay.
1002 verify_upgrade_paths(
1003 &vec![Upgrade {
1004 initial_version: 1,
1005 updated_version: 2,
1006 process: stub_upgrade,
1007 }],
1008 1,
1009 2,
1010 )
1011 .unwrap();
1012
1013 // A non-increasing upgrade is not okay.
1014 verify_upgrade_paths(
1015 &vec![Upgrade {
1016 initial_version: 2,
1017 updated_version: 2,
1018 process: stub_upgrade,
1019 }],
1020 2,
1021 2,
1022 )
1023 .unwrap_err();
1024
1025 // No route to final version is not okay.
1026 verify_upgrade_paths(
1027 &vec![Upgrade {
1028 initial_version: 1,
1029 updated_version: 2,
1030 process: stub_upgrade,
1031 }],
1032 1,
1033 3,
1034 )
1035 .unwrap_err();
1036
1037 // Simple path is okay.
1038 verify_upgrade_paths(
1039 &vec![
1040 Upgrade {
1041 initial_version: 1,
1042 updated_version: 2,
1043 process: stub_upgrade,
1044 },
1045 Upgrade {
1046 initial_version: 2,
1047 updated_version: 3,
1048 process: stub_upgrade,
1049 },
1050 ],
1051 1,
1052 3,
1053 )
1054 .unwrap();
1055
1056 // Two starting options for the same version is not okay.
1057 verify_upgrade_paths(
1058 &vec![
1059 Upgrade {
1060 initial_version: 1,
1061 updated_version: 2,
1062 process: stub_upgrade,
1063 },
1064 Upgrade {
1065 initial_version: 2,
1066 updated_version: 3,
1067 process: stub_upgrade,
1068 },
1069 Upgrade {
1070 initial_version: 1,
1071 updated_version: 3,
1072 process: stub_upgrade,
1073 },
1074 ],
1075 1,
1076 3,
1077 )
1078 .unwrap_err();
1079
1080 // Two ending options for the same version is okay.
1081 verify_upgrade_paths(
1082 &vec![
1083 Upgrade {
1084 initial_version: 1,
1085 updated_version: 3,
1086 process: stub_upgrade,
1087 },
1088 Upgrade {
1089 initial_version: 2,
1090 updated_version: 3,
1091 process: stub_upgrade,
1092 },
1093 ],
1094 1,
1095 3,
1096 )
1097 .unwrap();
1098
1099 // Two ending options for the same version, version too high.
1100 verify_upgrade_paths(
1101 &vec![
1102 Upgrade {
1103 initial_version: 1,
1104 updated_version: 3,
1105 process: stub_upgrade,
1106 },
1107 Upgrade {
1108 initial_version: 2,
1109 updated_version: 3,
1110 process: stub_upgrade,
1111 },
1112 ],
1113 1,
1114 2,
1115 )
1116 .unwrap_err();
1117
1118 // Complex valid structure.
1119 verify_upgrade_paths(
1120 &vec![
1121 Upgrade {
1122 initial_version: 1,
1123 updated_version: 3,
1124 process: stub_upgrade,
1125 },
1126 Upgrade {
1127 initial_version: 2,
1128 updated_version: 3,
1129 process: stub_upgrade,
1130 },
1131 Upgrade {
1132 initial_version: 3,
1133 updated_version: 6,
1134 process: stub_upgrade,
1135 },
1136 Upgrade {
1137 initial_version: 4,
1138 updated_version: 6,
1139 process: stub_upgrade,
1140 },
1141 Upgrade {
1142 initial_version: 5,
1143 updated_version: 6,
1144 process: stub_upgrade,
1145 },
1146 ],
1147 1,
1148 6,
1149 )
1150 .unwrap();
1151
1152 // Complex valid structure.
1153 verify_upgrade_paths(
1154 &vec![
1155 Upgrade {
1156 initial_version: 1,
1157 updated_version: 3,
1158 process: stub_upgrade,
1159 },
1160 Upgrade {
1161 initial_version: 2,
1162 updated_version: 3,
1163 process: stub_upgrade,
1164 },
1165 Upgrade {
1166 initial_version: 3,
1167 updated_version: 6,
1168 process: stub_upgrade,
1169 },
1170 Upgrade {
1171 initial_version: 4,
1172 updated_version: 6,
1173 process: stub_upgrade,
1174 },
1175 ],
1176 1,
1177 6,
1178 )
1179 .unwrap();
1180
1181 // Complex valid structure.
1182 verify_upgrade_paths(
1183 &vec![
1184 Upgrade {
1185 initial_version: 1,
1186 updated_version: 3,
1187 process: stub_upgrade,
1188 },
1189 Upgrade {
1190 initial_version: 2,
1191 updated_version: 3,
1192 process: stub_upgrade,
1193 },
1194 Upgrade {
1195 initial_version: 3,
1196 updated_version: 6,
1197 process: stub_upgrade,
1198 },
1199 Upgrade {
1200 initial_version: 4,
1201 updated_version: 6,
1202 process: stub_upgrade,
1203 },
1204 ],
1205 5,
1206 6,
1207 )
1208 .unwrap_err();
1209
1210 // Complex valid structure, randomly ordered.
1211 verify_upgrade_paths(
1212 &vec![
1213 Upgrade {
1214 initial_version: 5,
1215 updated_version: 6,
1216 process: stub_upgrade,
1217 },
1218 Upgrade {
1219 initial_version: 2,
1220 updated_version: 3,
1221 process: stub_upgrade,
1222 },
1223 Upgrade {
1224 initial_version: 3,
1225 updated_version: 6,
1226 process: stub_upgrade,
1227 },
1228 Upgrade {
1229 initial_version: 1,
1230 updated_version: 3,
1231 process: stub_upgrade,
1232 },
1233 Upgrade {
1234 initial_version: 4,
1235 updated_version: 6,
1236 process: stub_upgrade,
1237 },
1238 ],
1239 1,
1240 6,
1241 )
1242 .unwrap();
1243
1244 // Complex structure, randomly ordered, one orphan.
1245 verify_upgrade_paths(
1246 &vec![
1247 Upgrade {
1248 initial_version: 2,
1249 updated_version: 5,
1250 process: stub_upgrade,
1251 },
1252 Upgrade {
1253 initial_version: 6,
1254 updated_version: 7,
1255 process: stub_upgrade,
1256 },
1257 Upgrade {
1258 initial_version: 3,
1259 updated_version: 6,
1260 process: stub_upgrade,
1261 },
1262 Upgrade {
1263 initial_version: 1,
1264 updated_version: 4,
1265 process: stub_upgrade,
1266 },
1267 Upgrade {
1268 initial_version: 4,
1269 updated_version: 6,
1270 process: stub_upgrade,
1271 },
1272 ],
1273 1,
1274 6,
1275 )
1276 .unwrap_err();
1277 }
1278
1279 #[test]
1280 fn test_version_to_bytes() {
1281 assert!(&version_to_bytes(1) == b"001\n");
1282 assert!(&version_to_bytes(2) == b"002\n");
1283 assert!(&version_to_bytes(9) == b"009\n");
1284 assert!(&version_to_bytes(10) == b"010\n");
1285 assert!(&version_to_bytes(39) == b"039\n");
1286 assert!(&version_to_bytes(139) == b"139\n");
1287 }
1288}