1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
#![cfg_attr(feature = "nightly", deny(missing_docs))]
#![cfg_attr(feature = "nightly", feature(external_doc))]
#![cfg_attr(feature = "nightly", doc(include = "../README.md"))]
#![cfg_attr(test, deny(warnings))]

use anyhow::{anyhow, Error};
use async_std::fs::{self, OpenOptions};
use async_std::io::prelude::{SeekExt, WriteExt};
use async_std::io::{ReadExt, SeekFrom};
use random_access_storage::RandomAccess;
use std::ops::Drop;
use std::path;

/// Main constructor.
#[derive(Debug)]
pub struct RandomAccessDisk {
  filename: path::PathBuf,
  file: Option<fs::File>,
  length: u64,
  auto_sync: bool,
}

impl RandomAccessDisk {
  /// Create a new instance.
  #[allow(clippy::new_ret_no_self)]
  pub async fn open(
    filename: path::PathBuf,
  ) -> Result<RandomAccessDisk, Error> {
    Self::builder(filename).build().await
  }

  pub fn builder(filename: path::PathBuf) -> Builder {
    Builder::new(filename)
  }
}

#[async_trait::async_trait]
impl RandomAccess for RandomAccessDisk {
  type Error = Box<dyn std::error::Error + Sync + Send>;

  async fn write(
    &mut self,
    offset: u64,
    data: &[u8],
  ) -> Result<(), Self::Error> {
    let mut file = self.file.as_ref().expect("self.file was None.");
    file.seek(SeekFrom::Start(offset)).await?;
    file.write_all(&data).await?;
    if self.auto_sync {
      file.sync_all().await?;
    }

    // We've changed the length of our file.
    let new_len = offset + (data.len() as u64);
    if new_len > self.length {
      self.length = new_len;
    }

    Ok(())
  }

  // NOTE(yw): disabling clippy here because we files on disk might be sparse,
  // and sometimes you might want to read a bit of memory to check if it's
  // formatted or not. Returning zero'd out memory seems like an OK thing to do.
  // We should probably come back to this at a future point, and determine
  // whether it's okay to return a fully zero'd out slice. It's a bit weird,
  // because we're replacing empty data with actual zeroes - which does not
  // reflect the state of the world.
  // #[cfg_attr(test, allow(unused_io_amount))]
  async fn read(
    &mut self,
    offset: u64,
    length: u64,
  ) -> Result<Vec<u8>, Self::Error> {
    if (offset + length) as u64 > self.length {
      return Err(
        anyhow!(
          "Read bounds exceeded. {} < {}..{}",
          self.length,
          offset,
          offset + length
        )
        .into(),
      );
    }

    let mut file = self.file.as_ref().expect("self.file was None.");
    let mut buffer = vec![0; length as usize];
    file.seek(SeekFrom::Start(offset)).await?;
    let _bytes_read = file.read(&mut buffer[..]).await?;
    Ok(buffer)
  }

  async fn read_to_writer(
    &mut self,
    _offset: u64,
    _length: u64,
    _buf: &mut (impl async_std::io::Write + Send),
  ) -> Result<(), Self::Error> {
    unimplemented!()
  }

  async fn del(
    &mut self,
    _offset: u64,
    _length: u64,
  ) -> Result<(), Self::Error> {
    panic!("Not implemented yet");
  }

  async fn truncate(&mut self, length: u64) -> Result<(), Self::Error> {
    let file = self.file.as_ref().expect("self.file was None.");
    self.length = length as u64;
    file.set_len(self.length).await?;
    if self.auto_sync {
      file.sync_all().await?;
    }
    Ok(())
  }

  async fn len(&self) -> Result<u64, Self::Error> {
    Ok(self.length)
  }

  async fn is_empty(&mut self) -> Result<bool, Self::Error> {
    Ok(self.length == 0)
  }

  async fn sync_all(&mut self) -> Result<(), Self::Error> {
    if !self.auto_sync {
      let file = self.file.as_ref().expect("self.file was None.");
      file.sync_all().await?;
    }
    Ok(())
  }
}

impl Drop for RandomAccessDisk {
  fn drop(&mut self) {
    if let Some(file) = &self.file {
      // We need to flush the file on drop. Unfortunately, that is not possible to do in a
      // non-blocking fashion, but our only other option here is losing data remaining in the
      // write cache. Good task schedulers should be resilient to occasional blocking hiccups in
      // file destructors so we don't expect this to be a common problem in practice.
      // (from async_std::fs::File::drop)
      let _ = async_std::task::block_on(file.sync_all());
    }
  }
}

pub struct Builder {
  filename: path::PathBuf,
  auto_sync: bool,
}

impl Builder {
  pub fn new(filename: path::PathBuf) -> Self {
    Self {
      filename,
      auto_sync: true,
    }
  }
  pub fn auto_sync(mut self, auto_sync: bool) -> Self {
    self.auto_sync = auto_sync;
    self
  }

  pub async fn build(self) -> Result<RandomAccessDisk, Error> {
    if let Some(dirname) = self.filename.parent() {
      mkdirp::mkdirp(&dirname)?;
    }
    let file = OpenOptions::new()
      .create(true)
      .read(true)
      .write(true)
      .open(&self.filename)
      .await?;
    file.sync_all().await?;

    let metadata = self.filename.metadata()?;
    Ok(RandomAccessDisk {
      filename: self.filename,
      file: Some(file),
      length: metadata.len(),
      auto_sync: self.auto_sync,
    })
  }
}