solana_accounts_db/
accounts_file.rs

1use {
2    crate::{
3        account_info::{AccountInfo, Offset},
4        account_storage::stored_account_info::{StoredAccountInfo, StoredAccountInfoWithoutData},
5        accounts_db::AccountsFileId,
6        append_vec::{AppendVec, AppendVecError},
7        storable_accounts::StorableAccounts,
8        tiered_storage::{
9            error::TieredStorageError, hot::HOT_FORMAT, index::IndexOffset, TieredStorage,
10        },
11    },
12    agave_fs::buffered_reader::RequiredLenBufFileRead,
13    solana_account::AccountSharedData,
14    solana_clock::Slot,
15    solana_pubkey::Pubkey,
16    std::{
17        mem,
18        path::{Path, PathBuf},
19    },
20    thiserror::Error,
21};
22
23// Data placement should be aligned at the next boundary. Without alignment accessing the memory may
24// crash on some architectures.
25pub const ALIGN_BOUNDARY_OFFSET: usize = mem::size_of::<u64>();
26#[macro_export]
27macro_rules! u64_align {
28    ($addr: expr) => {
29        ($addr + ($crate::accounts_file::ALIGN_BOUNDARY_OFFSET - 1))
30            & !($crate::accounts_file::ALIGN_BOUNDARY_OFFSET - 1)
31    };
32}
33
34pub type Result<T> = std::result::Result<T, AccountsFileError>;
35
36/// An enum for AccountsFile related errors.
37#[derive(Error, Debug)]
38pub enum AccountsFileError {
39    #[error("I/O error: {0}")]
40    Io(#[from] std::io::Error),
41
42    #[error("AppendVecError: {0}")]
43    AppendVecError(#[from] AppendVecError),
44
45    #[error("TieredStorageError: {0}")]
46    TieredStorageError(#[from] TieredStorageError),
47}
48
49#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
50pub enum StorageAccess {
51    /// storages should be accessed by Mmap
52    Mmap,
53    /// storages should be accessed by File I/O
54    /// ancient storages are created by 1-shot write to pack multiple accounts together more efficiently with new formats
55    #[default]
56    File,
57}
58
59#[derive(Debug)]
60/// An enum for accessing an accounts file which can be implemented
61/// under different formats.
62pub enum AccountsFile {
63    AppendVec(AppendVec),
64    TieredStorage(TieredStorage),
65}
66
67impl AccountsFile {
68    /// Create an AccountsFile instance from the specified path.
69    ///
70    /// The second element of the returned tuple is the number of accounts in the
71    /// accounts file.
72    #[cfg(feature = "dev-context-only-utils")]
73    pub fn new_from_file(
74        path: impl Into<PathBuf>,
75        current_len: usize,
76        storage_access: StorageAccess,
77    ) -> Result<(Self, usize)> {
78        let (av, num_accounts) = AppendVec::new_from_file(path, current_len, storage_access)?;
79        Ok((Self::AppendVec(av), num_accounts))
80    }
81
82    /// Creates a new AccountsFile for the underlying storage at `path`
83    ///
84    /// This version of `new()` may only be called when reconstructing storages as part of startup.
85    /// It trusts the snapshot's value for `current_len`, and relies on later index generation or
86    /// accounts verification to ensure it is valid.
87    pub fn new_for_startup(
88        path: impl Into<PathBuf>,
89        current_len: usize,
90        storage_access: StorageAccess,
91    ) -> Result<Self> {
92        let av = AppendVec::new_for_startup(path, current_len, storage_access)?;
93        Ok(Self::AppendVec(av))
94    }
95
96    /// if storage is not readonly, reopen another instance that is read only
97    pub(crate) fn reopen_as_readonly(&self) -> Option<Self> {
98        match self {
99            Self::AppendVec(av) => av.reopen_as_readonly_file_io().map(Self::AppendVec),
100            Self::TieredStorage(_) => None,
101        }
102    }
103
104    /// Return the total number of bytes of the zero lamport single ref accounts in the storage.
105    /// Those bytes are "dead" and can be shrunk away.
106    pub(crate) fn dead_bytes_due_to_zero_lamport_single_ref(&self, count: usize) -> usize {
107        match self {
108            Self::AppendVec(av) => av.dead_bytes_due_to_zero_lamport_single_ref(count),
109            Self::TieredStorage(ts) => ts.dead_bytes_due_to_zero_lamport_single_ref(count),
110        }
111    }
112
113    /// Flushes contents to disk
114    pub fn flush(&self) -> Result<()> {
115        match self {
116            Self::AppendVec(av) => av.flush()?,
117            Self::TieredStorage(_) => {}
118        }
119        Ok(())
120    }
121
122    pub fn remaining_bytes(&self) -> u64 {
123        match self {
124            Self::AppendVec(av) => av.remaining_bytes(),
125            Self::TieredStorage(ts) => ts.capacity().saturating_sub(ts.len() as u64),
126        }
127    }
128
129    /// Returns the number of bytes, *not accounts*, used in the AccountsFile
130    pub fn len(&self) -> usize {
131        match self {
132            Self::AppendVec(av) => av.len(),
133            Self::TieredStorage(ts) => ts.len(),
134        }
135    }
136
137    pub fn is_empty(&self) -> bool {
138        match self {
139            Self::AppendVec(av) => av.is_empty(),
140            Self::TieredStorage(ts) => ts.is_empty(),
141        }
142    }
143
144    /// Returns the total number of bytes, *not accounts*, the AccountsFile can hold
145    pub fn capacity(&self) -> u64 {
146        match self {
147            Self::AppendVec(av) => av.capacity(),
148            Self::TieredStorage(ts) => ts.capacity(),
149        }
150    }
151
152    pub fn file_name(slot: Slot, id: AccountsFileId) -> String {
153        format!("{slot}.{id}")
154    }
155
156    /// Calls `callback` with the stored account at `offset`.
157    ///
158    /// Returns `None` if there is no account at `offset`, otherwise returns the result of
159    /// `callback` in `Some`.
160    ///
161    /// This fn does *not* load the account's data, just the data length.  If the data is needed,
162    /// use `get_stored_account_callback()` instead.  However, prefer this fn when possible.
163    pub fn get_stored_account_without_data_callback<Ret>(
164        &self,
165        offset: usize,
166        callback: impl for<'local> FnMut(StoredAccountInfoWithoutData<'local>) -> Ret,
167    ) -> Option<Ret> {
168        match self {
169            Self::AppendVec(av) => av.get_stored_account_without_data_callback(offset, callback),
170            Self::TieredStorage(ts) => {
171                // Note: The conversion here is needed as the AccountsDB currently
172                // assumes all offsets are multiple of 8 while TieredStorage uses
173                // IndexOffset that is equivalent to AccountInfo::reduced_offset.
174                let index_offset = IndexOffset(AccountInfo::get_reduced_offset(offset));
175                ts.reader()?
176                    .get_stored_account_without_data_callback(index_offset, callback)
177                    .ok()?
178            }
179        }
180    }
181
182    /// Calls `callback` with the stored account at `offset`.
183    ///
184    /// Returns `None` if there is no account at `offset`, otherwise returns the result of
185    /// `callback` in `Some`.
186    ///
187    /// This fn *does* load the account's data.  If the data is not needed,
188    /// use `get_stored_account_without_data_callback()` instead.
189    pub fn get_stored_account_callback<Ret>(
190        &self,
191        offset: usize,
192        callback: impl for<'local> FnMut(StoredAccountInfo<'local>) -> Ret,
193    ) -> Option<Ret> {
194        match self {
195            Self::AppendVec(av) => av.get_stored_account_callback(offset, callback),
196            Self::TieredStorage(ts) => {
197                // Note: The conversion here is needed as the AccountsDB currently
198                // assumes all offsets are multiple of 8 while TieredStorage uses
199                // IndexOffset that is equivalent to AccountInfo::reduced_offset.
200                let index_offset = IndexOffset(AccountInfo::get_reduced_offset(offset));
201                ts.reader()?
202                    .get_stored_account_callback(index_offset, callback)
203                    .ok()?
204            }
205        }
206    }
207
208    /// return an `AccountSharedData` for an account at `offset`, if any.  Otherwise return None.
209    pub(crate) fn get_account_shared_data(&self, offset: usize) -> Option<AccountSharedData> {
210        match self {
211            Self::AppendVec(av) => av.get_account_shared_data(offset),
212            Self::TieredStorage(ts) => {
213                // Note: The conversion here is needed as the AccountsDB currently
214                // assumes all offsets are multiple of 8 while TieredStorage uses
215                // IndexOffset that is equivalent to AccountInfo::reduced_offset.
216                let index_offset = IndexOffset(AccountInfo::get_reduced_offset(offset));
217                ts.reader()?.get_account_shared_data(index_offset).ok()?
218            }
219        }
220    }
221
222    /// Return the path of the underlying account file.
223    pub fn path(&self) -> &Path {
224        match self {
225            Self::AppendVec(av) => av.path(),
226            Self::TieredStorage(ts) => ts.path(),
227        }
228    }
229
230    /// Iterate over all accounts and call `callback` with each account.
231    ///
232    /// `callback` parameters:
233    /// * Offset: the offset within the file of this account
234    /// * StoredAccountInfoWithoutData: the account itself, without account data
235    ///
236    /// Note that account data is not read/passed to the callback.
237    pub fn scan_accounts_without_data(
238        &self,
239        callback: impl for<'local> FnMut(Offset, StoredAccountInfoWithoutData<'local>),
240    ) -> Result<()> {
241        match self {
242            Self::AppendVec(av) => av.scan_accounts_without_data(callback)?,
243            Self::TieredStorage(ts) => {
244                if let Some(reader) = ts.reader() {
245                    reader.scan_accounts_without_data(callback)?;
246                }
247            }
248        }
249        Ok(())
250    }
251
252    /// Iterate over all accounts and call `callback` with each account.
253    ///
254    /// `callback` parameters:
255    /// * Offset: the offset within the file of this account
256    /// * StoredAccountInfo: the account itself, with account data
257    ///
258    /// Prefer scan_accounts_without_data() when account data is not needed,
259    /// as it can potentially read less and be faster.
260    pub(crate) fn scan_accounts<'a>(
261        &'a self,
262        reader: &mut impl RequiredLenBufFileRead<'a>,
263        callback: impl for<'local> FnMut(Offset, StoredAccountInfo<'local>),
264    ) -> Result<()> {
265        match self {
266            Self::AppendVec(av) => av.scan_accounts(reader, callback)?,
267            Self::TieredStorage(ts) => {
268                if let Some(reader) = ts.reader() {
269                    reader.scan_accounts(callback)?;
270                }
271            }
272        }
273        Ok(())
274    }
275
276    /// Calculate the amount of storage required for an account with the passed
277    /// in data_len
278    pub(crate) fn calculate_stored_size(&self, data_len: usize) -> usize {
279        match self {
280            Self::AppendVec(_) => AppendVec::calculate_stored_size(data_len),
281            Self::TieredStorage(ts) => ts
282                .reader()
283                .expect("Reader must be initialized as stored size is specific to format")
284                .calculate_stored_size(data_len),
285        }
286    }
287
288    /// for each offset in `sorted_offsets`, get the data size
289    pub(crate) fn get_account_data_lens(&self, sorted_offsets: &[usize]) -> Vec<usize> {
290        match self {
291            Self::AppendVec(av) => av.get_account_data_lens(sorted_offsets),
292            Self::TieredStorage(ts) => ts
293                .reader()
294                .and_then(|reader| reader.get_account_data_lens(sorted_offsets).ok())
295                .unwrap_or_default(),
296        }
297    }
298
299    /// iterate over all pubkeys
300    pub fn scan_pubkeys(&self, callback: impl FnMut(&Pubkey)) -> Result<()> {
301        match self {
302            Self::AppendVec(av) => av.scan_pubkeys(callback)?,
303            Self::TieredStorage(ts) => {
304                if let Some(reader) = ts.reader() {
305                    reader.scan_pubkeys(callback)?;
306                }
307            }
308        }
309        Ok(())
310    }
311
312    /// Copy each account metadata, account and hash to the internal buffer.
313    /// If there is no room to write the first entry, None is returned.
314    /// Otherwise, returns the starting offset of each account metadata.
315    /// Plus, the final return value is the offset where the next entry would be appended.
316    /// So, return.len() is 1 + (number of accounts written)
317    /// After each account is appended, the internal `current_len` is updated
318    /// and will be available to other threads.
319    pub fn write_accounts<'a>(
320        &self,
321        accounts: &impl StorableAccounts<'a>,
322        skip: usize,
323    ) -> Option<StoredAccountsInfo> {
324        match self {
325            Self::AppendVec(av) => av.append_accounts(accounts, skip),
326            // Note: The conversion here is needed as the AccountsDB currently
327            // assumes all offsets are multiple of 8 while TieredStorage uses
328            // IndexOffset that is equivalent to AccountInfo::reduced_offset.
329            Self::TieredStorage(ts) => ts
330                .write_accounts(accounts, skip, &HOT_FORMAT)
331                .map(|mut stored_accounts_info| {
332                    stored_accounts_info.offsets.iter_mut().for_each(|offset| {
333                        *offset = AccountInfo::reduced_offset_to_offset(*offset as u32);
334                    });
335                    stored_accounts_info
336                })
337                .ok(),
338        }
339    }
340
341    /// Returns the way to access this accounts file when archiving
342    pub fn internals_for_archive(&self) -> InternalsForArchive<'_> {
343        match self {
344            Self::AppendVec(av) => av.internals_for_archive(),
345            Self::TieredStorage(ts) => InternalsForArchive::Mmap(
346                ts.reader()
347                    .expect("must be a reader when archiving")
348                    .data_for_archive(),
349            ),
350        }
351    }
352}
353
354/// An enum that creates AccountsFile instance with the specified format.
355#[derive(Debug, Default, Copy, Clone, Eq, PartialEq)]
356pub enum AccountsFileProvider {
357    #[default]
358    AppendVec,
359    HotStorage,
360}
361
362impl AccountsFileProvider {
363    pub fn new_writable(
364        &self,
365        path: impl Into<PathBuf>,
366        file_size: u64,
367        storage_access: StorageAccess,
368    ) -> AccountsFile {
369        match self {
370            Self::AppendVec => AccountsFile::AppendVec(AppendVec::new(
371                path,
372                true,
373                file_size as usize,
374                storage_access,
375            )),
376            Self::HotStorage => AccountsFile::TieredStorage(TieredStorage::new_writable(path)),
377        }
378    }
379}
380
381/// The access method to use when archiving an AccountsFile
382#[derive(Debug)]
383pub enum InternalsForArchive<'a> {
384    /// Accessing the internals is done via Mmap
385    Mmap(&'a [u8]),
386    /// Accessing the internals is done via File I/O
387    FileIo(&'a Path),
388}
389
390/// Information after storing accounts
391#[derive(Debug)]
392pub struct StoredAccountsInfo {
393    /// offset in the storage where each account was stored
394    pub offsets: Vec<usize>,
395    /// total size of all the stored accounts
396    pub size: usize,
397}