solana_accounts_db/
accounts_file.rs

1#[cfg(feature = "dev-context-only-utils")]
2use crate::append_vec::{self, StoredAccountMeta};
3use {
4    crate::{
5        account_info::{AccountInfo, Offset},
6        account_storage::stored_account_info::{StoredAccountInfo, StoredAccountInfoWithoutData},
7        accounts_db::AccountsFileId,
8        accounts_update_notifier_interface::AccountForGeyser,
9        append_vec::{AppendVec, AppendVecError},
10        buffered_reader::RequiredLenBufFileRead,
11        storable_accounts::StorableAccounts,
12        tiered_storage::{
13            error::TieredStorageError, hot::HOT_FORMAT, index::IndexOffset, TieredStorage,
14        },
15    },
16    solana_account::{AccountSharedData, ReadableAccount as _},
17    solana_clock::Slot,
18    solana_pubkey::Pubkey,
19    std::{
20        mem,
21        path::{Path, PathBuf},
22    },
23    thiserror::Error,
24};
25
26// Data placement should be aligned at the next boundary. Without alignment accessing the memory may
27// crash on some architectures.
28pub const ALIGN_BOUNDARY_OFFSET: usize = mem::size_of::<u64>();
29#[macro_export]
30macro_rules! u64_align {
31    ($addr: expr) => {
32        ($addr + ($crate::accounts_file::ALIGN_BOUNDARY_OFFSET - 1))
33            & !($crate::accounts_file::ALIGN_BOUNDARY_OFFSET - 1)
34    };
35}
36
37#[derive(Error, Debug)]
38/// An enum for AccountsFile related errors.
39pub enum AccountsFileError {
40    #[error("I/O error: {0}")]
41    Io(#[from] std::io::Error),
42
43    #[error("AppendVecError: {0}")]
44    AppendVecError(#[from] AppendVecError),
45
46    #[error("TieredStorageError: {0}")]
47    TieredStorageError(#[from] TieredStorageError),
48}
49
50#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
51pub enum StorageAccess {
52    /// storages should be accessed by Mmap
53    Mmap,
54    /// storages should be accessed by File I/O
55    /// ancient storages are created by 1-shot write to pack multiple accounts together more efficiently with new formats
56    #[default]
57    File,
58}
59
60pub type Result<T> = std::result::Result<T, AccountsFileError>;
61
62#[derive(Debug)]
63/// An enum for accessing an accounts file which can be implemented
64/// under different formats.
65pub enum AccountsFile {
66    AppendVec(AppendVec),
67    TieredStorage(TieredStorage),
68}
69
70impl AccountsFile {
71    /// Create an AccountsFile instance from the specified path.
72    ///
73    /// The second element of the returned tuple is the number of accounts in the
74    /// accounts file.
75    #[cfg(feature = "dev-context-only-utils")]
76    pub fn new_from_file(
77        path: impl Into<PathBuf>,
78        current_len: usize,
79        storage_access: StorageAccess,
80    ) -> Result<(Self, usize)> {
81        let (av, num_accounts) = AppendVec::new_from_file(path, current_len, storage_access)?;
82        Ok((Self::AppendVec(av), num_accounts))
83    }
84
85    /// Creates a new AccountsFile for the underlying storage at `path`
86    ///
87    /// This version of `new()` may only be called when reconstructing storages as part of startup.
88    /// It trusts the snapshot's value for `current_len`, and relies on later index generation or
89    /// accounts verification to ensure it is valid.
90    pub fn new_for_startup(
91        path: impl Into<PathBuf>,
92        current_len: usize,
93        storage_access: StorageAccess,
94    ) -> Result<Self> {
95        let av = AppendVec::new_for_startup(path, current_len, storage_access)?;
96        Ok(Self::AppendVec(av))
97    }
98
99    /// true if this storage can possibly be appended to (independent of capacity check)
100    //
101    // NOTE: Only used by ancient append vecs "append" method, which is test-only now.
102    #[cfg(test)]
103    pub(crate) fn can_append(&self) -> bool {
104        match self {
105            Self::AppendVec(av) => av.can_append(),
106            // once created, tiered storages cannot be appended to
107            Self::TieredStorage(_) => false,
108        }
109    }
110
111    /// if storage is not readonly, reopen another instance that is read only
112    pub(crate) fn reopen_as_readonly(&self) -> Option<Self> {
113        match self {
114            Self::AppendVec(av) => av.reopen_as_readonly().map(Self::AppendVec),
115            Self::TieredStorage(_) => None,
116        }
117    }
118
119    /// Return the total number of bytes of the zero lamport single ref accounts in the storage.
120    /// Those bytes are "dead" and can be shrunk away.
121    pub(crate) fn dead_bytes_due_to_zero_lamport_single_ref(&self, count: usize) -> usize {
122        match self {
123            Self::AppendVec(av) => av.dead_bytes_due_to_zero_lamport_single_ref(count),
124            Self::TieredStorage(ts) => ts.dead_bytes_due_to_zero_lamport_single_ref(count),
125        }
126    }
127
128    pub fn flush(&self) -> Result<()> {
129        match self {
130            Self::AppendVec(av) => av.flush(),
131            Self::TieredStorage(_) => Ok(()),
132        }
133    }
134
135    pub fn reset(&self) {
136        match self {
137            Self::AppendVec(av) => av.reset(),
138            Self::TieredStorage(_) => {}
139        }
140    }
141
142    pub fn remaining_bytes(&self) -> u64 {
143        match self {
144            Self::AppendVec(av) => av.remaining_bytes(),
145            Self::TieredStorage(ts) => ts.capacity().saturating_sub(ts.len() as u64),
146        }
147    }
148
149    pub fn len(&self) -> usize {
150        match self {
151            Self::AppendVec(av) => av.len(),
152            Self::TieredStorage(ts) => ts.len(),
153        }
154    }
155
156    pub fn is_empty(&self) -> bool {
157        match self {
158            Self::AppendVec(av) => av.is_empty(),
159            Self::TieredStorage(ts) => ts.is_empty(),
160        }
161    }
162
163    pub fn capacity(&self) -> u64 {
164        match self {
165            Self::AppendVec(av) => av.capacity(),
166            Self::TieredStorage(ts) => ts.capacity(),
167        }
168    }
169
170    pub fn file_name(slot: Slot, id: AccountsFileId) -> String {
171        format!("{slot}.{id}")
172    }
173
174    /// Calls `callback` with the stored account at `offset`.
175    ///
176    /// Returns `None` if there is no account at `offset`, otherwise returns the result of
177    /// `callback` in `Some`.
178    ///
179    /// This fn does *not* load the account's data, just the data length.  If the data is needed,
180    /// use `get_stored_account_callback()` instead.  However, prefer this fn when possible.
181    pub fn get_stored_account_without_data_callback<Ret>(
182        &self,
183        offset: usize,
184        callback: impl for<'local> FnMut(StoredAccountInfoWithoutData<'local>) -> Ret,
185    ) -> Option<Ret> {
186        match self {
187            Self::AppendVec(av) => av.get_stored_account_without_data_callback(offset, callback),
188            Self::TieredStorage(ts) => {
189                // Note: The conversion here is needed as the AccountsDB currently
190                // assumes all offsets are multiple of 8 while TieredStorage uses
191                // IndexOffset that is equivalent to AccountInfo::reduced_offset.
192                let index_offset = IndexOffset(AccountInfo::get_reduced_offset(offset));
193                ts.reader()?
194                    .get_stored_account_without_data_callback(index_offset, callback)
195                    .ok()?
196            }
197        }
198    }
199
200    /// Calls `callback` with the stored account at `offset`.
201    ///
202    /// Returns `None` if there is no account at `offset`, otherwise returns the result of
203    /// `callback` in `Some`.
204    ///
205    /// This fn *does* load the account's data.  If the data is not needed,
206    /// use `get_stored_account_without_data_callback()` instead.
207    pub fn get_stored_account_callback<Ret>(
208        &self,
209        offset: usize,
210        callback: impl for<'local> FnMut(StoredAccountInfo<'local>) -> Ret,
211    ) -> Option<Ret> {
212        match self {
213            Self::AppendVec(av) => av.get_stored_account_callback(offset, callback),
214            Self::TieredStorage(ts) => {
215                // Note: The conversion here is needed as the AccountsDB currently
216                // assumes all offsets are multiple of 8 while TieredStorage uses
217                // IndexOffset that is equivalent to AccountInfo::reduced_offset.
218                let index_offset = IndexOffset(AccountInfo::get_reduced_offset(offset));
219                ts.reader()?
220                    .get_stored_account_callback(index_offset, callback)
221                    .ok()?
222            }
223        }
224    }
225
226    /// calls `callback` with the account located at the specified index offset.
227    ///
228    /// Prefer get_stored_account_callback() when possible, as it does not contain file format
229    /// implementation details, and thus potentially can read less and be faster.
230    #[cfg(feature = "dev-context-only-utils")]
231    pub fn get_stored_account_meta_callback<Ret>(
232        &self,
233        offset: usize,
234        callback: impl for<'local> FnMut(StoredAccountMeta<'local>) -> Ret,
235    ) -> Option<Ret> {
236        match self {
237            Self::AppendVec(av) => av.get_stored_account_meta_callback(offset, callback),
238            Self::TieredStorage(_) => {
239                unimplemented!("StoredAccountMeta is only implemented for AppendVec")
240            }
241        }
242    }
243
244    /// return an `AccountSharedData` for an account at `offset`, if any.  Otherwise return None.
245    pub(crate) fn get_account_shared_data(&self, offset: usize) -> Option<AccountSharedData> {
246        match self {
247            Self::AppendVec(av) => av.get_account_shared_data(offset),
248            Self::TieredStorage(ts) => {
249                // Note: The conversion here is needed as the AccountsDB currently
250                // assumes all offsets are multiple of 8 while TieredStorage uses
251                // IndexOffset that is equivalent to AccountInfo::reduced_offset.
252                let index_offset = IndexOffset(AccountInfo::get_reduced_offset(offset));
253                ts.reader()?.get_account_shared_data(index_offset).ok()?
254            }
255        }
256    }
257
258    /// Return the path of the underlying account file.
259    pub fn path(&self) -> &Path {
260        match self {
261            Self::AppendVec(av) => av.path(),
262            Self::TieredStorage(ts) => ts.path(),
263        }
264    }
265
266    /// Iterate over all accounts and call `callback` with each account.
267    ///
268    /// `callback` parameters:
269    /// * Offset: the offset within the file of this account
270    /// * StoredAccountInfoWithoutData: the account itself, without account data
271    ///
272    /// Note that account data is not read/passed to the callback.
273    pub fn scan_accounts_without_data(
274        &self,
275        callback: impl for<'local> FnMut(Offset, StoredAccountInfoWithoutData<'local>),
276    ) -> Result<()> {
277        match self {
278            Self::AppendVec(av) => av.scan_accounts_without_data(callback),
279            Self::TieredStorage(ts) => {
280                if let Some(reader) = ts.reader() {
281                    reader.scan_accounts_without_data(callback)?;
282                }
283                Ok(())
284            }
285        }
286    }
287
288    /// Iterate over all accounts and call `callback` with each account.
289    ///
290    /// `callback` parameters:
291    /// * Offset: the offset within the file of this account
292    /// * StoredAccountInfo: the account itself, with account data
293    ///
294    /// Prefer scan_accounts_without_data() when account data is not needed,
295    /// as it can potentially read less and be faster.
296    pub(crate) fn scan_accounts<'a>(
297        &'a self,
298        reader: &mut impl RequiredLenBufFileRead<'a>,
299        callback: impl for<'local> FnMut(Offset, StoredAccountInfo<'local>),
300    ) -> Result<()> {
301        match self {
302            Self::AppendVec(av) => av.scan_accounts(reader, callback),
303            Self::TieredStorage(ts) => {
304                if let Some(reader) = ts.reader() {
305                    reader.scan_accounts(callback)?;
306                }
307                Ok(())
308            }
309        }
310    }
311
312    /// Iterate over all accounts and call `callback` with each account.
313    ///
314    /// Prefer scan_accounts() when possible, as it does not contain file format
315    /// implementation details, and thus potentially can read less and be faster.
316    #[cfg(feature = "dev-context-only-utils")]
317    pub fn scan_accounts_stored_meta(
318        &self,
319        callback: impl for<'local> FnMut(StoredAccountMeta<'local>),
320    ) -> Result<()> {
321        let mut reader = append_vec::new_scan_accounts_reader();
322        match self {
323            Self::AppendVec(av) => av.scan_accounts_stored_meta(&mut reader, callback),
324            Self::TieredStorage(_) => {
325                unimplemented!("StoredAccountMeta is only implemented for AppendVec")
326            }
327        }
328    }
329
330    /// Iterate over all accounts and call `callback` with each account.
331    /// Only intended to be used by Geyser.
332    pub(crate) fn scan_accounts_for_geyser<'a>(
333        &'a self,
334        reader: &mut impl RequiredLenBufFileRead<'a>,
335        mut callback: impl for<'local> FnMut(AccountForGeyser<'local>),
336    ) -> Result<()> {
337        self.scan_accounts(reader, |_offset, account| {
338            let account_for_geyser = AccountForGeyser {
339                pubkey: account.pubkey(),
340                lamports: account.lamports(),
341                owner: account.owner(),
342                executable: account.executable(),
343                rent_epoch: account.rent_epoch(),
344                data: account.data(),
345            };
346            callback(account_for_geyser)
347        })
348    }
349
350    /// Calculate the amount of storage required for an account with the passed
351    /// in data_len
352    pub(crate) fn calculate_stored_size(&self, data_len: usize) -> usize {
353        match self {
354            Self::AppendVec(av) => av.calculate_stored_size(data_len),
355            Self::TieredStorage(ts) => ts
356                .reader()
357                .expect("Reader must be initialized as stored size is specific to format")
358                .calculate_stored_size(data_len),
359        }
360    }
361
362    /// for each offset in `sorted_offsets`, get the data size
363    pub(crate) fn get_account_data_lens(&self, sorted_offsets: &[usize]) -> Vec<usize> {
364        match self {
365            Self::AppendVec(av) => av.get_account_data_lens(sorted_offsets),
366            Self::TieredStorage(ts) => ts
367                .reader()
368                .and_then(|reader| reader.get_account_data_lens(sorted_offsets).ok())
369                .unwrap_or_default(),
370        }
371    }
372
373    /// iterate over all pubkeys
374    pub fn scan_pubkeys(&self, callback: impl FnMut(&Pubkey)) -> Result<()> {
375        match self {
376            Self::AppendVec(av) => av.scan_pubkeys(callback),
377            Self::TieredStorage(ts) => {
378                if let Some(reader) = ts.reader() {
379                    reader.scan_pubkeys(callback)?;
380                }
381                Ok(())
382            }
383        }
384    }
385
386    /// Copy each account metadata, account and hash to the internal buffer.
387    /// If there is no room to write the first entry, None is returned.
388    /// Otherwise, returns the starting offset of each account metadata.
389    /// Plus, the final return value is the offset where the next entry would be appended.
390    /// So, return.len() is 1 + (number of accounts written)
391    /// After each account is appended, the internal `current_len` is updated
392    /// and will be available to other threads.
393    pub fn write_accounts<'a>(
394        &self,
395        accounts: &impl StorableAccounts<'a>,
396        skip: usize,
397    ) -> Option<StoredAccountsInfo> {
398        match self {
399            Self::AppendVec(av) => av.append_accounts(accounts, skip),
400            // Note: The conversion here is needed as the AccountsDB currently
401            // assumes all offsets are multiple of 8 while TieredStorage uses
402            // IndexOffset that is equivalent to AccountInfo::reduced_offset.
403            Self::TieredStorage(ts) => ts
404                .write_accounts(accounts, skip, &HOT_FORMAT)
405                .map(|mut stored_accounts_info| {
406                    stored_accounts_info.offsets.iter_mut().for_each(|offset| {
407                        *offset = AccountInfo::reduced_offset_to_offset(*offset as u32);
408                    });
409                    stored_accounts_info
410                })
411                .ok(),
412        }
413    }
414
415    /// Returns the way to access this accounts file when archiving
416    pub fn internals_for_archive(&self) -> InternalsForArchive {
417        match self {
418            Self::AppendVec(av) => av.internals_for_archive(),
419            Self::TieredStorage(ts) => InternalsForArchive::Mmap(
420                ts.reader()
421                    .expect("must be a reader when archiving")
422                    .data_for_archive(),
423            ),
424        }
425    }
426}
427
428/// An enum that creates AccountsFile instance with the specified format.
429#[derive(Debug, Default, Copy, Clone, Eq, PartialEq)]
430pub enum AccountsFileProvider {
431    #[default]
432    AppendVec,
433    HotStorage,
434}
435
436impl AccountsFileProvider {
437    pub fn new_writable(&self, path: impl Into<PathBuf>, file_size: u64) -> AccountsFile {
438        match self {
439            Self::AppendVec => {
440                AccountsFile::AppendVec(AppendVec::new(path, true, file_size as usize))
441            }
442            Self::HotStorage => AccountsFile::TieredStorage(TieredStorage::new_writable(path)),
443        }
444    }
445}
446
447/// The access method to use when archiving an AccountsFile
448#[derive(Debug)]
449pub enum InternalsForArchive<'a> {
450    /// Accessing the internals is done via Mmap
451    Mmap(&'a [u8]),
452    /// Accessing the internals is done via File I/O
453    FileIo(&'a Path),
454}
455
456/// Information after storing accounts
457#[derive(Debug)]
458pub struct StoredAccountsInfo {
459    /// offset in the storage where each account was stored
460    pub offsets: Vec<usize>,
461    /// total size of all the stored accounts
462    pub size: usize,
463}
464
465#[cfg(test)]
466pub mod tests {
467    use crate::accounts_file::AccountsFile;
468    impl AccountsFile {
469        pub(crate) fn set_current_len_for_tests(&self, len: usize) {
470            match self {
471                Self::AppendVec(av) => av.set_current_len_for_tests(len),
472                Self::TieredStorage(_) => {}
473            }
474        }
475    }
476}