forest/db/car/
any.rs

1// Copyright 2019-2025 ChainSafe Systems
2// SPDX-License-Identifier: Apache-2.0, MIT
3
4//! There are three different CAR formats: `.car`, `.car.zst` and
5//! `.forest.car.zst`. [`AnyCar`] identifies the format by inspecting the CAR
6//! header and the first key-value block, and picks the appropriate block store
7//! (either [`super::ForestCar`] or [`super::PlainCar`]).
8//!
9//! CARv2 is not supported yet.
10
11use super::{CacheKey, RandomAccessFileReader, ZstdFrameCache};
12use crate::blocks::{Tipset, TipsetKey};
13use crate::chain::FilecoinSnapshotMetadata;
14use crate::utils::io::EitherMmapOrRandomAccessFile;
15use cid::Cid;
16use fvm_ipld_blockstore::Blockstore;
17use itertools::Either;
18use positioned_io::ReadAt;
19use std::borrow::Cow;
20use std::io::{Error, ErrorKind, Read, Result};
21use std::path::{Path, PathBuf};
22use std::sync::Arc;
23
24pub enum AnyCar<ReaderT> {
25    Plain(super::PlainCar<ReaderT>),
26    Forest(super::ForestCar<ReaderT>),
27    Memory(super::PlainCar<Vec<u8>>),
28}
29
30impl<ReaderT: RandomAccessFileReader> AnyCar<ReaderT> {
31    /// Open an archive. May be formatted as `.car`, `.car.zst` or
32    /// `.forest.car.zst`. This call may block for an indeterminate amount of
33    /// time while data is decoded and indexed.
34    pub fn new(reader: ReaderT) -> Result<Self> {
35        if super::ForestCar::is_valid(&reader) {
36            return Ok(AnyCar::Forest(super::ForestCar::new(reader)?));
37        }
38
39        // Maybe use a tempfile for this in the future.
40        if let Ok(decompressed) = zstd::stream::decode_all(positioned_io::Cursor::new(&reader))
41            && let Ok(mem_car) = super::PlainCar::new(decompressed)
42        {
43            return Ok(AnyCar::Memory(mem_car));
44        }
45
46        if let Ok(plain_car) = super::PlainCar::new(reader) {
47            return Ok(AnyCar::Plain(plain_car));
48        }
49        Err(Error::new(
50            ErrorKind::InvalidData,
51            "input not recognized as any kind of CAR data (.car, .car.zst, .forest.car)",
52        ))
53    }
54
55    pub fn metadata(&self) -> &Option<FilecoinSnapshotMetadata> {
56        match self {
57            AnyCar::Forest(forest) => forest.metadata(),
58            AnyCar::Plain(plain) => plain.metadata(),
59            AnyCar::Memory(mem) => mem.metadata(),
60        }
61    }
62
63    pub fn heaviest_tipset_key(&self) -> TipsetKey {
64        match self {
65            AnyCar::Forest(forest) => forest.heaviest_tipset_key(),
66            AnyCar::Plain(plain) => plain.heaviest_tipset_key(),
67            AnyCar::Memory(mem) => mem.heaviest_tipset_key(),
68        }
69    }
70
71    /// Filecoin archives are tagged with the heaviest tipset. This call may
72    /// fail if the archive is corrupt or if it is not a Filecoin archive.
73    pub fn heaviest_tipset(&self) -> anyhow::Result<Tipset> {
74        match self {
75            AnyCar::Forest(forest) => forest.heaviest_tipset(),
76            AnyCar::Plain(plain) => plain.heaviest_tipset(),
77            AnyCar::Memory(mem) => mem.heaviest_tipset(),
78        }
79    }
80
81    /// Return the identified CAR format variant. There are three variants:
82    /// `CARv1`, `CARv2`, `CARv1.zst`, `CARv2.zst` and `ForestCARv1.zst`.
83    pub fn variant(&self) -> Cow<'static, str> {
84        match self {
85            AnyCar::Forest(_) => "ForestCARv1.zst".into(),
86            AnyCar::Plain(car) => format!("CARv{}", car.version()).into(),
87            AnyCar::Memory(car) => format!("CARv{}.zst", car.version()).into(),
88        }
89    }
90
91    /// Discard reader type and replace with dynamic trait object.
92    pub fn into_dyn(self) -> AnyCar<Box<dyn super::RandomAccessFileReader>> {
93        match self {
94            AnyCar::Forest(f) => AnyCar::Forest(f.into_dyn()),
95            AnyCar::Plain(p) => AnyCar::Plain(p.into_dyn()),
96            AnyCar::Memory(m) => AnyCar::Memory(m),
97        }
98    }
99
100    /// Set the z-frame cache of the inner CAR reader.
101    pub fn with_cache(self, cache: Arc<ZstdFrameCache>, key: CacheKey) -> Self {
102        match self {
103            AnyCar::Forest(f) => AnyCar::Forest(f.with_cache(cache, key)),
104            AnyCar::Plain(p) => AnyCar::Plain(p),
105            AnyCar::Memory(m) => AnyCar::Memory(m),
106        }
107    }
108
109    /// Get the index size in bytes
110    pub fn index_size_bytes(&self) -> Option<u32> {
111        match self {
112            Self::Forest(car) => Some(car.index_size_bytes()),
113            _ => None,
114        }
115    }
116
117    /// Gets a reader of the block data by its `Cid`
118    pub fn get_reader(&self, k: Cid) -> anyhow::Result<Option<impl Read>> {
119        match self {
120            Self::Forest(car) => Ok(car.get_reader(k)?.map(Either::Left)),
121            Self::Plain(car) => Ok(car.get_reader(k).map(|r| Either::Right(Either::Left(r)))),
122            Self::Memory(car) => Ok(car.get_reader(k).map(|r| Either::Right(Either::Right(r)))),
123        }
124    }
125}
126
127impl TryFrom<&'static [u8]> for AnyCar<&'static [u8]> {
128    type Error = std::io::Error;
129    fn try_from(bytes: &'static [u8]) -> std::io::Result<Self> {
130        Ok(AnyCar::Plain(super::PlainCar::new(bytes)?))
131    }
132}
133
134impl TryFrom<&Path> for AnyCar<EitherMmapOrRandomAccessFile> {
135    type Error = std::io::Error;
136    fn try_from(path: &Path) -> std::io::Result<Self> {
137        AnyCar::new(EitherMmapOrRandomAccessFile::open(path)?)
138    }
139}
140
141impl TryFrom<&PathBuf> for AnyCar<EitherMmapOrRandomAccessFile> {
142    type Error = std::io::Error;
143    fn try_from(path: &PathBuf) -> std::io::Result<Self> {
144        Self::try_from(path.as_path())
145    }
146}
147
148impl<ReaderT> Blockstore for AnyCar<ReaderT>
149where
150    ReaderT: ReadAt,
151{
152    fn get(&self, k: &Cid) -> anyhow::Result<Option<Vec<u8>>> {
153        match self {
154            AnyCar::Forest(forest) => forest.get(k),
155            AnyCar::Plain(plain) => plain.get(k),
156            AnyCar::Memory(mem) => mem.get(k),
157        }
158    }
159
160    fn put_keyed(&self, k: &Cid, block: &[u8]) -> anyhow::Result<()> {
161        match self {
162            AnyCar::Forest(forest) => forest.put_keyed(k, block),
163            AnyCar::Plain(plain) => plain.put_keyed(k, block),
164            AnyCar::Memory(mem) => mem.put_keyed(k, block),
165        }
166    }
167}
168
169impl<ReaderT> From<super::ForestCar<ReaderT>> for AnyCar<ReaderT> {
170    fn from(car: super::ForestCar<ReaderT>) -> Self {
171        Self::Forest(car)
172    }
173}
174
175impl<ReaderT> From<super::PlainCar<ReaderT>> for AnyCar<ReaderT> {
176    fn from(car: super::PlainCar<ReaderT>) -> Self {
177        Self::Plain(car)
178    }
179}
180
181#[cfg(test)]
182mod tests {
183    use super::*;
184    use crate::networks::{calibnet, mainnet};
185
186    #[test]
187    fn forest_any_load_calibnet() {
188        let forest_car = AnyCar::new(calibnet::DEFAULT_GENESIS).unwrap();
189        assert!(forest_car.has(&calibnet::GENESIS_CID).unwrap());
190    }
191
192    #[test]
193    fn forest_any_load_calibnet_zstd() {
194        let data = zstd::encode_all(calibnet::DEFAULT_GENESIS, 3).unwrap();
195        let forest_car = AnyCar::new(data).unwrap();
196        assert!(forest_car.has(&calibnet::GENESIS_CID).unwrap());
197    }
198
199    #[test]
200    fn forest_any_load_mainnet() {
201        let forest_car = AnyCar::new(mainnet::DEFAULT_GENESIS).unwrap();
202        assert!(forest_car.has(&mainnet::GENESIS_CID).unwrap());
203    }
204
205    #[test]
206    fn forest_any_load_mainnet_zstd() {
207        let data = zstd::encode_all(mainnet::DEFAULT_GENESIS, 3).unwrap();
208        let forest_car = AnyCar::new(data).unwrap();
209        assert!(forest_car.has(&mainnet::GENESIS_CID).unwrap());
210    }
211}