Skip to main content

forest/cli_shared/
snapshot.rs

1// Copyright 2019-2026 ChainSafe Systems
2// SPDX-License-Identifier: Apache-2.0, MIT
3
4use std::{
5    fmt::Display,
6    path::{Path, PathBuf},
7    str::FromStr,
8};
9
10use crate::{cli_shared::snapshot::parse::ParsedFilename, utils::net::download_file_with_retry};
11use crate::{networks::NetworkChain, utils::net::DownloadFileOption};
12use anyhow::{Context as _, bail};
13use chrono::NaiveDate;
14use url::Url;
15
16/// Who hosts the snapshot on the web?
17/// See [`stable_url`].
18#[derive(
19    Debug,
20    Clone,
21    Copy,
22    Hash,
23    PartialEq,
24    Eq,
25    Default,
26    strum::EnumString, // impl std::str::FromStr
27    strum::Display,    // impl Display
28    clap::ValueEnum,   // allow values to be enumerated and parsed by clap
29)]
30#[strum(serialize_all = "kebab-case")]
31pub enum TrustedVendor {
32    #[default]
33    Forest,
34}
35
36/// Create a filename in the "full" format. See [`parse`].
37// Common between export, and [`fetch`].
38// Keep in sync with the CLI documentation for the `snapshot` sub-command.
39pub fn filename(
40    vendor: impl Display,
41    chain: impl Display,
42    date: NaiveDate,
43    height: i64,
44    forest_format: bool,
45) -> String {
46    let vendor = vendor.to_string();
47    let chain = chain.to_string();
48    ParsedFilename::Full {
49        vendor: &vendor,
50        chain: &chain,
51        date,
52        height,
53        forest_format,
54    }
55    .to_string()
56}
57
58/// Returns the path to the downloaded file.
59pub async fn fetch(
60    directory: &Path,
61    chain: &NetworkChain,
62    vendor: TrustedVendor,
63) -> anyhow::Result<PathBuf> {
64    let (url, _len, path) = peek(vendor, chain).await?;
65    let (date, height, forest_format) = ParsedFilename::parse_str(&path)
66        .context("unexpected path format")?
67        .date_and_height_and_forest();
68    let filename = filename(vendor, chain, date, height, forest_format);
69
70    download_file_with_retry(
71        &url,
72        directory,
73        &filename,
74        DownloadFileOption::Resumable,
75        None,
76    )
77    .await
78}
79
80/// Returns
81/// - The final URL after redirection(s)
82/// - The size of the snapshot from this vendor on this chain
83/// - The filename of the snapshot
84pub async fn peek(
85    vendor: TrustedVendor,
86    chain: &NetworkChain,
87) -> anyhow::Result<(Url, u64, String)> {
88    let stable_url = stable_url(vendor, chain)?;
89    // issue an actual GET, so the content length will be of the body
90    // (we never actually fetch the body)
91    // if we issue a HEAD, the content-length will be zero for our stable URLs
92    // (this is a bug, maybe in reqwest - HEAD _should_ give us the length)
93    // (probably because the stable URLs are all double-redirects 301 -> 302 -> 200)
94    let response = reqwest::get(stable_url)
95        .await?
96        .error_for_status()
97        .context("server returned an error response")?;
98    let final_url = response.url().clone();
99    let cd_path = response
100        .headers()
101        .get(reqwest::header::CONTENT_DISPOSITION)
102        .and_then(parse_content_disposition);
103    Ok((
104        final_url,
105        response
106            .content_length()
107            .context("no content-length header")?,
108        cd_path.context("no content-disposition filepath")?,
109    ))
110}
111
112// Extract file paths from content-disposition values:
113//   "attachment; filename=\"911520_2023_09_14T06_13_00Z.car.zst\""
114// => "911520_2023_09_14T06_13_00Z.car.zst"
115fn parse_content_disposition(value: &reqwest::header::HeaderValue) -> Option<String> {
116    use regex::Regex;
117    let re = Regex::new("filename=\"([^\"]+)\"").ok()?;
118    let cap = re.captures(value.to_str().ok()?)?;
119    Some(cap.get(1)?.as_str().to_owned())
120}
121
122/// Also defines an `ALL_URLS` constant for test purposes
123macro_rules! define_urls {
124    ($($vis:vis const $name:ident: &str = $value:literal;)* $(,)?) => {
125        $($vis const $name: &str = $value;)*
126
127        #[cfg(test)]
128        const ALL_URLS: &[&str] = [
129            $($name,)*
130        ].as_slice();
131    };
132}
133
134define_urls!(
135    const FOREST_MAINNET_COMPRESSED: &str = "https://forest-archive.chainsafe.dev/latest/mainnet/";
136    const FOREST_CALIBNET_COMPRESSED: &str =
137        "https://forest-archive.chainsafe.dev/latest/calibnet/";
138);
139
140pub fn stable_url(vendor: TrustedVendor, chain: &NetworkChain) -> anyhow::Result<Url> {
141    let s = match (vendor, chain) {
142        (TrustedVendor::Forest, NetworkChain::Mainnet) => FOREST_MAINNET_COMPRESSED,
143        (TrustedVendor::Forest, NetworkChain::Calibnet) => FOREST_CALIBNET_COMPRESSED,
144        (TrustedVendor::Forest, NetworkChain::Butterflynet | NetworkChain::Devnet(_)) => {
145            bail!("unsupported chain {chain}")
146        }
147    };
148    Ok(Url::from_str(s).unwrap())
149}
150
151#[test]
152fn parse_stable_urls() {
153    for url in ALL_URLS {
154        let _did_not_panic = Url::from_str(url).unwrap();
155    }
156}
157
158mod parse {
159    //! Vendors publish filenames with two formats:
160    //! `filecoin_snapshot_calibnet_2023-06-13_height_643680.car.zst` "full" and
161    //! `632400_2023_06_09T08_13_00Z.car.zst` "short".
162    //!
163    //! This module contains utilities for parsing and printing these formats.
164
165    use std::{fmt::Display, str::FromStr};
166
167    use anyhow::{anyhow, bail};
168    use chrono::{NaiveDate, NaiveDateTime, NaiveTime};
169    use nom::{
170        Err, Parser,
171        branch::alt,
172        bytes::complete::{tag, take_until},
173        character::complete::digit1,
174        combinator::{map_res, recognize},
175        error::ErrorKind,
176        error_position,
177        multi::many1,
178    };
179
180    use crate::db::car::forest::FOREST_CAR_FILE_EXTENSION;
181
182    #[derive(PartialEq, Debug, Clone, Hash)]
183    pub(super) enum ParsedFilename<'a> {
184        Short {
185            date: NaiveDate,
186            time: NaiveTime,
187            height: i64,
188        },
189        Full {
190            vendor: &'a str,
191            chain: &'a str,
192            date: NaiveDate,
193            height: i64,
194            forest_format: bool,
195        },
196    }
197
198    impl Display for ParsedFilename<'_> {
199        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
200            match self {
201                ParsedFilename::Short { date, time, height } => f.write_fmt(format_args!(
202                    "{height}_{}.car.zst",
203                    NaiveDateTime::new(*date, *time).format("%Y_%m_%dT%H_%M_%SZ")
204                )),
205                ParsedFilename::Full {
206                    vendor,
207                    chain,
208                    date,
209                    height,
210                    forest_format,
211                } => f.write_fmt(format_args!(
212                    "{vendor}_snapshot_{chain}_{}_height_{height}{}.car.zst",
213                    date.format("%Y-%m-%d"),
214                    if *forest_format { ".forest" } else { "" }
215                )),
216            }
217        }
218    }
219
220    impl<'a> ParsedFilename<'a> {
221        pub fn date_and_height_and_forest(&self) -> (NaiveDate, i64, bool) {
222            match self {
223                ParsedFilename::Short { date, height, .. } => (*date, *height, false),
224                ParsedFilename::Full {
225                    date,
226                    height,
227                    forest_format,
228                    ..
229                } => (*date, *height, *forest_format),
230            }
231        }
232
233        pub fn parse_str(input: &'a str) -> anyhow::Result<Self> {
234            enter_nom(alt((short, full)), input)
235        }
236    }
237
238    /// Parse a number using its [`FromStr`] implementation.
239    fn number<T>(input: &str) -> nom::IResult<&str, T>
240    where
241        T: FromStr,
242    {
243        map_res(recognize(many1(digit1)), T::from_str).parse(input)
244    }
245
246    /// Create a parser for `YYYY-MM-DD` etc
247    fn ymd(separator: &str) -> impl Fn(&str) -> nom::IResult<&str, NaiveDate> + '_ {
248        move |input| {
249            let (rest, (year, _, month, _, day)) =
250                (number, tag(separator), number, tag(separator), number).parse(input)?;
251            match NaiveDate::from_ymd_opt(year, month, day) {
252                Some(date) => Ok((rest, date)),
253                None => Err(Err::Error(error_position!(input, ErrorKind::Verify))),
254            }
255        }
256    }
257
258    /// Create a parser for `HH_MM_SS` etc
259    fn hms(separator: &str) -> impl Fn(&str) -> nom::IResult<&str, NaiveTime> + '_ {
260        move |input| {
261            let (rest, (hour, _, minute, _, second)) =
262                (number, tag(separator), number, tag(separator), number).parse(input)?;
263            match NaiveTime::from_hms_opt(hour, minute, second) {
264                Some(date) => Ok((rest, date)),
265                None => Err(Err::Error(error_position!(input, ErrorKind::Verify))),
266            }
267        }
268    }
269
270    fn full(input: &str) -> nom::IResult<&str, ParsedFilename<'_>> {
271        let (rest, (vendor, _snapshot_, chain, _, date, _height_, height, car_zst)) = (
272            take_until("_snapshot_"),
273            tag("_snapshot_"),
274            take_until("_"),
275            tag("_"),
276            ymd("-"),
277            tag("_height_"),
278            number,
279            alt((tag(".car.zst"), tag(FOREST_CAR_FILE_EXTENSION))),
280        )
281            .parse(input)?;
282        Ok((
283            rest,
284            ParsedFilename::Full {
285                vendor,
286                chain,
287                date,
288                height,
289                forest_format: car_zst == FOREST_CAR_FILE_EXTENSION,
290            },
291        ))
292    }
293
294    fn short(input: &str) -> nom::IResult<&str, ParsedFilename<'_>> {
295        let (rest, (height, _, date, _, time, _)) = (
296            number,
297            tag("_"),
298            ymd("_"),
299            tag("T"),
300            hms("_"),
301            tag("Z.car.zst"),
302        )
303            .parse(input)?;
304        Ok((rest, ParsedFilename::Short { date, time, height }))
305    }
306
307    fn enter_nom<'a, T>(
308        mut parser: impl nom::Parser<&'a str, Output = T, Error = nom::error::Error<&'a str>>,
309        input: &'a str,
310    ) -> anyhow::Result<T> {
311        let (rest, t) = parser
312            .parse(input)
313            .map_err(|e| anyhow!("Parser error: {e}"))?;
314        if !rest.is_empty() {
315            bail!("Unexpected trailing input: {rest}")
316        }
317        Ok(t)
318    }
319
320    #[cfg(test)]
321    mod tests {
322        use super::*;
323
324        #[test]
325        fn test_serialization() {
326            for (text, value) in [
327                (
328                    "forest_snapshot_mainnet_2023-05-30_height_2905376.car.zst",
329                    ParsedFilename::full("forest", "mainnet", 2023, 5, 30, 2905376, false),
330                ),
331                (
332                    "forest_snapshot_calibnet_2023-05-30_height_604419.car.zst",
333                    ParsedFilename::full("forest", "calibnet", 2023, 5, 30, 604419, false),
334                ),
335                (
336                    "forest_snapshot_mainnet_2023-05-30_height_2905376.forest.car.zst",
337                    ParsedFilename::full("forest", "mainnet", 2023, 5, 30, 2905376, true),
338                ),
339                (
340                    "forest_snapshot_calibnet_2023-05-30_height_604419.forest.car.zst",
341                    ParsedFilename::full("forest", "calibnet", 2023, 5, 30, 604419, true),
342                ),
343                (
344                    "2905920_2023_05_30T22_00_00Z.car.zst",
345                    ParsedFilename::short(2905920, 2023, 5, 30, 22, 0, 0),
346                ),
347                (
348                    "605520_2023_05_31T00_13_00Z.car.zst",
349                    ParsedFilename::short(605520, 2023, 5, 31, 0, 13, 0),
350                ),
351                (
352                    "filecoin_snapshot_calibnet_2023-06-13_height_643680.car.zst",
353                    ParsedFilename::full("filecoin", "calibnet", 2023, 6, 13, 643680, false),
354                ),
355                (
356                    "venus_snapshot_pineconenet_2045-01-01_height_2.car.zst",
357                    ParsedFilename::full("venus", "pineconenet", 2045, 1, 1, 2, false),
358                ),
359                (
360                    "filecoin_snapshot_calibnet_2023-06-13_height_643680.forest.car.zst",
361                    ParsedFilename::full("filecoin", "calibnet", 2023, 6, 13, 643680, true),
362                ),
363                (
364                    "venus_snapshot_pineconenet_2045-01-01_height_2.forest.car.zst",
365                    ParsedFilename::full("venus", "pineconenet", 2045, 1, 1, 2, true),
366                ),
367            ] {
368                assert_eq!(
369                    value,
370                    ParsedFilename::parse_str(text).unwrap(),
371                    "mismatch in deserialize"
372                );
373                assert_eq!(value.to_string(), text, "mismatch in serialize");
374            }
375        }
376
377        #[test]
378        fn test_wrong_ext() {
379            ParsedFilename::parse_str("forest_snapshot_mainnet_2023-05-30_height_2905376.car.zstt")
380                .unwrap_err();
381            ParsedFilename::parse_str(
382                "forest_snapshot_mainnet_2023-05-30_height_2905376.car.zst.tmp",
383            )
384            .unwrap_err();
385        }
386
387        impl ParsedFilename<'static> {
388            /// # Panics
389            /// - If `ymd`/`hms` aren't valid
390            fn short(
391                height: i64,
392                year: i32,
393                month: u32,
394                day: u32,
395                hour: u32,
396                min: u32,
397                sec: u32,
398            ) -> Self {
399                Self::Short {
400                    date: NaiveDate::from_ymd_opt(year, month, day).unwrap(),
401                    time: NaiveTime::from_hms_opt(hour, min, sec).unwrap(),
402                    height,
403                }
404            }
405        }
406
407        impl<'a> ParsedFilename<'a> {
408            /// # Panics
409            /// - If `ymd` isn't valid
410            fn full(
411                vendor: &'a str,
412                chain: &'a str,
413                year: i32,
414                month: u32,
415                day: u32,
416                height: i64,
417                forest_format: bool,
418            ) -> Self {
419                Self::Full {
420                    vendor,
421                    chain,
422                    date: NaiveDate::from_ymd_opt(year, month, day).unwrap(),
423                    height,
424                    forest_format,
425                }
426            }
427        }
428    }
429}
430
431#[cfg(test)]
432mod tests {
433    use super::parse_content_disposition;
434    use reqwest::header::HeaderValue;
435
436    #[test]
437    fn content_disposition_forest() {
438        assert_eq!(
439            parse_content_disposition(&HeaderValue::from_static(
440                "attachment; filename*=UTF-8''forest_snapshot_calibnet_2023-09-14_height_911888.forest.car.zst; \
441                 filename=\"forest_snapshot_calibnet_2023-09-14_height_911888.forest.car.zst\""
442            )).unwrap(),
443            "forest_snapshot_calibnet_2023-09-14_height_911888.forest.car.zst"
444        );
445    }
446}