Skip to main content

forest/cli_shared/
snapshot.rs

1// Copyright 2019-2026 ChainSafe Systems
2// SPDX-License-Identifier: Apache-2.0, MIT
3
4use std::{
5    fmt::Display,
6    path::{Path, PathBuf},
7    str::FromStr,
8};
9
10use crate::{cli_shared::snapshot::parse::ParsedFilename, utils::net::download_file_with_retry};
11use crate::{networks::NetworkChain, utils::net::DownloadFileOption};
12use anyhow::{Context as _, bail};
13use chrono::NaiveDate;
14use url::Url;
15
16/// Who hosts the snapshot on the web?
17/// See [`stable_url`].
18#[derive(
19    Debug,
20    Clone,
21    Copy,
22    Hash,
23    PartialEq,
24    Eq,
25    Default,
26    strum::EnumString, // impl std::str::FromStr
27    strum::Display,    // impl Display
28    clap::ValueEnum,   // allow values to be enumerated and parsed by clap
29)]
30#[strum(serialize_all = "kebab-case")]
31pub enum TrustedVendor {
32    #[default]
33    Forest,
34}
35
36/// Create a filename in the "full" format. See [`parse`].
37// Common between export, and [`fetch`].
38// Keep in sync with the CLI documentation for the `snapshot` sub-command.
39pub fn filename(
40    vendor: impl Display,
41    chain: impl Display,
42    date: NaiveDate,
43    height: i64,
44    forest_format: bool,
45) -> String {
46    let vendor = vendor.to_string();
47    let chain = chain.to_string();
48    ParsedFilename::Full {
49        vendor: &vendor,
50        chain: &chain,
51        date,
52        height,
53        forest_format,
54    }
55    .to_string()
56}
57
58/// Returns the path to the downloaded file.
59pub async fn fetch(
60    directory: &Path,
61    chain: &NetworkChain,
62    vendor: TrustedVendor,
63) -> anyhow::Result<PathBuf> {
64    let (url, _len, path) = peek(vendor, chain).await?;
65    let (date, height, forest_format) = ParsedFilename::parse_str(&path)
66        .context("unexpected path format")?
67        .date_and_height_and_forest();
68    let filename = filename(vendor, chain, date, height, forest_format);
69
70    tracing::info!("Downloading snapshot: {filename}");
71
72    download_file_with_retry(
73        &url,
74        directory,
75        &filename,
76        DownloadFileOption::Resumable,
77        None,
78    )
79    .await
80}
81
82/// Returns
83/// - The final URL after redirection(s)
84/// - The size of the snapshot from this vendor on this chain
85/// - The filename of the snapshot
86pub async fn peek(
87    vendor: TrustedVendor,
88    chain: &NetworkChain,
89) -> anyhow::Result<(Url, u64, String)> {
90    let stable_url = stable_url(vendor, chain)?;
91    // issue an actual GET, so the content length will be of the body
92    // (we never actually fetch the body)
93    // if we issue a HEAD, the content-length will be zero for our stable URLs
94    // (this is a bug, maybe in reqwest - HEAD _should_ give us the length)
95    // (probably because the stable URLs are all double-redirects 301 -> 302 -> 200)
96    let response = reqwest::get(stable_url)
97        .await?
98        .error_for_status()
99        .context("server returned an error response")?;
100    let final_url = response.url().clone();
101    let cd_path = response
102        .headers()
103        .get(reqwest::header::CONTENT_DISPOSITION)
104        .and_then(parse_content_disposition);
105    Ok((
106        final_url,
107        response
108            .content_length()
109            .context("no content-length header")?,
110        cd_path.context("no content-disposition filepath")?,
111    ))
112}
113
114// Extract file paths from content-disposition values:
115//   "attachment; filename=\"911520_2023_09_14T06_13_00Z.car.zst\""
116// => "911520_2023_09_14T06_13_00Z.car.zst"
117fn parse_content_disposition(value: &reqwest::header::HeaderValue) -> Option<String> {
118    use regex::Regex;
119    let re = Regex::new("filename=\"([^\"]+)\"").ok()?;
120    let cap = re.captures(value.to_str().ok()?)?;
121    Some(cap.get(1)?.as_str().to_owned())
122}
123
124/// Also defines an `ALL_URLS` constant for test purposes
125macro_rules! define_urls {
126    ($($vis:vis const $name:ident: &str = $value:literal;)* $(,)?) => {
127        $($vis const $name: &str = $value;)*
128
129        #[cfg(test)]
130        const ALL_URLS: &[&str] = [
131            $($name,)*
132        ].as_slice();
133    };
134}
135
136define_urls!(
137    const FOREST_MAINNET_COMPRESSED: &str = "https://forest-archive.chainsafe.dev/latest/mainnet/";
138    const FOREST_CALIBNET_COMPRESSED: &str =
139        "https://forest-archive.chainsafe.dev/latest/calibnet/";
140);
141
142pub fn stable_url(vendor: TrustedVendor, chain: &NetworkChain) -> anyhow::Result<Url> {
143    let s = match (vendor, chain) {
144        (TrustedVendor::Forest, NetworkChain::Mainnet) => FOREST_MAINNET_COMPRESSED,
145        (TrustedVendor::Forest, NetworkChain::Calibnet) => FOREST_CALIBNET_COMPRESSED,
146        (TrustedVendor::Forest, NetworkChain::Butterflynet | NetworkChain::Devnet(_)) => {
147            bail!("unsupported chain {chain}")
148        }
149    };
150    Ok(Url::from_str(s).unwrap())
151}
152
153#[test]
154fn parse_stable_urls() {
155    for url in ALL_URLS {
156        let _did_not_panic = Url::from_str(url).unwrap();
157    }
158}
159
160mod parse {
161    //! Vendors publish filenames with two formats:
162    //! `filecoin_snapshot_calibnet_2023-06-13_height_643680.car.zst` "full" and
163    //! `632400_2023_06_09T08_13_00Z.car.zst` "short".
164    //!
165    //! This module contains utilities for parsing and printing these formats.
166
167    use std::{fmt::Display, str::FromStr};
168
169    use anyhow::{anyhow, bail};
170    use chrono::{NaiveDate, NaiveDateTime, NaiveTime};
171    use nom::{
172        Err, Parser,
173        branch::alt,
174        bytes::complete::{tag, take_until},
175        character::complete::digit1,
176        combinator::{map_res, recognize},
177        error::ErrorKind,
178        error_position,
179        multi::many1,
180    };
181
182    use crate::db::car::forest::FOREST_CAR_FILE_EXTENSION;
183
184    #[derive(PartialEq, Debug, Clone, Hash)]
185    pub(super) enum ParsedFilename<'a> {
186        Short {
187            date: NaiveDate,
188            time: NaiveTime,
189            height: i64,
190        },
191        Full {
192            vendor: &'a str,
193            chain: &'a str,
194            date: NaiveDate,
195            height: i64,
196            forest_format: bool,
197        },
198    }
199
200    impl Display for ParsedFilename<'_> {
201        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
202            match self {
203                ParsedFilename::Short { date, time, height } => f.write_fmt(format_args!(
204                    "{height}_{}.car.zst",
205                    NaiveDateTime::new(*date, *time).format("%Y_%m_%dT%H_%M_%SZ")
206                )),
207                ParsedFilename::Full {
208                    vendor,
209                    chain,
210                    date,
211                    height,
212                    forest_format,
213                } => f.write_fmt(format_args!(
214                    "{vendor}_snapshot_{chain}_{}_height_{height}{}.car.zst",
215                    date.format("%Y-%m-%d"),
216                    if *forest_format { ".forest" } else { "" }
217                )),
218            }
219        }
220    }
221
222    impl<'a> ParsedFilename<'a> {
223        pub fn date_and_height_and_forest(&self) -> (NaiveDate, i64, bool) {
224            match self {
225                ParsedFilename::Short { date, height, .. } => (*date, *height, false),
226                ParsedFilename::Full {
227                    date,
228                    height,
229                    forest_format,
230                    ..
231                } => (*date, *height, *forest_format),
232            }
233        }
234
235        pub fn parse_str(input: &'a str) -> anyhow::Result<Self> {
236            enter_nom(alt((short, full)), input)
237        }
238    }
239
240    /// Parse a number using its [`FromStr`] implementation.
241    fn number<T>(input: &str) -> nom::IResult<&str, T>
242    where
243        T: FromStr,
244    {
245        map_res(recognize(many1(digit1)), T::from_str).parse(input)
246    }
247
248    /// Create a parser for `YYYY-MM-DD` etc
249    fn ymd(separator: &str) -> impl Fn(&str) -> nom::IResult<&str, NaiveDate> + '_ {
250        move |input| {
251            let (rest, (year, _, month, _, day)) =
252                (number, tag(separator), number, tag(separator), number).parse(input)?;
253            match NaiveDate::from_ymd_opt(year, month, day) {
254                Some(date) => Ok((rest, date)),
255                None => Err(Err::Error(error_position!(input, ErrorKind::Verify))),
256            }
257        }
258    }
259
260    /// Create a parser for `HH_MM_SS` etc
261    fn hms(separator: &str) -> impl Fn(&str) -> nom::IResult<&str, NaiveTime> + '_ {
262        move |input| {
263            let (rest, (hour, _, minute, _, second)) =
264                (number, tag(separator), number, tag(separator), number).parse(input)?;
265            match NaiveTime::from_hms_opt(hour, minute, second) {
266                Some(date) => Ok((rest, date)),
267                None => Err(Err::Error(error_position!(input, ErrorKind::Verify))),
268            }
269        }
270    }
271
272    fn full(input: &str) -> nom::IResult<&str, ParsedFilename<'_>> {
273        let (rest, (vendor, _snapshot_, chain, _, date, _height_, height, car_zst)) = (
274            take_until("_snapshot_"),
275            tag("_snapshot_"),
276            take_until("_"),
277            tag("_"),
278            ymd("-"),
279            tag("_height_"),
280            number,
281            alt((tag(".car.zst"), tag(FOREST_CAR_FILE_EXTENSION))),
282        )
283            .parse(input)?;
284        Ok((
285            rest,
286            ParsedFilename::Full {
287                vendor,
288                chain,
289                date,
290                height,
291                forest_format: car_zst == FOREST_CAR_FILE_EXTENSION,
292            },
293        ))
294    }
295
296    fn short(input: &str) -> nom::IResult<&str, ParsedFilename<'_>> {
297        let (rest, (height, _, date, _, time, _)) = (
298            number,
299            tag("_"),
300            ymd("_"),
301            tag("T"),
302            hms("_"),
303            tag("Z.car.zst"),
304        )
305            .parse(input)?;
306        Ok((rest, ParsedFilename::Short { date, time, height }))
307    }
308
309    fn enter_nom<'a, T>(
310        mut parser: impl nom::Parser<&'a str, Output = T, Error = nom::error::Error<&'a str>>,
311        input: &'a str,
312    ) -> anyhow::Result<T> {
313        let (rest, t) = parser
314            .parse(input)
315            .map_err(|e| anyhow!("Parser error: {e}"))?;
316        if !rest.is_empty() {
317            bail!("Unexpected trailing input: {rest}")
318        }
319        Ok(t)
320    }
321
322    #[cfg(test)]
323    mod tests {
324        use super::*;
325
326        #[test]
327        fn test_serialization() {
328            for (text, value) in [
329                (
330                    "forest_snapshot_mainnet_2023-05-30_height_2905376.car.zst",
331                    ParsedFilename::full("forest", "mainnet", 2023, 5, 30, 2905376, false),
332                ),
333                (
334                    "forest_snapshot_calibnet_2023-05-30_height_604419.car.zst",
335                    ParsedFilename::full("forest", "calibnet", 2023, 5, 30, 604419, false),
336                ),
337                (
338                    "forest_snapshot_mainnet_2023-05-30_height_2905376.forest.car.zst",
339                    ParsedFilename::full("forest", "mainnet", 2023, 5, 30, 2905376, true),
340                ),
341                (
342                    "forest_snapshot_calibnet_2023-05-30_height_604419.forest.car.zst",
343                    ParsedFilename::full("forest", "calibnet", 2023, 5, 30, 604419, true),
344                ),
345                (
346                    "2905920_2023_05_30T22_00_00Z.car.zst",
347                    ParsedFilename::short(2905920, 2023, 5, 30, 22, 0, 0),
348                ),
349                (
350                    "605520_2023_05_31T00_13_00Z.car.zst",
351                    ParsedFilename::short(605520, 2023, 5, 31, 0, 13, 0),
352                ),
353                (
354                    "filecoin_snapshot_calibnet_2023-06-13_height_643680.car.zst",
355                    ParsedFilename::full("filecoin", "calibnet", 2023, 6, 13, 643680, false),
356                ),
357                (
358                    "venus_snapshot_pineconenet_2045-01-01_height_2.car.zst",
359                    ParsedFilename::full("venus", "pineconenet", 2045, 1, 1, 2, false),
360                ),
361                (
362                    "filecoin_snapshot_calibnet_2023-06-13_height_643680.forest.car.zst",
363                    ParsedFilename::full("filecoin", "calibnet", 2023, 6, 13, 643680, true),
364                ),
365                (
366                    "venus_snapshot_pineconenet_2045-01-01_height_2.forest.car.zst",
367                    ParsedFilename::full("venus", "pineconenet", 2045, 1, 1, 2, true),
368                ),
369            ] {
370                assert_eq!(
371                    value,
372                    ParsedFilename::parse_str(text).unwrap(),
373                    "mismatch in deserialize"
374                );
375                assert_eq!(value.to_string(), text, "mismatch in serialize");
376            }
377        }
378
379        #[test]
380        fn test_wrong_ext() {
381            ParsedFilename::parse_str("forest_snapshot_mainnet_2023-05-30_height_2905376.car.zstt")
382                .unwrap_err();
383            ParsedFilename::parse_str(
384                "forest_snapshot_mainnet_2023-05-30_height_2905376.car.zst.tmp",
385            )
386            .unwrap_err();
387        }
388
389        impl ParsedFilename<'static> {
390            /// # Panics
391            /// - If `ymd`/`hms` aren't valid
392            fn short(
393                height: i64,
394                year: i32,
395                month: u32,
396                day: u32,
397                hour: u32,
398                min: u32,
399                sec: u32,
400            ) -> Self {
401                Self::Short {
402                    date: NaiveDate::from_ymd_opt(year, month, day).unwrap(),
403                    time: NaiveTime::from_hms_opt(hour, min, sec).unwrap(),
404                    height,
405                }
406            }
407        }
408
409        impl<'a> ParsedFilename<'a> {
410            /// # Panics
411            /// - If `ymd` isn't valid
412            fn full(
413                vendor: &'a str,
414                chain: &'a str,
415                year: i32,
416                month: u32,
417                day: u32,
418                height: i64,
419                forest_format: bool,
420            ) -> Self {
421                Self::Full {
422                    vendor,
423                    chain,
424                    date: NaiveDate::from_ymd_opt(year, month, day).unwrap(),
425                    height,
426                    forest_format,
427                }
428            }
429        }
430    }
431}
432
433#[cfg(test)]
434mod tests {
435    use super::parse_content_disposition;
436    use reqwest::header::HeaderValue;
437
438    #[test]
439    fn content_disposition_forest() {
440        assert_eq!(
441            parse_content_disposition(&HeaderValue::from_static(
442                "attachment; filename*=UTF-8''forest_snapshot_calibnet_2023-09-14_height_911888.forest.car.zst; \
443                 filename=\"forest_snapshot_calibnet_2023-09-14_height_911888.forest.car.zst\""
444            )).unwrap(),
445            "forest_snapshot_calibnet_2023-09-14_height_911888.forest.car.zst"
446        );
447    }
448}