1use std::{
5 fmt::Display,
6 path::{Path, PathBuf},
7 str::FromStr,
8};
9
10use crate::{cli_shared::snapshot::parse::ParsedFilename, utils::net::download_file_with_retry};
11use crate::{networks::NetworkChain, utils::net::DownloadFileOption};
12use anyhow::{Context as _, bail};
13use chrono::NaiveDate;
14use url::Url;
15
16#[derive(
19 Debug,
20 Clone,
21 Copy,
22 Hash,
23 PartialEq,
24 Eq,
25 Default,
26 strum::EnumString, strum::Display, clap::ValueEnum, )]
30#[strum(serialize_all = "kebab-case")]
31pub enum TrustedVendor {
32 #[default]
33 Forest,
34}
35
36pub fn filename(
40 vendor: impl Display,
41 chain: impl Display,
42 date: NaiveDate,
43 height: i64,
44 forest_format: bool,
45) -> String {
46 let vendor = vendor.to_string();
47 let chain = chain.to_string();
48 ParsedFilename::Full {
49 vendor: &vendor,
50 chain: &chain,
51 date,
52 height,
53 forest_format,
54 }
55 .to_string()
56}
57
58pub async fn fetch(
60 directory: &Path,
61 chain: &NetworkChain,
62 vendor: TrustedVendor,
63) -> anyhow::Result<PathBuf> {
64 let (url, _len, path) = peek(vendor, chain).await?;
65 let (date, height, forest_format) = ParsedFilename::parse_str(&path)
66 .context("unexpected path format")?
67 .date_and_height_and_forest();
68 let filename = filename(vendor, chain, date, height, forest_format);
69
70 tracing::info!("Downloading snapshot: {filename}");
71
72 download_file_with_retry(
73 &url,
74 directory,
75 &filename,
76 DownloadFileOption::Resumable,
77 None,
78 )
79 .await
80}
81
82pub async fn peek(
87 vendor: TrustedVendor,
88 chain: &NetworkChain,
89) -> anyhow::Result<(Url, u64, String)> {
90 let stable_url = stable_url(vendor, chain)?;
91 let response = reqwest::get(stable_url)
97 .await?
98 .error_for_status()
99 .context("server returned an error response")?;
100 let final_url = response.url().clone();
101 let cd_path = response
102 .headers()
103 .get(reqwest::header::CONTENT_DISPOSITION)
104 .and_then(parse_content_disposition);
105 Ok((
106 final_url,
107 response
108 .content_length()
109 .context("no content-length header")?,
110 cd_path.context("no content-disposition filepath")?,
111 ))
112}
113
114fn parse_content_disposition(value: &reqwest::header::HeaderValue) -> Option<String> {
118 use regex::Regex;
119 let re = Regex::new("filename=\"([^\"]+)\"").ok()?;
120 let cap = re.captures(value.to_str().ok()?)?;
121 Some(cap.get(1)?.as_str().to_owned())
122}
123
124macro_rules! define_urls {
126 ($($vis:vis const $name:ident: &str = $value:literal;)* $(,)?) => {
127 $($vis const $name: &str = $value;)*
128
129 #[cfg(test)]
130 const ALL_URLS: &[&str] = [
131 $($name,)*
132 ].as_slice();
133 };
134}
135
136define_urls!(
137 const FOREST_MAINNET_COMPRESSED: &str = "https://forest-archive.chainsafe.dev/latest/mainnet/";
138 const FOREST_CALIBNET_COMPRESSED: &str =
139 "https://forest-archive.chainsafe.dev/latest/calibnet/";
140);
141
142pub fn stable_url(vendor: TrustedVendor, chain: &NetworkChain) -> anyhow::Result<Url> {
143 let s = match (vendor, chain) {
144 (TrustedVendor::Forest, NetworkChain::Mainnet) => FOREST_MAINNET_COMPRESSED,
145 (TrustedVendor::Forest, NetworkChain::Calibnet) => FOREST_CALIBNET_COMPRESSED,
146 (TrustedVendor::Forest, NetworkChain::Butterflynet | NetworkChain::Devnet(_)) => {
147 bail!("unsupported chain {chain}")
148 }
149 };
150 Ok(Url::from_str(s).unwrap())
151}
152
153#[test]
154fn parse_stable_urls() {
155 for url in ALL_URLS {
156 let _did_not_panic = Url::from_str(url).unwrap();
157 }
158}
159
160mod parse {
161 use std::{fmt::Display, str::FromStr};
168
169 use anyhow::{anyhow, bail};
170 use chrono::{NaiveDate, NaiveDateTime, NaiveTime};
171 use nom::{
172 Err, Parser,
173 branch::alt,
174 bytes::complete::{tag, take_until},
175 character::complete::digit1,
176 combinator::{map_res, recognize},
177 error::ErrorKind,
178 error_position,
179 multi::many1,
180 };
181
182 use crate::db::car::forest::FOREST_CAR_FILE_EXTENSION;
183
184 #[derive(PartialEq, Debug, Clone, Hash)]
185 pub(super) enum ParsedFilename<'a> {
186 Short {
187 date: NaiveDate,
188 time: NaiveTime,
189 height: i64,
190 },
191 Full {
192 vendor: &'a str,
193 chain: &'a str,
194 date: NaiveDate,
195 height: i64,
196 forest_format: bool,
197 },
198 }
199
200 impl Display for ParsedFilename<'_> {
201 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
202 match self {
203 ParsedFilename::Short { date, time, height } => f.write_fmt(format_args!(
204 "{height}_{}.car.zst",
205 NaiveDateTime::new(*date, *time).format("%Y_%m_%dT%H_%M_%SZ")
206 )),
207 ParsedFilename::Full {
208 vendor,
209 chain,
210 date,
211 height,
212 forest_format,
213 } => f.write_fmt(format_args!(
214 "{vendor}_snapshot_{chain}_{}_height_{height}{}.car.zst",
215 date.format("%Y-%m-%d"),
216 if *forest_format { ".forest" } else { "" }
217 )),
218 }
219 }
220 }
221
222 impl<'a> ParsedFilename<'a> {
223 pub fn date_and_height_and_forest(&self) -> (NaiveDate, i64, bool) {
224 match self {
225 ParsedFilename::Short { date, height, .. } => (*date, *height, false),
226 ParsedFilename::Full {
227 date,
228 height,
229 forest_format,
230 ..
231 } => (*date, *height, *forest_format),
232 }
233 }
234
235 pub fn parse_str(input: &'a str) -> anyhow::Result<Self> {
236 enter_nom(alt((short, full)), input)
237 }
238 }
239
240 fn number<T>(input: &str) -> nom::IResult<&str, T>
242 where
243 T: FromStr,
244 {
245 map_res(recognize(many1(digit1)), T::from_str).parse(input)
246 }
247
248 fn ymd(separator: &str) -> impl Fn(&str) -> nom::IResult<&str, NaiveDate> + '_ {
250 move |input| {
251 let (rest, (year, _, month, _, day)) =
252 (number, tag(separator), number, tag(separator), number).parse(input)?;
253 match NaiveDate::from_ymd_opt(year, month, day) {
254 Some(date) => Ok((rest, date)),
255 None => Err(Err::Error(error_position!(input, ErrorKind::Verify))),
256 }
257 }
258 }
259
260 fn hms(separator: &str) -> impl Fn(&str) -> nom::IResult<&str, NaiveTime> + '_ {
262 move |input| {
263 let (rest, (hour, _, minute, _, second)) =
264 (number, tag(separator), number, tag(separator), number).parse(input)?;
265 match NaiveTime::from_hms_opt(hour, minute, second) {
266 Some(date) => Ok((rest, date)),
267 None => Err(Err::Error(error_position!(input, ErrorKind::Verify))),
268 }
269 }
270 }
271
272 fn full(input: &str) -> nom::IResult<&str, ParsedFilename<'_>> {
273 let (rest, (vendor, _snapshot_, chain, _, date, _height_, height, car_zst)) = (
274 take_until("_snapshot_"),
275 tag("_snapshot_"),
276 take_until("_"),
277 tag("_"),
278 ymd("-"),
279 tag("_height_"),
280 number,
281 alt((tag(".car.zst"), tag(FOREST_CAR_FILE_EXTENSION))),
282 )
283 .parse(input)?;
284 Ok((
285 rest,
286 ParsedFilename::Full {
287 vendor,
288 chain,
289 date,
290 height,
291 forest_format: car_zst == FOREST_CAR_FILE_EXTENSION,
292 },
293 ))
294 }
295
296 fn short(input: &str) -> nom::IResult<&str, ParsedFilename<'_>> {
297 let (rest, (height, _, date, _, time, _)) = (
298 number,
299 tag("_"),
300 ymd("_"),
301 tag("T"),
302 hms("_"),
303 tag("Z.car.zst"),
304 )
305 .parse(input)?;
306 Ok((rest, ParsedFilename::Short { date, time, height }))
307 }
308
309 fn enter_nom<'a, T>(
310 mut parser: impl nom::Parser<&'a str, Output = T, Error = nom::error::Error<&'a str>>,
311 input: &'a str,
312 ) -> anyhow::Result<T> {
313 let (rest, t) = parser
314 .parse(input)
315 .map_err(|e| anyhow!("Parser error: {e}"))?;
316 if !rest.is_empty() {
317 bail!("Unexpected trailing input: {rest}")
318 }
319 Ok(t)
320 }
321
322 #[cfg(test)]
323 mod tests {
324 use super::*;
325
326 #[test]
327 fn test_serialization() {
328 for (text, value) in [
329 (
330 "forest_snapshot_mainnet_2023-05-30_height_2905376.car.zst",
331 ParsedFilename::full("forest", "mainnet", 2023, 5, 30, 2905376, false),
332 ),
333 (
334 "forest_snapshot_calibnet_2023-05-30_height_604419.car.zst",
335 ParsedFilename::full("forest", "calibnet", 2023, 5, 30, 604419, false),
336 ),
337 (
338 "forest_snapshot_mainnet_2023-05-30_height_2905376.forest.car.zst",
339 ParsedFilename::full("forest", "mainnet", 2023, 5, 30, 2905376, true),
340 ),
341 (
342 "forest_snapshot_calibnet_2023-05-30_height_604419.forest.car.zst",
343 ParsedFilename::full("forest", "calibnet", 2023, 5, 30, 604419, true),
344 ),
345 (
346 "2905920_2023_05_30T22_00_00Z.car.zst",
347 ParsedFilename::short(2905920, 2023, 5, 30, 22, 0, 0),
348 ),
349 (
350 "605520_2023_05_31T00_13_00Z.car.zst",
351 ParsedFilename::short(605520, 2023, 5, 31, 0, 13, 0),
352 ),
353 (
354 "filecoin_snapshot_calibnet_2023-06-13_height_643680.car.zst",
355 ParsedFilename::full("filecoin", "calibnet", 2023, 6, 13, 643680, false),
356 ),
357 (
358 "venus_snapshot_pineconenet_2045-01-01_height_2.car.zst",
359 ParsedFilename::full("venus", "pineconenet", 2045, 1, 1, 2, false),
360 ),
361 (
362 "filecoin_snapshot_calibnet_2023-06-13_height_643680.forest.car.zst",
363 ParsedFilename::full("filecoin", "calibnet", 2023, 6, 13, 643680, true),
364 ),
365 (
366 "venus_snapshot_pineconenet_2045-01-01_height_2.forest.car.zst",
367 ParsedFilename::full("venus", "pineconenet", 2045, 1, 1, 2, true),
368 ),
369 ] {
370 assert_eq!(
371 value,
372 ParsedFilename::parse_str(text).unwrap(),
373 "mismatch in deserialize"
374 );
375 assert_eq!(value.to_string(), text, "mismatch in serialize");
376 }
377 }
378
379 #[test]
380 fn test_wrong_ext() {
381 ParsedFilename::parse_str("forest_snapshot_mainnet_2023-05-30_height_2905376.car.zstt")
382 .unwrap_err();
383 ParsedFilename::parse_str(
384 "forest_snapshot_mainnet_2023-05-30_height_2905376.car.zst.tmp",
385 )
386 .unwrap_err();
387 }
388
389 impl ParsedFilename<'static> {
390 fn short(
393 height: i64,
394 year: i32,
395 month: u32,
396 day: u32,
397 hour: u32,
398 min: u32,
399 sec: u32,
400 ) -> Self {
401 Self::Short {
402 date: NaiveDate::from_ymd_opt(year, month, day).unwrap(),
403 time: NaiveTime::from_hms_opt(hour, min, sec).unwrap(),
404 height,
405 }
406 }
407 }
408
409 impl<'a> ParsedFilename<'a> {
410 fn full(
413 vendor: &'a str,
414 chain: &'a str,
415 year: i32,
416 month: u32,
417 day: u32,
418 height: i64,
419 forest_format: bool,
420 ) -> Self {
421 Self::Full {
422 vendor,
423 chain,
424 date: NaiveDate::from_ymd_opt(year, month, day).unwrap(),
425 height,
426 forest_format,
427 }
428 }
429 }
430 }
431}
432
433#[cfg(test)]
434mod tests {
435 use super::parse_content_disposition;
436 use reqwest::header::HeaderValue;
437
438 #[test]
439 fn content_disposition_forest() {
440 assert_eq!(
441 parse_content_disposition(&HeaderValue::from_static(
442 "attachment; filename*=UTF-8''forest_snapshot_calibnet_2023-09-14_height_911888.forest.car.zst; \
443 filename=\"forest_snapshot_calibnet_2023-09-14_height_911888.forest.car.zst\""
444 )).unwrap(),
445 "forest_snapshot_calibnet_2023-09-14_height_911888.forest.car.zst"
446 );
447 }
448}