1use std::{
5 fmt::Display,
6 path::{Path, PathBuf},
7 str::FromStr,
8};
9
10use crate::{cli_shared::snapshot::parse::ParsedFilename, utils::net::download_file_with_retry};
11use crate::{networks::NetworkChain, utils::net::DownloadFileOption};
12use anyhow::{Context as _, bail};
13use chrono::NaiveDate;
14use url::Url;
15
16#[derive(
19 Debug,
20 Clone,
21 Copy,
22 Hash,
23 PartialEq,
24 Eq,
25 Default,
26 strum::EnumString, strum::Display, clap::ValueEnum, )]
30#[strum(serialize_all = "kebab-case")]
31pub enum TrustedVendor {
32 #[default]
33 Forest,
34}
35
36pub fn filename(
40 vendor: impl Display,
41 chain: impl Display,
42 date: NaiveDate,
43 height: i64,
44 forest_format: bool,
45) -> String {
46 let vendor = vendor.to_string();
47 let chain = chain.to_string();
48 ParsedFilename::Full {
49 vendor: &vendor,
50 chain: &chain,
51 date,
52 height,
53 forest_format,
54 }
55 .to_string()
56}
57
58pub async fn fetch(
60 directory: &Path,
61 chain: &NetworkChain,
62 vendor: TrustedVendor,
63) -> anyhow::Result<PathBuf> {
64 let (url, _len, path) = peek(vendor, chain).await?;
65 let (date, height, forest_format) = ParsedFilename::parse_str(&path)
66 .context("unexpected path format")?
67 .date_and_height_and_forest();
68 let filename = filename(vendor, chain, date, height, forest_format);
69
70 download_file_with_retry(
71 &url,
72 directory,
73 &filename,
74 DownloadFileOption::Resumable,
75 None,
76 )
77 .await
78}
79
80pub async fn peek(
85 vendor: TrustedVendor,
86 chain: &NetworkChain,
87) -> anyhow::Result<(Url, u64, String)> {
88 let stable_url = stable_url(vendor, chain)?;
89 let response = reqwest::get(stable_url)
95 .await?
96 .error_for_status()
97 .context("server returned an error response")?;
98 let final_url = response.url().clone();
99 let cd_path = response
100 .headers()
101 .get(reqwest::header::CONTENT_DISPOSITION)
102 .and_then(parse_content_disposition);
103 Ok((
104 final_url,
105 response
106 .content_length()
107 .context("no content-length header")?,
108 cd_path.context("no content-disposition filepath")?,
109 ))
110}
111
112fn parse_content_disposition(value: &reqwest::header::HeaderValue) -> Option<String> {
116 use regex::Regex;
117 let re = Regex::new("filename=\"([^\"]+)\"").ok()?;
118 let cap = re.captures(value.to_str().ok()?)?;
119 Some(cap.get(1)?.as_str().to_owned())
120}
121
122macro_rules! define_urls {
124 ($($vis:vis const $name:ident: &str = $value:literal;)* $(,)?) => {
125 $($vis const $name: &str = $value;)*
126
127 #[cfg(test)]
128 const ALL_URLS: &[&str] = [
129 $($name,)*
130 ].as_slice();
131 };
132}
133
134define_urls!(
135 const FOREST_MAINNET_COMPRESSED: &str = "https://forest-archive.chainsafe.dev/latest/mainnet/";
136 const FOREST_CALIBNET_COMPRESSED: &str =
137 "https://forest-archive.chainsafe.dev/latest/calibnet/";
138);
139
140pub fn stable_url(vendor: TrustedVendor, chain: &NetworkChain) -> anyhow::Result<Url> {
141 let s = match (vendor, chain) {
142 (TrustedVendor::Forest, NetworkChain::Mainnet) => FOREST_MAINNET_COMPRESSED,
143 (TrustedVendor::Forest, NetworkChain::Calibnet) => FOREST_CALIBNET_COMPRESSED,
144 (TrustedVendor::Forest, NetworkChain::Butterflynet | NetworkChain::Devnet(_)) => {
145 bail!("unsupported chain {chain}")
146 }
147 };
148 Ok(Url::from_str(s).unwrap())
149}
150
151#[test]
152fn parse_stable_urls() {
153 for url in ALL_URLS {
154 let _did_not_panic = Url::from_str(url).unwrap();
155 }
156}
157
158mod parse {
159 use std::{fmt::Display, str::FromStr};
166
167 use anyhow::{anyhow, bail};
168 use chrono::{NaiveDate, NaiveDateTime, NaiveTime};
169 use nom::{
170 Err, Parser,
171 branch::alt,
172 bytes::complete::{tag, take_until},
173 character::complete::digit1,
174 combinator::{map_res, recognize},
175 error::ErrorKind,
176 error_position,
177 multi::many1,
178 };
179
180 use crate::db::car::forest::FOREST_CAR_FILE_EXTENSION;
181
182 #[derive(PartialEq, Debug, Clone, Hash)]
183 pub(super) enum ParsedFilename<'a> {
184 Short {
185 date: NaiveDate,
186 time: NaiveTime,
187 height: i64,
188 },
189 Full {
190 vendor: &'a str,
191 chain: &'a str,
192 date: NaiveDate,
193 height: i64,
194 forest_format: bool,
195 },
196 }
197
198 impl Display for ParsedFilename<'_> {
199 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
200 match self {
201 ParsedFilename::Short { date, time, height } => f.write_fmt(format_args!(
202 "{height}_{}.car.zst",
203 NaiveDateTime::new(*date, *time).format("%Y_%m_%dT%H_%M_%SZ")
204 )),
205 ParsedFilename::Full {
206 vendor,
207 chain,
208 date,
209 height,
210 forest_format,
211 } => f.write_fmt(format_args!(
212 "{vendor}_snapshot_{chain}_{}_height_{height}{}.car.zst",
213 date.format("%Y-%m-%d"),
214 if *forest_format { ".forest" } else { "" }
215 )),
216 }
217 }
218 }
219
220 impl<'a> ParsedFilename<'a> {
221 pub fn date_and_height_and_forest(&self) -> (NaiveDate, i64, bool) {
222 match self {
223 ParsedFilename::Short { date, height, .. } => (*date, *height, false),
224 ParsedFilename::Full {
225 date,
226 height,
227 forest_format,
228 ..
229 } => (*date, *height, *forest_format),
230 }
231 }
232
233 pub fn parse_str(input: &'a str) -> anyhow::Result<Self> {
234 enter_nom(alt((short, full)), input)
235 }
236 }
237
238 fn number<T>(input: &str) -> nom::IResult<&str, T>
240 where
241 T: FromStr,
242 {
243 map_res(recognize(many1(digit1)), T::from_str).parse(input)
244 }
245
246 fn ymd(separator: &str) -> impl Fn(&str) -> nom::IResult<&str, NaiveDate> + '_ {
248 move |input| {
249 let (rest, (year, _, month, _, day)) =
250 (number, tag(separator), number, tag(separator), number).parse(input)?;
251 match NaiveDate::from_ymd_opt(year, month, day) {
252 Some(date) => Ok((rest, date)),
253 None => Err(Err::Error(error_position!(input, ErrorKind::Verify))),
254 }
255 }
256 }
257
258 fn hms(separator: &str) -> impl Fn(&str) -> nom::IResult<&str, NaiveTime> + '_ {
260 move |input| {
261 let (rest, (hour, _, minute, _, second)) =
262 (number, tag(separator), number, tag(separator), number).parse(input)?;
263 match NaiveTime::from_hms_opt(hour, minute, second) {
264 Some(date) => Ok((rest, date)),
265 None => Err(Err::Error(error_position!(input, ErrorKind::Verify))),
266 }
267 }
268 }
269
270 fn full(input: &str) -> nom::IResult<&str, ParsedFilename<'_>> {
271 let (rest, (vendor, _snapshot_, chain, _, date, _height_, height, car_zst)) = (
272 take_until("_snapshot_"),
273 tag("_snapshot_"),
274 take_until("_"),
275 tag("_"),
276 ymd("-"),
277 tag("_height_"),
278 number,
279 alt((tag(".car.zst"), tag(FOREST_CAR_FILE_EXTENSION))),
280 )
281 .parse(input)?;
282 Ok((
283 rest,
284 ParsedFilename::Full {
285 vendor,
286 chain,
287 date,
288 height,
289 forest_format: car_zst == FOREST_CAR_FILE_EXTENSION,
290 },
291 ))
292 }
293
294 fn short(input: &str) -> nom::IResult<&str, ParsedFilename<'_>> {
295 let (rest, (height, _, date, _, time, _)) = (
296 number,
297 tag("_"),
298 ymd("_"),
299 tag("T"),
300 hms("_"),
301 tag("Z.car.zst"),
302 )
303 .parse(input)?;
304 Ok((rest, ParsedFilename::Short { date, time, height }))
305 }
306
307 fn enter_nom<'a, T>(
308 mut parser: impl nom::Parser<&'a str, Output = T, Error = nom::error::Error<&'a str>>,
309 input: &'a str,
310 ) -> anyhow::Result<T> {
311 let (rest, t) = parser
312 .parse(input)
313 .map_err(|e| anyhow!("Parser error: {e}"))?;
314 if !rest.is_empty() {
315 bail!("Unexpected trailing input: {rest}")
316 }
317 Ok(t)
318 }
319
320 #[cfg(test)]
321 mod tests {
322 use super::*;
323
324 #[test]
325 fn test_serialization() {
326 for (text, value) in [
327 (
328 "forest_snapshot_mainnet_2023-05-30_height_2905376.car.zst",
329 ParsedFilename::full("forest", "mainnet", 2023, 5, 30, 2905376, false),
330 ),
331 (
332 "forest_snapshot_calibnet_2023-05-30_height_604419.car.zst",
333 ParsedFilename::full("forest", "calibnet", 2023, 5, 30, 604419, false),
334 ),
335 (
336 "forest_snapshot_mainnet_2023-05-30_height_2905376.forest.car.zst",
337 ParsedFilename::full("forest", "mainnet", 2023, 5, 30, 2905376, true),
338 ),
339 (
340 "forest_snapshot_calibnet_2023-05-30_height_604419.forest.car.zst",
341 ParsedFilename::full("forest", "calibnet", 2023, 5, 30, 604419, true),
342 ),
343 (
344 "2905920_2023_05_30T22_00_00Z.car.zst",
345 ParsedFilename::short(2905920, 2023, 5, 30, 22, 0, 0),
346 ),
347 (
348 "605520_2023_05_31T00_13_00Z.car.zst",
349 ParsedFilename::short(605520, 2023, 5, 31, 0, 13, 0),
350 ),
351 (
352 "filecoin_snapshot_calibnet_2023-06-13_height_643680.car.zst",
353 ParsedFilename::full("filecoin", "calibnet", 2023, 6, 13, 643680, false),
354 ),
355 (
356 "venus_snapshot_pineconenet_2045-01-01_height_2.car.zst",
357 ParsedFilename::full("venus", "pineconenet", 2045, 1, 1, 2, false),
358 ),
359 (
360 "filecoin_snapshot_calibnet_2023-06-13_height_643680.forest.car.zst",
361 ParsedFilename::full("filecoin", "calibnet", 2023, 6, 13, 643680, true),
362 ),
363 (
364 "venus_snapshot_pineconenet_2045-01-01_height_2.forest.car.zst",
365 ParsedFilename::full("venus", "pineconenet", 2045, 1, 1, 2, true),
366 ),
367 ] {
368 assert_eq!(
369 value,
370 ParsedFilename::parse_str(text).unwrap(),
371 "mismatch in deserialize"
372 );
373 assert_eq!(value.to_string(), text, "mismatch in serialize");
374 }
375 }
376
377 #[test]
378 fn test_wrong_ext() {
379 ParsedFilename::parse_str("forest_snapshot_mainnet_2023-05-30_height_2905376.car.zstt")
380 .unwrap_err();
381 ParsedFilename::parse_str(
382 "forest_snapshot_mainnet_2023-05-30_height_2905376.car.zst.tmp",
383 )
384 .unwrap_err();
385 }
386
387 impl ParsedFilename<'static> {
388 fn short(
391 height: i64,
392 year: i32,
393 month: u32,
394 day: u32,
395 hour: u32,
396 min: u32,
397 sec: u32,
398 ) -> Self {
399 Self::Short {
400 date: NaiveDate::from_ymd_opt(year, month, day).unwrap(),
401 time: NaiveTime::from_hms_opt(hour, min, sec).unwrap(),
402 height,
403 }
404 }
405 }
406
407 impl<'a> ParsedFilename<'a> {
408 fn full(
411 vendor: &'a str,
412 chain: &'a str,
413 year: i32,
414 month: u32,
415 day: u32,
416 height: i64,
417 forest_format: bool,
418 ) -> Self {
419 Self::Full {
420 vendor,
421 chain,
422 date: NaiveDate::from_ymd_opt(year, month, day).unwrap(),
423 height,
424 forest_format,
425 }
426 }
427 }
428 }
429}
430
431#[cfg(test)]
432mod tests {
433 use super::parse_content_disposition;
434 use reqwest::header::HeaderValue;
435
436 #[test]
437 fn content_disposition_forest() {
438 assert_eq!(
439 parse_content_disposition(&HeaderValue::from_static(
440 "attachment; filename*=UTF-8''forest_snapshot_calibnet_2023-09-14_height_911888.forest.car.zst; \
441 filename=\"forest_snapshot_calibnet_2023-09-14_height_911888.forest.car.zst\""
442 )).unwrap(),
443 "forest_snapshot_calibnet_2023-09-14_height_911888.forest.car.zst"
444 );
445 }
446}