1use ahash::HashSet;
5use anyhow::{Context as _, bail};
6use clap::Args;
7use regex::Regex;
8use serde::Deserialize;
9use std::sync::LazyLock;
10use std::time::SystemTime;
11use url::Url;
12
13use crate::networks::{NetworkChain, calculate_expected_epoch};
14use crate::shim::clock::{ChainEpoch, EPOCH_DURATION_SECONDS};
15use crate::utils::net::global_http_client;
16
17const LIST_BASE: &str = "https://forest-archive.chainsafe.dev/list";
18
19const LITE_INTERVAL: ChainEpoch = 30_000;
20const DIFF_INTERVAL: ChainEpoch = 3_000;
21
22const MAINNET_GENESIS_TIMESTAMP: u64 = 1598306400; const CALIBNET_GENESIS_TIMESTAMP: u64 = 1667326380; #[derive(Debug, Args)]
27pub struct ArchiveMissingCommand {
28 #[arg(long, required = true)]
30 chain: NetworkChain,
31 #[arg(long)]
34 from: Option<ChainEpoch>,
35 #[arg(long)]
38 to: Option<ChainEpoch>,
39}
40
41#[derive(Debug, Deserialize)]
42struct ListingItem {
43 url: Url,
44}
45
46#[derive(Debug, Deserialize)]
47struct ListingResponse {
48 items: Vec<ListingItem>,
49}
50
51fn extract_height(url: &Url) -> Option<ChainEpoch> {
55 static RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"_height_(\d+)").unwrap());
56 let path = url.path();
57 let caps = RE.captures(path)?;
58 caps[1].parse().ok()
59}
60
61fn parse_listing_heights(data: &ListingResponse) -> HashSet<ChainEpoch> {
63 data.items
64 .iter()
65 .filter_map(|item| extract_height(&item.url))
66 .collect()
67}
68
69fn compute_required_lite(from: ChainEpoch, to: ChainEpoch) -> Vec<ChainEpoch> {
71 let base_from = (from / LITE_INTERVAL) * LITE_INTERVAL;
72 let base_to = (to / LITE_INTERVAL) * LITE_INTERVAL;
73 (base_from..=base_to)
74 .step_by(LITE_INTERVAL as usize)
75 .collect()
76}
77
78fn compute_required_diff(from: ChainEpoch, to: ChainEpoch) -> Vec<ChainEpoch> {
80 let base_from = (from / LITE_INTERVAL) * LITE_INTERVAL;
81 let base_to = (to / LITE_INTERVAL) * LITE_INTERVAL;
82 let diff_to = if to > base_to {
83 ((to - 1) / DIFF_INTERVAL) * DIFF_INTERVAL
84 } else if base_to >= DIFF_INTERVAL {
85 base_to - DIFF_INTERVAL
86 } else {
87 return Vec::new();
90 };
91 (base_from..=diff_to)
92 .step_by(DIFF_INTERVAL as usize)
93 .collect()
94}
95
96fn find_missing(required: &[ChainEpoch], available: &HashSet<ChainEpoch>) -> Vec<ChainEpoch> {
98 required
99 .iter()
100 .filter(|h| !available.contains(h))
101 .copied()
102 .collect()
103}
104
105async fn fetch_available_heights(
107 client: &reqwest::Client,
108 network: &str,
109 snapshot_type: &str,
110) -> anyhow::Result<HashSet<ChainEpoch>> {
111 let url = format!("{LIST_BASE}/{network}/{snapshot_type}?format=json");
112 let resp = client
113 .get(&url)
114 .send()
115 .await
116 .context("failed to fetch archive listing")?;
117 if !resp.status().is_success() {
118 bail!("{url}: HTTP {}", resp.status());
119 }
120 let data: ListingResponse = resp
121 .json()
122 .await
123 .context("failed to parse archive listing")?;
124 Ok(parse_listing_heights(&data))
125}
126
127impl ArchiveMissingCommand {
128 pub async fn run(self) -> anyhow::Result<()> {
129 let (network, genesis_ts) = match &self.chain {
130 NetworkChain::Mainnet => ("mainnet", MAINNET_GENESIS_TIMESTAMP),
131 NetworkChain::Calibnet => ("calibnet", CALIBNET_GENESIS_TIMESTAMP),
132 other => bail!("network {other} is not supported"),
133 };
134
135 let now = SystemTime::now()
136 .duration_since(SystemTime::UNIX_EPOCH)?
137 .as_secs();
138 let current_epoch =
139 calculate_expected_epoch(now, genesis_ts, EPOCH_DURATION_SECONDS as u32);
140
141 let from = self.from.unwrap_or(0);
142 let to = self.to.unwrap_or_else(|| current_epoch - DIFF_INTERVAL);
143
144 if from > to {
145 bail!("--from ({from}) must be <= --to ({to})");
146 }
147
148 println!(
149 "Checking {network} epochs {from}..={to} (current network epoch: {current_epoch})"
150 );
151
152 let client = global_http_client();
153
154 println!("Fetching archive listings...");
155 let (available_lite, available_diff) = tokio::try_join!(
156 fetch_available_heights(&client, network, "lite"),
157 fetch_available_heights(&client, network, "diff"),
158 )?;
159
160 println!(
161 "Archive has {} lite and {} diff snapshots.",
162 available_lite.len(),
163 available_diff.len()
164 );
165
166 let required_lite = compute_required_lite(from, to);
167 let required_diff = compute_required_diff(from, to);
168
169 let missing_lite = find_missing(&required_lite, &available_lite);
170 let missing_diff = find_missing(&required_diff, &available_diff);
171
172 let total_required = required_lite.len() + required_diff.len();
173 let total_missing = missing_lite.len() + missing_diff.len();
174
175 if total_missing == 0 {
176 let base_from = (from / LITE_INTERVAL) * LITE_INTERVAL;
177 println!(
178 "All {total_required} required snapshots are available (epochs {base_from}..={to}).",
179 );
180 } else {
181 println!("\n{total_missing} of {total_required} required snapshots are MISSING:\n");
182 if !missing_lite.is_empty() {
183 println!(" Missing lite snapshots:");
184 for h in &missing_lite {
185 println!(" lite at height {h}");
186 }
187 }
188 if !missing_diff.is_empty() {
189 println!(" Missing diff snapshots:");
190 for h in &missing_diff {
191 println!(" diff at height {h}");
192 }
193 }
194 bail!("{total_missing} of {total_required} required snapshots are missing");
195 }
196
197 Ok(())
198 }
199}
200
201#[cfg(test)]
202mod tests {
203 use super::*;
204
205 #[test]
206 fn test_extract_height_lite() {
207 let url = Url::parse(
208 "https://example.com/forest_snapshot_calibnet_2026-03-04_height_3510000.forest.car.zst",
209 )
210 .unwrap();
211 assert_eq!(extract_height(&url), Some(3510000));
212 }
213
214 #[test]
215 fn test_extract_height_diff() {
216 let url = Url::parse(
217 "https://example.com/forest_diff_calibnet_2022-11-02_height_0+3000.forest.car.zst",
218 )
219 .unwrap();
220 assert_eq!(extract_height(&url), Some(0));
221 let url = Url::parse(
222 "https://example.com/forest_diff_mainnet_2025-12-24_height_3480000+3000.forest.car.zst",
223 )
224 .unwrap();
225 assert_eq!(extract_height(&url), Some(3480000));
226 }
227
228 #[test]
229 fn test_extract_height_invalid() {
230 let url = Url::parse("https://example.com/not-a-snapshot").unwrap();
231 assert_eq!(extract_height(&url), None);
232 }
233
234 #[test]
235 fn test_compute_required_lite_single_segment() {
236 assert_eq!(compute_required_lite(30_000, 59_999), vec![30_000]);
238 }
239
240 #[test]
241 fn test_compute_required_lite_multiple_segments() {
242 assert_eq!(
243 compute_required_lite(30_000, 90_000),
244 vec![30_000, 60_000, 90_000]
245 );
246 }
247
248 #[test]
249 fn test_compute_required_lite_from_genesis() {
250 assert_eq!(compute_required_lite(0, 60_000), vec![0, 30_000, 60_000]);
251 }
252
253 #[test]
254 fn test_compute_required_lite_rounds_down() {
255 assert_eq!(compute_required_lite(5_000, 35_000), vec![0, 30_000]);
257 }
258
259 #[test]
260 fn test_compute_required_diff_within_segment() {
261 let diffs = compute_required_diff(30_000, 36_000);
263 assert_eq!(diffs, vec![30_000, 33_000]);
264 }
265
266 #[test]
267 fn test_compute_required_diff_exact_lite_boundary() {
268 let diffs = compute_required_diff(30_000, 60_000);
271 assert_eq!(
272 diffs,
273 vec![
274 30_000, 33_000, 36_000, 39_000, 42_000, 45_000, 48_000, 51_000, 54_000, 57_000
275 ]
276 );
277 }
278
279 #[test]
280 fn test_compute_required_diff_cross_segment() {
281 let diffs = compute_required_diff(57_000, 63_000);
283 assert_eq!(
285 diffs,
286 vec![
287 30_000, 33_000, 36_000, 39_000, 42_000, 45_000, 48_000, 51_000, 54_000, 57_000,
288 60_000
289 ]
290 );
291 }
292
293 #[test]
294 fn test_find_missing_none() {
295 let required = vec![0, 30_000, 60_000];
296 let available: HashSet<_> = HashSet::from_iter([0, 30_000, 60_000, 90_000]);
297 assert!(find_missing(&required, &available).is_empty());
298 }
299
300 #[test]
301 fn test_find_missing_some() {
302 let required = vec![0, 30_000, 60_000];
303 let available: HashSet<_> = HashSet::from_iter([0, 60_000]);
304 assert_eq!(find_missing(&required, &available), vec![30_000]);
305 }
306
307 #[test]
308 fn test_find_missing_all() {
309 let required = vec![0, 30_000];
310 let available: HashSet<ChainEpoch> = HashSet::default();
311 assert_eq!(find_missing(&required, &available), vec![0, 30_000]);
312 }
313
314 #[test]
315 fn test_parse_listing_heights_from_json() {
316 let json = r#"{
317 "total": 3,
318 "offset": 0,
319 "limit": 0,
320 "items": [
321 {
322 "url": "https://forest-archive.chainsafe.dev/archive/forest/calibnet/lite/forest_snapshot_calibnet_2026-03-04_height_3510000.forest.car.zst",
323 "sha256url": "https://forest-archive.chainsafe.dev/archive/forest/calibnet/lite/forest_snapshot_calibnet_2026-03-04_height_3510000.forest.car.zst.sha256sum",
324 "size": 7528742793,
325 "uploaded": "2026-03-05T00:52:34.198Z"
326 },
327 {
328 "url": "https://forest-archive.chainsafe.dev/archive/forest/calibnet/lite/forest_snapshot_calibnet_2026-02-22_height_3480000.forest.car.zst",
329 "sha256url": "https://forest-archive.chainsafe.dev/archive/forest/calibnet/lite/forest_snapshot_calibnet_2026-02-22_height_3480000.forest.car.zst.sha256sum",
330 "size": 7440018317,
331 "uploaded": "2026-02-22T23:40:48.106Z"
332 },
333 {
334 "url": "https://forest-archive.chainsafe.dev/archive/forest/calibnet/lite/forest_snapshot_calibnet_2022-11-01_height_0.forest.car.zst",
335 "sha256url": "https://forest-archive.chainsafe.dev/archive/forest/calibnet/lite/forest_snapshot_calibnet_2022-11-01_height_0.forest.car.zst.sha256sum",
336 "size": 491234,
337 "uploaded": "2023-08-30T08:54:56.805Z"
338 }
339 ]
340 }"#;
341 let data: ListingResponse = serde_json::from_str(json).unwrap();
342 let heights = parse_listing_heights(&data);
343 assert_eq!(heights, HashSet::from_iter([0, 3_480_000, 3_510_000]));
344 }
345
346 #[test]
347 fn test_parse_listing_heights_with_diffs() {
348 let json = r#"{
349 "total": 2,
350 "offset": 0,
351 "limit": 0,
352 "items": [
353 {
354 "url": "https://forest-archive.chainsafe.dev/archive/forest/calibnet/diff/forest_diff_calibnet_2026-03-04_height_3510000+3000.forest.car.zst",
355 "sha256url": "https://forest-archive.chainsafe.dev/archive/forest/calibnet/diff/forest_diff_calibnet_2026-03-04_height_3510000+3000.forest.car.zst.sha256sum",
356 "size": 123456,
357 "uploaded": "2026-03-05T01:00:00.000Z"
358 },
359 {
360 "url": "https://forest-archive.chainsafe.dev/archive/forest/calibnet/diff/forest_diff_calibnet_2022-11-02_height_0+3000.forest.car.zst",
361 "sha256url": "https://forest-archive.chainsafe.dev/archive/forest/calibnet/diff/forest_diff_calibnet_2022-11-02_height_0+3000.forest.car.zst.sha256sum",
362 "size": 789012,
363 "uploaded": "2023-08-30T09:00:00.000Z"
364 }
365 ]
366 }"#;
367 let data: ListingResponse = serde_json::from_str(json).unwrap();
368 let heights = parse_listing_heights(&data);
369 assert_eq!(heights, HashSet::from_iter([0, 3_510_000]));
370 }
371
372 #[test]
373 fn test_end_to_end_missing_detection() {
374 let available_lite: HashSet<_> = HashSet::from_iter([0, 60_000]);
377 let available_diff: HashSet<_> = (0..60_000).step_by(DIFF_INTERVAL as usize).collect();
378
379 let required_lite = compute_required_lite(0, 60_000);
380 let required_diff = compute_required_diff(0, 60_000);
381
382 let missing_lite = find_missing(&required_lite, &available_lite);
383 let missing_diff = find_missing(&required_diff, &available_diff);
384
385 assert_eq!(missing_lite, vec![30_000]);
386 assert!(missing_diff.is_empty());
387 }
388
389 #[test]
390 fn test_end_to_end_all_present() {
391 let available_lite: HashSet<_> = HashSet::from_iter([0, 30_000, 60_000]);
392 let available_diff: HashSet<_> = (0..60_000).step_by(DIFF_INTERVAL as usize).collect();
393
394 let required_lite = compute_required_lite(0, 60_000);
395 let required_diff = compute_required_diff(0, 60_000);
396
397 assert!(find_missing(&required_lite, &available_lite).is_empty());
398 assert!(find_missing(&required_diff, &available_diff).is_empty());
399 }
400}