Skip to main content

forest/dev/subcommands/
archive_missing_cmd.rs

1// Copyright 2019-2026 ChainSafe Systems
2// SPDX-License-Identifier: Apache-2.0, MIT
3
4use ahash::HashSet;
5use anyhow::{Context as _, bail};
6use clap::Args;
7use regex::Regex;
8use serde::Deserialize;
9use std::sync::LazyLock;
10use std::time::SystemTime;
11use url::Url;
12
13use crate::networks::{NetworkChain, calculate_expected_epoch};
14use crate::shim::clock::{ChainEpoch, EPOCH_DURATION_SECONDS};
15use crate::utils::net::global_http_client;
16
17const LIST_BASE: &str = "https://forest-archive.chainsafe.dev/list";
18
19const LITE_INTERVAL: ChainEpoch = 30_000;
20const DIFF_INTERVAL: ChainEpoch = 3_000;
21
22/// Well-known genesis timestamps (Unix seconds).
23const MAINNET_GENESIS_TIMESTAMP: u64 = 1598306400; // 2020-08-24T22:00:00Z
24const CALIBNET_GENESIS_TIMESTAMP: u64 = 1667326380; // 2022-11-01T14:13:00Z
25
26#[derive(Debug, Args)]
27pub struct ArchiveMissingCommand {
28    /// Filecoin network chain (e.g., calibnet, mainnet)
29    #[arg(long, required = true)]
30    chain: NetworkChain,
31    /// Start epoch (inclusive). Defaults to genesis (epoch 0).
32    /// Rounded down to the nearest lite boundary.
33    #[arg(long)]
34    from: Option<ChainEpoch>,
35    /// End epoch (inclusive). Defaults to the current expected epoch minus 3000.
36    /// Rounded up to the next diff boundary.
37    #[arg(long)]
38    to: Option<ChainEpoch>,
39}
40
41#[derive(Debug, Deserialize)]
42struct ListingItem {
43    url: Url,
44}
45
46#[derive(Debug, Deserialize)]
47struct ListingResponse {
48    items: Vec<ListingItem>,
49}
50
51/// Extract height from an archive URL.
52/// Lite: `..._height_30000.forest.car.zst` → 30000
53/// Diff: `..._height_0+3000.forest.car.zst` → 0
54fn extract_height(url: &Url) -> Option<ChainEpoch> {
55    static RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"_height_(\d+)").unwrap());
56    let path = url.path();
57    let caps = RE.captures(path)?;
58    caps[1].parse().ok()
59}
60
61/// Parse a JSON listing response into a set of available heights.
62fn parse_listing_heights(data: &ListingResponse) -> HashSet<ChainEpoch> {
63    data.items
64        .iter()
65        .filter_map(|item| extract_height(&item.url))
66        .collect()
67}
68
69/// Compute the required lite snapshot epochs for a given range.
70fn compute_required_lite(from: ChainEpoch, to: ChainEpoch) -> Vec<ChainEpoch> {
71    let base_from = (from / LITE_INTERVAL) * LITE_INTERVAL;
72    let base_to = (to / LITE_INTERVAL) * LITE_INTERVAL;
73    (base_from..=base_to)
74        .step_by(LITE_INTERVAL as usize)
75        .collect()
76}
77
78/// Compute the required diff snapshot epochs for a given range.
79fn compute_required_diff(from: ChainEpoch, to: ChainEpoch) -> Vec<ChainEpoch> {
80    let base_from = (from / LITE_INTERVAL) * LITE_INTERVAL;
81    let base_to = (to / LITE_INTERVAL) * LITE_INTERVAL;
82    let diff_to = if to > base_to {
83        ((to - 1) / DIFF_INTERVAL) * DIFF_INTERVAL
84    } else if base_to >= DIFF_INTERVAL {
85        base_to - DIFF_INTERVAL
86    } else {
87        // Range falls within the first lite segment with to on the boundary;
88        // no diffs are needed (the lite snapshot at epoch 0 covers it).
89        return Vec::new();
90    };
91    (base_from..=diff_to)
92        .step_by(DIFF_INTERVAL as usize)
93        .collect()
94}
95
96/// Return the subset of `required` epochs not present in `available`.
97fn find_missing(required: &[ChainEpoch], available: &HashSet<ChainEpoch>) -> Vec<ChainEpoch> {
98    required
99        .iter()
100        .filter(|h| !available.contains(h))
101        .copied()
102        .collect()
103}
104
105/// Fetch the set of available heights for a given network and snapshot type.
106async fn fetch_available_heights(
107    client: &reqwest::Client,
108    network: &str,
109    snapshot_type: &str,
110) -> anyhow::Result<HashSet<ChainEpoch>> {
111    let url = format!("{LIST_BASE}/{network}/{snapshot_type}?format=json");
112    let resp = client
113        .get(&url)
114        .send()
115        .await
116        .context("failed to fetch archive listing")?;
117    if !resp.status().is_success() {
118        bail!("{url}: HTTP {}", resp.status());
119    }
120    let data: ListingResponse = resp
121        .json()
122        .await
123        .context("failed to parse archive listing")?;
124    Ok(parse_listing_heights(&data))
125}
126
127impl ArchiveMissingCommand {
128    pub async fn run(self) -> anyhow::Result<()> {
129        let (network, genesis_ts) = match &self.chain {
130            NetworkChain::Mainnet => ("mainnet", MAINNET_GENESIS_TIMESTAMP),
131            NetworkChain::Calibnet => ("calibnet", CALIBNET_GENESIS_TIMESTAMP),
132            other => bail!("network {other} is not supported"),
133        };
134
135        let now = SystemTime::now()
136            .duration_since(SystemTime::UNIX_EPOCH)?
137            .as_secs();
138        let current_epoch =
139            calculate_expected_epoch(now, genesis_ts, EPOCH_DURATION_SECONDS as u32);
140
141        let from = self.from.unwrap_or(0);
142        let to = self.to.unwrap_or_else(|| current_epoch - DIFF_INTERVAL);
143
144        if from > to {
145            bail!("--from ({from}) must be <= --to ({to})");
146        }
147
148        println!(
149            "Checking {network} epochs {from}..={to} (current network epoch: {current_epoch})"
150        );
151
152        let client = global_http_client();
153
154        println!("Fetching archive listings...");
155        let (available_lite, available_diff) = tokio::try_join!(
156            fetch_available_heights(&client, network, "lite"),
157            fetch_available_heights(&client, network, "diff"),
158        )?;
159
160        println!(
161            "Archive has {} lite and {} diff snapshots.",
162            available_lite.len(),
163            available_diff.len()
164        );
165
166        let required_lite = compute_required_lite(from, to);
167        let required_diff = compute_required_diff(from, to);
168
169        let missing_lite = find_missing(&required_lite, &available_lite);
170        let missing_diff = find_missing(&required_diff, &available_diff);
171
172        let total_required = required_lite.len() + required_diff.len();
173        let total_missing = missing_lite.len() + missing_diff.len();
174
175        if total_missing == 0 {
176            let base_from = (from / LITE_INTERVAL) * LITE_INTERVAL;
177            println!(
178                "All {total_required} required snapshots are available (epochs {base_from}..={to}).",
179            );
180        } else {
181            println!("\n{total_missing} of {total_required} required snapshots are MISSING:\n");
182            if !missing_lite.is_empty() {
183                println!("  Missing lite snapshots:");
184                for h in &missing_lite {
185                    println!("    lite at height {h}");
186                }
187            }
188            if !missing_diff.is_empty() {
189                println!("  Missing diff snapshots:");
190                for h in &missing_diff {
191                    println!("    diff at height {h}");
192                }
193            }
194            bail!("{total_missing} of {total_required} required snapshots are missing");
195        }
196
197        Ok(())
198    }
199}
200
201#[cfg(test)]
202mod tests {
203    use super::*;
204
205    #[test]
206    fn test_extract_height_lite() {
207        let url = Url::parse(
208            "https://example.com/forest_snapshot_calibnet_2026-03-04_height_3510000.forest.car.zst",
209        )
210        .unwrap();
211        assert_eq!(extract_height(&url), Some(3510000));
212    }
213
214    #[test]
215    fn test_extract_height_diff() {
216        let url = Url::parse(
217            "https://example.com/forest_diff_calibnet_2022-11-02_height_0+3000.forest.car.zst",
218        )
219        .unwrap();
220        assert_eq!(extract_height(&url), Some(0));
221        let url = Url::parse(
222            "https://example.com/forest_diff_mainnet_2025-12-24_height_3480000+3000.forest.car.zst",
223        )
224        .unwrap();
225        assert_eq!(extract_height(&url), Some(3480000));
226    }
227
228    #[test]
229    fn test_extract_height_invalid() {
230        let url = Url::parse("https://example.com/not-a-snapshot").unwrap();
231        assert_eq!(extract_height(&url), None);
232    }
233
234    #[test]
235    fn test_compute_required_lite_single_segment() {
236        // Range within one lite segment: only one lite snapshot needed.
237        assert_eq!(compute_required_lite(30_000, 59_999), vec![30_000]);
238    }
239
240    #[test]
241    fn test_compute_required_lite_multiple_segments() {
242        assert_eq!(
243            compute_required_lite(30_000, 90_000),
244            vec![30_000, 60_000, 90_000]
245        );
246    }
247
248    #[test]
249    fn test_compute_required_lite_from_genesis() {
250        assert_eq!(compute_required_lite(0, 60_000), vec![0, 30_000, 60_000]);
251    }
252
253    #[test]
254    fn test_compute_required_lite_rounds_down() {
255        // from=5000 rounds down to 0, to=35000 rounds down to 30000.
256        assert_eq!(compute_required_lite(5_000, 35_000), vec![0, 30_000]);
257    }
258
259    #[test]
260    fn test_compute_required_diff_within_segment() {
261        // from=30000, to=36000 — need diffs from 30000 up to 33000.
262        let diffs = compute_required_diff(30_000, 36_000);
263        assert_eq!(diffs, vec![30_000, 33_000]);
264    }
265
266    #[test]
267    fn test_compute_required_diff_exact_lite_boundary() {
268        // to=60000 is exactly on a lite boundary — need all diffs in the
269        // segment between 30000 and 60000.
270        let diffs = compute_required_diff(30_000, 60_000);
271        assert_eq!(
272            diffs,
273            vec![
274                30_000, 33_000, 36_000, 39_000, 42_000, 45_000, 48_000, 51_000, 54_000, 57_000
275            ]
276        );
277    }
278
279    #[test]
280    fn test_compute_required_diff_cross_segment() {
281        // Spans two lite segments.
282        let diffs = compute_required_diff(57_000, 63_000);
283        // base_from=30000, base_to=60000, diff_to=60000
284        assert_eq!(
285            diffs,
286            vec![
287                30_000, 33_000, 36_000, 39_000, 42_000, 45_000, 48_000, 51_000, 54_000, 57_000,
288                60_000
289            ]
290        );
291    }
292
293    #[test]
294    fn test_find_missing_none() {
295        let required = vec![0, 30_000, 60_000];
296        let available: HashSet<_> = HashSet::from_iter([0, 30_000, 60_000, 90_000]);
297        assert!(find_missing(&required, &available).is_empty());
298    }
299
300    #[test]
301    fn test_find_missing_some() {
302        let required = vec![0, 30_000, 60_000];
303        let available: HashSet<_> = HashSet::from_iter([0, 60_000]);
304        assert_eq!(find_missing(&required, &available), vec![30_000]);
305    }
306
307    #[test]
308    fn test_find_missing_all() {
309        let required = vec![0, 30_000];
310        let available: HashSet<ChainEpoch> = HashSet::default();
311        assert_eq!(find_missing(&required, &available), vec![0, 30_000]);
312    }
313
314    #[test]
315    fn test_parse_listing_heights_from_json() {
316        let json = r#"{
317            "total": 3,
318            "offset": 0,
319            "limit": 0,
320            "items": [
321                {
322                    "url": "https://forest-archive.chainsafe.dev/archive/forest/calibnet/lite/forest_snapshot_calibnet_2026-03-04_height_3510000.forest.car.zst",
323                    "sha256url": "https://forest-archive.chainsafe.dev/archive/forest/calibnet/lite/forest_snapshot_calibnet_2026-03-04_height_3510000.forest.car.zst.sha256sum",
324                    "size": 7528742793,
325                    "uploaded": "2026-03-05T00:52:34.198Z"
326                },
327                {
328                    "url": "https://forest-archive.chainsafe.dev/archive/forest/calibnet/lite/forest_snapshot_calibnet_2026-02-22_height_3480000.forest.car.zst",
329                    "sha256url": "https://forest-archive.chainsafe.dev/archive/forest/calibnet/lite/forest_snapshot_calibnet_2026-02-22_height_3480000.forest.car.zst.sha256sum",
330                    "size": 7440018317,
331                    "uploaded": "2026-02-22T23:40:48.106Z"
332                },
333                {
334                    "url": "https://forest-archive.chainsafe.dev/archive/forest/calibnet/lite/forest_snapshot_calibnet_2022-11-01_height_0.forest.car.zst",
335                    "sha256url": "https://forest-archive.chainsafe.dev/archive/forest/calibnet/lite/forest_snapshot_calibnet_2022-11-01_height_0.forest.car.zst.sha256sum",
336                    "size": 491234,
337                    "uploaded": "2023-08-30T08:54:56.805Z"
338                }
339            ]
340        }"#;
341        let data: ListingResponse = serde_json::from_str(json).unwrap();
342        let heights = parse_listing_heights(&data);
343        assert_eq!(heights, HashSet::from_iter([0, 3_480_000, 3_510_000]));
344    }
345
346    #[test]
347    fn test_parse_listing_heights_with_diffs() {
348        let json = r#"{
349            "total": 2,
350            "offset": 0,
351            "limit": 0,
352            "items": [
353                {
354                    "url": "https://forest-archive.chainsafe.dev/archive/forest/calibnet/diff/forest_diff_calibnet_2026-03-04_height_3510000+3000.forest.car.zst",
355                    "sha256url": "https://forest-archive.chainsafe.dev/archive/forest/calibnet/diff/forest_diff_calibnet_2026-03-04_height_3510000+3000.forest.car.zst.sha256sum",
356                    "size": 123456,
357                    "uploaded": "2026-03-05T01:00:00.000Z"
358                },
359                {
360                    "url": "https://forest-archive.chainsafe.dev/archive/forest/calibnet/diff/forest_diff_calibnet_2022-11-02_height_0+3000.forest.car.zst",
361                    "sha256url": "https://forest-archive.chainsafe.dev/archive/forest/calibnet/diff/forest_diff_calibnet_2022-11-02_height_0+3000.forest.car.zst.sha256sum",
362                    "size": 789012,
363                    "uploaded": "2023-08-30T09:00:00.000Z"
364                }
365            ]
366        }"#;
367        let data: ListingResponse = serde_json::from_str(json).unwrap();
368        let heights = parse_listing_heights(&data);
369        assert_eq!(heights, HashSet::from_iter([0, 3_510_000]));
370    }
371
372    #[test]
373    fn test_end_to_end_missing_detection() {
374        // Simulate checking calibnet epochs 0..=60000.
375        // Available: lite at 0 and 60000 (missing 30000), all diffs present.
376        let available_lite: HashSet<_> = HashSet::from_iter([0, 60_000]);
377        let available_diff: HashSet<_> = (0..60_000).step_by(DIFF_INTERVAL as usize).collect();
378
379        let required_lite = compute_required_lite(0, 60_000);
380        let required_diff = compute_required_diff(0, 60_000);
381
382        let missing_lite = find_missing(&required_lite, &available_lite);
383        let missing_diff = find_missing(&required_diff, &available_diff);
384
385        assert_eq!(missing_lite, vec![30_000]);
386        assert!(missing_diff.is_empty());
387    }
388
389    #[test]
390    fn test_end_to_end_all_present() {
391        let available_lite: HashSet<_> = HashSet::from_iter([0, 30_000, 60_000]);
392        let available_diff: HashSet<_> = (0..60_000).step_by(DIFF_INTERVAL as usize).collect();
393
394        let required_lite = compute_required_lite(0, 60_000);
395        let required_diff = compute_required_diff(0, 60_000);
396
397        assert!(find_missing(&required_lite, &available_lite).is_empty());
398        assert!(find_missing(&required_diff, &available_diff).is_empty());
399    }
400}