use ahash::HashSet;
use anyhow::{Context as _, bail};
use clap::Args;
use regex::Regex;
use serde::Deserialize;
use std::sync::LazyLock;
use std::time::SystemTime;
use url::Url;
use crate::networks::{NetworkChain, calculate_expected_epoch};
use crate::shim::clock::{ChainEpoch, EPOCH_DURATION_SECONDS};
use crate::utils::net::global_http_client;
const LIST_BASE: &str = "https://forest-archive.chainsafe.dev/list";
const LITE_INTERVAL: ChainEpoch = 30_000;
const DIFF_INTERVAL: ChainEpoch = 3_000;
const MAINNET_GENESIS_TIMESTAMP: u64 = 1598306400; const CALIBNET_GENESIS_TIMESTAMP: u64 = 1667326380;
#[derive(Debug, Args)]
pub struct ArchiveMissingCommand {
#[arg(long, required = true)]
chain: NetworkChain,
#[arg(long)]
from: Option<ChainEpoch>,
#[arg(long)]
to: Option<ChainEpoch>,
}
#[derive(Debug, Deserialize)]
struct ListingItem {
url: Url,
}
#[derive(Debug, Deserialize)]
struct ListingResponse {
items: Vec<ListingItem>,
}
fn extract_height(url: &Url) -> Option<ChainEpoch> {
static RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"_height_(\d+)").unwrap());
let path = url.path();
let caps = RE.captures(path)?;
caps[1].parse().ok()
}
fn parse_listing_heights(data: &ListingResponse) -> HashSet<ChainEpoch> {
data.items
.iter()
.filter_map(|item| extract_height(&item.url))
.collect()
}
fn compute_required_lite(from: ChainEpoch, to: ChainEpoch) -> Vec<ChainEpoch> {
let base_from = (from / LITE_INTERVAL) * LITE_INTERVAL;
let base_to = (to / LITE_INTERVAL) * LITE_INTERVAL;
(base_from..=base_to)
.step_by(LITE_INTERVAL as usize)
.collect()
}
fn compute_required_diff(from: ChainEpoch, to: ChainEpoch) -> Vec<ChainEpoch> {
let base_from = (from / LITE_INTERVAL) * LITE_INTERVAL;
let base_to = (to / LITE_INTERVAL) * LITE_INTERVAL;
let diff_to = if to > base_to {
((to - 1) / DIFF_INTERVAL) * DIFF_INTERVAL
} else if base_to >= DIFF_INTERVAL {
base_to - DIFF_INTERVAL
} else {
return Vec::new();
};
(base_from..=diff_to)
.step_by(DIFF_INTERVAL as usize)
.collect()
}
fn find_missing(required: &[ChainEpoch], available: &HashSet<ChainEpoch>) -> Vec<ChainEpoch> {
required
.iter()
.filter(|h| !available.contains(h))
.copied()
.collect()
}
async fn fetch_available_heights(
client: &reqwest::Client,
network: &str,
snapshot_type: &str,
) -> anyhow::Result<HashSet<ChainEpoch>> {
let url = format!("{LIST_BASE}/{network}/{snapshot_type}?format=json");
let resp = client
.get(&url)
.send()
.await
.context("failed to fetch archive listing")?;
if !resp.status().is_success() {
bail!("{url}: HTTP {}", resp.status());
}
let data: ListingResponse = resp
.json()
.await
.context("failed to parse archive listing")?;
Ok(parse_listing_heights(&data))
}
impl ArchiveMissingCommand {
pub async fn run(self) -> anyhow::Result<()> {
let (network, genesis_ts) = match &self.chain {
NetworkChain::Mainnet => ("mainnet", MAINNET_GENESIS_TIMESTAMP),
NetworkChain::Calibnet => ("calibnet", CALIBNET_GENESIS_TIMESTAMP),
other => bail!("network {other} is not supported"),
};
let now = SystemTime::now()
.duration_since(SystemTime::UNIX_EPOCH)?
.as_secs();
let current_epoch =
calculate_expected_epoch(now, genesis_ts, EPOCH_DURATION_SECONDS as u32);
let from = self.from.unwrap_or(0);
let to = self.to.unwrap_or_else(|| current_epoch - DIFF_INTERVAL);
if from > to {
bail!("--from ({from}) must be <= --to ({to})");
}
println!(
"Checking {network} epochs {from}..={to} (current network epoch: {current_epoch})"
);
let client = global_http_client();
println!("Fetching archive listings...");
let (available_lite, available_diff) = tokio::try_join!(
fetch_available_heights(&client, network, "lite"),
fetch_available_heights(&client, network, "diff"),
)?;
println!(
"Archive has {} lite and {} diff snapshots.",
available_lite.len(),
available_diff.len()
);
let required_lite = compute_required_lite(from, to);
let required_diff = compute_required_diff(from, to);
let missing_lite = find_missing(&required_lite, &available_lite);
let missing_diff = find_missing(&required_diff, &available_diff);
let total_required = required_lite.len() + required_diff.len();
let total_missing = missing_lite.len() + missing_diff.len();
if total_missing == 0 {
let base_from = (from / LITE_INTERVAL) * LITE_INTERVAL;
println!(
"All {total_required} required snapshots are available (epochs {base_from}..={to}).",
);
} else {
println!("\n{total_missing} of {total_required} required snapshots are MISSING:\n");
if !missing_lite.is_empty() {
println!(" Missing lite snapshots:");
for h in &missing_lite {
println!(" lite at height {h}");
}
}
if !missing_diff.is_empty() {
println!(" Missing diff snapshots:");
for h in &missing_diff {
println!(" diff at height {h}");
}
}
bail!("{total_missing} of {total_required} required snapshots are missing");
}
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_extract_height_lite() {
let url = Url::parse(
"https://example.com/forest_snapshot_calibnet_2026-03-04_height_3510000.forest.car.zst",
)
.unwrap();
assert_eq!(extract_height(&url), Some(3510000));
}
#[test]
fn test_extract_height_diff() {
let url = Url::parse(
"https://example.com/forest_diff_calibnet_2022-11-02_height_0+3000.forest.car.zst",
)
.unwrap();
assert_eq!(extract_height(&url), Some(0));
let url = Url::parse(
"https://example.com/forest_diff_mainnet_2025-12-24_height_3480000+3000.forest.car.zst",
)
.unwrap();
assert_eq!(extract_height(&url), Some(3480000));
}
#[test]
fn test_extract_height_invalid() {
let url = Url::parse("https://example.com/not-a-snapshot").unwrap();
assert_eq!(extract_height(&url), None);
}
#[test]
fn test_compute_required_lite_single_segment() {
assert_eq!(compute_required_lite(30_000, 59_999), vec![30_000]);
}
#[test]
fn test_compute_required_lite_multiple_segments() {
assert_eq!(
compute_required_lite(30_000, 90_000),
vec![30_000, 60_000, 90_000]
);
}
#[test]
fn test_compute_required_lite_from_genesis() {
assert_eq!(compute_required_lite(0, 60_000), vec![0, 30_000, 60_000]);
}
#[test]
fn test_compute_required_lite_rounds_down() {
assert_eq!(compute_required_lite(5_000, 35_000), vec![0, 30_000]);
}
#[test]
fn test_compute_required_diff_within_segment() {
let diffs = compute_required_diff(30_000, 36_000);
assert_eq!(diffs, vec![30_000, 33_000]);
}
#[test]
fn test_compute_required_diff_exact_lite_boundary() {
let diffs = compute_required_diff(30_000, 60_000);
assert_eq!(
diffs,
vec![
30_000, 33_000, 36_000, 39_000, 42_000, 45_000, 48_000, 51_000, 54_000, 57_000
]
);
}
#[test]
fn test_compute_required_diff_cross_segment() {
let diffs = compute_required_diff(57_000, 63_000);
assert_eq!(
diffs,
vec![
30_000, 33_000, 36_000, 39_000, 42_000, 45_000, 48_000, 51_000, 54_000, 57_000,
60_000
]
);
}
#[test]
fn test_find_missing_none() {
let required = vec![0, 30_000, 60_000];
let available: HashSet<_> = HashSet::from_iter([0, 30_000, 60_000, 90_000]);
assert!(find_missing(&required, &available).is_empty());
}
#[test]
fn test_find_missing_some() {
let required = vec![0, 30_000, 60_000];
let available: HashSet<_> = HashSet::from_iter([0, 60_000]);
assert_eq!(find_missing(&required, &available), vec![30_000]);
}
#[test]
fn test_find_missing_all() {
let required = vec![0, 30_000];
let available: HashSet<ChainEpoch> = HashSet::default();
assert_eq!(find_missing(&required, &available), vec![0, 30_000]);
}
#[test]
fn test_parse_listing_heights_from_json() {
let json = r#"{
"total": 3,
"offset": 0,
"limit": 0,
"items": [
{
"url": "https://forest-archive.chainsafe.dev/archive/forest/calibnet/lite/forest_snapshot_calibnet_2026-03-04_height_3510000.forest.car.zst",
"sha256url": "https://forest-archive.chainsafe.dev/archive/forest/calibnet/lite/forest_snapshot_calibnet_2026-03-04_height_3510000.forest.car.zst.sha256sum",
"size": 7528742793,
"uploaded": "2026-03-05T00:52:34.198Z"
},
{
"url": "https://forest-archive.chainsafe.dev/archive/forest/calibnet/lite/forest_snapshot_calibnet_2026-02-22_height_3480000.forest.car.zst",
"sha256url": "https://forest-archive.chainsafe.dev/archive/forest/calibnet/lite/forest_snapshot_calibnet_2026-02-22_height_3480000.forest.car.zst.sha256sum",
"size": 7440018317,
"uploaded": "2026-02-22T23:40:48.106Z"
},
{
"url": "https://forest-archive.chainsafe.dev/archive/forest/calibnet/lite/forest_snapshot_calibnet_2022-11-01_height_0.forest.car.zst",
"sha256url": "https://forest-archive.chainsafe.dev/archive/forest/calibnet/lite/forest_snapshot_calibnet_2022-11-01_height_0.forest.car.zst.sha256sum",
"size": 491234,
"uploaded": "2023-08-30T08:54:56.805Z"
}
]
}"#;
let data: ListingResponse = serde_json::from_str(json).unwrap();
let heights = parse_listing_heights(&data);
assert_eq!(heights, HashSet::from_iter([0, 3_480_000, 3_510_000]));
}
#[test]
fn test_parse_listing_heights_with_diffs() {
let json = r#"{
"total": 2,
"offset": 0,
"limit": 0,
"items": [
{
"url": "https://forest-archive.chainsafe.dev/archive/forest/calibnet/diff/forest_diff_calibnet_2026-03-04_height_3510000+3000.forest.car.zst",
"sha256url": "https://forest-archive.chainsafe.dev/archive/forest/calibnet/diff/forest_diff_calibnet_2026-03-04_height_3510000+3000.forest.car.zst.sha256sum",
"size": 123456,
"uploaded": "2026-03-05T01:00:00.000Z"
},
{
"url": "https://forest-archive.chainsafe.dev/archive/forest/calibnet/diff/forest_diff_calibnet_2022-11-02_height_0+3000.forest.car.zst",
"sha256url": "https://forest-archive.chainsafe.dev/archive/forest/calibnet/diff/forest_diff_calibnet_2022-11-02_height_0+3000.forest.car.zst.sha256sum",
"size": 789012,
"uploaded": "2023-08-30T09:00:00.000Z"
}
]
}"#;
let data: ListingResponse = serde_json::from_str(json).unwrap();
let heights = parse_listing_heights(&data);
assert_eq!(heights, HashSet::from_iter([0, 3_510_000]));
}
#[test]
fn test_end_to_end_missing_detection() {
let available_lite: HashSet<_> = HashSet::from_iter([0, 60_000]);
let available_diff: HashSet<_> = (0..60_000).step_by(DIFF_INTERVAL as usize).collect();
let required_lite = compute_required_lite(0, 60_000);
let required_diff = compute_required_diff(0, 60_000);
let missing_lite = find_missing(&required_lite, &available_lite);
let missing_diff = find_missing(&required_diff, &available_diff);
assert_eq!(missing_lite, vec![30_000]);
assert!(missing_diff.is_empty());
}
#[test]
fn test_end_to_end_all_present() {
let available_lite: HashSet<_> = HashSet::from_iter([0, 30_000, 60_000]);
let available_diff: HashSet<_> = (0..60_000).step_by(DIFF_INTERVAL as usize).collect();
let required_lite = compute_required_lite(0, 60_000);
let required_diff = compute_required_diff(0, 60_000);
assert!(find_missing(&required_lite, &available_lite).is_empty());
assert!(find_missing(&required_diff, &available_diff).is_empty());
}
}