use std::path::PathBuf;
use std::time::Duration;
use chrono::Utc;
use clap::{Parser, Subcommand};
use ebg::index::{SiteIndex, WaybackFilter, WaybackLink, WaybackLinks};
use ebg::wayback::Wayback;
use miette::{Context, IntoDiagnostic};
use url::Url;
use super::Command;
#[derive(Parser)]
pub struct WaybackOptions {
#[clap(subcommand)]
command: WaybackCommands,
}
#[derive(Subcommand)]
enum WaybackCommands {
UpdateLinks(UpdateLinksOptions),
}
#[derive(Parser)]
struct UpdateLinksOptions {
#[clap(default_value = ".")]
root: PathBuf,
#[clap(long)]
dry_run: bool,
#[clap(long, default_value = "1")]
delay: u64,
}
impl Command for WaybackOptions {
fn run(self) -> miette::Result<()> {
match self.command {
WaybackCommands::UpdateLinks(options) => options.run(),
}
}
}
impl Command for UpdateLinksOptions {
fn run(self) -> miette::Result<()> {
tokio::runtime::Builder::new_multi_thread()
.enable_all()
.build()
.unwrap()
.block_on(async move { self.run_async().await })
}
}
impl UpdateLinksOptions {
async fn run_async(self) -> miette::Result<()> {
println!("Loading site from {}...", self.root.display());
let site = SiteIndex::from_directory(&self.root, true).await?;
let wayback_client = if !self.dry_run {
let access_key = std::env::var("WAYBACK_ACCESS_KEY")
.into_diagnostic()
.wrap_err("WAYBACK_ACCESS_KEY environment variable not set")?;
let secret_key = std::env::var("WAYBACK_SECRET_KEY")
.into_diagnostic()
.wrap_err("WAYBACK_SECRET_KEY environment variable not set")?;
Some(Wayback::with_credentials(access_key, secret_key))
} else {
println!("🔍 DRY RUN MODE - No links will be archived\n");
None
};
if let Some(wayback_cfg) = site.config().wayback.as_ref() {
if !wayback_cfg.exclude.is_empty() {
println!("📋 Active filters:");
for filter in &wayback_cfg.exclude {
match filter {
WaybackFilter::Before(date) => {
println!(" • Excluding posts before {}", date.format("%Y-%m-%d"));
}
}
}
println!();
}
}
let mut total_posts = 0;
let mut total_links = 0;
let mut already_archived = 0;
let mut newly_archived = 0;
let mut failed_archives = 0;
let mut filtered_posts = 0;
let wayback_config = site.config().wayback.as_ref();
for post in site.posts() {
total_posts += 1;
if let Some(config) = wayback_config {
if config.should_exclude_post(post) {
filtered_posts += 1;
continue;
}
}
let external_links: Vec<_> = post.external_links().collect();
if external_links.is_empty() {
continue;
}
total_links += external_links.len();
let source_path = post.source_path();
let wayback_path = if source_path.ends_with("index.md") {
source_path.parent().unwrap().join("wayback.toml")
} else {
source_path.with_extension("wayback.toml")
};
let wayback_file_path = self.root.join(&wayback_path);
let mut wayback_links = if wayback_file_path.exists() {
WaybackLinks::from_file(&wayback_file_path)?
} else {
WaybackLinks::new()
};
let mut post_needs_archiving = Vec::new();
let mut post_already_archived = Vec::new();
for url in external_links {
if wayback_links.contains(&url) {
post_already_archived.push(url);
} else {
post_needs_archiving.push(url);
}
}
already_archived += post_already_archived.len();
if !post_needs_archiving.is_empty() {
println!("\n📄 {}", source_path.display());
println!(" Wayback config: {}", wayback_path.display());
println!(" ✅ Already archived: {}", post_already_archived.len());
println!(" 📦 Needs archiving: {}", post_needs_archiving.len());
if let Some(client) = &wayback_client {
for (idx, url) in post_needs_archiving.iter().enumerate() {
println!(
" [{}/{}] Archiving {}...",
idx + 1,
post_needs_archiving.len(),
url
);
match self.archive_link(client, url).await {
Ok(wayback_link) => {
println!(" ✅ Archived: {}", wayback_link.wayback_url);
wayback_links.add(wayback_link);
newly_archived += 1;
wayback_links
.to_file(&wayback_file_path)
.wrap_err_with(|| {
format!("Failed to save wayback config to {}", wayback_path.display())
})?;
}
Err(e) => {
println!(" ❌ Failed: {}", e);
failed_archives += 1;
}
}
if idx < post_needs_archiving.len() - 1 {
tokio::time::sleep(Duration::from_secs(self.delay)).await;
}
}
} else {
for url in &post_needs_archiving {
println!(" - {}", url);
}
}
}
}
println!("\n{}", "=".repeat(60));
println!("Summary:");
println!(" Posts scanned: {}", total_posts);
if filtered_posts > 0 {
println!(" 🚫 Filtered by config: {}", filtered_posts);
}
println!(" Total external links: {}", total_links);
println!(" ✅ Already archived: {}", already_archived);
if self.dry_run {
println!(" 📦 Would archive: {}", total_links - already_archived);
println!("\n⚠️ This was a dry run. Use without --dry-run to actually archive links.");
} else {
println!(" ✨ Newly archived: {}", newly_archived);
if failed_archives > 0 {
println!(" ❌ Failed: {}", failed_archives);
}
}
Ok(())
}
async fn archive_link(&self, client: &Wayback, url: &Url) -> miette::Result<WaybackLink> {
let job = client
.begin_save_page(url)
.await
.into_diagnostic()
.wrap_err_with(|| format!("Failed to start archiving {}", url))?;
loop {
tokio::time::sleep(Duration::from_secs(2)).await;
let status = client
.job_status(&job)
.await
.into_diagnostic()
.wrap_err("Failed to check job status")?;
if !status.is_complete() {
continue;
}
if !status.is_success() {
return Err(miette::miette!(
"Archive job failed with status: {}",
status.status()
));
}
let wayback_url = status
.wayback_url()
.ok_or_else(|| miette::miette!("No wayback URL in successful response"))?;
let archived_at = Utc::now();
return Ok(WaybackLink {
url: url.clone(),
wayback_url,
archived_at,
});
}
}
}