use std::{
io::{BufWriter, Write},
path::{Path, PathBuf},
};
use progscrape_scrapers::StoryDate;
use serde::{Deserialize, Serialize};
use crate::{
PersistError, Shard,
persist::scrapestore::{SCRAPE_STORE_VERSION, ScrapeStoreStats},
timer_end, timer_start,
};
use super::{
scrapestore::ScrapeStore,
shard::{ShardOrder, ShardRange},
};
#[derive(Clone, Copy, Debug, Serialize, Deserialize, PartialEq, Eq)]
pub enum BackupResult {
Empty,
NoChange,
Success(usize),
}
pub struct BackerUpper {
path: PathBuf,
}
impl BackerUpper {
pub fn new(path: impl AsRef<Path>) -> Self {
Self {
path: path.as_ref().to_owned(),
}
}
fn trace_error<E: core::fmt::Debug>(error: E) -> E {
tracing::error!("Ignoring error in metadata read: {:?}", error);
error
}
pub fn backup(
&self,
name: &str,
shard: Shard,
scrapes: &ScrapeStore,
) -> Result<BackupResult, PersistError> {
let stats = scrapes.stats(shard)?;
if stats.count == 0 {
return Ok(BackupResult::Empty);
}
let meta = self.path.join(format!("{name}.meta.json"));
let meta_temp = self.path.join(format!(".{name}.meta.json"));
if meta.exists()
&& let Ok(file) = std::fs::File::open(&meta).map_err(Self::trace_error)
&& let Ok(current_stats) = serde_json::from_reader(file).map_err(Self::trace_error)
&& stats == current_stats {
return Ok(BackupResult::NoChange);
}
let output = self.path.join(format!("{name}.json"));
let temp = self.path.join(format!(".{name}.temp"));
let file = std::fs::File::create(&temp)?;
let time = timer_start!();
let mut w = BufWriter::new(file);
const NEWLINE: [u8; 1] = [b'\n'];
let mut earliest = StoryDate::MAX;
let mut latest = StoryDate::MIN;
let mut count = 0;
scrapes.fetch_all(
shard,
|scrape| {
count += 1;
earliest = earliest.min(scrape.date);
latest = latest.max(scrape.date);
w.write_all(serde_json::to_string(&scrape)?.as_bytes())?;
w.write_all(&NEWLINE)?;
Ok(())
},
|error| {
tracing::error!("Error fetching scrape: {:?}", error);
},
)?;
let computed_stats = ScrapeStoreStats {
version: SCRAPE_STORE_VERSION,
count,
earliest,
latest,
};
if computed_stats != stats {
tracing::info!(
"Scrape store stats changed during backup: was {:?}, computed {:?}",
stats,
computed_stats
);
}
serde_json::to_writer(std::fs::File::create(&meta_temp)?, &computed_stats)?;
std::fs::rename(temp, &output)?;
std::fs::rename(meta_temp, meta)?;
timer_end!(
time,
"Successfully backed up {} stories to {}",
count,
output.to_string_lossy()
);
Ok(BackupResult::Success(count))
}
pub fn backup_range(
&self,
scrapes: &ScrapeStore,
shard_range: ShardRange,
) -> Vec<(Shard, Result<BackupResult, PersistError>)> {
let mut v = vec![];
for shard in shard_range.iterate(ShardOrder::OldestFirst) {
v.push((shard, self.backup(&shard.to_string(), shard, scrapes)))
}
v
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::PersistLocation;
use crate::test::enable_tracing;
use progscrape_scrapers::ScrapeConfig;
use rstest::*;
#[rstest]
fn test_insert(_enable_tracing: &bool) -> Result<(), Box<dyn std::error::Error>> {
let store = ScrapeStore::new(PersistLocation::Memory)?;
let samples = progscrape_scrapers::load_sample_scrapes(&ScrapeConfig::default());
let first = &samples[0..100];
for scrape in first {
store.insert_scrape(scrape)?;
}
let tempdir = tempfile::tempdir()?;
let backup = BackerUpper::new(tempdir);
backup.backup("2015-01", Shard::from_year_month(2015, 1), &store)?;
Ok(())
}
}