use std::collections::HashMap;
use std::path::PathBuf;
use bytes::Bytes;
use serde::{Deserialize, Serialize};
use tracing::{debug, warn};
use scrapling_fetch::Response;
use crate::error::{Result, SpiderError};
pub struct ResponseCacheManager {
cache_dir: PathBuf,
}
#[derive(Serialize, Deserialize)]
struct CachedResponse {
url: String,
status: u16,
reason: String,
encoding: String,
cookies: HashMap<String, String>,
headers: HashMap<String, String>,
request_headers: HashMap<String, String>,
method: String,
content_base64: String,
}
impl ResponseCacheManager {
pub fn new(cache_dir: impl Into<PathBuf>) -> Self {
Self {
cache_dir: cache_dir.into(),
}
}
fn cache_path(&self, fingerprint: &[u8]) -> PathBuf {
self.cache_dir
.join(format!("{}.json", hex::encode(fingerprint)))
}
pub fn get(&self, fingerprint: &[u8]) -> Option<Response> {
use base64::Engine;
let path = self.cache_path(fingerprint);
let data = std::fs::read(&path)
.inspect_err(|e| warn!(error = %e, "failed to read cache file"))
.ok()?;
let cached: CachedResponse = serde_json::from_slice(&data)
.inspect_err(|e| warn!(error = %e, "failed to deserialize cache entry"))
.ok()?;
let body = base64::engine::general_purpose::STANDARD
.decode(&cached.content_base64)
.inspect_err(|e| warn!(error = %e, "failed to decode cached body"))
.ok()
.map(Bytes::from)?;
Some(Response::new(
&cached.url,
body,
cached.status,
Some(cached.reason),
cached.cookies,
cached.headers,
cached.request_headers,
cached.encoding,
cached.method,
Vec::new(),
HashMap::new(),
))
}
pub fn put(&self, fingerprint: &[u8], response: &Response, method: &str) -> Result<()> {
std::fs::create_dir_all(&self.cache_dir)
.map_err(|e| SpiderError::Other(format!("failed to create cache dir: {e}")))?;
use base64::Engine;
let content_base64 = base64::engine::general_purpose::STANDARD.encode(&response.body);
let cached = CachedResponse {
url: response.url().to_owned(),
status: response.status,
reason: response.reason.clone(),
encoding: response.encoding.clone(),
cookies: response.cookies.clone(),
headers: response.headers.clone(),
request_headers: response.request_headers.clone(),
method: method.to_owned(),
content_base64,
};
let temp_path = self.cache_dir.join(".cache.tmp");
let json = serde_json::to_vec(&cached)
.map_err(|e| SpiderError::Other(format!("cache serialization failed: {e}")))?;
std::fs::write(&temp_path, &json)
.map_err(|e| SpiderError::Other(format!("failed to write cache: {e}")))?;
let target = self.cache_path(fingerprint);
std::fs::rename(&temp_path, &target).map_err(|e| {
let _ = std::fs::remove_file(&temp_path);
SpiderError::Other(format!("failed to rename cache file: {e}"))
})?;
debug!("response cached");
Ok(())
}
pub fn clear(&self) -> Result<()> {
if self.cache_dir.exists() {
for entry in std::fs::read_dir(&self.cache_dir)
.map_err(|e| SpiderError::Other(format!("failed to read cache dir: {e}")))?
.flatten()
{
if entry.path().extension().is_some_and(|e| e == "json") {
let _ = std::fs::remove_file(entry.path());
}
}
}
Ok(())
}
}