use fs4::FileExt;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::fs;
use std::io::Write;
use std::path::PathBuf;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TechniqueRecord {
#[serde(default)]
pub name: String,
#[serde(default)]
pub total_successes: u32,
#[serde(default)]
pub total_attempts: u32,
#[serde(default)]
pub target_count: u32,
#[serde(default)]
pub last_success_epoch: u64,
}
impl TechniqueRecord {
#[must_use]
pub fn success_rate(&self) -> f64 {
if self.total_attempts == 0 {
return 0.0;
}
f64::from(self.total_successes) / f64::from(self.total_attempts)
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct WafGenome {
#[serde(default)]
pub waf_name: String,
#[serde(default)]
pub techniques: Vec<TechniqueRecord>,
#[serde(default)]
pub targets_scanned: u32,
#[serde(default)]
pub updated_at: u64,
}
impl WafGenome {
#[must_use]
pub fn new(waf_name: &str) -> Self {
Self {
waf_name: waf_name.to_string(),
techniques: Vec::new(),
targets_scanned: 0,
updated_at: current_epoch(),
}
}
#[must_use]
pub fn top_techniques(&self, n: usize, min_attempts: u32) -> Vec<&TechniqueRecord> {
let mut eligible: Vec<&TechniqueRecord> = self
.techniques
.iter()
.filter(|t| t.total_attempts >= min_attempts)
.collect();
eligible.sort_by(|a, b| {
b.success_rate()
.partial_cmp(&a.success_rate())
.unwrap_or(std::cmp::Ordering::Equal)
});
eligible.truncate(n);
eligible
}
const MAX_TECHNIQUES: usize = 1024;
pub fn merge_session(&mut self, stats: &[(String, u32, u32)]) {
let now = current_epoch();
self.targets_scanned = self.targets_scanned.saturating_add(1);
self.updated_at = now;
for (name, successes, attempts) in stats {
if let Some(existing) = self.techniques.iter_mut().find(|t| t.name == *name) {
existing.total_successes = existing.total_successes.saturating_add(*successes);
existing.total_attempts = existing.total_attempts.saturating_add(*attempts);
if *successes > 0 {
existing.target_count = existing.target_count.saturating_add(1);
existing.last_success_epoch = now;
}
} else if self.techniques.len() < Self::MAX_TECHNIQUES {
self.techniques.push(TechniqueRecord {
name: name.clone(),
total_successes: *successes,
total_attempts: *attempts,
target_count: u32::from(*successes > 0),
last_success_epoch: if *successes > 0 { now } else { 0 },
});
}
}
}
#[must_use]
pub fn seed_winners(&self) -> Vec<String> {
self.top_techniques(20, 5)
.iter()
.filter(|t| t.success_rate() >= 0.60)
.map(|t| t.name.clone())
.collect()
}
}
pub struct GeneBank {
root: PathBuf,
cache: HashMap<String, WafGenome>,
}
impl GeneBank {
pub fn open_default() -> Result<Self, GeneBankError> {
let root = default_genome_dir()?;
Self::open(root)
}
pub fn open(root: impl AsRef<std::path::Path>) -> Result<Self, GeneBankError> {
let root = root.as_ref().to_path_buf();
fs::create_dir_all(&root).map_err(|e| GeneBankError::Io {
path: root.clone(),
source: e,
})?;
Ok(Self {
root,
cache: HashMap::new(),
})
}
pub fn load(&mut self, waf_name: &str) -> Option<&WafGenome> {
let key = normalize_name(waf_name);
if self.cache.contains_key(&key) {
return self.cache.get(&key);
}
let path = self.genome_path(&key);
if !path.exists() {
return None;
}
match fs::read_to_string(&path) {
Ok(contents) => match serde_json::from_str::<WafGenome>(&contents) {
Ok(genome) => {
self.cache.insert(key.clone(), genome);
self.cache.get(&key)
}
Err(e) => {
Self::quarantine_corrupt(&path, &e);
None
}
},
Err(e) => {
tracing::warn!(
path = %path.display(),
error = %e,
"failed to read genome file"
);
None
}
}
}
pub fn save(&mut self, genome: &WafGenome) -> Result<(), GeneBankError> {
let key = normalize_name(&genome.waf_name);
let path = self.genome_path(&key);
let (lock_file, lock_path) = Self::acquire_lock(&path)?;
Self::write_genome(&path, genome)?;
drop(lock_file);
let _ = fs::remove_file(&lock_path);
self.cache.insert(key, genome.clone());
Ok(())
}
pub fn merge_and_save(
&mut self,
waf_name: &str,
stats: &[(String, u32, u32)],
) -> Result<(), GeneBankError> {
let key = normalize_name(waf_name);
let path = self.genome_path(&key);
let (lock_file, lock_path) = Self::acquire_lock(&path)?;
let mut genome = self
.cache
.remove(&key)
.or_else(|| Self::read_genome_from_disk(&path))
.unwrap_or_else(|| WafGenome::new(waf_name));
genome.merge_session(stats);
Self::write_genome(&path, &genome)?;
drop(lock_file);
let _ = fs::remove_file(&lock_path);
self.cache.insert(key, genome);
Ok(())
}
#[must_use]
pub fn list_wafs(&self) -> Vec<String> {
let Ok(entries) = fs::read_dir(&self.root) else {
return Vec::new();
};
entries
.filter_map(|e| {
let e = e.ok()?;
let name = e.file_name().to_string_lossy().to_string();
if name.ends_with(".json") && !name.contains(".corrupt.") && !name.ends_with(".tmp")
{
Some(name.trim_end_matches(".json").to_string())
} else {
None
}
})
.collect()
}
fn genome_path(&self, normalized_name: &str) -> PathBuf {
self.root.join(format!("{normalized_name}.json"))
}
fn acquire_lock(path: &std::path::Path) -> Result<(fs::File, PathBuf), GeneBankError> {
let lock_path = path.with_extension("lock");
let lock_file = fs::OpenOptions::new()
.create(true)
.truncate(true)
.write(true)
.open(&lock_path)
.map_err(|e| GeneBankError::Io {
path: lock_path.clone(),
source: e,
})?;
FileExt::lock(&lock_file).map_err(|e| GeneBankError::Io {
path: lock_path.clone(),
source: e,
})?;
Ok((lock_file, lock_path))
}
fn write_genome(path: &std::path::Path, genome: &WafGenome) -> Result<(), GeneBankError> {
let tmp_path = path.with_extension("json.tmp");
let json = serde_json::to_string_pretty(genome).map_err(|e| GeneBankError::Serialize {
waf: genome.waf_name.clone(),
source: e,
})?;
let mut file = fs::File::create(&tmp_path).map_err(|e| GeneBankError::Io {
path: tmp_path.clone(),
source: e,
})?;
file.write_all(json.as_bytes())
.map_err(|e| GeneBankError::Io {
path: tmp_path.clone(),
source: e,
})?;
file.sync_all().map_err(|e| GeneBankError::Io {
path: tmp_path.clone(),
source: e,
})?;
drop(file);
fs::rename(&tmp_path, path).map_err(|e| {
let _ = fs::remove_file(&tmp_path);
GeneBankError::Io {
path: path.to_path_buf(),
source: e,
}
})?;
if let Some(parent) = path.parent()
&& let Ok(dir) = fs::OpenOptions::new().read(true).open(parent)
{
let _ = dir.sync_all();
}
Ok(())
}
fn read_genome_from_disk(path: &std::path::Path) -> Option<WafGenome> {
if !path.exists() {
return None;
}
match fs::read_to_string(path) {
Ok(contents) => match serde_json::from_str(&contents) {
Ok(g) => Some(g),
Err(e) => {
Self::quarantine_corrupt(path, &e);
None
}
},
Err(e) => {
tracing::warn!(
path = %path.display(),
error = %e,
"failed to read genome for merge"
);
None
}
}
}
fn quarantine_corrupt(path: &std::path::Path, error: &serde_json::Error) {
let epoch = current_epoch();
let quarantine = path.with_extension(format!("json.corrupt.{epoch}"));
tracing::warn!(
path = %path.display(),
quarantine = %quarantine.display(),
error = %error,
"corrupt genome file — quarantining for inspection"
);
if let Err(e) = fs::rename(path, &quarantine) {
tracing::error!(
error = %e,
"failed to quarantine corrupt genome, removing instead"
);
let _ = fs::remove_file(path);
}
}
}
#[derive(Debug, thiserror::Error)]
pub enum GeneBankError {
#[error("gene bank I/O error at {}: {source}", path.display())]
Io {
path: PathBuf,
source: std::io::Error,
},
#[error("failed to serialize genome for {waf}: {source}")]
Serialize {
waf: String,
source: serde_json::Error,
},
#[error("cannot determine home directory for gene bank storage")]
NoHomeDir,
}
pub(crate) fn normalize_name(name: &str) -> String {
name.to_lowercase()
.chars()
.map(|c| {
if c.is_alphanumeric() || c == '-' || c == '_' {
c
} else {
'_'
}
})
.collect()
}
fn default_genome_dir() -> Result<PathBuf, GeneBankError> {
let home = dirs::home_dir().ok_or(GeneBankError::NoHomeDir)?;
Ok(home.join(".wafrift").join("genomes"))
}
fn current_epoch() -> u64 {
std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.map_or(0, |d| d.as_secs())
}
#[cfg(test)]
mod tests;