use crate::lineage::Lineage;
use rand::Rng;
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct Chromosome {
pub genes: Vec<(String, String)>,
pub fitness: f64,
pub evaluations: u32,
#[serde(default = "default_lineage")]
pub lineage: Lineage,
}
fn default_lineage() -> Lineage {
Lineage::genesis(0)
}
impl Chromosome {
#[must_use]
pub fn new(genes: Vec<(String, String)>) -> Self {
Self {
genes,
fitness: 0.0,
evaluations: 0,
lineage: Lineage::genesis(0),
}
}
#[must_use]
pub fn with_lineage(genes: Vec<(String, String)>, lineage: Lineage) -> Self {
Self {
genes,
fitness: 0.0,
evaluations: 0,
lineage,
}
}
pub fn record_verdict(&mut self, verdict: &crate::types::OracleVerdict) {
self.evaluations += 1;
let value = verdict.to_fitness();
let alpha = 2.0 / (f64::from(self.evaluations) + 1.0);
self.fitness = alpha * value + (1.0 - alpha) * self.fitness;
}
pub fn record(&mut self, passed: bool) {
self.record_verdict(&crate::types::OracleVerdict::from_bool(passed));
}
#[must_use]
pub fn gene(&self, name: &str) -> Option<&str> {
self.genes
.iter()
.find(|(gene_name, _)| gene_name == name)
.map(|(_, value)| value.as_str())
}
#[must_use]
pub fn has_gene(&self, name: &str) -> bool {
self.genes.iter().any(|(gene_name, _)| gene_name == name)
}
#[must_use]
pub fn active_gene_count(&self) -> usize {
self.genes
.iter()
.filter(|(_, value)| value != "None")
.count()
}
#[must_use]
pub fn hash(&self) -> u64 {
use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};
let mut hasher = DefaultHasher::new();
for (name, value) in &self.genes {
name.hash(&mut hasher);
value.hash(&mut hasher);
}
hasher.finish()
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct GenePool {
pub pools: Vec<(String, Vec<String>)>,
}
impl GenePool {
#[must_use]
pub fn default_wafrift() -> Self {
Self {
pools: vec![
(
"encoding".into(),
vec![
"None".into(),
"CaseAlternation".into(),
"UrlEncode".into(),
"DoubleUrlEncode".into(),
"TripleUrlEncode".into(),
"UnicodeEncode".into(),
"HtmlEntityEncode".into(),
"OverlongUtf8".into(),
"WhitespaceInsertion".into(),
"SqlCommentInsertion".into(),
"NullByteInsertion".into(),
"ChunkedSplit".into(),
"ParameterPollution".into(),
],
),
(
"content_type".into(),
vec![
"None".into(),
"Multipart".into(),
"MultipartQuotedBoundary".into(),
"JsonNested".into(),
"JsonUnicodeKeys".into(),
"JsonWithComments".into(),
"XmlCdata".into(),
"XmlNamespace".into(),
"MixedContentType".into(),
],
),
(
"header_obfuscation".into(),
vec![
"None".into(),
"CaseMixing".into(),
"TabSeparator".into(),
"WhitespacePadding".into(),
"LineFolding".into(),
"UnderscoreSubstitution".into(),
],
),
(
"grammar_rule".into(),
vec![
"None".into(),
"tautology_swap".into(),
"comment_swap".into(),
"whitespace_swap".into(),
"equality_swap".into(),
"union_swap".into(),
"string_split".into(),
"mysql_conditional".into(),
"tag_event_swap".into(),
"exec_fn_swap".into(),
"uri_scheme".into(),
"separator_swap".into(),
"command_obfuscate".into(),
"ifs_swap".into(),
"path_obfuscate".into(),
"variable_indirection".into(),
],
),
],
}
}
#[must_use]
pub fn values_for(&self, gene_name: &str) -> Option<&[String]> {
self.pools
.iter()
.find(|(name, _)| name == gene_name)
.map(|(_, values)| values.as_slice())
}
#[must_use]
pub fn gene_names(&self) -> Vec<&str> {
self.pools.iter().map(|(name, _)| name.as_str()).collect()
}
#[must_use]
pub fn random_value(&self, gene_name: &str, rng: &mut impl Rng) -> Option<String> {
let values = self.values_for(gene_name)?;
if values.is_empty() {
return None;
}
Some(values[rng.gen_range(0..values.len())].clone())
}
#[must_use]
pub fn all_values(&self) -> Vec<String> {
let mut values = Vec::new();
for (_, pool_values) in &self.pools {
for v in pool_values {
if !values.contains(v) {
values.push(v.clone());
}
}
}
values
}
}
#[must_use]
pub fn random_chromosome(gene_pool: &GenePool, rng: &mut impl Rng) -> Chromosome {
let genes = gene_pool
.gene_names()
.into_iter()
.map(|name| {
let value = gene_pool
.random_value(name, rng)
.unwrap_or_else(|| String::from("None"));
(name.to_string(), value)
})
.collect();
Chromosome::new(genes)
}
#[must_use]
pub fn baseline_chromosome(gene_pool: &GenePool) -> Chromosome {
let genes = gene_pool
.gene_names()
.into_iter()
.map(|name| (name.to_string(), String::from("None")))
.collect();
Chromosome::new(genes)
}
#[cfg(test)]
mod tests {
use super::*;
use rand::SeedableRng;
use rand::rngs::StdRng;
#[test]
fn chromosome_new_zero_fitness() {
let c = Chromosome::new(vec![("a".into(), "1".into())]);
assert_eq!(c.fitness, 0.0);
assert_eq!(c.evaluations, 0);
}
#[test]
fn chromosome_record_updates_fitness() {
let mut c = Chromosome::new(vec![("a".into(), "1".into())]);
c.record(true);
assert_eq!(c.evaluations, 1);
assert!(c.fitness > 0.0);
}
#[test]
fn chromosome_record_verdict_smoothing() {
let mut c = Chromosome::new(vec![("a".into(), "1".into())]);
c.record_verdict(&crate::types::OracleVerdict::from_bool(true));
let f1 = c.fitness;
c.record_verdict(&crate::types::OracleVerdict::from_bool(false));
assert!(c.fitness < f1);
}
#[test]
fn chromosome_gene_lookup() {
let c = Chromosome::new(vec![
("encoding".into(), "UrlEncode".into()),
("content_type".into(), "None".into()),
]);
assert_eq!(c.gene("encoding"), Some("UrlEncode"));
assert_eq!(c.gene("missing"), None);
}
#[test]
fn chromosome_has_gene() {
let c = Chromosome::new(vec![("encoding".into(), "UrlEncode".into())]);
assert!(c.has_gene("encoding"));
assert!(!c.has_gene("missing"));
}
#[test]
fn chromosome_active_gene_count_skips_none() {
let c = Chromosome::new(vec![
("a".into(), "None".into()),
("b".into(), "1".into()),
("c".into(), "None".into()),
("d".into(), "2".into()),
]);
assert_eq!(c.active_gene_count(), 2);
}
#[test]
fn chromosome_hash_equal_for_equal_genes() {
let c1 = Chromosome::new(vec![("a".into(), "1".into()), ("b".into(), "2".into())]);
let c2 = Chromosome::new(vec![("a".into(), "1".into()), ("b".into(), "2".into())]);
assert_eq!(c1.hash(), c2.hash());
}
#[test]
fn chromosome_hash_different_for_different_genes() {
let c1 = Chromosome::new(vec![("a".into(), "1".into())]);
let c2 = Chromosome::new(vec![("a".into(), "2".into())]);
assert_ne!(c1.hash(), c2.hash());
}
#[test]
fn gene_pool_default_has_encoding() {
let pool = GenePool::default_wafrift();
assert!(pool.values_for("encoding").is_some());
assert!(pool.values_for("content_type").is_some());
assert!(pool.values_for("header_obfuscation").is_some());
assert!(pool.values_for("grammar_rule").is_some());
}
#[test]
fn gene_pool_gene_names() {
let pool = GenePool::default_wafrift();
let names = pool.gene_names();
assert_eq!(names.len(), 4);
}
#[test]
fn gene_pool_random_value_returns_some() {
let pool = GenePool::default_wafrift();
let mut rng = StdRng::seed_from_u64(42);
assert!(pool.random_value("encoding", &mut rng).is_some());
}
#[test]
fn gene_pool_random_value_missing_returns_none() {
let pool = GenePool::default_wafrift();
let mut rng = StdRng::seed_from_u64(42);
assert!(pool.random_value("missing", &mut rng).is_none());
}
#[test]
fn gene_pool_all_values_unique() {
let pool = GenePool::default_wafrift();
let values = pool.all_values();
let unique: std::collections::HashSet<_> = values.iter().collect();
assert_eq!(values.len(), unique.len());
}
#[test]
fn baseline_chromosome_all_none() {
let pool = GenePool::default_wafrift();
let c = baseline_chromosome(&pool);
for (_, value) in &c.genes {
assert_eq!(value, "None");
}
assert_eq!(c.genes.len(), pool.gene_names().len());
}
#[test]
fn random_chromosome_has_all_genes() {
let pool = GenePool::default_wafrift();
let mut rng = StdRng::seed_from_u64(42);
let c = random_chromosome(&pool, &mut rng);
assert_eq!(c.genes.len(), pool.gene_names().len());
}
}