use super::ProbeContext;
use std::collections::HashMap;
pub fn extract_behavior_features(ctx: &ProbeContext, features: &mut HashMap<String, f64>) {
let baseline_len = ctx.baseline.body_bytes.max(1) as f64;
let probe_len = ctx.response.body_bytes as f64;
let len_ratio = (probe_len - baseline_len).abs() / baseline_len;
if len_ratio > 0.20 {
features.insert("behavior:response_length_delta".into(), len_ratio.min(1.0));
}
let baseline_words = ctx.baseline.body.split_whitespace().count().max(1) as f64;
let probe_words = ctx.response.body.split_whitespace().count() as f64;
let word_ratio = (probe_words - baseline_words).abs() / baseline_words;
if word_ratio > 0.15 {
features.insert("behavior:response_word_delta".into(), word_ratio.min(1.0));
}
let baseline_tags = count_html_tags(&ctx.baseline.body).max(1) as f64;
let probe_tags = count_html_tags(&ctx.response.body) as f64;
let tag_ratio = (probe_tags - baseline_tags).abs() / baseline_tags;
if tag_ratio > 0.10 {
features.insert("behavior:response_tag_delta".into(), tag_ratio.min(1.0));
}
if ctx.response.status != ctx.baseline.status {
features.insert("behavior:status_code_delta".into(), 1.0);
}
let header_diff =
(ctx.response.headers.len() as i64 - ctx.baseline.headers.len() as i64).unsigned_abs();
if header_diff >= 2 {
features.insert("behavior:header_count_delta".into(), 1.0);
}
let baseline_time = ctx.baseline.response_time_ms.max(1) as f64;
let probe_time = ctx.response.response_time_ms as f64;
if probe_time > baseline_time * 2.0 {
features.insert("behavior:response_time_delta".into(), 1.0);
}
let baseline_loc = ctx.baseline.headers.get("location");
let probe_loc = ctx.response.headers.get("location");
if baseline_loc != probe_loc {
if baseline_loc.is_some() || probe_loc.is_some() {
features.insert("behavior:redirect_behavior_delta".into(), 1.0);
}
}
if ctx.response.status >= 400 && ctx.baseline.status >= 400 {
let probe_errors = extract_error_text(&ctx.response.body);
let baseline_errors = extract_error_text(&ctx.baseline.body);
if !probe_errors.is_empty() && probe_errors != baseline_errors {
features.insert("behavior:error_message_delta".into(), 1.0);
}
}
if let Some(ref sequence) = ctx.probe_sequence {
if sequence.len() >= 2 {
let payload_lower = ctx.probe_payload.to_lowercase();
for resp in sequence.iter().skip(1) {
if resp.body.to_lowercase().contains(&payload_lower) {
features.insert("behavior:state_persisted_across_requests".into(), 1.0);
break;
}
}
if !ctx.response.body.to_lowercase().contains(&payload_lower) {
for resp in sequence.iter().skip(1) {
if resp.status >= 500
|| resp.body.to_lowercase().contains(&payload_lower)
{
features.insert("behavior:second_order_trigger".into(), 1.0);
break;
}
}
}
let initial_cookies = ctx.response.headers.get("set-cookie");
for resp in sequence.iter().skip(1) {
let later_cookies = resp.headers.get("set-cookie");
if initial_cookies != later_cookies && later_cookies.is_some() {
features.insert("behavior:session_state_mutated".into(), 1.0);
break;
}
}
if sequence.len() >= 3 {
let first_body = &sequence[0].body;
let last_body = &sequence[sequence.len() - 1].body;
if first_body != last_body
&& last_body.to_lowercase().contains(&payload_lower)
{
features.insert("behavior:database_state_changed".into(), 1.0);
}
}
}
}
if let Some(ref sequence) = ctx.probe_sequence {
if sequence.len() >= 2 {
let lengths: Vec<usize> = sequence.iter().map(|r| r.body_bytes).collect();
let is_increasing = lengths.windows(2).all(|w| w[1] > w[0]);
if is_increasing && lengths.len() >= 3 {
features.insert("behavior:progressive_disclosure".into(), 1.0);
}
let status_set: std::collections::HashSet<u16> =
sequence.iter().map(|r| r.status).collect();
if status_set.len() >= 2 {
features.insert("behavior:boundary_probe_diff".into(), 1.0);
}
let body_set: std::collections::HashSet<usize> =
sequence.iter().map(|r| r.body_bytes).collect();
if body_set.len() >= 3 {
features.insert("behavior:type_juggling_diff".into(), 1.0);
}
if ctx.encoding_used.is_some() {
let sizes: Vec<usize> = sequence.iter().map(|r| r.body_bytes).collect();
let size_set: std::collections::HashSet<usize> = sizes.iter().copied().collect();
if size_set.len() >= 2 {
features.insert("behavior:encoding_handling_diff".into(), 1.0);
}
}
let statuses: Vec<u16> = sequence.iter().map(|r| r.status).collect();
if statuses.len() >= 2 && statuses[0] != statuses[1] {
features.insert("behavior:method_swap_diff".into(), 1.0);
}
if sequence.len() >= 2 {
let ct1 = sequence[0].headers.get("content-type");
let ct2 = sequence[1].headers.get("content-type");
if ct1 != ct2 {
features.insert("behavior:content_type_swap_diff".into(), 1.0);
}
}
}
}
if let Some(ref sequence) = ctx.probe_sequence {
if sequence.len() >= 2 {
let all_same = sequence
.iter()
.all(|r| r.body == ctx.response.body && r.status == ctx.response.status);
if all_same {
features.insert("behavior:all_probes_same_response".into(), 1.0);
}
}
}
if len_ratio < 0.05 {
let time_delta = (probe_time - baseline_time).abs() / baseline_time;
if time_delta < 0.05 {
features.insert("behavior:delta_within_noise".into(), 1.0);
}
}
let probe_stripped = strip_dynamic_content(&ctx.response.body);
let baseline_stripped = strip_dynamic_content(&ctx.baseline.body);
if ctx.response.body != ctx.baseline.body && probe_stripped == baseline_stripped {
features.insert("behavior:only_cosmetic_diff".into(), 1.0);
}
if ctx.response.status == 429 {
features.insert("behavior:rate_limited_responses".into(), 1.0);
}
if let Some(ref sequence) = ctx.probe_sequence {
if sequence.iter().any(|r| r.status == 429) {
features.insert("behavior:rate_limited_responses".into(), 1.0);
}
}
}
fn count_html_tags(body: &str) -> usize {
body.matches('<').count()
}
fn extract_error_text(body: &str) -> String {
let lower = body.to_lowercase();
let error_keywords = ["error", "exception", "warning", "fatal", "failed"];
let mut parts = Vec::new();
for keyword in &error_keywords {
if lower.contains(keyword) {
parts.push(*keyword);
}
}
parts.join(",")
}
fn strip_dynamic_content(body: &str) -> String {
let mut result = body.to_string();
let mut cleaned = String::new();
let mut chars = result.chars().peekable();
while let Some(c) = chars.next() {
if c.is_ascii_digit() {
let mut num = String::new();
num.push(c);
while let Some(&next) = chars.peek() {
if next.is_ascii_digit() {
num.push(chars.next().unwrap());
} else {
break;
}
}
if num.len() >= 10 && num.len() <= 13 {
cleaned.push_str("__TIMESTAMP__");
} else {
cleaned.push_str(&num);
}
} else {
cleaned.push(c);
}
}
result = cleaned;
let mut cleaned2 = String::new();
let mut chars2 = result.chars().peekable();
while let Some(c) = chars2.next() {
if c.is_ascii_hexdigit() {
let mut hex = String::new();
hex.push(c);
while let Some(&next) = chars2.peek() {
if next.is_ascii_hexdigit() {
hex.push(chars2.next().unwrap());
} else {
break;
}
}
if hex.len() >= 32 {
cleaned2.push_str("__TOKEN__");
} else {
cleaned2.push_str(&hex);
}
} else {
cleaned2.push(c);
}
}
cleaned2
}
#[cfg(test)]
mod tests {
use super::super::tests::*;
use super::*;
#[test]
fn test_response_length_delta() {
let response = make_response(&"x".repeat(200), 200);
let ctx = make_ctx("sqli", "'", response);
let mut features = HashMap::new();
extract_behavior_features(&ctx, &mut features);
assert!(features.contains_key("behavior:response_length_delta"));
}
#[test]
fn test_status_code_delta() {
let response = make_response("Error", 500);
let ctx = make_ctx("sqli", "'", response);
let mut features = HashMap::new();
extract_behavior_features(&ctx, &mut features);
assert!(features.contains_key("behavior:status_code_delta"));
}
#[test]
fn test_delta_within_noise() {
let response = make_response("<html><body>Normal page</body></html>", 200);
let ctx = make_ctx("sqli", "'", response);
let mut features = HashMap::new();
extract_behavior_features(&ctx, &mut features);
assert!(features.contains_key("behavior:delta_within_noise"));
}
#[test]
fn test_only_cosmetic_diff() {
let response = make_response(
"<html><body>Normal page</body><span>1234567890123</span></html>",
200,
);
let mut ctx = make_ctx("sqli", "'", response);
ctx.baseline.body =
"<html><body>Normal page</body><span>9876543210987</span></html>".to_string();
let mut features = HashMap::new();
extract_behavior_features(&ctx, &mut features);
assert!(features.contains_key("behavior:only_cosmetic_diff"));
}
#[test]
fn test_rate_limited() {
let response = make_response("Too Many Requests", 429);
let ctx = make_ctx("sqli", "'", response);
let mut features = HashMap::new();
extract_behavior_features(&ctx, &mut features);
assert!(features.contains_key("behavior:rate_limited_responses"));
}
#[test]
fn test_probe_sequence_state_persisted() {
let response = make_response("OK", 200);
let mut ctx = make_ctx("xss", "<script>alert(1)</script>", response);
ctx.probe_sequence = Some(vec![
make_response("first", 200),
make_response("found: <script>alert(1)</script>", 200),
]);
let mut features = HashMap::new();
extract_behavior_features(&ctx, &mut features);
assert!(features.contains_key("behavior:state_persisted_across_requests"));
}
#[test]
fn test_all_probes_same_response() {
let response = make_response("same", 200);
let mut ctx = make_ctx("sqli", "'", response);
ctx.probe_sequence = Some(vec![
make_response("same", 200),
make_response("same", 200),
]);
let mut features = HashMap::new();
extract_behavior_features(&ctx, &mut features);
assert!(features.contains_key("behavior:all_probes_same_response"));
}
}