use crate::error::Result;
use crate::transduce::Stage;
use wafrift_grammar::grammar::{bestfit, nfkc_preimage};
pub trait ReflectionOracle {
fn reflect(&mut self, input: &[u8]) -> Result<Vec<u8>>;
}
pub struct FnReflector<F>(pub F);
impl<F> ReflectionOracle for FnReflector<F>
where
F: FnMut(&[u8]) -> Result<Vec<u8>>,
{
fn reflect(&mut self, input: &[u8]) -> Result<Vec<u8>> {
(self.0)(input)
}
}
const MARKER: &str = "wz7qx4k9mfp2r8td";
const CONTROL: &str = "ctl8b3n6haje5wq1";
struct Probe {
stage: Stage,
sent: Vec<u8>,
folded: Vec<u8>,
}
fn overlong_bytes(s: &[u8]) -> Vec<u8> {
let mut v = Vec::with_capacity(s.len() * 2);
for &b in s {
if b <= 0x7F {
v.push(0xC0 | (b >> 6));
v.push(0x80 | (b & 0x3F));
} else {
v.push(b);
}
}
v
}
fn probes() -> Vec<Probe> {
let marker = MARKER.as_bytes().to_vec();
let mut out = Vec::new();
out.push(Probe {
stage: Stage::UrlDecode {
plus_is_space: false,
},
sent: format!("{MARKER}%2D").into_bytes(),
folded: format!("{MARKER}-").into_bytes(),
});
out.push(Probe {
stage: Stage::DoubleUrlDecode,
sent: format!("{MARKER}%252D").into_bytes(),
folded: format!("{MARKER}-").into_bytes(),
});
{
use base64::Engine;
out.push(Probe {
stage: Stage::Base64Decode,
sent: base64::engine::general_purpose::STANDARD
.encode(&marker)
.into_bytes(),
folded: marker.clone(),
});
}
out.push(Probe {
stage: Stage::HexDecode,
sent: hex::encode(&marker).into_bytes(),
folded: marker.clone(),
});
out.push(Probe {
stage: Stage::OverlongUtf8Decode,
sent: overlong_bytes(&marker),
folded: marker.clone(),
});
let mut nul_sent = marker.clone();
nul_sent.insert(2, 0);
out.push(Probe {
stage: Stage::StripNulls,
sent: nul_sent,
folded: marker.clone(),
});
out.push(Probe {
stage: Stage::HtmlEntityDecode,
sent: format!("{MARKER}-").into_bytes(),
folded: format!("{MARKER}-").into_bytes(),
});
out.push(Probe {
stage: Stage::JsonUnescape,
sent: format!("{MARKER}\\u002d").into_bytes(),
folded: format!("{MARKER}-").into_bytes(),
});
if let Some(h) = nfkc_preimage::variants(MARKER, 1).into_iter().next() {
debug_assert_eq!(nfkc_preimage::normalize(&h), MARKER);
out.push(Probe {
stage: Stage::NfkcNormalize,
sent: h.into_bytes(),
folded: marker.clone(),
});
}
let bf_ascii = format!("{MARKER}'");
if let Some(h) = bestfit::variants(&bf_ascii, 1).into_iter().next() {
debug_assert_eq!(bestfit::normalize(&h), bf_ascii);
out.push(Probe {
stage: Stage::BestFitDownconvert,
sent: h.into_bytes(),
folded: bf_ascii.into_bytes(),
});
}
out
}
fn run_probes(oracle: &mut dyn ReflectionOracle, suppress: bool) -> Result<(bool, Vec<Stage>)> {
let mut stages = Vec::new();
let mut reflection_observed = false;
for p in probes() {
let reflected = oracle.reflect(&p.sent)?;
let folded_seen = contains(&reflected, &p.folded);
let sent_survived = contains(&reflected, &p.sent);
if folded_seen || sent_survived {
reflection_observed = true;
}
if !suppress && folded_seen && !sent_survived {
stages.push(p.stage);
}
}
if stages.iter().any(|s| matches!(s, Stage::DoubleUrlDecode)) {
stages.retain(|s| !matches!(s, Stage::UrlDecode { .. }));
}
Ok((reflection_observed, stages))
}
pub fn detect_origin_normalization(oracle: &mut dyn ReflectionOracle) -> Result<Vec<Stage>> {
Ok(run_probes(oracle, false)?.1)
}
fn contains(haystack: &[u8], needle: &[u8]) -> bool {
!needle.is_empty() && haystack.windows(needle.len()).any(|w| w == needle)
}
#[derive(Debug, Clone, PartialEq)]
pub struct OriginScan {
pub reflection_observed: bool,
pub marker_collision: bool,
pub stages: Vec<Stage>,
}
pub fn scan_origin(oracle: &mut dyn ReflectionOracle) -> Result<OriginScan> {
let baseline = oracle.reflect(CONTROL.as_bytes())?;
let marker_collision = contains(&baseline, MARKER.as_bytes());
let (reflection_observed, stages) = run_probes(oracle, marker_collision)?;
Ok(OriginScan {
reflection_observed,
marker_collision,
stages,
})
}
#[cfg(test)]
mod tests {
use super::*;
use crate::transduce::Pipeline;
struct FakeOrigin(Pipeline);
impl ReflectionOracle for FakeOrigin {
fn reflect(&mut self, input: &[u8]) -> Result<Vec<u8>> {
Ok(self.0.apply(input))
}
}
fn detect(stages: Vec<Stage>) -> Vec<Stage> {
let mut o = FakeOrigin(Pipeline(stages));
detect_origin_normalization(&mut o).unwrap()
}
#[test]
fn identity_origin_detects_nothing() {
assert!(detect(vec![Stage::Identity]).is_empty());
}
#[test]
fn nfkc_origin_is_detected() {
assert_eq!(
detect(vec![Stage::NfkcNormalize]),
vec![Stage::NfkcNormalize]
);
}
#[test]
fn bestfit_origin_is_detected() {
assert_eq!(
detect(vec![Stage::BestFitDownconvert]),
vec![Stage::BestFitDownconvert]
);
}
#[test]
fn url_decoding_origin_is_detected() {
assert_eq!(
detect(vec![Stage::UrlDecode {
plus_is_space: false
}]),
vec![Stage::UrlDecode {
plus_is_space: false
}]
);
}
#[test]
fn null_stripping_origin_is_detected() {
assert_eq!(detect(vec![Stage::StripNulls]), vec![Stage::StripNulls]);
}
#[test]
fn overlong_utf8_decoding_origin_is_detected() {
assert_eq!(
detect(vec![Stage::OverlongUtf8Decode]),
vec![Stage::OverlongUtf8Decode]
);
}
#[test]
fn base64_decoding_origin_is_detected() {
assert_eq!(detect(vec![Stage::Base64Decode]), vec![Stage::Base64Decode]);
}
#[test]
fn hex_decoding_origin_is_detected() {
assert_eq!(detect(vec![Stage::HexDecode]), vec![Stage::HexDecode]);
}
#[test]
fn base64_and_hex_do_not_cross_report() {
assert_eq!(detect(vec![Stage::Base64Decode]), vec![Stage::Base64Decode]);
assert_eq!(detect(vec![Stage::HexDecode]), vec![Stage::HexDecode]);
}
#[test]
fn base64_origin_does_not_falsely_report_other_decodes() {
assert_eq!(detect(vec![Stage::Base64Decode]), vec![Stage::Base64Decode]);
}
#[test]
fn byte_decodes_are_independent_no_cross_detection() {
assert_eq!(detect(vec![Stage::StripNulls]), vec![Stage::StripNulls]);
assert_eq!(
detect(vec![Stage::OverlongUtf8Decode]),
vec![Stage::OverlongUtf8Decode]
);
}
#[test]
fn json_unescaping_origin_is_detected() {
assert_eq!(detect(vec![Stage::JsonUnescape]), vec![Stage::JsonUnescape]);
}
#[test]
fn html_entity_decoding_origin_is_detected() {
assert_eq!(
detect(vec![Stage::HtmlEntityDecode]),
vec![Stage::HtmlEntityDecode]
);
}
#[test]
fn double_url_decoding_origin_is_detected_and_subsumes_single() {
let d = detect(vec![Stage::DoubleUrlDecode]);
assert_eq!(d, vec![Stage::DoubleUrlDecode], "got {d:?}");
assert!(!d.contains(&Stage::UrlDecode {
plus_is_space: false
}));
}
#[test]
fn single_url_decode_is_not_reported_as_double() {
let d = detect(vec![Stage::UrlDecode {
plus_is_space: false,
}]);
assert_eq!(
d,
vec![Stage::UrlDecode {
plus_is_space: false
}]
);
assert!(!d.contains(&Stage::DoubleUrlDecode));
}
#[test]
fn json_and_html_decodes_do_not_cross_report() {
let j = detect(vec![Stage::JsonUnescape]);
assert_eq!(j, vec![Stage::JsonUnescape]);
assert!(!j.contains(&Stage::HtmlEntityDecode));
let h = detect(vec![Stage::HtmlEntityDecode]);
assert_eq!(h, vec![Stage::HtmlEntityDecode]);
assert!(!h.contains(&Stage::JsonUnescape));
}
#[test]
fn framework_decodes_do_not_falsely_report_url_or_base64() {
for st in [Stage::HtmlEntityDecode, Stage::JsonUnescape] {
let d = detect(vec![st.clone()]);
assert_eq!(d, vec![st.clone()], "stage {st:?} reported {d:?}");
}
}
#[test]
fn every_invertible_solver_stage_has_a_detection_probe() {
use std::collections::HashSet;
use std::mem::discriminant;
let probed: HashSet<_> = probes().iter().map(|p| discriminant(&p.stage)).collect();
let invertible = [
Stage::UrlDecode {
plus_is_space: false,
},
Stage::DoubleUrlDecode,
Stage::JsonUnescape,
Stage::HtmlEntityDecode,
Stage::NfkcNormalize,
Stage::BestFitDownconvert,
Stage::StripNulls,
Stage::OverlongUtf8Decode,
Stage::Base64Decode,
Stage::HexDecode,
];
for st in &invertible {
assert!(
probed.contains(&discriminant(st)),
"invertible solver stage {st:?} has no detection probe in probes() \
— the live fingerprinter is blind to an origin the solver can bypass"
);
}
}
#[test]
fn composite_url_then_nfkc_origin_detects_both_in_order() {
let detected = detect(vec![
Stage::UrlDecode {
plus_is_space: false,
},
Stage::NfkcNormalize,
]);
assert_eq!(
detected,
vec![
Stage::UrlDecode {
plus_is_space: false
},
Stage::NfkcNormalize
]
);
}
#[test]
fn nfkc_normalizing_origin_does_not_falsely_report_bestfit() {
let detected = detect(vec![Stage::NfkcNormalize]);
assert!(!detected.contains(&Stage::BestFitDownconvert));
assert!(detected.contains(&Stage::NfkcNormalize));
}
#[test]
fn detected_pipeline_drives_the_solver_to_a_targeted_bypass() {
use crate::canon::Channel;
use crate::normalize::Transform;
use crate::oracle::{ChannelSet, Rule, SimRegexWaf};
use crate::{Outcome, WafOracle, solve_bypass};
use wafrift_types::Request;
let detected = detect(vec![Stage::NfkcNormalize]);
assert_eq!(detected, vec![Stage::NfkcNormalize]);
let sink = Pipeline(detected);
let attack = b"<script>";
let mut waf = SimRegexWaf::new(
vec![Rule {
id: "941".into(),
channels: ChannelSet::none().with(Channel::Body),
transforms: vec![Transform::UrlDecodeUni, Transform::Lowercase],
pattern: regex::bytes::Regex::new("<script").unwrap(),
score: 5,
}],
5,
);
let build =
|b: &[u8]| Request::post("https://h/p", b.to_vec()).header("Content-Type", "text/html");
let sol = solve_bypass(attack, &sink, &mut waf, &build)
.unwrap()
.expect("a fingerprinted NFKC origin must yield a targeted homoglyph bypass");
assert!(!sol.input.contains(&b'<') && !sol.input.contains(&b'>'));
let mut replay = SimRegexWaf::new(
vec![Rule {
id: "941".into(),
channels: ChannelSet::none().with(Channel::Body),
transforms: vec![Transform::UrlDecodeUni, Transform::Lowercase],
pattern: regex::bytes::Regex::new("<script").unwrap(),
score: 5,
}],
5,
);
assert_eq!(replay.classify(&build(&sol.input)).unwrap(), Outcome::Pass);
}
#[test]
fn detected_double_decode_origin_drives_the_classic_double_encode_bypass() {
use crate::canon::Channel;
use crate::normalize::Transform;
use crate::oracle::{ChannelSet, Rule, SimRegexWaf};
use crate::{Outcome, WafOracle, solve_bypass};
use wafrift_types::Request;
let detected = detect(vec![Stage::DoubleUrlDecode]);
assert_eq!(detected, vec![Stage::DoubleUrlDecode], "got {detected:?}");
let sink = Pipeline(detected);
let attack = b"<script";
let rule = || Rule {
id: "941".into(),
channels: ChannelSet::none().with(Channel::Body),
transforms: vec![Transform::UrlDecodeUni, Transform::Lowercase],
pattern: regex::bytes::Regex::new("<script").unwrap(),
score: 5,
};
let mut waf = SimRegexWaf::new(vec![rule()], 5);
let build =
|b: &[u8]| Request::post("https://h/p", b.to_vec()).header("Content-Type", "text/html");
let sol = solve_bypass(attack, &sink, &mut waf, &build)
.unwrap()
.expect("a fingerprinted double-decoding origin must yield a double-encoded bypass");
assert!(
!sol.input.contains(&b'<'),
"solved input must not contain raw '<': {:?}",
String::from_utf8_lossy(&sol.input)
);
assert!(
sink.apply(&sol.input)
.windows(attack.len())
.any(|w| w == attack)
);
let mut replay = SimRegexWaf::new(vec![rule()], 5);
assert_eq!(replay.classify(&build(&sol.input)).unwrap(), Outcome::Pass);
}
struct EchoOrigin;
impl ReflectionOracle for EchoOrigin {
fn reflect(&mut self, input: &[u8]) -> Result<Vec<u8>> {
Ok(input.to_vec())
}
}
struct ConstOrigin(Vec<u8>);
impl ReflectionOracle for ConstOrigin {
fn reflect(&mut self, _input: &[u8]) -> Result<Vec<u8>> {
Ok(self.0.clone())
}
}
struct MarkerInjectOrigin;
impl ReflectionOracle for MarkerInjectOrigin {
fn reflect(&mut self, input: &[u8]) -> Result<Vec<u8>> {
let mut out = input.to_vec();
out.extend_from_slice(MARKER.as_bytes());
Ok(out)
}
}
#[test]
fn scan_confirms_reflection_on_an_echoing_identity_origin() {
let scan = scan_origin(&mut EchoOrigin).unwrap();
assert!(scan.reflection_observed, "echoing origin must be observed");
assert!(!scan.marker_collision);
assert!(scan.stages.is_empty(), "identity origin has no stages");
}
#[test]
fn scan_reports_no_reflection_when_the_channel_does_not_echo() {
let scan = scan_origin(&mut ConstOrigin(b"static page, no echo".to_vec())).unwrap();
assert!(
!scan.reflection_observed,
"a non-echoing channel must not be reported as observed"
);
assert!(scan.stages.is_empty());
}
#[test]
fn scan_fails_closed_on_ambient_marker_collision() {
let scan = scan_origin(&mut MarkerInjectOrigin).unwrap();
assert!(scan.reflection_observed, "the echo channel still works");
assert!(
scan.marker_collision,
"ambient marker must be detected at baseline"
);
assert!(
scan.stages.is_empty(),
"marker collision must yield NO detections (fail-closed), got {:?}",
scan.stages
);
}
#[test]
fn scan_still_detects_a_real_stage_through_the_baseline() {
let mut o = FakeOrigin(Pipeline(vec![Stage::Base64Decode]));
let scan = scan_origin(&mut o).unwrap();
assert!(scan.reflection_observed);
assert!(!scan.marker_collision);
assert_eq!(scan.stages, vec![Stage::Base64Decode]);
}
#[test]
fn marker_and_control_are_distinct_and_non_overlapping() {
assert_ne!(MARKER, CONTROL);
assert!(!CONTROL.contains(MARKER));
assert!(!MARKER.contains(CONTROL));
for tok in [MARKER, CONTROL] {
assert!(
tok.bytes()
.all(|b| b.is_ascii_lowercase() || b.is_ascii_digit()),
"{tok} must be lowercase-alnum (normalization-neutral)"
);
}
}
}