use wafrift_types::Request;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)]
pub enum Channel {
Path,
ArgName,
ArgValue,
HeaderName,
HeaderValue,
CookieName,
CookieValue,
Body,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Segment {
pub channel: Channel,
pub bytes: Vec<u8>,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct CanonView {
pub method: String,
pub segments: Vec<Segment>,
}
impl CanonView {
#[must_use]
pub fn channel(&self, channel: Channel) -> Vec<&[u8]> {
self.segments
.iter()
.filter(|s| s.channel == channel)
.map(|s| s.bytes.as_slice())
.collect()
}
#[must_use]
pub fn total_bytes(&self) -> usize {
self.segments.iter().map(|s| s.bytes.len()).sum()
}
}
fn split_form(raw: &[u8]) -> Vec<(Vec<u8>, Vec<u8>)> {
if raw.is_empty() {
return Vec::new();
}
raw.split(|&b| b == b'&')
.map(|pair| match pair.iter().position(|&b| b == b'=') {
Some(i) => (pair[..i].to_vec(), pair[i + 1..].to_vec()),
None => (pair.to_vec(), Vec::new()),
})
.collect()
}
fn split_cookies(raw: &str) -> Vec<(Vec<u8>, Vec<u8>)> {
raw.split(';')
.filter_map(|c| {
let c = c.strip_prefix(' ').unwrap_or(c);
if c.is_empty() {
return None;
}
let cb = c.as_bytes();
Some(match c.find('=') {
Some(i) => (cb[..i].to_vec(), cb[i + 1..].to_vec()),
None => (cb.to_vec(), Vec::new()),
})
})
.collect()
}
#[must_use]
pub fn canonicalize(req: &Request) -> CanonView {
let mut segments = Vec::new();
let url = req.url();
let after_authority = match url.find("://") {
Some(s) => {
let rest = &url[s + 3..];
rest.find('/').map_or("/", |p| &rest[p..])
}
None => url,
};
let (path, query) = match after_authority.find('?') {
Some(q) => (&after_authority[..q], Some(&after_authority[q + 1..])),
None => (after_authority, None),
};
let path = path.split('#').next().unwrap_or(path);
segments.push(Segment {
channel: Channel::Path,
bytes: path.as_bytes().to_vec(),
});
if let Some(q) = query {
let q = q.split('#').next().unwrap_or(q);
for (n, v) in split_form(q.as_bytes()) {
segments.push(Segment {
channel: Channel::ArgName,
bytes: n,
});
segments.push(Segment {
channel: Channel::ArgValue,
bytes: v,
});
}
}
for (name, value) in req.headers() {
if name.eq_ignore_ascii_case("cookie") {
for (cn, cv) in split_cookies(value) {
segments.push(Segment {
channel: Channel::CookieName,
bytes: cn,
});
segments.push(Segment {
channel: Channel::CookieValue,
bytes: cv,
});
}
} else {
segments.push(Segment {
channel: Channel::HeaderName,
bytes: name.as_bytes().to_vec(),
});
segments.push(Segment {
channel: Channel::HeaderValue,
bytes: value.as_bytes().to_vec(),
});
}
}
if let Some(body) = req.body_bytes() {
let is_form = req.content_type().is_some_and(|ct| {
let type_subtype = ct.split(';').next().unwrap_or(ct).trim();
type_subtype.eq_ignore_ascii_case("application/x-www-form-urlencoded")
});
if is_form {
for (n, v) in split_form(body) {
segments.push(Segment {
channel: Channel::ArgName,
bytes: n,
});
segments.push(Segment {
channel: Channel::ArgValue,
bytes: v,
});
}
} else {
segments.push(Segment {
channel: Channel::Body,
bytes: body.to_vec(),
});
}
}
CanonView {
method: req.method().as_str().to_ascii_uppercase(),
segments,
}
}
#[cfg(test)]
mod tests {
use super::*;
use wafrift_types::Request;
fn req_with_content_type(ct: &str, body: &[u8]) -> Request {
let mut r = Request::post("http://t/", body.to_vec());
r.headers_mut().push(("Content-Type".into(), ct.into()));
r
}
#[test]
fn form_body_with_lowercase_content_type_splits_into_args() {
let r = req_with_content_type("application/x-www-form-urlencoded", b"a=1&b=2");
let view = canonicalize(&r);
let names: Vec<_> = view
.channel(Channel::ArgName)
.iter()
.map(|b| std::str::from_utf8(b).unwrap().to_string())
.collect();
assert_eq!(names, vec!["a", "b"]);
}
#[test]
fn form_body_with_mixed_case_content_type_still_splits_into_args() {
let r = req_with_content_type("Application/X-WWW-Form-URLencoded", b"a=1&b=2");
let view = canonicalize(&r);
let names: Vec<_> = view
.channel(Channel::ArgName)
.iter()
.map(|b| std::str::from_utf8(b).unwrap().to_string())
.collect();
assert_eq!(
names,
vec!["a", "b"],
"mixed-case content type must still parse as form per RFC 7231 §3.1.1.1"
);
assert!(view.channel(Channel::Body).is_empty());
}
#[test]
fn form_body_with_uppercase_content_type_still_splits_into_args() {
let r = req_with_content_type("APPLICATION/X-WWW-FORM-URLENCODED", b"x=y");
let view = canonicalize(&r);
assert_eq!(view.channel(Channel::ArgName)[0], b"x");
assert_eq!(view.channel(Channel::ArgValue)[0], b"y");
}
#[test]
fn form_body_with_charset_parameter_splits_into_args() {
let r = req_with_content_type("application/x-www-form-urlencoded; charset=UTF-8", b"k=v");
let view = canonicalize(&r);
assert_eq!(view.channel(Channel::ArgName)[0], b"k");
}
#[test]
fn json_body_does_not_split_into_args() {
let r = req_with_content_type("application/json", br#"{"a":1}"#);
let view = canonicalize(&r);
assert!(view.channel(Channel::ArgName).is_empty());
assert_eq!(view.channel(Channel::Body)[0], br#"{"a":1}"#);
}
#[test]
fn form_body_with_unrelated_subtype_does_not_split() {
let r = req_with_content_type("application/x-www-form-urlencoded-extra", b"a=1");
let view = canonicalize(&r);
assert!(view.channel(Channel::ArgName).is_empty());
assert_eq!(view.channel(Channel::Body)[0], b"a=1");
}
}