Skip to main content

wafrift_encoding/
range_header_smuggle.rs

1//! `Range` request-header parser-differential smuggling (RFC 7233).
2//!
3//! The `Range` request header (RFC 7233 §3.1) is one of the most
4//! casually-parsed header surfaces in HTTP. Most WAFs treat it as
5//! opaque metadata; most origin servers parse loosely because real-
6//! world clients have always done weird things with it. That gap is
7//! the bypass surface.
8//!
9//! ## Wire format (RFC 7233 §3.1)
10//!
11//! ```text
12//! Range = byte-ranges-specifier / other-ranges-specifier
13//! byte-ranges-specifier = bytes-unit "=" byte-range-set
14//! bytes-unit = "bytes"
15//! byte-range-set = 1#( byte-range-spec / suffix-byte-range-spec )
16//! byte-range-spec = first-byte-pos "-" [ last-byte-pos ]
17//! suffix-byte-range-spec = "-" suffix-length
18//! ```
19//!
20//! Per the RFC each spec is a comma-separated list. Real parser
21//! divergence emerges around:
22//!
23//! - **Multiple `Range:` headers** — RFC 7230 §3.2.2 prohibits, but
24//!   clients send them; nginx keeps first, Apache last.
25//! - **Empty range** — `Range: bytes=` is accepted as "the whole
26//!   resource" by some, rejected with 416 by others.
27//! - **Reversed range** — `Range: bytes=100-0` (first > last); MUST
28//!   be 416 per RFC but some servers swap the boundaries silently.
29//! - **Overlapping ranges** — `Range: bytes=0-99,50-149`; some
30//!   servers coalesce, some emit separate multipart parts.
31//! - **Gigabyte ranges** — `Range: bytes=0-999999999`; servers that
32//!   pre-allocate based on declared length OOM.
33//! - **Whitespace inside range** — `Range: bytes= 0-99` or `bytes=0
34//!   -99` (space around `-`); RFC says no whitespace, parsers vary.
35//! - **Suffix length** — `Range: bytes=-1000` (last 1000 bytes);
36//!   some interpret as "byte at -1000" (negative-position interpretation
37//!   error → off-by-one or wraparound).
38//! - **Non-bytes units** — `Range: pages=0-9`; RFC allows but only
39//!   `bytes` is universally implemented; lax origins accept, strict
40//!   reject.
41
42use rand::Rng;
43use wafrift_types::canary::Canary;
44use wafrift_types::pick::pick_from;
45use wafrift_types::probe::{SmuggleArtifact, SmuggleProbe};
46
47/// Max length for a single Range header value (bounded so adversarial
48/// callers can't synthesize multi-megabyte header lines through this
49/// builder).
50pub const MAX_RANGE_HEADER_BYTES: usize = 2 * 1024;
51
52/// Range-header smuggle variants — each surfaces a distinct RFC 7233
53/// parser-divergence seam.
54#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
55pub enum RangeSmuggleVariant {
56    /// `Range: bytes=` — empty range-set. RFC says 400/416; lax
57    /// servers serve the whole resource as if no Range was set.
58    EmptyRangeSet,
59    /// `Range: bytes=last-first` (e.g. `bytes=100-0`). Strictly
60    /// 416; lax servers swap.
61    ReversedFirstLast,
62    /// `Range: bytes=0-99,50-149` — overlapping spans. Coalesce vs
63    /// multipart vs reject differential.
64    OverlappingRanges,
65    /// `Range: bytes=0-<gigabyte>` — over-large last-byte position.
66    /// Servers that pre-allocate OOM; capped at a sane ceiling
67    /// (`SAFE_LARGE_LAST_POS`) so wafrift itself doesn't enable
68    /// denial-of-service attacks on authorized targets.
69    OverLargeLastPosition,
70    /// `Range: bytes= 0-99` (leading whitespace after `=`) or
71    /// `bytes=0 -99` (whitespace around `-`). RFC says no WS;
72    /// lenient parsers strip.
73    WhitespaceInsideRange,
74    /// `Range: bytes=-1000` — suffix range; some implementations
75    /// misread as "byte position -1000" leading to underflow.
76    SuffixLengthAsNegativePosition,
77    /// `Range: pages=0-9` — non-bytes unit. RFC allows; only
78    /// `bytes` is universal. Probes which side rejects.
79    NonBytesUnit,
80    /// Two `Range:` headers — first benign, second smuggle. nginx
81    /// keeps first, Apache last → differential.
82    DuplicateHeaderFirstWinsBenign,
83}
84
85impl SmuggleProbe for RangeSmuggleProbe {
86    fn canary(&self) -> &Canary {
87        &self.canary
88    }
89
90    fn technique(&self) -> String {
91        let suffix = match self.variant {
92            RangeSmuggleVariant::EmptyRangeSet => "empty-range-set",
93            RangeSmuggleVariant::ReversedFirstLast => "reversed-first-last",
94            RangeSmuggleVariant::OverlappingRanges => "overlapping-ranges",
95            RangeSmuggleVariant::OverLargeLastPosition => "over-large-last-position",
96            RangeSmuggleVariant::WhitespaceInsideRange => "whitespace-inside-range",
97            RangeSmuggleVariant::SuffixLengthAsNegativePosition => {
98                "suffix-length-as-negative-position"
99            }
100            RangeSmuggleVariant::NonBytesUnit => "non-bytes-unit",
101            RangeSmuggleVariant::DuplicateHeaderFirstWinsBenign => {
102                "duplicate-header-first-wins-benign"
103            }
104        };
105        format!("range.{suffix}")
106    }
107
108    fn description(&self) -> &str {
109        &self.description
110    }
111
112    fn artifact(&self) -> SmuggleArtifact {
113        SmuggleArtifact::Headers(self.header_lines.clone())
114    }
115}
116
117/// Realistic last-byte positions for the
118/// [`OverLargeLastPosition`](RangeSmuggleVariant::OverLargeLastPosition)
119/// probe. Each is "large enough to OOM a sloppy server" but bounded
120/// well below the QUIC-varint / signed-i64 ceiling so wafrift itself
121/// never emits a value that could be misread as a sentinel.
122pub(crate) const SAFE_LARGE_LAST_POS: &[u64] = &[
123    1_000_000,         // 1 MB
124    100_000_000,       // 100 MB
125    10_000_000_000,    // 10 GB
126    1_000_000_000_000, // 1 TB
127];
128
129/// Non-`bytes` range units for the
130/// [`NonBytesUnit`](RangeSmuggleVariant::NonBytesUnit) probe.
131pub(crate) const NON_BYTES_UNITS: &[&str] = &[
132    "pages",   // PDF page ranges, accepted by some PDF servers
133    "items",   // generic
134    "rows",    // SQL-style range, accepted by some REST APIs
135    "objects", // S3-style
136    "lines",   // log-streaming
137];
138
139/// A Range-header smuggle probe.
140#[derive(Debug, Clone)]
141pub struct RangeSmuggleProbe {
142    pub variant: RangeSmuggleVariant,
143    /// Header lines to attach. Most variants emit one `(name,
144    /// value)` pair; the duplicate-header variant emits two.
145    pub header_lines: Vec<(String, String)>,
146    pub description: String,
147    pub canary: Canary,
148}
149
150impl RangeSmuggleProbe {
151    fn finalise(
152        variant: RangeSmuggleVariant,
153        mut header_lines: Vec<(String, String)>,
154        description: String,
155    ) -> Self {
156        for (_, v) in header_lines.iter_mut() {
157            if v.len() > MAX_RANGE_HEADER_BYTES {
158                // §15 panic fix: cap at a UTF-8 char boundary (shared helper) so
159                // a multibyte value can't panic String::truncate mid-codepoint.
160                let cut = crate::floor_char_boundary(v, MAX_RANGE_HEADER_BYTES);
161                v.truncate(cut);
162            }
163        }
164        Self {
165            variant,
166            header_lines,
167            description,
168            canary: Canary::generate(),
169        }
170    }
171
172    /// `Range: bytes=` — empty range set.
173    #[must_use]
174    pub fn empty_range_set() -> Self {
175        Self::finalise(
176            RangeSmuggleVariant::EmptyRangeSet,
177            vec![("Range".into(), "bytes=".into())],
178            "Empty Range value — `bytes=` with no spec; RFC 7233 vs lax differential".into(),
179        )
180    }
181
182    /// `Range: bytes={first}-{last}` with `first > last`. Strict =
183    /// 416. Lax = swapped.
184    #[must_use]
185    pub fn reversed_first_last(first: u64, last: u64) -> Self {
186        // Caller may pass them either way; ensure first > last so
187        // the probe semantic holds. If they were already in correct
188        // order, swap them.
189        let (hi, lo) = if first > last {
190            (first, last)
191        } else if first == last {
192            (first.saturating_add(1), last)
193        } else {
194            (last, first)
195        };
196        let value = format!("bytes={hi}-{lo}");
197        Self::finalise(
198            RangeSmuggleVariant::ReversedFirstLast,
199            vec![("Range".into(), value)],
200            format!("Reversed Range `bytes={hi}-{lo}` — first > last violation, swap-vs-416 diff"),
201        )
202    }
203
204    /// `Range: bytes=0-99,50-149` — overlapping spans.
205    #[must_use]
206    pub fn overlapping_ranges() -> Self {
207        let value = "bytes=0-99,50-149".to_string();
208        Self::finalise(
209            RangeSmuggleVariant::OverlappingRanges,
210            vec![("Range".into(), value)],
211            "Overlapping Range spans — coalesce vs multipart vs reject differential".into(),
212        )
213    }
214
215    /// `Range: bytes=0-{LARGE}` — over-large last position. The
216    /// position is drawn from `SAFE_LARGE_LAST_POS` per-call.
217    #[must_use]
218    pub fn over_large_last_position() -> Self {
219        let last = pick_from(SAFE_LARGE_LAST_POS, 1_000_000_000_u64);
220        let value = format!("bytes=0-{last}");
221        Self::finalise(
222            RangeSmuggleVariant::OverLargeLastPosition,
223            vec![("Range".into(), value)],
224            format!(
225                "Over-large last-byte position {last} — naive pre-allocators OOM, capped vs error"
226            ),
227        )
228    }
229
230    /// `Range: bytes= 0 - 99` — whitespace sprinkled in the spec.
231    /// Specific whitespace insertion locations are randomised per
232    /// call so signature WAFs that pin "exactly one space after `=`"
233    /// don't catch every probe.
234    #[must_use]
235    pub fn whitespace_inside_range() -> Self {
236        let mut rng = rand::thread_rng();
237        let after_eq = if rng.gen_bool(0.5) { " " } else { "" };
238        let around_dash_left = if rng.gen_bool(0.5) { " " } else { "" };
239        let around_dash_right = if rng.gen_bool(0.5) { " " } else { "" };
240        let value = format!("bytes={after_eq}0{around_dash_left}-{around_dash_right}99");
241        Self::finalise(
242            RangeSmuggleVariant::WhitespaceInsideRange,
243            vec![("Range".into(), value)],
244            "Whitespace inside Range spec — strict-reject vs trim differential".into(),
245        )
246    }
247
248    /// `Range: bytes=-1000` — suffix range. Some implementations
249    /// misread the leading `-` as a sign.
250    #[must_use]
251    pub fn suffix_length_as_negative_position(suffix_len: u64) -> Self {
252        let value = format!("bytes=-{suffix_len}");
253        Self::finalise(
254            RangeSmuggleVariant::SuffixLengthAsNegativePosition,
255            vec![("Range".into(), value)],
256            format!("Suffix range `bytes=-{suffix_len}` — last-N vs negative-position misparse"),
257        )
258    }
259
260    /// `Range: <unit>=0-9` — non-`bytes` unit. Unit drawn from
261    /// `NON_BYTES_UNITS` per-call.
262    #[must_use]
263    pub fn non_bytes_unit() -> Self {
264        let unit = pick_from(NON_BYTES_UNITS, "pages");
265        let value = format!("{unit}=0-9");
266        Self::finalise(
267            RangeSmuggleVariant::NonBytesUnit,
268            vec![("Range".into(), value)],
269            format!("Non-bytes range unit `{unit}` — RFC allows; only `bytes` universal"),
270        )
271    }
272
273    /// Two `Range:` header lines — first benign full-resource,
274    /// second the smuggled range.
275    #[must_use]
276    pub fn duplicate_header_first_wins_benign(smuggle_range: &str) -> Self {
277        let benign = "bytes=0-".to_string(); // whole resource
278        let smuggle = if smuggle_range.starts_with("bytes=") {
279            smuggle_range.to_string()
280        } else {
281            format!("bytes={smuggle_range}")
282        };
283        Self::finalise(
284            RangeSmuggleVariant::DuplicateHeaderFirstWinsBenign,
285            vec![("Range".into(), benign), ("Range".into(), smuggle)],
286            "Duplicate Range headers — nginx-vs-Apache first/last-wins differential".into(),
287        )
288    }
289}
290
291/// Enumerate one probe per variant. Useful for sweep-style probes.
292#[must_use]
293pub fn all_variants() -> Vec<RangeSmuggleProbe> {
294    vec![
295        RangeSmuggleProbe::empty_range_set(),
296        RangeSmuggleProbe::reversed_first_last(100, 0),
297        RangeSmuggleProbe::overlapping_ranges(),
298        RangeSmuggleProbe::over_large_last_position(),
299        RangeSmuggleProbe::whitespace_inside_range(),
300        RangeSmuggleProbe::suffix_length_as_negative_position(1000),
301        RangeSmuggleProbe::non_bytes_unit(),
302        RangeSmuggleProbe::duplicate_header_first_wins_benign("bytes=100-199"),
303    ]
304}
305
306#[cfg(test)]
307mod tests {
308    use super::*;
309    use std::collections::HashSet;
310
311    #[test]
312    fn sweep_emits_eight_distinct_variants() {
313        let v = all_variants();
314        assert_eq!(v.len(), 8);
315        let kinds: HashSet<_> = v.iter().map(|p| p.variant).collect();
316        assert_eq!(kinds.len(), 8);
317    }
318
319    #[test]
320    fn empty_range_value_is_just_bytes_equals() {
321        let p = RangeSmuggleProbe::empty_range_set();
322        assert_eq!(p.header_lines[0].1, "bytes=");
323    }
324
325    #[test]
326    fn reversed_first_last_orders_high_then_low_on_wire() {
327        let p = RangeSmuggleProbe::reversed_first_last(0, 100);
328        // Caller passed (0, 100); probe should swap to (100, 0).
329        assert_eq!(p.header_lines[0].1, "bytes=100-0");
330    }
331
332    #[test]
333    fn reversed_first_last_handles_equal_inputs_by_offset() {
334        // first == last would defeat the "first > last" invariant.
335        // Builder must adjust.
336        let p = RangeSmuggleProbe::reversed_first_last(50, 50);
337        let v = &p.header_lines[0].1;
338        assert!(
339            v.contains("51-50") || v.contains("50-49"),
340            "expected offset to break equality, got {v:?}"
341        );
342    }
343
344    #[test]
345    fn overlapping_ranges_contains_two_comma_separated_spans() {
346        let p = RangeSmuggleProbe::overlapping_ranges();
347        let v = &p.header_lines[0].1;
348        assert!(v.starts_with("bytes="));
349        // Two spans = one comma.
350        assert_eq!(v.matches(',').count(), 1);
351    }
352
353    #[test]
354    fn over_large_last_position_picks_from_safe_pool() {
355        let p = RangeSmuggleProbe::over_large_last_position();
356        let v = &p.header_lines[0].1;
357        // The last value must equal one of the SAFE_LARGE_LAST_POS
358        // entries (anti-rig: a regression that hardcoded one value
359        // would defeat the per-call signature randomisation).
360        let last_str = v.trim_start_matches("bytes=0-");
361        let last: u64 = last_str.parse().expect("parseable u64");
362        assert!(
363            SAFE_LARGE_LAST_POS.contains(&last),
364            "last position {last} not in SAFE_LARGE_LAST_POS"
365        );
366    }
367
368    #[test]
369    fn whitespace_probe_contains_at_least_one_space_or_tab() {
370        // The whitespace insertion is randomised; over a few
371        // independent calls, at least one MUST produce a space in
372        // the value (anti-rig for the per-call randomness).
373        let mut saw_ws = false;
374        for _ in 0..20 {
375            let p = RangeSmuggleProbe::whitespace_inside_range();
376            if p.header_lines[0].1.contains(' ') {
377                saw_ws = true;
378                break;
379            }
380        }
381        assert!(
382            saw_ws,
383            "20 calls to whitespace_inside_range produced ZERO spaces — RNG broken or coin biased"
384        );
385    }
386
387    #[test]
388    fn suffix_length_uses_dash_prefix() {
389        let p = RangeSmuggleProbe::suffix_length_as_negative_position(2048);
390        assert_eq!(p.header_lines[0].1, "bytes=-2048");
391    }
392
393    #[test]
394    fn non_bytes_unit_picks_from_pool() {
395        let p = RangeSmuggleProbe::non_bytes_unit();
396        let v = &p.header_lines[0].1;
397        let unit_end = v.find('=').expect("=");
398        let unit = &v[..unit_end];
399        assert!(
400            NON_BYTES_UNITS.contains(&unit),
401            "unit {unit:?} not in NON_BYTES_UNITS pool"
402        );
403    }
404
405    #[test]
406    fn duplicate_header_probe_emits_two_range_lines() {
407        let p = RangeSmuggleProbe::duplicate_header_first_wins_benign("bytes=500-999");
408        assert_eq!(p.header_lines.len(), 2);
409        assert_eq!(p.header_lines[0].0, "Range");
410        assert_eq!(p.header_lines[1].0, "Range");
411        assert_eq!(p.header_lines[0].1, "bytes=0-"); // benign whole-resource
412        assert_eq!(p.header_lines[1].1, "bytes=500-999");
413    }
414
415    #[test]
416    fn duplicate_header_probe_accepts_unprefixed_smuggle_input() {
417        // Caller convenience: "100-199" without the bytes= prefix.
418        let p = RangeSmuggleProbe::duplicate_header_first_wins_benign("100-199");
419        assert_eq!(p.header_lines[1].1, "bytes=100-199");
420    }
421
422    #[test]
423    fn every_probe_carries_a_distinct_canary() {
424        let a = RangeSmuggleProbe::empty_range_set();
425        let b = RangeSmuggleProbe::empty_range_set();
426        assert_ne!(a.canary.token, b.canary.token);
427        assert_eq!(a.canary.token.len(), 16);
428    }
429
430    #[test]
431    fn safe_large_last_pos_pool_within_signed_i64_band() {
432        // Anti-rig: every pool entry must fit in i64 so any
433        // downstream code that parses Range with signed arithmetic
434        // doesn't wrap. i64::MAX is ~9.2e18.
435        for &p in SAFE_LARGE_LAST_POS {
436            assert!(p < i64::MAX as u64);
437        }
438    }
439
440    #[test]
441    fn non_bytes_units_pool_is_non_empty_and_unique() {
442        assert!(!NON_BYTES_UNITS.is_empty());
443        let unique: HashSet<&&str> = NON_BYTES_UNITS.iter().collect();
444        assert_eq!(unique.len(), NON_BYTES_UNITS.len());
445    }
446}