Skip to main content

wafrift_encoding/
contextual.rs

1use crate::encoding::Strategy;
2use crate::encoding::strategy::MAX_PAYLOAD_SIZE;
3use wafrift_types::injection_context::{ContextualEncodeError, InjectionContext};
4
5pub fn encode_in_context(
6    payload: &[u8],
7    strategy: Strategy,
8    context: InjectionContext,
9) -> Result<String, ContextualEncodeError> {
10    // §7 DEDUP: context-specific limits reference the canonical
11    // `MAX_PAYLOAD_SIZE` (8 MiB) instead of repeating the bare literal.
12    // The pre-fix had two independent `8 * 1024 * 1024` entries; both
13    // now track the shared constant so a single edit adjusts them all.
14    let max_size = match context {
15        InjectionContext::JsonString => MAX_PAYLOAD_SIZE / 2, // 4 MiB
16        InjectionContext::JsonNumber => 1024,
17        InjectionContext::XmlAttribute => MAX_PAYLOAD_SIZE / 8, // 1 MiB
18        InjectionContext::XmlCdata => MAX_PAYLOAD_SIZE,
19        InjectionContext::HeaderValue => 8 * 1024,
20        InjectionContext::CookieValue => 4 * 1024,
21        InjectionContext::MultipartFileName => 256,
22        _ => MAX_PAYLOAD_SIZE,
23    };
24
25    if payload.len() > max_size {
26        return Err(ContextualEncodeError::PayloadTooLarge {
27            context,
28            size: payload.len(),
29            max: max_size,
30        });
31    }
32
33    let base = match crate::encoding::encode(payload, strategy) {
34        Ok(s) => s,
35        Err(e) => {
36            return Err(match e {
37                crate::error::EncodeError::InvalidUtf8 => {
38                    ContextualEncodeError::InvalidUtf8 { offset: 0 }
39                }
40                crate::error::EncodeError::PayloadTooLarge { max, actual } => {
41                    ContextualEncodeError::PayloadTooLarge {
42                        context,
43                        size: actual,
44                        max,
45                    }
46                }
47                crate::error::EncodeError::LayeredOutputTooLarge { max, actual } => {
48                    ContextualEncodeError::PayloadTooLarge {
49                        context,
50                        size: actual,
51                        max,
52                    }
53                }
54                crate::error::EncodeError::InvalidContext {
55                    strategy: s,
56                    context: _,
57                } => ContextualEncodeError::ContextIncompatible {
58                    strategy: s.into(),
59                    context,
60                    reason: "strategy invalid for context".into(),
61                },
62                crate::error::EncodeError::InvalidConfig(msg) => {
63                    ContextualEncodeError::ContextIncompatible {
64                        strategy: "config".into(),
65                        context,
66                        reason: msg,
67                    }
68                }
69            });
70        }
71    };
72
73    escape_for_context(&base, context)
74}
75
76pub fn escape_for_context(
77    input: &str,
78    context: InjectionContext,
79) -> Result<String, ContextualEncodeError> {
80    let escaped = match context {
81        InjectionContext::JsonString => {
82            let mut s = String::with_capacity(input.len() + 10);
83            for c in input.chars() {
84                match c {
85                    '\\' => s.push_str("\\\\"),
86                    '"' => s.push_str("\\\""),
87                    '\n' => s.push_str("\\n"),
88                    '\r' => s.push_str("\\r"),
89                    '\t' => s.push_str("\\t"),
90                    '\x00'..='\x1f' => s.push_str(&format!("\\u{:04x}", c as u32)),
91                    // U+2028 LINE SEPARATOR and U+2029 PARAGRAPH SEPARATOR
92                    // are valid in JSON strings per RFC 8259 but are line
93                    // terminators in legacy ECMAScript / JSONP / eval
94                    // contexts. Pre-fix a payload-controlled value with
95                    // U+2028 inlined into <script>JSON</script> would
96                    // close the string literal and inject script. Escape
97                    // both for defence-in-depth even when shipping pure
98                    // JSON over the wire.
99                    '\u{2028}' => s.push_str("\\u2028"),
100                    '\u{2029}' => s.push_str("\\u2029"),
101                    _ => s.push(c),
102                }
103            }
104            s
105        }
106        InjectionContext::JsonNumber => {
107            if input.chars().any(|c| {
108                !c.is_ascii_digit() && c != '.' && c != '-' && c != 'e' && c != 'E' && c != '+'
109            }) {
110                return Err(ContextualEncodeError::ContextIncompatible {
111                    strategy: "escape".into(),
112                    context,
113                    reason: "not a valid JSON number".into(),
114                });
115            }
116            input.to_string()
117        }
118        InjectionContext::XmlAttribute => {
119            if input.contains('\x00') {
120                return Err(ContextualEncodeError::ContextIncompatible {
121                    strategy: "escape".into(),
122                    context,
123                    reason: "null byte in xml attribute".into(),
124                });
125            }
126            // XML allows single-quoted attributes; pre-fix only escaped
127            // `&"<>` and a payload with `'` would break out of an
128            // `<elem attr='...'>` form. Add &apos; escape.
129            input
130                .replace('&', "&amp;")
131                .replace('"', "&quot;")
132                .replace('\'', "&apos;")
133                .replace('<', "&lt;")
134                .replace('>', "&gt;")
135        }
136        InjectionContext::XmlCdata => {
137            if input.contains("]]>") {
138                return Err(ContextualEncodeError::ContextIncompatible {
139                    strategy: "escape".into(),
140                    context,
141                    reason: "CDATA cannot contain ]]>".into(),
142                });
143            }
144            input.to_string()
145        }
146        InjectionContext::XmlText => input
147            .replace('&', "&amp;")
148            .replace('<', "&lt;")
149            .replace('>', "&gt;"),
150        InjectionContext::HtmlAttribute => input
151            .replace('&', "&amp;")
152            .replace('"', "&quot;")
153            .replace('\'', "&#x27;")
154            .replace('<', "&lt;"),
155        InjectionContext::HtmlText => input.replace('&', "&amp;").replace('<', "&lt;"),
156        InjectionContext::UrlQuery => urlencoding::encode(input).to_string(),
157        InjectionContext::UrlPath => urlencoding::encode(input).to_string().replace("%2F", "/"),
158        InjectionContext::UrlFragment => urlencoding::encode(input).to_string(),
159        InjectionContext::HeaderValue => {
160            if input.contains('\r') || input.contains('\n') {
161                return Err(ContextualEncodeError::ContextIncompatible {
162                    strategy: "escape".into(),
163                    context,
164                    reason: "CR/LF in header value".into(),
165                });
166            }
167            if input.contains('\x00') {
168                return Err(ContextualEncodeError::ContextIncompatible {
169                    strategy: "escape".into(),
170                    context,
171                    reason: "null byte in header value".into(),
172                });
173            }
174            input.to_string()
175        }
176        InjectionContext::CookieValue => input
177            // RFC 6265 §4.1.1 cookie-octet excludes space, ",", '"', `\\`
178            // in addition to ; = CTLs. Pre-fix the missing chars caused
179            // Chrome / Firefox / curl to truncate the cookie at the
180            // offending byte — making bypass probes silently lie about
181            // the value that actually reached the server.
182            .replace(';', "%3B")
183            .replace('=', "%3D")
184            .replace(' ', "%20")
185            .replace(',', "%2C")
186            .replace('"', "%22")
187            .replace('\\', "%5C")
188            .replace('\x00', "%00")
189            .replace('\r', "%0D")
190            .replace('\n', "%0A"),
191        InjectionContext::MultipartField => {
192            if input.contains('\r') || input.contains('\n') {
193                return Err(ContextualEncodeError::ContextIncompatible {
194                    strategy: "escape".into(),
195                    context,
196                    reason: "CR/LF would break multipart structure".into(),
197                });
198            }
199            input.to_string()
200        }
201        InjectionContext::MultipartFileName => {
202            if input.contains('"') {
203                return Err(ContextualEncodeError::ContextIncompatible {
204                    strategy: "escape".into(),
205                    context,
206                    reason: "quote in filename".into(),
207                });
208            }
209            if input.contains('\r') || input.contains('\n') {
210                return Err(ContextualEncodeError::ContextIncompatible {
211                    strategy: "escape".into(),
212                    context,
213                    reason: "CR/LF in filename".into(),
214                });
215            }
216            input.to_string()
217        }
218        InjectionContext::PlainBody => input.to_string(),
219        _ => input.to_string(),
220    };
221    validate_in_context(&escaped, context)?;
222    Ok(escaped)
223}
224
225pub fn validate_in_context(
226    payload: &str,
227    context: InjectionContext,
228) -> Result<(), ContextualEncodeError> {
229    match context {
230        InjectionContext::JsonString => {
231            let mut chars = payload.chars().peekable();
232            while let Some(c) = chars.next() {
233                if c == '"' {
234                    return Err(ContextualEncodeError::ContextIncompatible {
235                        strategy: "validate".into(),
236                        context,
237                        reason: "unescaped double quote in JSON string".into(),
238                    });
239                }
240                if c == '\\' {
241                    let escaped = chars.next();
242                    match escaped {
243                        Some('\\' | '"' | 'n' | 'r' | 't' | 'b' | 'f' | '/') => {}
244                        Some('u') => {
245                            // Validate exactly 4 hex digits after \u
246                            for _ in 0..4 {
247                                match chars.next() {
248                                    Some(c) if c.is_ascii_hexdigit() => {}
249                                    _ => {
250                                        return Err(ContextualEncodeError::ContextIncompatible {
251                                            strategy: "validate".into(),
252                                            context,
253                                            reason: "invalid Unicode escape in JSON string".into(),
254                                        });
255                                    }
256                                }
257                            }
258                        }
259                        Some(other) => {
260                            return Err(ContextualEncodeError::ContextIncompatible {
261                                strategy: "validate".into(),
262                                context,
263                                reason: format!("invalid JSON escape sequence: \\{other}"),
264                            });
265                        }
266                        None => {
267                            return Err(ContextualEncodeError::ContextIncompatible {
268                                strategy: "validate".into(),
269                                context,
270                                reason: "trailing backslash in JSON string".into(),
271                            });
272                        }
273                    }
274                }
275            }
276        }
277        InjectionContext::XmlAttribute => {
278            // F137: pre-fix the `&` branch did `chars.by_ref().take(6).collect()`
279            // which UNCONDITIONALLY consumed the next 6 chars regardless of
280            // whether an entity matched. Those 6 chars were never validated,
281            // so a payload like `&lt;<script>` slipped past — the validator
282            // saw `&`, ate `lt;<sc` to "check" for a known entity, recognised
283            // `lt;`, and then never inspected the `<` it had already swallowed.
284            // Switch to a lookahead via `chars.clone()` (cheap — `Chars` is a
285            // slice cursor) and advance only as far as a matched entity.
286            let mut chars = payload.chars();
287            const ENTITIES: &[&str] = &["quot;", "apos;", "amp;", "lt;", "gt;"];
288            while let Some(c) = chars.next() {
289                if c == '"' {
290                    return Err(ContextualEncodeError::ContextIncompatible {
291                        strategy: "validate".into(),
292                        context,
293                        reason: "unescaped double quote in XML attribute".into(),
294                    });
295                }
296                // Single-quoted XML attributes (attr='...') are equally valid in
297                // XML 1.0 §3.1. An unescaped `'` inside such an attribute breaks
298                // out of the value just as `"` does in a double-quoted attribute.
299                if c == '\'' {
300                    return Err(ContextualEncodeError::ContextIncompatible {
301                        strategy: "validate".into(),
302                        context,
303                        reason: "unescaped single quote in XML attribute".into(),
304                    });
305                }
306                if c == '<' {
307                    return Err(ContextualEncodeError::ContextIncompatible {
308                        strategy: "validate".into(),
309                        context,
310                        reason: "unescaped `<` in XML attribute".into(),
311                    });
312                }
313                if c == '&' {
314                    let lookahead: String = chars.clone().take(6).collect();
315                    if let Some(matched) = ENTITIES.iter().find(|e| lookahead.starts_with(*e)) {
316                        // Consume exactly the entity body (name + `;`). The
317                        // rest of the payload stays in `chars` for the next
318                        // iteration so every other byte is still validated.
319                        for _ in 0..matched.len() {
320                            chars.next();
321                        }
322                    }
323                    // Lenient on unknown `&`: leave `chars` untouched and
324                    // keep scanning. An `&` alone is technically valid XML
325                    // text per XML 1.0 §2.4 only when not followed by an
326                    // entity-like shape; we don't reject it here so the
327                    // existing permissive contract holds.
328                }
329            }
330        }
331        // Contexts below have no validation rules yet. Adding an explicit
332        // arm for each ensures the compiler warns us when a new variant is
333        // added so we can decide whether it needs validation.
334        InjectionContext::PlainBody => {
335            // Plain body accepts any byte sequence; nothing to validate.
336        }
337        InjectionContext::XmlCdata if payload.contains("]]>") => {
338            return Err(ContextualEncodeError::ContextIncompatible {
339                strategy: "validate".into(),
340                context,
341                reason: "CDATA payload contains `]]>` (unterminated section)".into(),
342            });
343        }
344        InjectionContext::XmlText => {
345            if payload.contains('<') {
346                return Err(ContextualEncodeError::ContextIncompatible {
347                    strategy: "validate".into(),
348                    context,
349                    reason: "XML text payload contains unescaped `<`".into(),
350                });
351            }
352            reject_unescaped_ampersand(payload, context)?;
353        }
354        InjectionContext::HtmlAttribute => {
355            if payload.contains('<') {
356                return Err(ContextualEncodeError::ContextIncompatible {
357                    strategy: "validate".into(),
358                    context,
359                    reason: "HTML attribute contains unescaped `<` — would close the attribute"
360                        .into(),
361                });
362            }
363            if payload.contains('"') {
364                return Err(ContextualEncodeError::ContextIncompatible {
365                    strategy: "validate".into(),
366                    context,
367                    reason: "HTML attribute contains unescaped `\"` — attribute breakout".into(),
368                });
369            }
370            if payload.contains('\'') {
371                return Err(ContextualEncodeError::ContextIncompatible {
372                    strategy: "validate".into(),
373                    context,
374                    reason: "HTML attribute contains unescaped `'` — single-quoted attr breakout"
375                        .into(),
376                });
377            }
378            reject_unescaped_ampersand(payload, context)?;
379        }
380        InjectionContext::HtmlText => {
381            if payload.contains('<') {
382                return Err(ContextualEncodeError::ContextIncompatible {
383                    strategy: "validate".into(),
384                    context,
385                    reason: "HTML text contains unescaped `<` — would start a tag".into(),
386                });
387            }
388            reject_unescaped_ampersand(payload, context)?;
389        }
390        InjectionContext::UrlQuery | InjectionContext::UrlPath | InjectionContext::UrlFragment => {
391            // URL components are validated by percent-encoding step later;
392            // raw payload can contain any bytes here.
393        }
394        InjectionContext::HeaderValue => {
395            // Header values are validated by the header obfuscation layer;
396            // CRLF injection is guarded at the transport level.
397        }
398        InjectionContext::CookieValue => {
399            // Cookie values accept most printable ASCII; validation is
400            // handled by the cookie encoding layer.
401        }
402        InjectionContext::MultipartField | InjectionContext::MultipartFileName => {
403            // Multipart boundaries are managed by the form encoder;
404            // individual field values have no additional constraints.
405        }
406        // InjectionContext is #[non_exhaustive]; future variants default to
407        // no validation until explicit rules are added.
408        _ => {}
409    }
410    Ok(())
411}
412
413/// Returns Err if `payload` contains an `&` that is NOT the start of a
414/// well-formed entity reference (`&name;`, `&#nnn;`, or `&#xHHH;`).
415///
416/// This is the cheap cousin of an HTML5 entity validator — it doesn't
417/// know which named entities are real (`&copy;` vs `&xyz;`), but it
418/// does enforce the lexical shape so a stray `&` cannot ride through
419/// `validate_in_context` for HTML/XML contexts.
420fn reject_unescaped_ampersand(
421    payload: &str,
422    context: InjectionContext,
423) -> Result<(), ContextualEncodeError> {
424    let bytes = payload.as_bytes();
425    let mut i = 0;
426    while i < bytes.len() {
427        if bytes[i] != b'&' {
428            i += 1;
429            continue;
430        }
431        // Walk forward to find the terminating `;` within a bounded
432        // window — real entities are short (max ~12 chars including
433        // the `;`). If we don't find one, the `&` is unescaped.
434        let mut j = i + 1;
435        let max = (i + 12).min(bytes.len());
436        let mut saw_semicolon = false;
437        let mut valid_shape = true;
438        let first = bytes.get(j).copied();
439        if first == Some(b'#') {
440            j += 1;
441            let hex = bytes.get(j).copied() == Some(b'x') || bytes.get(j).copied() == Some(b'X');
442            if hex {
443                j += 1;
444            }
445            let mut digit_count = 0;
446            while j < max {
447                let b = bytes[j];
448                if b == b';' {
449                    saw_semicolon = true;
450                    j += 1;
451                    break;
452                }
453                let ok = if hex {
454                    b.is_ascii_hexdigit()
455                } else {
456                    b.is_ascii_digit()
457                };
458                if !ok {
459                    valid_shape = false;
460                    break;
461                }
462                digit_count += 1;
463                j += 1;
464            }
465            if digit_count == 0 {
466                valid_shape = false;
467            }
468        } else if let Some(b) = first {
469            if b.is_ascii_alphabetic() {
470                while j < max {
471                    let b = bytes[j];
472                    if b == b';' {
473                        saw_semicolon = true;
474                        j += 1;
475                        break;
476                    }
477                    if !b.is_ascii_alphanumeric() {
478                        valid_shape = false;
479                        break;
480                    }
481                    j += 1;
482                }
483            } else {
484                valid_shape = false;
485            }
486        } else {
487            valid_shape = false;
488        }
489        if !valid_shape || !saw_semicolon {
490            return Err(ContextualEncodeError::ContextIncompatible {
491                strategy: "validate".into(),
492                context,
493                reason: format!("unescaped `&` at byte {i} (no entity reference follows)"),
494            });
495        }
496        i = j;
497    }
498    Ok(())
499}
500
501#[cfg(test)]
502mod tests {
503    use super::*;
504    use crate::encoding::Strategy;
505
506    #[test]
507    fn encode_error_mapping_payload_too_large() {
508        // PayloadTooLarge from encode maps to PayloadTooLarge contextual error
509        // We can't easily trigger this from encode(), but we verify the error path
510        // by checking that InvalidUtf8 is only returned for actual UTF-8 errors
511        let result = encode_in_context(
512            b"\x80",
513            Strategy::CaseAlternation,
514            InjectionContext::PlainBody,
515        );
516        // \x80 alone is invalid UTF-8, so encode should return InvalidUtf8
517        assert!(result.is_err());
518        let err = result.unwrap_err();
519        assert!(
520            err.to_string().contains("invalid") || err.to_string().contains("UTF-8"),
521            "error should mention invalid UTF-8, got: {err}"
522        );
523    }
524
525    #[test]
526    fn json_string_validates_unescaped_quote() {
527        let err = validate_in_context("hello\"world", InjectionContext::JsonString).unwrap_err();
528        assert!(err.to_string().contains("unescaped double quote"));
529    }
530
531    #[test]
532    fn json_string_validates_valid_escapes() {
533        assert!(validate_in_context("hello\\nworld", InjectionContext::JsonString).is_ok());
534        assert!(validate_in_context("hello\\tworld", InjectionContext::JsonString).is_ok());
535        assert!(validate_in_context("hello\\\\world", InjectionContext::JsonString).is_ok());
536        assert!(validate_in_context("hello\\\"world", InjectionContext::JsonString).is_ok());
537    }
538
539    #[test]
540    fn json_string_validates_unicode_escape() {
541        // Valid \u00e4
542        assert!(validate_in_context("\\u00e4", InjectionContext::JsonString).is_ok());
543        // Invalid \u00g4 (non-hex)
544        let err = validate_in_context("\\u00g4", InjectionContext::JsonString).unwrap_err();
545        assert!(err.to_string().contains("invalid Unicode escape"));
546        // Too short \u00
547        let err = validate_in_context("\\u00", InjectionContext::JsonString).unwrap_err();
548        assert!(err.to_string().contains("invalid Unicode escape"));
549    }
550
551    #[test]
552    fn json_string_validates_invalid_escape() {
553        let err = validate_in_context("\\x", InjectionContext::JsonString).unwrap_err();
554        assert!(err.to_string().contains("invalid JSON escape"));
555    }
556
557    #[test]
558    fn json_string_validates_trailing_backslash() {
559        let err = validate_in_context("hello\\", InjectionContext::JsonString).unwrap_err();
560        assert!(err.to_string().contains("trailing backslash"));
561    }
562
563    #[test]
564    fn xml_attribute_validates_unescaped_quote() {
565        let err = validate_in_context("hello\"world", InjectionContext::XmlAttribute).unwrap_err();
566        assert!(err.to_string().contains("unescaped double quote"));
567    }
568
569    #[test]
570    fn xml_attribute_allows_escaped_quote() {
571        // &quot; should be allowed (the validator doesn't fully validate entities,
572        // but it shouldn't error on well-formed entity references)
573        assert!(validate_in_context("hello&quot;world", InjectionContext::XmlAttribute).is_ok());
574    }
575
576    #[test]
577    fn xml_attribute_validates_unescaped_single_quote() {
578        // A single-quoted XML attribute (attr='...') breaks out on an unescaped `'`.
579        // Previously the validator only checked `"`, so `' onclick='alert(1)` passed
580        // as "valid" despite being an injection vector.
581        let err = validate_in_context("foo' onclick='alert(1)", InjectionContext::XmlAttribute)
582            .unwrap_err();
583        assert!(
584            err.to_string().contains("single quote"),
585            "error must mention single quote, got: {err}"
586        );
587    }
588
589    #[test]
590    fn xml_attribute_validator_does_not_swallow_chars_after_entity() {
591        // F137 regression: pre-fix `&lt;<script>` passed validation
592        // because the validator consumed 6 chars after every `&` to
593        // peek at the entity name. After matching `lt;` it had already
594        // eaten the next 2 chars (`<s`) and never validated them — so
595        // the unescaped `<` rode straight through. Post-fix the
596        // validator clones the cursor for lookahead and advances only
597        // by the matched entity length, so the trailing `<` is caught.
598        let err = validate_in_context("&lt;<script>", InjectionContext::XmlAttribute)
599            .expect_err("unescaped `<` after &lt; MUST reject");
600        assert!(
601            err.to_string().contains('<') || err.to_string().contains("unescaped"),
602            "error should mention the unescaped `<`, got: {err}"
603        );
604    }
605
606    #[test]
607    fn xml_attribute_validator_catches_quote_after_short_entity() {
608        // Same F137 hazard, different exploit: `&amp;"` — after `&amp;`
609        // (4 chars), the pre-fix code consumed 2 chars beyond (the `"`
610        // and one more), bypassing the unescaped-quote check.
611        let err = validate_in_context("&amp;\"breakout", InjectionContext::XmlAttribute)
612            .expect_err("unescaped `\"` after &amp; MUST reject");
613        assert!(
614            err.to_string().contains("double quote"),
615            "error should mention double quote, got: {err}"
616        );
617    }
618
619    #[test]
620    fn xml_attribute_validator_allows_multiple_entities_in_a_row() {
621        // The fix must not over-correct: a payload of nothing-but-
622        // entities still passes.
623        assert!(
624            validate_in_context("&amp;&lt;&gt;&quot;&apos;", InjectionContext::XmlAttribute,)
625                .is_ok(),
626            "chain of well-formed entities must pass validation"
627        );
628    }
629
630    #[test]
631    fn xml_attribute_escape_encodes_single_quote() {
632        // escape_for_context must produce &apos; for `'` so that the escaped
633        // output then passes validate_in_context.
634        let escaped =
635            escape_for_context("don't break my attribute", InjectionContext::XmlAttribute).unwrap();
636        assert!(
637            escaped.contains("&apos;"),
638            "expected &apos; in escaped output, got: {escaped}"
639        );
640        // The round-trip must also pass validation.
641        validate_in_context(&escaped, InjectionContext::XmlAttribute)
642            .expect("escaped output must pass validation");
643    }
644
645    #[test]
646    fn xml_attribute_allows_escaped_apos() {
647        // &apos; is a well-formed entity reference and must not trigger the
648        // single-quote validator.
649        assert!(
650            validate_in_context("don&apos;t", InjectionContext::XmlAttribute).is_ok(),
651            "&apos; must be accepted by the XmlAttribute validator"
652        );
653    }
654
655    #[test]
656    fn header_value_validates_crlf() {
657        let err = encode_in_context(
658            b"hello\r\nworld",
659            Strategy::CaseAlternation,
660            InjectionContext::HeaderValue,
661        )
662        .unwrap_err();
663        assert!(err.to_string().contains("CR/LF"));
664    }
665
666    #[test]
667    fn cookie_value_escapes_crlf() {
668        let out = encode_in_context(
669            b"hello\r\nworld",
670            Strategy::CaseAlternation,
671            InjectionContext::CookieValue,
672        )
673        .unwrap();
674        assert!(out.contains("%0D") && out.contains("%0A"));
675    }
676
677    #[test]
678    fn multipart_field_validates_crlf() {
679        let err = encode_in_context(
680            b"hello\r\nworld",
681            Strategy::CaseAlternation,
682            InjectionContext::MultipartField,
683        )
684        .unwrap_err();
685        assert!(err.to_string().contains("CR/LF"));
686    }
687
688    #[test]
689    fn html_attribute_escapes_ampersand() {
690        let out = encode_in_context(
691            b"a&b",
692            Strategy::CaseAlternation,
693            InjectionContext::HtmlAttribute,
694        )
695        .unwrap();
696        assert!(out.contains("&amp;"));
697    }
698
699    #[test]
700    fn url_query_escapes_space() {
701        let out = encode_in_context(
702            b"hello world",
703            Strategy::CaseAlternation,
704            InjectionContext::UrlQuery,
705        )
706        .unwrap();
707        assert!(!out.contains(' '));
708    }
709
710    #[test]
711    fn url_path_preserves_slash() {
712        let out = encode_in_context(
713            b"/api/v1",
714            Strategy::CaseAlternation,
715            InjectionContext::UrlPath,
716        )
717        .unwrap();
718        assert!(out.contains('/'));
719    }
720
721    #[test]
722    fn plain_body_no_structural_escaping() {
723        // PlainBody doesn't add structural escaping, but the strategy still mutates
724        let out = encode_in_context(
725            b"<script>",
726            Strategy::CaseAlternation,
727            InjectionContext::PlainBody,
728        )
729        .unwrap();
730        assert_eq!(out, "<ScRiPt>");
731    }
732
733    #[test]
734    fn max_size_enforced() {
735        let big = vec![b'a'; 8 * 1024 * 1024 + 1];
736        let err = encode_in_context(&big, Strategy::CaseAlternation, InjectionContext::PlainBody)
737            .unwrap_err();
738        assert!(err.to_string().contains("too large"));
739    }
740
741    #[test]
742    fn xml_cdata_rejects_termination_sequence() {
743        let err = encode_in_context(
744            b"hello]]>world",
745            Strategy::CaseAlternation,
746            InjectionContext::XmlCdata,
747        )
748        .unwrap_err();
749        assert!(err.to_string().contains("CDATA"));
750    }
751
752    #[test]
753    fn multipart_filename_rejects_quote() {
754        let err = encode_in_context(
755            b"file\"name.txt",
756            Strategy::CaseAlternation,
757            InjectionContext::MultipartFileName,
758        )
759        .unwrap_err();
760        assert!(err.to_string().contains("quote"));
761    }
762
763    #[test]
764    fn json_number_rejects_non_numeric() {
765        let err = encode_in_context(
766            b"abc",
767            Strategy::CaseAlternation,
768            InjectionContext::JsonNumber,
769        )
770        .unwrap_err();
771        assert!(err.to_string().contains("not a valid JSON number"));
772    }
773
774    #[test]
775    fn empty_payload_valid_in_all_contexts() {
776        for ctx in [
777            InjectionContext::PlainBody,
778            InjectionContext::JsonString,
779            InjectionContext::XmlAttribute,
780            InjectionContext::HeaderValue,
781            InjectionContext::CookieValue,
782        ] {
783            assert!(
784                encode_in_context(b"", Strategy::UrlEncode, ctx).is_ok(),
785                "empty payload should be valid in {ctx:?}"
786            );
787        }
788    }
789
790    // ── New tests added 2026-05-24 ─────────────────────────────────────────
791
792    #[test]
793    fn xml_attribute_single_quote_payloads_all_rejected() {
794        // 10 distinct single-quote-bearing payloads — each must either error
795        // OR produce output that passes validate_in_context (escape succeeded).
796        // The fix was to escape ' as &apos; so validate accepts it.
797        let payloads = [
798            "don't",
799            "a' onclick='alert(1)",
800            "' OR 1=1",
801            "test' attribute='injected",
802            "hello'world",
803            "foo' onmouseover='evil",
804            "x' style='color:red",
805            "value'extra",
806            "a'b'c",
807            "' union select",
808        ];
809        for payload in &payloads {
810            let escaped = escape_for_context(payload, InjectionContext::XmlAttribute);
811            match escaped {
812                Ok(s) => {
813                    // If escaping succeeded, validation must also succeed.
814                    validate_in_context(&s, InjectionContext::XmlAttribute)
815                        .unwrap_or_else(|e| panic!(
816                            "escape_for_context produced invalid output for {payload:?}: {e}\n  escaped: {s}"
817                        ));
818                    // The escaped form must NOT contain a bare single quote.
819                    assert!(
820                        !s.contains('\''),
821                        "bare single quote survived in escaped output for {payload:?}: {s}"
822                    );
823                }
824                Err(_) => {
825                    // Rejecting is also valid — as long as the bare payload doesn't
826                    // silently pass validation.
827                    let _ = validate_in_context(payload, InjectionContext::XmlAttribute);
828                    // We just require no panic. The point is the input can't bypass.
829                }
830            }
831        }
832    }
833
834    #[test]
835    fn escape_for_context_xml_attribute_round_trip() {
836        // Payloads that can be expressed in an XML attribute must survive
837        // a round-trip: escape → validate succeeds.
838        let payloads = [
839            "hello world",
840            "test & value",
841            "\"quoted\"",
842            "less < than",
843            "greater > than",
844        ];
845        for payload in &payloads {
846            let escaped = escape_for_context(payload, InjectionContext::XmlAttribute)
847                .unwrap_or_else(|e| panic!("escape_for_context failed for {payload:?}: {e}"));
848            validate_in_context(&escaped, InjectionContext::XmlAttribute).unwrap_or_else(|e| {
849                panic!("round-trip validation failed for {payload:?}: {e}\n  escaped: {escaped}")
850            });
851        }
852    }
853
854    #[test]
855    fn url_encode_twice_is_deterministic() {
856        // URL-encoding is NOT idempotent (% chars get re-encoded), but it IS a
857        // pure deterministic function: applying it twice always produces the
858        // same result as applying it twice on a second call.
859        let payload = "' OR 1=1--";
860        let run1_once = encode_in_context(
861            payload.as_bytes(),
862            Strategy::UrlEncode,
863            InjectionContext::UrlQuery,
864        )
865        .unwrap();
866        let run1_twice = encode_in_context(
867            run1_once.as_bytes(),
868            Strategy::UrlEncode,
869            InjectionContext::UrlQuery,
870        )
871        .unwrap();
872        let run2_once = encode_in_context(
873            payload.as_bytes(),
874            Strategy::UrlEncode,
875            InjectionContext::UrlQuery,
876        )
877        .unwrap();
878        let run2_twice = encode_in_context(
879            run2_once.as_bytes(),
880            Strategy::UrlEncode,
881            InjectionContext::UrlQuery,
882        )
883        .unwrap();
884        assert_eq!(
885            run1_twice, run2_twice,
886            "URL-encode applied twice must be deterministic across calls"
887        );
888        // double-encoded result must differ from single-encoded (% is re-encoded to %25)
889        assert_ne!(
890            run1_once, run1_twice,
891            "URL-encode applied twice must produce a different (double-encoded) result"
892        );
893    }
894
895    #[test]
896    fn url_encode_decode_round_trip() {
897        // encode(payload, UrlEncode) then url-decode must reproduce the original.
898        let original = "' OR 1=1--";
899        let encoded = crate::encoding::encode(original.as_bytes(), Strategy::UrlEncode).unwrap();
900        let decoded = urlencoding::decode(&encoded).unwrap();
901        assert_eq!(
902            decoded, original,
903            "URL encode → decode round-trip must equal original"
904        );
905    }
906
907    #[test]
908    fn unicode_boundary_4byte_utf8_no_panic() {
909        // 4-byte UTF-8 characters must not panic in any encoder.
910        let payload = "😀𝄞🚀"; // all supplementary-plane chars
911        for strategy in crate::encoding::all_strategies() {
912            let _ = crate::encoding::encode(payload.as_bytes(), *strategy);
913        }
914    }
915
916    #[test]
917    fn unicode_boundary_bom_no_panic() {
918        // BOM (U+FEFF) must not panic in any encoder.
919        let payload = "\u{FEFF}SELECT * FROM users";
920        for strategy in crate::encoding::all_strategies() {
921            let _ = crate::encoding::encode(payload.as_bytes(), *strategy);
922        }
923    }
924
925    #[test]
926    fn json_string_escape_u2028_and_u2029() {
927        // U+2028 and U+2029 must be escaped to 
/
 to prevent
928        // line-terminator injection in JSONP/eval contexts.
929        let payload = "\u{2028}hello\u{2029}world";
930        let escaped = escape_for_context(payload, InjectionContext::JsonString).unwrap();
931        assert!(
932            escaped.contains("\\u2028"),
933            "U+2028 must be escaped to \\u2028, got: {escaped}"
934        );
935        assert!(
936            escaped.contains("\\u2029"),
937            "U+2029 must be escaped to \\u2029, got: {escaped}"
938        );
939        // Escaped result must also pass validation.
940        validate_in_context(&escaped, InjectionContext::JsonString).unwrap();
941    }
942
943    #[test]
944    fn cookie_value_all_special_chars_encoded() {
945        let payload = "val;ue=sp ace,\"q\"\\back\x00nul\r\n";
946        let out = escape_for_context(payload, InjectionContext::CookieValue).unwrap();
947        // Must not contain raw special chars.
948        assert!(!out.contains(';'), "semicolon must be encoded");
949        assert!(!out.contains('='), "equals must be encoded");
950        assert!(!out.contains(' '), "space must be encoded");
951        assert!(!out.contains(','), "comma must be encoded");
952        assert!(!out.contains('"'), "double-quote must be encoded");
953        assert!(!out.contains('\\'), "backslash must be encoded");
954        assert!(!out.contains('\x00'), "null must be encoded");
955        assert!(!out.contains('\r'), "CR must be encoded");
956        assert!(!out.contains('\n'), "LF must be encoded");
957    }
958
959    #[test]
960    fn header_value_null_byte_rejected() {
961        // NULL byte in a header value must be rejected.
962        let err = escape_for_context("hello\x00world", InjectionContext::HeaderValue).unwrap_err();
963        assert!(
964            err.to_string().contains("null"),
965            "error must mention null byte, got: {err}"
966        );
967    }
968
969    #[test]
970    fn xml_attribute_null_byte_rejected() {
971        // NULL byte in an XML attribute must be rejected.
972        let err = escape_for_context("hello\x00world", InjectionContext::XmlAttribute).unwrap_err();
973        assert!(!err.to_string().is_empty());
974    }
975}