Skip to main content

contextual_encoder/
ruby.rs

1//! ruby literal encoder.
2//!
3//! encodes untrusted strings for safe embedding in ruby source literals.
4//!
5//! - [`for_ruby_string`] — safe for ruby double-quoted string literals (`"..."`)
6//!
7//! # encoding rules
8//!
9//! the encoder uses ruby's native escape syntax:
10//!
11//! - named escapes: `\a`, `\b`, `\t`, `\n`, `\v`, `\f`, `\r`, `\e`, `\\`
12//! - double quote → `\"`
13//! - hash sign → `\#` (prevents `#{}`, `#$`, `#@` interpolation)
14//! - other C0 controls and DEL → `\xHH`
15//! - unicode non-characters → space
16//! - non-ASCII unicode passes through (ruby 2.0+ source files are UTF-8
17//!   by default)
18//!
19//! the output is safe for double-quoted string literals only. ruby
20//! single-quoted strings (`'...'`) use different escape rules and are
21//! not covered by this encoder.
22
23use std::fmt;
24
25use crate::engine::{encode_loop, is_unicode_noncharacter, write_c0_named_escape};
26
27// ---------------------------------------------------------------------------
28// for_ruby_string — safe for Ruby double-quoted string literals ("...")
29// ---------------------------------------------------------------------------
30
31/// encodes `input` for safe embedding in a ruby double-quoted string literal
32/// (`"..."`).
33///
34/// escapes backslashes, double quotes, hash signs (to prevent interpolation),
35/// and control characters using ruby's escape syntax. non-ASCII unicode passes
36/// through unchanged (ruby 2.0+ source files are UTF-8 by default). unicode
37/// non-characters are replaced with space.
38///
39/// # examples
40///
41/// ```
42/// use contextual_encoder::for_ruby_string;
43///
44/// assert_eq!(for_ruby_string(r#"say "hi""#), r#"say \"hi\""#);
45/// assert_eq!(for_ruby_string("line\nbreak"), r"line\nbreak");
46/// assert_eq!(for_ruby_string("café"), "café");
47/// assert_eq!(for_ruby_string("hello #{name}"), r"hello \#{name}");
48/// ```
49pub fn for_ruby_string(input: &str) -> String {
50    let mut out = String::with_capacity(input.len());
51    write_ruby_string(&mut out, input).expect("writing to string cannot fail");
52    out
53}
54
55/// writes the ruby-string-encoded form of `input` to `out`.
56///
57/// see [`for_ruby_string`] for encoding rules.
58pub fn write_ruby_string<W: fmt::Write>(out: &mut W, input: &str) -> fmt::Result {
59    encode_loop(out, input, needs_ruby_string_encoding, |out, c, _next| {
60        write_ruby_text_encoded(out, c)
61    })
62}
63
64fn needs_ruby_string_encoding(c: char) -> bool {
65    matches!(c, '\x00'..='\x1F' | '\x7F' | '"' | '#' | '\\') || is_unicode_noncharacter(c as u32)
66}
67
68/// writes the encoded form of a character for ruby string context.
69fn write_ruby_text_encoded<W: fmt::Write>(out: &mut W, c: char) -> fmt::Result {
70    if let Some(r) = write_c0_named_escape(out, c) {
71        return r;
72    }
73    match c {
74        '\x1B' => out.write_str("\\e"),
75        '"' => out.write_str("\\\""),
76        '#' => out.write_str("\\#"),
77        c if is_unicode_noncharacter(c as u32) => out.write_char(' '),
78        // other C0 controls and DEL
79        c => write!(out, "\\x{:02x}", c as u32),
80    }
81}
82
83#[cfg(test)]
84mod tests {
85    use super::*;
86
87    #[test]
88    fn string_passthrough() {
89        assert_eq!(for_ruby_string("hello world"), "hello world");
90        assert_eq!(for_ruby_string(""), "");
91        assert_eq!(
92            for_ruby_string("cafe\u{0301} \u{65E5}\u{672C}\u{8A9E}"),
93            "cafe\u{0301} \u{65E5}\u{672C}\u{8A9E}"
94        );
95        assert_eq!(for_ruby_string("\u{1F600}"), "\u{1F600}");
96    }
97
98    #[test]
99    fn string_escapes_double_quote() {
100        assert_eq!(for_ruby_string(r#"a"b"#), r#"a\"b"#);
101    }
102
103    #[test]
104    fn string_passes_single_quote() {
105        assert_eq!(for_ruby_string("a'b"), "a'b");
106    }
107
108    #[test]
109    fn string_escapes_backslash() {
110        assert_eq!(for_ruby_string(r"a\b"), r"a\\b");
111    }
112
113    #[test]
114    fn string_escapes_hash() {
115        assert_eq!(for_ruby_string("hello #{name}"), r"hello \#{name}");
116        assert_eq!(for_ruby_string("#$global"), r"\#$global");
117        assert_eq!(for_ruby_string("#@ivar"), r"\#@ivar");
118        assert_eq!(for_ruby_string("color #ff0000"), r"color \#ff0000");
119    }
120
121    #[test]
122    fn string_named_escapes() {
123        assert_eq!(for_ruby_string("\x07"), "\\a");
124        assert_eq!(for_ruby_string("\x08"), "\\b");
125        assert_eq!(for_ruby_string("\t"), "\\t");
126        assert_eq!(for_ruby_string("\n"), "\\n");
127        assert_eq!(for_ruby_string("\x0B"), "\\v");
128        assert_eq!(for_ruby_string("\x0C"), "\\f");
129        assert_eq!(for_ruby_string("\r"), "\\r");
130        assert_eq!(for_ruby_string("\x1B"), "\\e");
131    }
132
133    #[test]
134    fn string_hex_escapes_for_controls() {
135        assert_eq!(for_ruby_string("\x00"), "\\x00");
136        assert_eq!(for_ruby_string("\x01"), "\\x01");
137        assert_eq!(for_ruby_string("\x06"), "\\x06");
138        assert_eq!(for_ruby_string("\x0E"), "\\x0e");
139        assert_eq!(for_ruby_string("\x1F"), "\\x1f");
140        assert_eq!(for_ruby_string("\x7F"), "\\x7f");
141    }
142
143    #[test]
144    fn string_nonchars_replaced() {
145        assert_eq!(for_ruby_string("\u{FDD0}"), " ");
146        assert_eq!(for_ruby_string("\u{FFFE}"), " ");
147    }
148
149    #[test]
150    fn string_writer_matches() {
151        let input = "test\x00\"\\\n#{}café\x1B";
152        let mut w = String::new();
153        write_ruby_string(&mut w, input).unwrap();
154        assert_eq!(for_ruby_string(input), w);
155    }
156}