Skip to main content

contextual_encoder/
go.rs

1//! go literal encoders.
2//!
3//! encodes untrusted strings for safe embedding in go source literals.
4//!
5//! - [`for_go_string`] — safe for go interpreted string literals (`"..."`)
6//! - [`for_go_char`] — safe for go rune literals (`'...'`)
7//! - [`for_go_byte_string`] — safe for go byte-explicit string literals
8//!   (`[]byte("...")`)
9//!
10//! # encoding rules
11//!
12//! all three encoders use go's native escape syntax:
13//!
14//! - named escapes: `\a`, `\b`, `\t`, `\n`, `\v`, `\f`, `\r`, `\\`
15//! - other C0 controls and DEL → `\xHH`
16//! - unicode non-characters → space (string/char) or `\xHH` per byte (byte string)
17//!
18//! the encoders differ in which quote is escaped and how non-ASCII is handled:
19//!
20//! | encoder | quote escape | non-ASCII |
21//! |---------|-------------|-----------|
22//! | `for_go_string` | `"` → `\"` | passes through |
23//! | `for_go_char` | `'` → `\'` | passes through |
24//! | `for_go_byte_string` | `"` → `\"` | each UTF-8 byte → `\xHH` |
25
26use std::fmt;
27
28use crate::engine::{
29    encode_loop, is_unicode_noncharacter, needs_byte_string_encoding, write_byte_string_encoded,
30    write_c0_named_escape,
31};
32
33// ---------------------------------------------------------------------------
34// for_go_string — safe for Go interpreted string literals ("...")
35// ---------------------------------------------------------------------------
36
37/// encodes `input` for safe embedding in a go interpreted string literal
38/// (`"..."`).
39///
40/// escapes backslashes, double quotes, and control characters using go's
41/// escape syntax. non-ASCII unicode passes through unchanged (go source files
42/// are UTF-8). unicode non-characters are replaced with space.
43///
44/// # examples
45///
46/// ```
47/// use contextual_encoder::for_go_string;
48///
49/// assert_eq!(for_go_string(r#"say "hi""#), r#"say \"hi\""#);
50/// assert_eq!(for_go_string("line\nbreak"), r"line\nbreak");
51/// assert_eq!(for_go_string("cafe\u{0301}"), "cafe\u{0301}");
52/// ```
53pub fn for_go_string(input: &str) -> String {
54    let mut out = String::with_capacity(input.len());
55    write_go_string(&mut out, input).expect("writing to string cannot fail");
56    out
57}
58
59/// writes the go-string-encoded form of `input` to `out`.
60///
61/// see [`for_go_string`] for encoding rules.
62pub fn write_go_string<W: fmt::Write>(out: &mut W, input: &str) -> fmt::Result {
63    encode_loop(out, input, needs_go_string_encoding, |out, c, _next| {
64        write_go_text_encoded(out, c, '"')
65    })
66}
67
68fn needs_go_string_encoding(c: char) -> bool {
69    matches!(c, '\x00'..='\x1F' | '\x7F' | '"' | '\\') || is_unicode_noncharacter(c as u32)
70}
71
72// ---------------------------------------------------------------------------
73// for_go_char — safe for Go rune literals ('...')
74// ---------------------------------------------------------------------------
75
76/// encodes `input` for safe embedding in a go rune literal (`'...'`).
77///
78/// escapes backslashes, single quotes, and control characters using go's
79/// escape syntax. non-ASCII unicode passes through unchanged. unicode
80/// non-characters are replaced with space.
81///
82/// # examples
83///
84/// ```
85/// use contextual_encoder::for_go_char;
86///
87/// assert_eq!(for_go_char("it's"), r"it\'s");
88/// assert_eq!(for_go_char(r#"a"b"#), r#"a"b"#);
89/// assert_eq!(for_go_char("tab\there"), r"tab\there");
90/// ```
91pub fn for_go_char(input: &str) -> String {
92    let mut out = String::with_capacity(input.len());
93    write_go_char(&mut out, input).expect("writing to string cannot fail");
94    out
95}
96
97/// writes the go-char-encoded form of `input` to `out`.
98///
99/// see [`for_go_char`] for encoding rules.
100pub fn write_go_char<W: fmt::Write>(out: &mut W, input: &str) -> fmt::Result {
101    encode_loop(out, input, needs_go_char_encoding, |out, c, _next| {
102        write_go_text_encoded(out, c, '\'')
103    })
104}
105
106fn needs_go_char_encoding(c: char) -> bool {
107    matches!(c, '\x00'..='\x1F' | '\x7F' | '\'' | '\\') || is_unicode_noncharacter(c as u32)
108}
109
110// ---------------------------------------------------------------------------
111// shared helper for string and char encoders
112// ---------------------------------------------------------------------------
113
114/// writes the encoded form of a character for go string or rune context.
115/// `quote` is the delimiter being escaped (`"` or `'`).
116fn write_go_text_encoded<W: fmt::Write>(out: &mut W, c: char, quote: char) -> fmt::Result {
117    if let Some(r) = write_c0_named_escape(out, c) {
118        return r;
119    }
120    match c {
121        '"' if quote == '"' => out.write_str("\\\""),
122        '\'' if quote == '\'' => out.write_str("\\'"),
123        c if is_unicode_noncharacter(c as u32) => out.write_char(' '),
124        // other C0 controls and DEL
125        c => write!(out, "\\x{:02x}", c as u32),
126    }
127}
128
129// ---------------------------------------------------------------------------
130// for_go_byte_string — safe for Go byte-explicit string contexts
131// ---------------------------------------------------------------------------
132
133/// encodes `input` for safe embedding in a go string literal used in a
134/// byte-explicit context (`[]byte("...")`).
135///
136/// escapes backslashes, double quotes, and control characters. non-ASCII
137/// characters are encoded as their individual UTF-8 bytes using `\xHH`
138/// notation, making every byte visible.
139///
140/// # examples
141///
142/// ```
143/// use contextual_encoder::for_go_byte_string;
144///
145/// assert_eq!(for_go_byte_string("hello"), "hello");
146/// assert_eq!(for_go_byte_string(r#"say "hi""#), r#"say \"hi\""#);
147/// assert_eq!(for_go_byte_string("cafe\u{0301}"), r"cafe\xcc\x81");
148/// assert_eq!(for_go_byte_string("null\x00byte"), r"null\x00byte");
149/// ```
150pub fn for_go_byte_string(input: &str) -> String {
151    let mut out = String::with_capacity(input.len());
152    write_go_byte_string(&mut out, input).expect("writing to string cannot fail");
153    out
154}
155
156/// writes the go-byte-string-encoded form of `input` to `out`.
157///
158/// see [`for_go_byte_string`] for encoding rules.
159pub fn write_go_byte_string<W: fmt::Write>(out: &mut W, input: &str) -> fmt::Result {
160    encode_loop(out, input, needs_byte_string_encoding, |out, c, _next| {
161        write_byte_string_encoded(out, c, write_c0_named_escape)
162    })
163}
164
165#[cfg(test)]
166mod tests {
167    use super::*;
168
169    // -- for_go_string --
170
171    #[test]
172    fn string_passthrough() {
173        assert_eq!(for_go_string("hello world"), "hello world");
174        assert_eq!(for_go_string(""), "");
175        assert_eq!(
176            for_go_string("cafe\u{0301} \u{65E5}\u{672C}\u{8A9E}"),
177            "cafe\u{0301} \u{65E5}\u{672C}\u{8A9E}"
178        );
179        assert_eq!(for_go_string("\u{1F600}"), "\u{1F600}");
180    }
181
182    #[test]
183    fn string_escapes_double_quote() {
184        assert_eq!(for_go_string(r#"a"b"#), r#"a\"b"#);
185    }
186
187    #[test]
188    fn string_passes_single_quote() {
189        assert_eq!(for_go_string("a'b"), "a'b");
190    }
191
192    #[test]
193    fn string_escapes_backslash() {
194        assert_eq!(for_go_string(r"a\b"), r"a\\b");
195    }
196
197    #[test]
198    fn string_named_escapes() {
199        assert_eq!(for_go_string("\x07"), "\\a");
200        assert_eq!(for_go_string("\x08"), "\\b");
201        assert_eq!(for_go_string("\t"), "\\t");
202        assert_eq!(for_go_string("\n"), "\\n");
203        assert_eq!(for_go_string("\x0B"), "\\v");
204        assert_eq!(for_go_string("\x0C"), "\\f");
205        assert_eq!(for_go_string("\r"), "\\r");
206    }
207
208    #[test]
209    fn string_hex_escapes_for_controls() {
210        assert_eq!(for_go_string("\x00"), "\\x00");
211        assert_eq!(for_go_string("\x01"), "\\x01");
212        assert_eq!(for_go_string("\x06"), "\\x06");
213        assert_eq!(for_go_string("\x0E"), "\\x0e");
214        assert_eq!(for_go_string("\x1F"), "\\x1f");
215        assert_eq!(for_go_string("\x7F"), "\\x7f");
216    }
217
218    #[test]
219    fn string_nonchars_replaced() {
220        assert_eq!(for_go_string("\u{FDD0}"), " ");
221        assert_eq!(for_go_string("\u{FFFE}"), " ");
222    }
223
224    #[test]
225    fn string_writer_matches() {
226        let input = "test\x00\"\\\n cafe\u{0301}";
227        let mut w = String::new();
228        write_go_string(&mut w, input).unwrap();
229        assert_eq!(for_go_string(input), w);
230    }
231
232    // -- for_go_char --
233
234    #[test]
235    fn char_passthrough() {
236        assert_eq!(for_go_char("hello world"), "hello world");
237        assert_eq!(for_go_char(""), "");
238        assert_eq!(for_go_char("cafe\u{0301}"), "cafe\u{0301}");
239    }
240
241    #[test]
242    fn char_escapes_single_quote() {
243        assert_eq!(for_go_char("a'b"), r"a\'b");
244    }
245
246    #[test]
247    fn char_passes_double_quote() {
248        assert_eq!(for_go_char(r#"a"b"#), r#"a"b"#);
249    }
250
251    #[test]
252    fn char_escapes_backslash() {
253        assert_eq!(for_go_char(r"a\b"), r"a\\b");
254    }
255
256    #[test]
257    fn char_named_escapes() {
258        assert_eq!(for_go_char("\x07"), "\\a");
259        assert_eq!(for_go_char("\x08"), "\\b");
260        assert_eq!(for_go_char("\t"), "\\t");
261        assert_eq!(for_go_char("\n"), "\\n");
262        assert_eq!(for_go_char("\x0B"), "\\v");
263        assert_eq!(for_go_char("\x0C"), "\\f");
264        assert_eq!(for_go_char("\r"), "\\r");
265    }
266
267    #[test]
268    fn char_hex_escapes_for_controls() {
269        assert_eq!(for_go_char("\x01"), "\\x01");
270        assert_eq!(for_go_char("\x7F"), "\\x7f");
271    }
272
273    #[test]
274    fn char_nonchars_replaced() {
275        assert_eq!(for_go_char("\u{FDD0}"), " ");
276    }
277
278    #[test]
279    fn char_writer_matches() {
280        let input = "test\x00'\\\n cafe\u{0301}";
281        let mut w = String::new();
282        write_go_char(&mut w, input).unwrap();
283        assert_eq!(for_go_char(input), w);
284    }
285
286    // -- for_go_byte_string --
287
288    #[test]
289    fn byte_string_passthrough() {
290        assert_eq!(for_go_byte_string("hello world"), "hello world");
291        assert_eq!(for_go_byte_string(""), "");
292    }
293
294    #[test]
295    fn byte_string_escapes_double_quote() {
296        assert_eq!(for_go_byte_string(r#"a"b"#), r#"a\"b"#);
297    }
298
299    #[test]
300    fn byte_string_escapes_backslash() {
301        assert_eq!(for_go_byte_string(r"a\b"), r"a\\b");
302    }
303
304    #[test]
305    fn byte_string_named_escapes() {
306        assert_eq!(for_go_byte_string("\x07"), "\\a");
307        assert_eq!(for_go_byte_string("\x08"), "\\b");
308        assert_eq!(for_go_byte_string("\t"), "\\t");
309        assert_eq!(for_go_byte_string("\n"), "\\n");
310        assert_eq!(for_go_byte_string("\x0B"), "\\v");
311        assert_eq!(for_go_byte_string("\x0C"), "\\f");
312        assert_eq!(for_go_byte_string("\r"), "\\r");
313    }
314
315    #[test]
316    fn byte_string_hex_for_controls() {
317        assert_eq!(for_go_byte_string("\x00"), "\\x00");
318        assert_eq!(for_go_byte_string("\x01"), "\\x01");
319        assert_eq!(for_go_byte_string("\x7F"), "\\x7f");
320    }
321
322    #[test]
323    fn byte_string_non_ascii_as_utf8_bytes() {
324        // combining accent U+0301 → UTF-8: CC 81
325        assert_eq!(for_go_byte_string("\u{0301}"), r"\xcc\x81");
326        // cafe + combining accent
327        assert_eq!(for_go_byte_string("cafe\u{0301}"), r"cafe\xcc\x81");
328        // 日 = U+65E5 → UTF-8: E6 97 A5
329        assert_eq!(for_go_byte_string("\u{65E5}"), r"\xe6\x97\xa5");
330        // 😀 = U+1F600 → UTF-8: F0 9F 98 80
331        assert_eq!(for_go_byte_string("\u{1F600}"), r"\xf0\x9f\x98\x80");
332    }
333
334    #[test]
335    fn byte_string_nonchars_as_bytes() {
336        // U+FDD0 → UTF-8: EF B7 90
337        assert_eq!(for_go_byte_string("\u{FDD0}"), r"\xef\xb7\x90");
338    }
339
340    #[test]
341    fn byte_string_single_quote_passes() {
342        assert_eq!(for_go_byte_string("a'b"), "a'b");
343    }
344
345    #[test]
346    fn byte_string_writer_matches() {
347        let input = "test\x00\"\\cafe\u{0301}";
348        let mut w = String::new();
349        write_go_byte_string(&mut w, input).unwrap();
350        assert_eq!(for_go_byte_string(input), w);
351    }
352}