Skip to main content

contextual_encoder/
go.rs

1//! go literal encoders.
2//!
3//! encodes untrusted strings for safe embedding in go source literals.
4//!
5//! - [`for_go_string`] — safe for go interpreted string literals (`"..."`)
6//! - [`for_go_char`] — safe for go rune literals (`'...'`)
7//! - [`for_go_byte_string`] — safe for go byte-explicit string literals
8//!   (`[]byte("...")`)
9//!
10//! # encoding rules
11//!
12//! all three encoders use go's native escape syntax:
13//!
14//! - named escapes: `\a`, `\b`, `\t`, `\n`, `\v`, `\f`, `\r`, `\\`
15//! - other C0 controls and DEL → `\xHH`
16//! - unicode non-characters → space (string/char) or `\xHH` per byte (byte string)
17//!
18//! the encoders differ in which quote is escaped and how non-ASCII is handled:
19//!
20//! | encoder | quote escape | non-ASCII |
21//! |---------|-------------|-----------|
22//! | `for_go_string` | `"` → `\"` | passes through |
23//! | `for_go_char` | `'` → `\'` | passes through |
24//! | `for_go_byte_string` | `"` → `\"` | each UTF-8 byte → `\xHH` |
25
26use std::fmt;
27
28use crate::engine::{
29    encode_loop, is_unicode_noncharacter, write_c0_named_escape, write_utf8_hex_bytes,
30};
31
32// ---------------------------------------------------------------------------
33// for_go_string — safe for Go interpreted string literals ("...")
34// ---------------------------------------------------------------------------
35
36/// encodes `input` for safe embedding in a go interpreted string literal
37/// (`"..."`).
38///
39/// escapes backslashes, double quotes, and control characters using go's
40/// escape syntax. non-ASCII unicode passes through unchanged (go source files
41/// are UTF-8). unicode non-characters are replaced with space.
42///
43/// # examples
44///
45/// ```
46/// use contextual_encoder::for_go_string;
47///
48/// assert_eq!(for_go_string(r#"say "hi""#), r#"say \"hi\""#);
49/// assert_eq!(for_go_string("line\nbreak"), r"line\nbreak");
50/// assert_eq!(for_go_string("cafe\u{0301}"), "cafe\u{0301}");
51/// ```
52pub fn for_go_string(input: &str) -> String {
53    let mut out = String::with_capacity(input.len());
54    write_go_string(&mut out, input).expect("writing to string cannot fail");
55    out
56}
57
58/// writes the go-string-encoded form of `input` to `out`.
59///
60/// see [`for_go_string`] for encoding rules.
61pub fn write_go_string<W: fmt::Write>(out: &mut W, input: &str) -> fmt::Result {
62    encode_loop(out, input, needs_go_string_encoding, |out, c, _next| {
63        write_go_text_encoded(out, c, '"')
64    })
65}
66
67fn needs_go_string_encoding(c: char) -> bool {
68    matches!(c, '\x00'..='\x1F' | '\x7F' | '"' | '\\') || is_unicode_noncharacter(c as u32)
69}
70
71// ---------------------------------------------------------------------------
72// for_go_char — safe for Go rune literals ('...')
73// ---------------------------------------------------------------------------
74
75/// encodes `input` for safe embedding in a go rune literal (`'...'`).
76///
77/// escapes backslashes, single quotes, and control characters using go's
78/// escape syntax. non-ASCII unicode passes through unchanged. unicode
79/// non-characters are replaced with space.
80///
81/// # examples
82///
83/// ```
84/// use contextual_encoder::for_go_char;
85///
86/// assert_eq!(for_go_char("it's"), r"it\'s");
87/// assert_eq!(for_go_char(r#"a"b"#), r#"a"b"#);
88/// assert_eq!(for_go_char("tab\there"), r"tab\there");
89/// ```
90pub fn for_go_char(input: &str) -> String {
91    let mut out = String::with_capacity(input.len());
92    write_go_char(&mut out, input).expect("writing to string cannot fail");
93    out
94}
95
96/// writes the go-char-encoded form of `input` to `out`.
97///
98/// see [`for_go_char`] for encoding rules.
99pub fn write_go_char<W: fmt::Write>(out: &mut W, input: &str) -> fmt::Result {
100    encode_loop(out, input, needs_go_char_encoding, |out, c, _next| {
101        write_go_text_encoded(out, c, '\'')
102    })
103}
104
105fn needs_go_char_encoding(c: char) -> bool {
106    matches!(c, '\x00'..='\x1F' | '\x7F' | '\'' | '\\') || is_unicode_noncharacter(c as u32)
107}
108
109// ---------------------------------------------------------------------------
110// shared helper for string and char encoders
111// ---------------------------------------------------------------------------
112
113/// writes the encoded form of a character for go string or rune context.
114/// `quote` is the delimiter being escaped (`"` or `'`).
115fn write_go_text_encoded<W: fmt::Write>(out: &mut W, c: char, quote: char) -> fmt::Result {
116    if let Some(r) = write_c0_named_escape(out, c) {
117        return r;
118    }
119    match c {
120        '"' if quote == '"' => out.write_str("\\\""),
121        '\'' if quote == '\'' => out.write_str("\\'"),
122        c if is_unicode_noncharacter(c as u32) => out.write_char(' '),
123        // other C0 controls and DEL
124        c => write!(out, "\\x{:02x}", c as u32),
125    }
126}
127
128// ---------------------------------------------------------------------------
129// for_go_byte_string — safe for Go byte-explicit string contexts
130// ---------------------------------------------------------------------------
131
132/// encodes `input` for safe embedding in a go string literal used in a
133/// byte-explicit context (`[]byte("...")`).
134///
135/// escapes backslashes, double quotes, and control characters. non-ASCII
136/// characters are encoded as their individual UTF-8 bytes using `\xHH`
137/// notation, making every byte visible.
138///
139/// # examples
140///
141/// ```
142/// use contextual_encoder::for_go_byte_string;
143///
144/// assert_eq!(for_go_byte_string("hello"), "hello");
145/// assert_eq!(for_go_byte_string(r#"say "hi""#), r#"say \"hi\""#);
146/// assert_eq!(for_go_byte_string("cafe\u{0301}"), r"cafe\xcc\x81");
147/// assert_eq!(for_go_byte_string("null\x00byte"), r"null\x00byte");
148/// ```
149pub fn for_go_byte_string(input: &str) -> String {
150    let mut out = String::with_capacity(input.len());
151    write_go_byte_string(&mut out, input).expect("writing to string cannot fail");
152    out
153}
154
155/// writes the go-byte-string-encoded form of `input` to `out`.
156///
157/// see [`for_go_byte_string`] for encoding rules.
158pub fn write_go_byte_string<W: fmt::Write>(out: &mut W, input: &str) -> fmt::Result {
159    encode_loop(
160        out,
161        input,
162        needs_go_byte_string_encoding,
163        write_go_byte_string_encoded,
164    )
165}
166
167fn needs_go_byte_string_encoding(c: char) -> bool {
168    matches!(c, '\x00'..='\x1F' | '\x7F' | '"' | '\\') || !c.is_ascii()
169}
170
171fn write_go_byte_string_encoded<W: fmt::Write>(
172    out: &mut W,
173    c: char,
174    _next: Option<char>,
175) -> fmt::Result {
176    if let Some(r) = write_c0_named_escape(out, c) {
177        return r;
178    }
179    match c {
180        '"' => out.write_str("\\\""),
181        // non-ASCII → encode each UTF-8 byte
182        c if !c.is_ascii() => write_utf8_hex_bytes(out, c),
183        // other C0 controls and DEL
184        c => write!(out, "\\x{:02x}", c as u32),
185    }
186}
187
188#[cfg(test)]
189mod tests {
190    use super::*;
191
192    // -- for_go_string --
193
194    #[test]
195    fn string_passthrough() {
196        assert_eq!(for_go_string("hello world"), "hello world");
197        assert_eq!(for_go_string(""), "");
198        assert_eq!(
199            for_go_string("cafe\u{0301} \u{65E5}\u{672C}\u{8A9E}"),
200            "cafe\u{0301} \u{65E5}\u{672C}\u{8A9E}"
201        );
202        assert_eq!(for_go_string("\u{1F600}"), "\u{1F600}");
203    }
204
205    #[test]
206    fn string_escapes_double_quote() {
207        assert_eq!(for_go_string(r#"a"b"#), r#"a\"b"#);
208    }
209
210    #[test]
211    fn string_passes_single_quote() {
212        assert_eq!(for_go_string("a'b"), "a'b");
213    }
214
215    #[test]
216    fn string_escapes_backslash() {
217        assert_eq!(for_go_string(r"a\b"), r"a\\b");
218    }
219
220    #[test]
221    fn string_named_escapes() {
222        assert_eq!(for_go_string("\x07"), "\\a");
223        assert_eq!(for_go_string("\x08"), "\\b");
224        assert_eq!(for_go_string("\t"), "\\t");
225        assert_eq!(for_go_string("\n"), "\\n");
226        assert_eq!(for_go_string("\x0B"), "\\v");
227        assert_eq!(for_go_string("\x0C"), "\\f");
228        assert_eq!(for_go_string("\r"), "\\r");
229    }
230
231    #[test]
232    fn string_hex_escapes_for_controls() {
233        assert_eq!(for_go_string("\x00"), "\\x00");
234        assert_eq!(for_go_string("\x01"), "\\x01");
235        assert_eq!(for_go_string("\x06"), "\\x06");
236        assert_eq!(for_go_string("\x0E"), "\\x0e");
237        assert_eq!(for_go_string("\x1F"), "\\x1f");
238        assert_eq!(for_go_string("\x7F"), "\\x7f");
239    }
240
241    #[test]
242    fn string_nonchars_replaced() {
243        assert_eq!(for_go_string("\u{FDD0}"), " ");
244        assert_eq!(for_go_string("\u{FFFE}"), " ");
245    }
246
247    #[test]
248    fn string_writer_matches() {
249        let input = "test\x00\"\\\n cafe\u{0301}";
250        let mut w = String::new();
251        write_go_string(&mut w, input).unwrap();
252        assert_eq!(for_go_string(input), w);
253    }
254
255    // -- for_go_char --
256
257    #[test]
258    fn char_passthrough() {
259        assert_eq!(for_go_char("hello world"), "hello world");
260        assert_eq!(for_go_char(""), "");
261        assert_eq!(for_go_char("cafe\u{0301}"), "cafe\u{0301}");
262    }
263
264    #[test]
265    fn char_escapes_single_quote() {
266        assert_eq!(for_go_char("a'b"), r"a\'b");
267    }
268
269    #[test]
270    fn char_passes_double_quote() {
271        assert_eq!(for_go_char(r#"a"b"#), r#"a"b"#);
272    }
273
274    #[test]
275    fn char_escapes_backslash() {
276        assert_eq!(for_go_char(r"a\b"), r"a\\b");
277    }
278
279    #[test]
280    fn char_named_escapes() {
281        assert_eq!(for_go_char("\x07"), "\\a");
282        assert_eq!(for_go_char("\x08"), "\\b");
283        assert_eq!(for_go_char("\t"), "\\t");
284        assert_eq!(for_go_char("\n"), "\\n");
285        assert_eq!(for_go_char("\x0B"), "\\v");
286        assert_eq!(for_go_char("\x0C"), "\\f");
287        assert_eq!(for_go_char("\r"), "\\r");
288    }
289
290    #[test]
291    fn char_hex_escapes_for_controls() {
292        assert_eq!(for_go_char("\x01"), "\\x01");
293        assert_eq!(for_go_char("\x7F"), "\\x7f");
294    }
295
296    #[test]
297    fn char_nonchars_replaced() {
298        assert_eq!(for_go_char("\u{FDD0}"), " ");
299    }
300
301    #[test]
302    fn char_writer_matches() {
303        let input = "test\x00'\\\n cafe\u{0301}";
304        let mut w = String::new();
305        write_go_char(&mut w, input).unwrap();
306        assert_eq!(for_go_char(input), w);
307    }
308
309    // -- for_go_byte_string --
310
311    #[test]
312    fn byte_string_passthrough() {
313        assert_eq!(for_go_byte_string("hello world"), "hello world");
314        assert_eq!(for_go_byte_string(""), "");
315    }
316
317    #[test]
318    fn byte_string_escapes_double_quote() {
319        assert_eq!(for_go_byte_string(r#"a"b"#), r#"a\"b"#);
320    }
321
322    #[test]
323    fn byte_string_escapes_backslash() {
324        assert_eq!(for_go_byte_string(r"a\b"), r"a\\b");
325    }
326
327    #[test]
328    fn byte_string_named_escapes() {
329        assert_eq!(for_go_byte_string("\x07"), "\\a");
330        assert_eq!(for_go_byte_string("\x08"), "\\b");
331        assert_eq!(for_go_byte_string("\t"), "\\t");
332        assert_eq!(for_go_byte_string("\n"), "\\n");
333        assert_eq!(for_go_byte_string("\x0B"), "\\v");
334        assert_eq!(for_go_byte_string("\x0C"), "\\f");
335        assert_eq!(for_go_byte_string("\r"), "\\r");
336    }
337
338    #[test]
339    fn byte_string_hex_for_controls() {
340        assert_eq!(for_go_byte_string("\x00"), "\\x00");
341        assert_eq!(for_go_byte_string("\x01"), "\\x01");
342        assert_eq!(for_go_byte_string("\x7F"), "\\x7f");
343    }
344
345    #[test]
346    fn byte_string_non_ascii_as_utf8_bytes() {
347        // combining accent U+0301 → UTF-8: CC 81
348        assert_eq!(for_go_byte_string("\u{0301}"), r"\xcc\x81");
349        // cafe + combining accent
350        assert_eq!(for_go_byte_string("cafe\u{0301}"), r"cafe\xcc\x81");
351        // 日 = U+65E5 → UTF-8: E6 97 A5
352        assert_eq!(for_go_byte_string("\u{65E5}"), r"\xe6\x97\xa5");
353        // 😀 = U+1F600 → UTF-8: F0 9F 98 80
354        assert_eq!(for_go_byte_string("\u{1F600}"), r"\xf0\x9f\x98\x80");
355    }
356
357    #[test]
358    fn byte_string_nonchars_as_bytes() {
359        // U+FDD0 → UTF-8: EF B7 90
360        assert_eq!(for_go_byte_string("\u{FDD0}"), r"\xef\xb7\x90");
361    }
362
363    #[test]
364    fn byte_string_single_quote_passes() {
365        assert_eq!(for_go_byte_string("a'b"), "a'b");
366    }
367
368    #[test]
369    fn byte_string_writer_matches() {
370        let input = "test\x00\"\\cafe\u{0301}";
371        let mut w = String::new();
372        write_go_byte_string(&mut w, input).unwrap();
373        assert_eq!(for_go_byte_string(input), w);
374    }
375}