Skip to main content

alef_e2e/
escape.rs

1//! Language-specific string escaping for e2e test code generation.
2
3/// Escape a string for embedding in a Python string literal.
4pub fn escape_python(s: &str) -> String {
5    let mut out = String::with_capacity(s.len());
6    for ch in s.chars() {
7        match ch {
8            '\\' => out.push_str("\\\\"),
9            '"' => out.push_str("\\\""),
10            '\n' => out.push_str("\\n"),
11            '\r' => out.push_str("\\r"),
12            '\t' => out.push_str("\\t"),
13            c if (c as u32) < 0x20 => {
14                // Control character — emit \xHH escape so Python source remains valid.
15                out.push_str(&format!("\\x{:02x}", c as u32));
16            }
17            c => out.push(c),
18        }
19    }
20    out
21}
22
23/// Escape a string for embedding in a Rust string literal.
24pub fn escape_rust(s: &str) -> String {
25    s.replace('\\', "\\\\")
26        .replace('"', "\\\"")
27        .replace('\n', "\\n")
28        .replace('\r', "\\r")
29        .replace('\t', "\\t")
30}
31
32/// Compute the number of # needed for a Rust raw string literal.
33pub fn raw_string_hashes(s: &str) -> usize {
34    let mut max_hashes = 0;
35    let mut current = 0;
36    let mut after_quote = false;
37    for ch in s.chars() {
38        if ch == '"' {
39            after_quote = true;
40            current = 0;
41        } else if ch == '#' && after_quote {
42            current += 1;
43            max_hashes = max_hashes.max(current);
44        } else {
45            after_quote = false;
46            current = 0;
47        }
48    }
49    max_hashes + 1
50}
51
52/// Format a string as a Rust raw string literal (r#"..."#).
53pub fn rust_raw_string(s: &str) -> String {
54    let hashes = raw_string_hashes(s);
55    let h: String = "#".repeat(hashes);
56    format!("r{h}\"{s}\"{h}")
57}
58
59/// Escape a string for embedding in a JavaScript/TypeScript double-quoted string literal.
60///
61/// `$` does not need escaping in double-quoted strings (only in template literals).
62/// Escaping it would produce `\$` which Biome flags as `noUselessEscapeInString`.
63pub fn escape_js(s: &str) -> String {
64    s.replace('\\', "\\\\")
65        .replace('"', "\\\"")
66        .replace('\n', "\\n")
67        .replace('\r', "\\r")
68        .replace('\t', "\\t")
69}
70
71/// Escape a string for embedding in a JavaScript/TypeScript template literal (backtick string).
72///
73/// Template literals interpolate `${...}` and use backtick delimiters, so both
74/// `` ` `` and `$` must be escaped to prevent unintended interpolation.
75pub fn escape_js_template(s: &str) -> String {
76    s.replace('\\', "\\\\").replace('`', "\\`").replace('$', "\\$")
77}
78
79/// Returns `true` if the string must use a Go interpreted (double-quoted) literal
80/// rather than a raw (backtick) literal.
81///
82/// Go raw string literals cannot contain backtick characters or NUL bytes, and
83/// `\r` inside a raw string is passed through as a literal CR which gofmt rejects.
84fn go_needs_quoted(s: &str) -> bool {
85    s.contains('`') || s.bytes().any(|b| b == 0 || b == b'\r')
86}
87
88/// Format a string as a Go string literal (backtick or quoted).
89///
90/// Prefers backtick raw literals for readability, but falls back to double-quoted
91/// interpreted literals when the string contains characters that raw literals
92/// cannot represent: backtick `` ` ``, NUL (`\x00`), or carriage return (`\r`).
93pub fn go_string_literal(s: &str) -> String {
94    if go_needs_quoted(s) {
95        format!("\"{}\"", escape_go(s))
96    } else {
97        format!("`{s}`")
98    }
99}
100
101/// Escape a string for embedding in a Go double-quoted string.
102///
103/// Handles all characters that cannot appear literally in a Go interpreted string:
104/// `\\`, `"`, `\n`, `\r`, `\t`, and NUL (`\x00`). Other non-printable bytes are
105/// emitted as `\xNN` hex escape sequences.
106pub fn escape_go(s: &str) -> String {
107    let mut out = String::with_capacity(s.len());
108    for b in s.bytes() {
109        match b {
110            b'\\' => out.push_str("\\\\"),
111            b'"' => out.push_str("\\\""),
112            b'\n' => out.push_str("\\n"),
113            b'\r' => out.push_str("\\r"),
114            b'\t' => out.push_str("\\t"),
115            0 => out.push_str("\\x00"),
116            // Other control characters or non-ASCII bytes: hex escape.
117            b if b < 0x20 || b == 0x7f => {
118                out.push_str(&format!("\\x{b:02x}"));
119            }
120            _ => out.push(b as char),
121        }
122    }
123    out
124}
125
126/// Escape a string for embedding in a Java string literal.
127pub fn escape_java(s: &str) -> String {
128    s.replace('\\', "\\\\")
129        .replace('"', "\\\"")
130        .replace('\n', "\\n")
131        .replace('\r', "\\r")
132        .replace('\t', "\\t")
133}
134
135/// Escape a string for embedding in a Kotlin double-quoted string literal.
136/// Like Java escaping but also escapes `$` which triggers Kotlin string interpolation.
137pub fn escape_kotlin(s: &str) -> String {
138    s.replace('\\', "\\\\")
139        .replace('"', "\\\"")
140        .replace('$', "\\$")
141        .replace('\n', "\\n")
142        .replace('\r', "\\r")
143        .replace('\t', "\\t")
144}
145
146/// Escape a string for embedding in a C# string literal.
147pub fn escape_csharp(s: &str) -> String {
148    s.replace('\\', "\\\\")
149        .replace('"', "\\\"")
150        .replace('\n', "\\n")
151        .replace('\r', "\\r")
152        .replace('\t', "\\t")
153}
154
155/// Escape a string for embedding in a PHP string literal.
156pub fn escape_php(s: &str) -> String {
157    s.replace('\\', "\\\\")
158        .replace('"', "\\\"")
159        .replace('$', "\\$")
160        .replace('\n', "\\n")
161        .replace('\r', "\\r")
162        .replace('\t', "\\t")
163}
164
165/// Escape a string for embedding in a double-quoted Ruby string literal.
166pub fn escape_ruby(s: &str) -> String {
167    s.replace('\\', "\\\\")
168        .replace('"', "\\\"")
169        .replace('#', "\\#")
170        .replace('\n', "\\n")
171        .replace('\r', "\\r")
172        .replace('\t', "\\t")
173}
174
175/// Escape a string for embedding in a single-quoted Ruby string literal.
176/// Single-quoted Ruby strings only interpret `\\` and `\'`.
177pub fn escape_ruby_single(s: &str) -> String {
178    s.replace('\\', "\\\\").replace('\'', "\\'")
179}
180
181/// Convert a `{param}` template string to a Ruby double-quoted string with `#{param}` interpolation.
182///
183/// `{key}` placeholders are converted to `#{key}`. All other characters are escaped for
184/// Ruby double-quoted strings. The returned value does NOT include the surrounding `"` quotes.
185pub fn ruby_template_to_interpolation(template: &str) -> String {
186    let mut out = String::with_capacity(template.len() + 8);
187    let mut chars = template.chars().peekable();
188    while let Some(ch) = chars.next() {
189        match ch {
190            '{' => {
191                // Check if this is a {identifier} placeholder
192                let is_ident_start = chars.peek().is_some_and(|&c| c.is_ascii_alphabetic() || c == '_');
193                if is_ident_start {
194                    // Collect the identifier
195                    let mut ident = String::new();
196                    while let Some(&c) = chars.peek() {
197                        if c.is_ascii_alphanumeric() || c == '_' {
198                            ident.push(chars.next().unwrap());
199                        } else {
200                            break;
201                        }
202                    }
203                    if chars.peek() == Some(&'}') {
204                        chars.next(); // consume '}'
205                        out.push('#');
206                        out.push('{');
207                        out.push_str(&ident);
208                        out.push('}');
209                    } else {
210                        // Not a valid {ident} placeholder — emit literally
211                        out.push('{');
212                        out.push_str(&ident);
213                    }
214                } else {
215                    out.push('{');
216                }
217            }
218            '\\' => out.push_str("\\\\"),
219            '"' => out.push_str("\\\""),
220            '#' => out.push_str("\\#"),
221            '\n' => out.push_str("\\n"),
222            '\r' => out.push_str("\\r"),
223            '\t' => out.push_str("\\t"),
224            c => out.push(c),
225        }
226    }
227    out
228}
229
230/// Convert a `{param}` template string to an R expression using `paste0()`.
231///
232/// `{key}` placeholders are converted to variable references in a `paste0(...)` call.
233/// Literal text segments are R string literals. If the template has no placeholders,
234/// a plain R string literal is returned. If the template is a single bare placeholder
235/// like `{text}`, just the variable name is returned.
236///
237/// Examples:
238/// - `"[BTN:{text}]"` → `paste0("[BTN:", text, "]")`
239/// - `"--- {text} ---"` → `paste0("--- ", text, " ---")`
240/// - `"{text}"` → `text`
241/// - `"static"` → `"static"`
242pub fn r_template_to_paste0(template: &str) -> String {
243    enum Seg {
244        Lit(String),
245        Param(String),
246    }
247    let mut segments: Vec<Seg> = Vec::new();
248    let mut lit = String::new();
249    let mut chars = template.chars().peekable();
250    while let Some(ch) = chars.next() {
251        if ch == '{' {
252            let is_ident_start = chars.peek().is_some_and(|&c| c.is_ascii_alphabetic() || c == '_');
253            if is_ident_start {
254                let mut ident = String::new();
255                while let Some(&c) = chars.peek() {
256                    if c.is_ascii_alphanumeric() || c == '_' {
257                        ident.push(chars.next().unwrap());
258                    } else {
259                        break;
260                    }
261                }
262                if chars.peek() == Some(&'}') {
263                    chars.next();
264                    if !lit.is_empty() {
265                        segments.push(Seg::Lit(lit.clone()));
266                        lit.clear();
267                    }
268                    segments.push(Seg::Param(ident));
269                    continue;
270                }
271                lit.push('{');
272                lit.push_str(&ident);
273            } else {
274                lit.push('{');
275            }
276        } else {
277            lit.push(ch);
278        }
279    }
280    if !lit.is_empty() {
281        segments.push(Seg::Lit(lit));
282    }
283    match segments.as_slice() {
284        [] => r#""""#.to_string(),
285        [Seg::Param(p)] => p.clone(),
286        segs => {
287            let args: Vec<String> = segs
288                .iter()
289                .map(|s| match s {
290                    Seg::Lit(l) => format!("\"{}\"", escape_r(l)),
291                    Seg::Param(p) => p.clone(),
292                })
293                .collect();
294            format!("paste0({})", args.join(", "))
295        }
296    }
297}
298
299/// Returns true if the string needs double quotes (contains control characters
300/// that require escape sequences only available in double-quoted strings).
301pub fn ruby_needs_double_quotes(s: &str) -> bool {
302    s.contains('\n') || s.contains('\r') || s.contains('\t') || s.contains('\0')
303}
304
305/// Format a string as a Ruby literal, preferring single quotes.
306pub fn ruby_string_literal(s: &str) -> String {
307    if ruby_needs_double_quotes(s) {
308        format!("\"{}\"", escape_ruby(s))
309    } else {
310        format!("'{}'", escape_ruby_single(s))
311    }
312}
313
314/// Escape a string for embedding in an Elixir string literal.
315pub fn escape_elixir(s: &str) -> String {
316    s.replace('\\', "\\\\")
317        .replace('"', "\\\"")
318        .replace('#', "\\#")
319        .replace('\n', "\\n")
320        .replace('\r', "\\r")
321        .replace('\t', "\\t")
322}
323
324/// Escape a string for embedding in an R string literal.
325pub fn escape_r(s: &str) -> String {
326    s.replace('\\', "\\\\")
327        .replace('"', "\\\"")
328        .replace('\n', "\\n")
329        .replace('\r', "\\r")
330        .replace('\t', "\\t")
331}
332
333/// Escape a string for embedding in a C string literal.
334pub fn escape_c(s: &str) -> String {
335    s.replace('\\', "\\\\")
336        .replace('"', "\\\"")
337        .replace('\n', "\\n")
338        .replace('\r', "\\r")
339        .replace('\t', "\\t")
340}
341
342/// Sanitize an identifier for use as a test function name.
343/// Replaces non-alphanumeric characters with underscores, strips leading digits,
344/// and strips any underscores left dangling after the digit prefix so that
345/// generators which prefix the result (e.g. `test_<ident>`) don't produce
346/// double-underscore names like `test__foo` from fixture ids like `24_foo`.
347pub fn sanitize_ident(s: &str) -> String {
348    let mut result = String::with_capacity(s.len());
349    for ch in s.chars() {
350        if ch.is_ascii_alphanumeric() || ch == '_' {
351            result.push(ch);
352        } else {
353            result.push('_');
354        }
355    }
356    // Strip leading digits.
357    let after_digits = result.trim_start_matches(|c: char| c.is_ascii_digit());
358    // If we stripped any digits, also strip the underscores left behind so
359    // callers like `format!("test_{name}")` don't yield `test__foo`.
360    let trimmed = if after_digits.len() < result.len() {
361        after_digits.trim_start_matches('_')
362    } else {
363        after_digits
364    };
365    if trimmed.is_empty() {
366        "_".to_string()
367    } else {
368        trimmed.to_string()
369    }
370}
371
372/// Convert a category name to a sanitized filename component.
373pub fn sanitize_filename(s: &str) -> String {
374    s.chars()
375        .map(|c| if c.is_ascii_alphanumeric() || c == '_' { c } else { '_' })
376        .collect::<String>()
377        .to_lowercase()
378}
379
380/// Expand fixture template expressions in a string value.
381///
382/// Supported templates:
383/// - `{{ repeat 'X' N times }}` — expands to the character X repeated N times
384///
385/// If no templates are found, the original string is returned unchanged.
386pub fn expand_fixture_templates(s: &str) -> String {
387    const PREFIX: &str = "{{ repeat '";
388    const SUFFIX: &str = " times }}";
389
390    let mut result = String::with_capacity(s.len());
391    let mut remaining = s;
392
393    while let Some(start) = remaining.find(PREFIX) {
394        result.push_str(&remaining[..start]);
395        let after_prefix = &remaining[start + PREFIX.len()..];
396
397        // Expect character(s) followed by `' N times }}`
398        if let Some(quote_pos) = after_prefix.find("' ") {
399            let ch = &after_prefix[..quote_pos];
400            let after_quote = &after_prefix[quote_pos + 2..];
401
402            if let Some(end) = after_quote.find(SUFFIX) {
403                let count_str = after_quote[..end].trim();
404                if let Ok(count) = count_str.parse::<usize>() {
405                    result.push_str(&ch.repeat(count));
406                    remaining = &after_quote[end + SUFFIX.len()..];
407                    continue;
408                }
409            }
410        }
411
412        // Template didn't match — emit the prefix literally and continue
413        result.push_str(PREFIX);
414        remaining = after_prefix;
415    }
416    result.push_str(remaining);
417    result
418}
419
420/// Escape a string for embedding in a POSIX single-quoted shell string literal.
421///
422/// Wraps the string in single quotes and escapes embedded single quotes as `'\''`.
423/// Single-quoted shell strings treat every character literally except `'`, so
424/// no other escaping is needed.
425pub fn escape_shell(s: &str) -> String {
426    s.replace('\'', r"'\''")
427}
428
429/// Escape a string for embedding in a Gleam string literal.
430pub fn escape_gleam(s: &str) -> String {
431    s.replace('\\', "\\\\")
432        .replace('"', "\\\"")
433        .replace('\n', "\\n")
434        .replace('\r', "\\r")
435        .replace('\t', "\\t")
436}
437
438/// Escape a string for embedding in a Zig string literal.
439pub fn escape_zig(s: &str) -> String {
440    s.replace('\\', "\\\\")
441        .replace('"', "\\\"")
442        .replace('\n', "\\n")
443        .replace('\r', "\\r")
444        .replace('\t', "\\t")
445}
446
447#[cfg(test)]
448mod tests {
449    use super::*;
450
451    /// Go raw string literals (backticks) cannot contain NUL bytes — gofmt rejects them.
452    /// Strings with NUL must fall back to a double-quoted interpreted literal with `\x00`.
453    #[test]
454    fn go_string_literal_nul_bytes_use_quoted_form() {
455        let s = "Hello\x00World";
456        let lit = go_string_literal(s);
457        // Must not contain a raw NUL byte
458        assert!(
459            !lit.as_bytes().contains(&0u8),
460            "go_string_literal emitted a NUL byte — gofmt would reject this: {lit:?}"
461        );
462        // Must be a double-quoted string, not a backtick raw string
463        assert!(
464            lit.starts_with('"'),
465            "expected double-quoted string for NUL input, got: {lit:?}"
466        );
467        // The NUL must be represented as \\x00
468        assert!(
469            lit.contains("\\x00"),
470            "expected \\x00 escape sequence for NUL byte, got: {lit:?}"
471        );
472    }
473
474    /// Strings with carriage return must also use the double-quoted form
475    /// because Go raw strings cannot represent `\r`.
476    #[test]
477    fn go_string_literal_carriage_return_uses_quoted_form() {
478        let s = "line1\r\nline2";
479        let lit = go_string_literal(s);
480        assert!(
481            !lit.as_bytes().contains(&b'\r'),
482            "go_string_literal emitted a literal CR — gofmt would reject this: {lit:?}"
483        );
484        assert!(
485            lit.starts_with('"'),
486            "expected double-quoted string for CR input, got: {lit:?}"
487        );
488    }
489
490    /// Strings with only printable chars and no backtick should still use the
491    /// readable backtick form.
492    #[test]
493    fn go_string_literal_plain_string_uses_backtick() {
494        let s = "Hello World\nwith newline";
495        let lit = go_string_literal(s);
496        assert!(
497            lit.starts_with('`'),
498            "expected backtick form for plain string, got: {lit:?}"
499        );
500    }
501
502    /// Strings that contain a backtick must fall back to double-quoted form.
503    #[test]
504    fn go_string_literal_backtick_in_string_uses_quoted_form() {
505        let s = "has `backtick`";
506        let lit = go_string_literal(s);
507        assert!(
508            lit.starts_with('"'),
509            "expected double-quoted form when string contains backtick, got: {lit:?}"
510        );
511    }
512
513    /// Fixture ids with a numeric prefix (`24_cookie_samesite_strict`) must not
514    /// produce names like `_cookie_samesite_strict` that, when prefixed with
515    /// `test_`, yield the invalid-looking `test__cookie_samesite_strict`.
516    #[test]
517    fn sanitize_ident_strips_leading_underscore_after_digit_prefix() {
518        assert_eq!(sanitize_ident("24_cookie_samesite_strict"), "cookie_samesite_strict");
519        assert_eq!(sanitize_ident("01_foo"), "foo");
520        assert_eq!(sanitize_ident("9bar"), "bar");
521    }
522
523    /// Genuine leading underscores (no preceding digits) are preserved so
524    /// fixture ids that intentionally start with `_` round-trip unchanged.
525    #[test]
526    fn sanitize_ident_preserves_leading_underscore_without_digits() {
527        assert_eq!(sanitize_ident("_foo"), "_foo");
528        assert_eq!(sanitize_ident("__bar"), "__bar");
529    }
530
531    /// Strings consisting only of digits (and the underscores left behind)
532    /// collapse to the placeholder `_` since the result would otherwise be empty.
533    #[test]
534    fn sanitize_ident_all_digits_returns_underscore_placeholder() {
535        assert_eq!(sanitize_ident("123"), "_");
536        assert_eq!(sanitize_ident("12_"), "_");
537    }
538
539    /// Non-leading digits and underscores are untouched.
540    #[test]
541    fn sanitize_ident_preserves_interior_chars() {
542        assert_eq!(sanitize_ident("foo_42_bar"), "foo_42_bar");
543        assert_eq!(sanitize_ident("foo.bar-baz"), "foo_bar_baz");
544    }
545}