1use std::fmt;
42
43use crate::engine::{
44 encode_loop, is_unicode_noncharacter, write_c0_named_escape, write_utf8_hex_bytes,
45};
46
47pub fn for_python_string(input: &str) -> String {
70 let mut out = String::with_capacity(input.len());
71 write_python_string(&mut out, input).expect("writing to string cannot fail");
72 out
73}
74
75pub fn write_python_string<W: fmt::Write>(out: &mut W, input: &str) -> fmt::Result {
79 encode_loop(out, input, needs_python_string_encoding, |out, c, _next| {
80 write_python_text_encoded(out, c)
81 })
82}
83
84fn needs_python_string_encoding(c: char) -> bool {
85 matches!(c, '\x00'..='\x1F' | '\x7F' | '"' | '\'' | '\\') || is_unicode_noncharacter(c as u32)
86}
87
88pub fn for_python_bytes(input: &str) -> String {
110 let mut out = String::with_capacity(input.len());
111 write_python_bytes(&mut out, input).expect("writing to string cannot fail");
112 out
113}
114
115pub fn write_python_bytes<W: fmt::Write>(out: &mut W, input: &str) -> fmt::Result {
119 encode_loop(
120 out,
121 input,
122 needs_python_bytes_encoding,
123 write_python_bytes_encoded,
124 )
125}
126
127fn needs_python_bytes_encoding(c: char) -> bool {
128 matches!(c, '\x00'..='\x1F' | '\x7F' | '"' | '\'' | '\\') || !c.is_ascii()
129}
130
131fn write_python_bytes_encoded<W: fmt::Write>(
132 out: &mut W,
133 c: char,
134 _next: Option<char>,
135) -> fmt::Result {
136 if let Some(r) = write_c0_named_escape(out, c) {
137 return r;
138 }
139 match c {
140 '"' => out.write_str("\\\""),
141 '\'' => out.write_str("\\'"),
142 c if !c.is_ascii() => write_utf8_hex_bytes(out, c),
144 c => write!(out, "\\x{:02x}", c as u32),
146 }
147}
148
149fn write_python_text_encoded<W: fmt::Write>(out: &mut W, c: char) -> fmt::Result {
155 if let Some(r) = write_c0_named_escape(out, c) {
156 return r;
157 }
158 match c {
159 '"' => out.write_str("\\\""),
160 '\'' => out.write_str("\\'"),
161 c if is_unicode_noncharacter(c as u32) => out.write_char(' '),
162 c => write!(out, "\\x{:02x}", c as u32),
164 }
165}
166
167pub fn for_python_raw_string(input: &str) -> String {
191 let mut out = String::with_capacity(input.len());
192 write_python_raw_string(&mut out, input).expect("writing to string cannot fail");
193 out
194}
195
196pub fn write_python_raw_string<W: fmt::Write>(out: &mut W, input: &str) -> fmt::Result {
200 let trailing_bs = input.bytes().rev().take_while(|&b| b == b'\\').count();
201 let cutoff = if trailing_bs % 2 == 1 {
202 input.len() - 1
203 } else {
204 input.len()
205 };
206
207 for (i, c) in input.char_indices() {
208 if i >= cutoff {
209 out.write_char(' ')?;
211 } else if needs_python_raw_string_encoding(c) {
212 out.write_char(' ')?;
213 } else {
214 out.write_char(c)?;
215 }
216 }
217 Ok(())
218}
219
220fn needs_python_raw_string_encoding(c: char) -> bool {
221 matches!(c, '\x00'..='\x1F' | '\x7F' | '"' | '\'') || is_unicode_noncharacter(c as u32)
222}
223
224#[cfg(test)]
225mod tests {
226 use super::*;
227
228 #[test]
231 fn string_passthrough() {
232 assert_eq!(for_python_string("hello world"), "hello world");
233 assert_eq!(for_python_string(""), "");
234 assert_eq!(
235 for_python_string("cafe\u{0301} \u{65E5}\u{672C}\u{8A9E}"),
236 "cafe\u{0301} \u{65E5}\u{672C}\u{8A9E}"
237 );
238 assert_eq!(for_python_string("\u{1F600}"), "\u{1F600}");
239 }
240
241 #[test]
242 fn string_escapes_double_quote() {
243 assert_eq!(for_python_string(r#"a"b"#), r#"a\"b"#);
244 }
245
246 #[test]
247 fn string_escapes_single_quote() {
248 assert_eq!(for_python_string("a'b"), r"a\'b");
249 }
250
251 #[test]
252 fn string_escapes_backslash() {
253 assert_eq!(for_python_string(r"a\b"), r"a\\b");
254 }
255
256 #[test]
257 fn string_named_escapes() {
258 assert_eq!(for_python_string("\x07"), "\\a");
259 assert_eq!(for_python_string("\x08"), "\\b");
260 assert_eq!(for_python_string("\t"), "\\t");
261 assert_eq!(for_python_string("\n"), "\\n");
262 assert_eq!(for_python_string("\x0B"), "\\v");
263 assert_eq!(for_python_string("\x0C"), "\\f");
264 assert_eq!(for_python_string("\r"), "\\r");
265 }
266
267 #[test]
268 fn string_hex_escapes_for_controls() {
269 assert_eq!(for_python_string("\x00"), "\\x00");
270 assert_eq!(for_python_string("\x01"), "\\x01");
271 assert_eq!(for_python_string("\x06"), "\\x06");
272 assert_eq!(for_python_string("\x0E"), "\\x0e");
273 assert_eq!(for_python_string("\x1F"), "\\x1f");
274 assert_eq!(for_python_string("\x7F"), "\\x7f");
275 }
276
277 #[test]
278 fn string_nonchars_replaced() {
279 assert_eq!(for_python_string("\u{FDD0}"), " ");
280 assert_eq!(for_python_string("\u{FFFE}"), " ");
281 }
282
283 #[test]
284 fn string_writer_matches() {
285 let input = "test\x00\"'\\\n cafe\u{0301}";
286 let mut w = String::new();
287 write_python_string(&mut w, input).unwrap();
288 assert_eq!(for_python_string(input), w);
289 }
290
291 #[test]
294 fn bytes_passthrough() {
295 assert_eq!(for_python_bytes("hello world"), "hello world");
296 assert_eq!(for_python_bytes(""), "");
297 }
298
299 #[test]
300 fn bytes_escapes_double_quote() {
301 assert_eq!(for_python_bytes(r#"a"b"#), r#"a\"b"#);
302 }
303
304 #[test]
305 fn bytes_escapes_single_quote() {
306 assert_eq!(for_python_bytes("a'b"), r"a\'b");
307 }
308
309 #[test]
310 fn bytes_escapes_backslash() {
311 assert_eq!(for_python_bytes(r"a\b"), r"a\\b");
312 }
313
314 #[test]
315 fn bytes_named_escapes() {
316 assert_eq!(for_python_bytes("\x07"), "\\a");
317 assert_eq!(for_python_bytes("\x08"), "\\b");
318 assert_eq!(for_python_bytes("\t"), "\\t");
319 assert_eq!(for_python_bytes("\n"), "\\n");
320 assert_eq!(for_python_bytes("\x0B"), "\\v");
321 assert_eq!(for_python_bytes("\x0C"), "\\f");
322 assert_eq!(for_python_bytes("\r"), "\\r");
323 }
324
325 #[test]
326 fn bytes_hex_for_controls() {
327 assert_eq!(for_python_bytes("\x00"), "\\x00");
328 assert_eq!(for_python_bytes("\x01"), "\\x01");
329 assert_eq!(for_python_bytes("\x7F"), "\\x7f");
330 }
331
332 #[test]
333 fn bytes_non_ascii_as_utf8_bytes() {
334 assert_eq!(for_python_bytes("\u{0301}"), r"\xcc\x81");
336 assert_eq!(for_python_bytes("cafe\u{0301}"), r"cafe\xcc\x81");
338 assert_eq!(for_python_bytes("\u{65E5}"), r"\xe6\x97\xa5");
340 assert_eq!(for_python_bytes("\u{1F600}"), r"\xf0\x9f\x98\x80");
342 }
343
344 #[test]
345 fn bytes_nonchars_as_bytes() {
346 assert_eq!(for_python_bytes("\u{FDD0}"), r"\xef\xb7\x90");
348 }
349
350 #[test]
351 fn bytes_writer_matches() {
352 let input = "test\x00\"'\\cafe\u{0301}";
353 let mut w = String::new();
354 write_python_bytes(&mut w, input).unwrap();
355 assert_eq!(for_python_bytes(input), w);
356 }
357
358 #[test]
361 fn raw_passthrough() {
362 assert_eq!(for_python_raw_string("hello world"), "hello world");
363 assert_eq!(for_python_raw_string(""), "");
364 }
365
366 #[test]
367 fn raw_quotes_replaced() {
368 assert_eq!(for_python_raw_string(r#"a"b"#), "a b");
369 assert_eq!(for_python_raw_string("a'b"), "a b");
370 assert_eq!(for_python_raw_string(r#"a"b'c"#), "a b c");
371 }
372
373 #[test]
374 fn raw_controls_replaced() {
375 assert_eq!(for_python_raw_string("\x00"), " ");
376 assert_eq!(for_python_raw_string("\x01"), " ");
377 assert_eq!(for_python_raw_string("\t"), " ");
378 assert_eq!(for_python_raw_string("\n"), " ");
379 assert_eq!(for_python_raw_string("\x7F"), " ");
380 }
381
382 #[test]
383 fn raw_backslash_in_middle() {
384 assert_eq!(for_python_raw_string(r"a\b"), r"a\b");
385 assert_eq!(for_python_raw_string(r"path\to\file"), r"path\to\file");
386 }
387
388 #[test]
389 fn raw_trailing_even_backslashes() {
390 assert_eq!(for_python_raw_string(r"ab\\"), r"ab\\");
391 assert_eq!(for_python_raw_string(r"ab\\\\"), r"ab\\\\");
392 }
393
394 #[test]
395 fn raw_trailing_odd_backslash_replaced() {
396 assert_eq!(for_python_raw_string(r"trailing\"), "trailing ");
397 assert_eq!(for_python_raw_string(r"ab\\\"), "ab\\\\ ");
398 assert_eq!(for_python_raw_string(r"\"), " ");
399 }
400
401 #[test]
402 fn raw_nonchars_replaced() {
403 assert_eq!(for_python_raw_string("\u{FDD0}"), " ");
404 assert_eq!(for_python_raw_string("\u{FFFE}"), " ");
405 }
406
407 #[test]
408 fn raw_non_ascii_passes_through() {
409 assert_eq!(for_python_raw_string("café"), "café");
410 assert_eq!(for_python_raw_string("日本語"), "日本語");
411 assert_eq!(for_python_raw_string("😀"), "😀");
412 }
413
414 #[test]
415 fn raw_writer_matches() {
416 let input = "test\x00\"'\\path\\to";
417 let mut w = String::new();
418 write_python_raw_string(&mut w, input).unwrap();
419 assert_eq!(for_python_raw_string(input), w);
420 }
421}