Skip to main content

toon/shared/
string_utils.rs

1use crate::shared::constants::{BACKSLASH, CARRIAGE_RETURN, DOUBLE_QUOTE, NEWLINE, TAB};
2
3#[must_use]
4pub fn escape_string(value: &str) -> String {
5    let mut out = String::with_capacity(value.len());
6    for ch in value.chars() {
7        match ch {
8            '\\' => {
9                out.push(BACKSLASH);
10                out.push(BACKSLASH);
11            }
12            '"' => {
13                out.push(BACKSLASH);
14                out.push(DOUBLE_QUOTE);
15            }
16            '\n' => {
17                out.push(BACKSLASH);
18                out.push('n');
19            }
20            '\r' => {
21                out.push(BACKSLASH);
22                out.push('r');
23            }
24            '\t' => {
25                out.push(BACKSLASH);
26                out.push('t');
27            }
28            _ => out.push(ch),
29        }
30    }
31    out
32}
33
34/// Unescape a string literal body.
35///
36/// # Errors
37///
38/// Returns an error when the input contains invalid escape sequences or ends
39/// with a trailing backslash.
40pub fn unescape_string(value: &str) -> Result<String, String> {
41    let mut out = String::with_capacity(value.len());
42    let mut chars = value.chars();
43
44    while let Some(ch) = chars.next() {
45        if ch == BACKSLASH {
46            let next = chars
47                .next()
48                .ok_or_else(|| "Invalid escape sequence: backslash at end of string".to_string())?;
49            match next {
50                'n' => out.push(NEWLINE),
51                't' => out.push(TAB),
52                'r' => out.push(CARRIAGE_RETURN),
53                '\\' => out.push(BACKSLASH),
54                '"' => out.push(DOUBLE_QUOTE),
55                other => {
56                    return Err(format!("Invalid escape sequence: \\{other}"));
57                }
58            }
59        } else {
60            out.push(ch);
61        }
62    }
63
64    Ok(out)
65}
66
67#[must_use]
68pub fn find_closing_quote(content: &str, start: usize) -> Option<usize> {
69    let bytes = content.as_bytes();
70    // Guard against out-of-bounds start positions (prevents `start + 1` overflow
71    // on pathological inputs and short-circuits when there is no content to scan).
72    if start >= bytes.len() {
73        return None;
74    }
75    let mut i = start + 1;
76    while i < bytes.len() {
77        if bytes[i] == BACKSLASH as u8 && i + 1 < bytes.len() {
78            i += 2;
79            continue;
80        }
81        if bytes[i] == DOUBLE_QUOTE as u8 {
82            return Some(i);
83        }
84        i += 1;
85    }
86    None
87}
88
89#[must_use]
90pub fn find_unquoted_char(content: &str, target: char, start: usize) -> Option<usize> {
91    // Byte-level scanning is valid only for ASCII targets. All current callers pass
92    // ASCII (colon, pipe, tab, comma) so this is a safe fast-path; non-ASCII targets
93    // short-circuit to None rather than risk a false positive on a UTF-8 byte.
94    if !target.is_ascii() {
95        return None;
96    }
97    let target_byte = target as u8;
98    let bs_byte = BACKSLASH as u8;
99    let dq_byte = DOUBLE_QUOTE as u8;
100    let bytes = content.as_bytes();
101    let mut i = start;
102    let mut in_quotes = false;
103    while i < bytes.len() {
104        let b = bytes[i];
105        if in_quotes && b == bs_byte && i + 1 < bytes.len() {
106            i += 2;
107            continue;
108        }
109        if b == dq_byte {
110            in_quotes = !in_quotes;
111            i += 1;
112            continue;
113        }
114        if b == target_byte && !in_quotes {
115            return Some(i);
116        }
117        i += 1;
118    }
119    None
120}