ftml/parsing/
string.rs

1/*
2 * parsing/string.rs
3 *
4 * ftml - Library to parse Wikidot text
5 * Copyright (C) 2019-2025 Wikijump Team
6 *
7 * This program is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU Affero General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU Affero General Public License for more details.
16 *
17 * You should have received a copy of the GNU Affero General Public License
18 * along with this program. If not, see <http://www.gnu.org/licenses/>.
19 */
20
21use std::borrow::Cow;
22
23/// Parses a double-quoted string.
24///
25/// Takes inputs starting and ending with `"`
26/// and containing characters, or any of these
27/// escapes:
28/// * `\\`
29/// * `\"`
30/// * `\'`
31/// * `\r`
32/// * `\n`
33/// * `\t`
34///
35/// If in invalid escape is found, the input
36/// is returned. So for `\$`, it will emit a
37/// `\` followed by a `$`.
38pub fn parse_string(input: &str) -> Cow<str> {
39    // We could do an iteration thing, but tracking
40    // the index across replacements is complicated.
41    //
42    // So we check if there are any possible escapes,
43    // and if so, build a new string.
44    //
45    // This removes the double quotes on either end
46    // and lets us only deal with the center.
47    // If it's not a string (i.e. doesn't start/end with ")
48    // then it just quits.
49
50    let input = match slice_middle(input) {
51        Some(input) => input,
52        None => {
53            warn!("Not a 'string', returning as-is: {:?}", input);
54            return Cow::Borrowed(input);
55        }
56    };
57
58    if !input.contains('\\') {
59        trace!("No escapes, returning as-is: {:?}", input);
60        return Cow::Borrowed(input);
61    }
62
63    let mut output = String::new();
64    let mut wants_escape = false;
65
66    for ch in input.chars() {
67        if wants_escape {
68            match escape_char(ch) {
69                Some(replacement) => {
70                    trace!("Replacing backslash escape: \\{ch}");
71                    output.push(replacement);
72                }
73                None => {
74                    warn!("Invalid backslash escape found, ignoring: \\{ch}");
75                    output.push('\\');
76                    output.push(ch);
77                }
78            }
79
80            wants_escape = false;
81        } else if ch == '\\' {
82            wants_escape = true;
83        } else {
84            output.push(ch);
85        }
86    }
87
88    Cow::Owned(output)
89}
90
91/// Remove the contents of a string if it is one.
92///
93/// Checks if the first and last characters are ASCII `"`,
94/// and if so, slices the first and last characters off of them.
95/// Does not make any assumptions about codepoints.
96fn slice_middle(input: &str) -> Option<&str> {
97    // Starts and ends with "
98    //
99    // Regarding the length check:
100    // We can use byte length here, since ASCII " x2 is 2 bytes,
101    // so any other irregular pattern must be *at least* that.
102    //
103    // If shorter, it cannot be valid.
104    if input.len() < 2 || !input.starts_with('"') || !input.ends_with('"') {
105        return None;
106    }
107
108    // Okay, we know the first and last chars are ASCII, it's safe to slice
109    let last = input.len() - 1;
110    Some(&input[1..last])
111}
112
113/// Helper function to convert escapes to the actual character.
114fn escape_char(ch: char) -> Option<char> {
115    let escaped = match ch {
116        '\\' => '\\',
117        '\"' => '\"',
118        '\'' => '\'',
119        'r' => '\r',
120        'n' => '\n',
121        't' => '\t',
122        _ => return None,
123    };
124
125    Some(escaped)
126}
127
128#[test]
129fn test_parse_string() {
130    macro_rules! test {
131        ($input:expr, $expected:expr, $variant:tt $(,)?) => {{
132            let actual = parse_string($input);
133
134            assert_eq!(
135                &actual, $expected,
136                "Actual string (left) doesn't match expected (right)"
137            );
138
139            assert!(
140                matches!(actual, Cow::$variant(_)),
141                "Outputted string of the incorrect variant",
142            );
143        }};
144    }
145
146    test!(r#""""#, "", Borrowed);
147    test!(r#""!""#, "!", Borrowed);
148    test!(r#""\"""#, "\"", Owned);
149    test!(r#""\'""#, "\'", Owned);
150    test!(r#""apple banana""#, "apple banana", Borrowed);
151    test!(r#""abc \\""#, "abc \\", Owned);
152    test!(r#""\n def""#, "\n def", Owned);
153    test!(
154        r#""abc \t (\\\t) \r (\\\r) def""#,
155        "abc \t (\\\t) \r (\\\r) def",
156        Owned,
157    );
158    test!(r#""abc \t \x \y \z \n""#, "abc \t \\x \\y \\z \n", Owned);
159    test!("'abc'", "'abc'", Borrowed);
160    test!("\"abc", "\"abc", Borrowed);
161    test!("foo", "foo", Borrowed);
162}
163
164#[test]
165fn test_slice_middle() {
166    macro_rules! test {
167        ($input:expr, $expected:expr $(,)?) => {{
168            let actual = slice_middle($input).expect("Invalid string input");
169
170            assert_eq!(
171                actual, $expected,
172                "Actual (left) doesn't match expected (right)",
173            );
174        }};
175
176        ($input:expr $(,)?) => {{
177            assert!(
178                slice_middle($input).is_none(),
179                "Invalid string was accepted",
180            );
181        }};
182    }
183
184    test!(r#""""#, "");
185    test!(r#""!""#, "!");
186    test!(r#""abc""#, "abc");
187    test!(r#""apple banana cherry""#, "apple banana cherry");
188
189    test!("");
190    test!("\"");
191    test!("\"'");
192    test!("''");
193    test!("[]");
194}