ftml/parsing/string.rs
1/*
2 * parsing/string.rs
3 *
4 * ftml - Library to parse Wikidot text
5 * Copyright (C) 2019-2025 Wikijump Team
6 *
7 * This program is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU Affero General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU Affero General Public License for more details.
16 *
17 * You should have received a copy of the GNU Affero General Public License
18 * along with this program. If not, see <http://www.gnu.org/licenses/>.
19 */
20
21use std::borrow::Cow;
22
23/// Parses a double-quoted string.
24///
25/// Takes inputs starting and ending with `"`
26/// and containing characters, or any of these
27/// escapes:
28/// * `\\`
29/// * `\"`
30/// * `\'`
31/// * `\r`
32/// * `\n`
33/// * `\t`
34///
35/// If in invalid escape is found, the input
36/// is returned. So for `\$`, it will emit a
37/// `\` followed by a `$`.
38pub fn parse_string(input: &str) -> Cow<str> {
39 // We could do an iteration thing, but tracking
40 // the index across replacements is complicated.
41 //
42 // So we check if there are any possible escapes,
43 // and if so, build a new string.
44 //
45 // This removes the double quotes on either end
46 // and lets us only deal with the center.
47 // If it's not a string (i.e. doesn't start/end with ")
48 // then it just quits.
49
50 let input = match slice_middle(input) {
51 Some(input) => input,
52 None => {
53 warn!("Not a 'string', returning as-is: {:?}", input);
54 return Cow::Borrowed(input);
55 }
56 };
57
58 if !input.contains('\\') {
59 trace!("No escapes, returning as-is: {:?}", input);
60 return Cow::Borrowed(input);
61 }
62
63 let mut output = String::new();
64 let mut wants_escape = false;
65
66 for ch in input.chars() {
67 if wants_escape {
68 match escape_char(ch) {
69 Some(replacement) => {
70 trace!("Replacing backslash escape: \\{ch}");
71 output.push(replacement);
72 }
73 None => {
74 warn!("Invalid backslash escape found, ignoring: \\{ch}");
75 output.push('\\');
76 output.push(ch);
77 }
78 }
79
80 wants_escape = false;
81 } else if ch == '\\' {
82 wants_escape = true;
83 } else {
84 output.push(ch);
85 }
86 }
87
88 Cow::Owned(output)
89}
90
91/// Remove the contents of a string if it is one.
92///
93/// Checks if the first and last characters are ASCII `"`,
94/// and if so, slices the first and last characters off of them.
95/// Does not make any assumptions about codepoints.
96fn slice_middle(input: &str) -> Option<&str> {
97 // Starts and ends with "
98 //
99 // Regarding the length check:
100 // We can use byte length here, since ASCII " x2 is 2 bytes,
101 // so any other irregular pattern must be *at least* that.
102 //
103 // If shorter, it cannot be valid.
104 if input.len() < 2 || !input.starts_with('"') || !input.ends_with('"') {
105 return None;
106 }
107
108 // Okay, we know the first and last chars are ASCII, it's safe to slice
109 let last = input.len() - 1;
110 Some(&input[1..last])
111}
112
113/// Helper function to convert escapes to the actual character.
114fn escape_char(ch: char) -> Option<char> {
115 let escaped = match ch {
116 '\\' => '\\',
117 '\"' => '\"',
118 '\'' => '\'',
119 'r' => '\r',
120 'n' => '\n',
121 't' => '\t',
122 _ => return None,
123 };
124
125 Some(escaped)
126}
127
128#[test]
129fn test_parse_string() {
130 macro_rules! test {
131 ($input:expr, $expected:expr, $variant:tt $(,)?) => {{
132 let actual = parse_string($input);
133
134 assert_eq!(
135 &actual, $expected,
136 "Actual string (left) doesn't match expected (right)"
137 );
138
139 assert!(
140 matches!(actual, Cow::$variant(_)),
141 "Outputted string of the incorrect variant",
142 );
143 }};
144 }
145
146 test!(r#""""#, "", Borrowed);
147 test!(r#""!""#, "!", Borrowed);
148 test!(r#""\"""#, "\"", Owned);
149 test!(r#""\'""#, "\'", Owned);
150 test!(r#""apple banana""#, "apple banana", Borrowed);
151 test!(r#""abc \\""#, "abc \\", Owned);
152 test!(r#""\n def""#, "\n def", Owned);
153 test!(
154 r#""abc \t (\\\t) \r (\\\r) def""#,
155 "abc \t (\\\t) \r (\\\r) def",
156 Owned,
157 );
158 test!(r#""abc \t \x \y \z \n""#, "abc \t \\x \\y \\z \n", Owned);
159 test!("'abc'", "'abc'", Borrowed);
160 test!("\"abc", "\"abc", Borrowed);
161 test!("foo", "foo", Borrowed);
162}
163
164#[test]
165fn test_slice_middle() {
166 macro_rules! test {
167 ($input:expr, $expected:expr $(,)?) => {{
168 let actual = slice_middle($input).expect("Invalid string input");
169
170 assert_eq!(
171 actual, $expected,
172 "Actual (left) doesn't match expected (right)",
173 );
174 }};
175
176 ($input:expr $(,)?) => {{
177 assert!(
178 slice_middle($input).is_none(),
179 "Invalid string was accepted",
180 );
181 }};
182 }
183
184 test!(r#""""#, "");
185 test!(r#""!""#, "!");
186 test!(r#""abc""#, "abc");
187 test!(r#""apple banana cherry""#, "apple banana cherry");
188
189 test!("");
190 test!("\"");
191 test!("\"'");
192 test!("''");
193 test!("[]");
194}