react_compiler_diagnostics/
js_string.rs1use std::fmt;
15
16use serde::Serialize;
17
18#[derive(Debug, Clone, PartialEq, Eq, Hash)]
25pub struct JsString(Repr);
26
27#[derive(Debug, Clone, PartialEq, Eq, Hash)]
28enum Repr {
29 Utf8(String),
31 Wtf16(Vec<u16>),
33}
34
35#[derive(Debug, Clone, Copy, PartialEq, Eq)]
38pub enum JsStringRef<'a> {
39 Utf8(&'a str),
40 Wtf16(&'a [u16]),
41}
42
43impl JsString {
44 pub fn from_code_units(units: Vec<u16>) -> Self {
46 match String::from_utf16(&units) {
47 Ok(s) => JsString(Repr::Utf8(s)),
48 Err(_) => JsString(Repr::Wtf16(units)),
49 }
50 }
51
52 pub fn as_ref(&self) -> JsStringRef<'_> {
53 match &self.0 {
54 Repr::Utf8(s) => JsStringRef::Utf8(s),
55 Repr::Wtf16(units) => JsStringRef::Wtf16(units),
56 }
57 }
58
59 pub fn as_str(&self) -> Option<&str> {
61 match &self.0 {
62 Repr::Utf8(s) => Some(s),
63 Repr::Wtf16(_) => None,
64 }
65 }
66
67 pub fn code_units(&self) -> Vec<u16> {
68 match &self.0 {
69 Repr::Utf8(s) => s.encode_utf16().collect(),
70 Repr::Wtf16(units) => units.clone(),
71 }
72 }
73
74 pub fn len_utf16(&self) -> usize {
76 match &self.0 {
77 Repr::Utf8(s) => s.encode_utf16().count(),
78 Repr::Wtf16(units) => units.len(),
79 }
80 }
81
82 pub fn to_string_lossy(&self) -> String {
85 match &self.0 {
86 Repr::Utf8(s) => s.clone(),
87 Repr::Wtf16(units) => String::from_utf16_lossy(units),
88 }
89 }
90
91 pub fn from_marker_string(s: &str) -> Self {
99 const PREFIX: &[u8] = b"__SURROGATE_";
100 const MARKER_LEN: usize = 18;
101 if !s.contains("__SURROGATE_") {
102 return JsString(Repr::Utf8(s.to_string()));
103 }
104 let bytes = s.as_bytes();
105 let mut units: Vec<u16> = Vec::with_capacity(s.len());
106 let mut pos = 0;
107 let mut segment_start = 0;
108 while let Some(found) = s[pos..].find("__SURROGATE_") {
109 let idx = pos + found;
110 let tail = &bytes[idx..];
111 let well_formed = tail.len() >= MARKER_LEN
112 && &tail[MARKER_LEN - 2..MARKER_LEN] == b"__"
113 && tail[PREFIX.len()..PREFIX.len() + 4]
114 .iter()
115 .all(|b| b.is_ascii_hexdigit() && !b.is_ascii_lowercase());
116 if well_formed {
117 let hex = std::str::from_utf8(&tail[PREFIX.len()..PREFIX.len() + 4])
118 .expect("ascii hex is valid utf8");
119 let unit = u16::from_str_radix(hex, 16).expect("validated hex digits");
120 units.extend(s[segment_start..idx].encode_utf16());
121 units.push(unit);
122 pos = idx + MARKER_LEN;
123 segment_start = pos;
124 } else {
125 pos = idx + PREFIX.len();
128 }
129 }
130 units.extend(s[segment_start..].encode_utf16());
131 JsString::from_code_units(units)
132 }
133
134 pub fn to_marker_string(&self) -> String {
136 match &self.0 {
137 Repr::Utf8(s) => s.clone(),
138 Repr::Wtf16(units) => {
139 let mut out = String::with_capacity(units.len() * 2);
140 let mut iter = units.iter().copied().peekable();
141 while let Some(unit) = iter.next() {
142 match unit {
143 0xD800..=0xDBFF => {
144 if let Some(&next) = iter.peek() {
145 if (0xDC00..=0xDFFF).contains(&next) {
146 iter.next();
147 let cp = 0x10000
148 + ((unit as u32 - 0xD800) << 10)
149 + (next as u32 - 0xDC00);
150 out.push(char::from_u32(cp).expect("valid supplementary"));
151 continue;
152 }
153 }
154 out.push_str(&format!("__SURROGATE_{unit:04X}__"));
155 }
156 0xDC00..=0xDFFF => {
157 out.push_str(&format!("__SURROGATE_{unit:04X}__"));
158 }
159 _ => {
160 out.push(
161 char::from_u32(unit as u32).expect("BMP non-surrogate is a char"),
162 );
163 }
164 }
165 }
166 out
167 }
168 }
169 }
170
171 pub fn to_escaped_string(&self) -> String {
175 match &self.0 {
176 Repr::Utf8(s) => s.clone(),
177 Repr::Wtf16(units) => {
178 let mut out = String::with_capacity(units.len() * 2);
179 let mut iter = units.iter().copied().peekable();
180 while let Some(unit) = iter.next() {
181 match unit {
182 0xD800..=0xDBFF => {
183 if let Some(&next) = iter.peek() {
184 if (0xDC00..=0xDFFF).contains(&next) {
185 iter.next();
186 let cp = 0x10000
187 + ((unit as u32 - 0xD800) << 10)
188 + (next as u32 - 0xDC00);
189 out.push(char::from_u32(cp).expect("valid supplementary"));
190 continue;
191 }
192 }
193 out.push_str(&format!("\\u{unit:04x}"));
194 }
195 0xDC00..=0xDFFF => {
196 out.push_str(&format!("\\u{unit:04x}"));
197 }
198 _ => {
199 out.push(
200 char::from_u32(unit as u32).expect("BMP non-surrogate is a char"),
201 );
202 }
203 }
204 }
205 out
206 }
207 }
208 }
209}
210
211impl From<String> for JsString {
212 fn from(s: String) -> Self {
213 JsString(Repr::Utf8(s))
216 }
217}
218
219impl From<&str> for JsString {
220 fn from(s: &str) -> Self {
221 JsString(Repr::Utf8(s.to_string()))
222 }
223}
224
225impl PartialEq<str> for JsString {
226 fn eq(&self, other: &str) -> bool {
227 self.as_str() == Some(other)
228 }
229}
230
231impl PartialEq<&str> for JsString {
232 fn eq(&self, other: &&str) -> bool {
233 self.as_str() == Some(*other)
234 }
235}
236
237impl fmt::Display for JsString {
238 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
239 f.write_str(&self.to_escaped_string())
240 }
241}
242
243impl Serialize for JsString {
244 fn serialize<S: serde::Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
245 serializer.serialize_str(&self.to_marker_string())
246 }
247}
248
249#[cfg(test)]
250mod tests {
251 use super::JsString;
252 use super::JsStringRef;
253
254 #[test]
255 fn as_ref_views_match_well_formedness() {
256 assert!(matches!(
257 JsString::from("plain").as_ref(),
258 JsStringRef::Utf8("plain")
259 ));
260 assert!(matches!(
261 JsString::from_code_units(vec![0xD83E]).as_ref(),
262 JsStringRef::Wtf16(&[0xD83E])
263 ));
264 assert_eq!(
268 JsString::from_code_units("plain".encode_utf16().collect()),
269 JsString::from("plain")
270 );
271 }
272
273 #[test]
274 fn marker_round_trip_preserves_lone_surrogates() {
275 let js = JsString::from_marker_string("__SURROGATE_D83E__");
276 assert_eq!(js.code_units(), vec![0xD83E]);
277 assert_eq!(js.to_marker_string(), "__SURROGATE_D83E__");
278 assert_eq!(js.to_escaped_string(), "\\ud83e");
279 }
280
281 #[test]
282 fn paired_halves_render_as_the_supplementary_character() {
283 let js = JsString::from_code_units(vec![0xD83E, 0xDD21]);
284 assert_eq!(js.as_str(), Some("\u{1F921}"));
285 }
286
287 #[test]
288 fn plain_strings_stay_utf8_and_compare_with_str() {
289 let js = JsString::from("use memo");
290 assert!(js == "use memo");
291 assert_eq!(js.to_marker_string(), "use memo");
292 }
293
294 #[test]
295 fn malformed_marker_text_is_kept_literally() {
296 let js = JsString::from_marker_string("__SURROGATE_XYZ__");
297 assert_eq!(js.as_str(), Some("__SURROGATE_XYZ__"));
298 }
299
300 #[test]
301 fn multibyte_text_after_marker_prefix_does_not_panic() {
302 let input = "__SURROGATE_\u{20AC}\u{20AC}";
303 let js = JsString::from_marker_string(input);
304 assert_eq!(js.as_str(), Some(input));
305
306 let truncated = "__SURROGATE_D8";
307 assert_eq!(
308 JsString::from_marker_string(truncated).as_str(),
309 Some(truncated)
310 );
311
312 let mixed = "a\u{20AC}__SURROGATE_D83E__b\u{20AC}";
313 let js = JsString::from_marker_string(mixed);
314 let mut expected: Vec<u16> = "a\u{20AC}".encode_utf16().collect();
315 expected.push(0xD83E);
316 expected.extend("b\u{20AC}".encode_utf16());
317 assert_eq!(js.code_units(), expected);
318 }
319
320 #[test]
321 fn lowercase_hex_markers_are_not_decoded() {
322 let input = "__SURROGATE_d83e__";
325 assert_eq!(JsString::from_marker_string(input).as_str(), Some(input));
326 }
327}