parse_js/parse/
literal.rs1use super::ParseCtx;
2use super::Parser;
3use crate::error::SyntaxErrorType;
4use crate::error::SyntaxResult;
5use crate::num::JsNumber;
6use crate::token::TokenType;
7use core::str::FromStr;
8use memchr::memchr;
9use std::str::from_utf8_unchecked;
10
11fn parse_radix(raw: &str, radix: u32) -> Result<f64, ()> {
12 u64::from_str_radix(raw, radix)
13 .map_err(|_| ())
14 .map(|v| v as f64)
16}
17
18pub fn normalise_literal_number(raw: &str) -> Option<JsNumber> {
19 match raw {
22 s if s.starts_with("0b") || s.starts_with("0B") => parse_radix(&s[2..], 2),
23 s if s.starts_with("0o") || s.starts_with("0o") => parse_radix(&s[2..], 8),
24 s if s.starts_with("0x") || s.starts_with("0X") => parse_radix(&s[2..], 16),
25 s => f64::from_str(s).map_err(|_| ()),
26 }
27 .map(JsNumber)
28 .ok()
29}
30
31pub fn normalise_literal_bigint(raw: &str) -> Option<String> {
32 Some(raw.to_string())
35}
36
37pub fn normalise_literal_string_or_template_inner(mut raw: &[u8]) -> Option<String> {
38 let mut norm = Vec::new();
39 while !raw.is_empty() {
40 let Some(escape_pos) = memchr(b'\\', raw) else {
41 norm.extend_from_slice(raw);
42 break;
43 };
44 norm.extend_from_slice(&raw[..escape_pos]);
45 raw = &raw[escape_pos + 1..];
46 let mut tmp = [0u8; 4];
50 let (skip, add): (usize, &[u8]) = match raw[0] {
51 b'\n' => (1, b""),
52 b'b' => (1, b"\x08"),
53 b'f' => (1, b"\x0c"),
54 b'n' => (1, b"\n"),
55 b'r' => (1, b"\r"),
56 b't' => (1, b"\t"),
57 b'v' => (1, b"\x0b"),
58 b'0'..=b'7' => {
59 let mut len = 1;
61 if raw
62 .get(len)
63 .filter(|&c| (b'0'..=b'7').contains(c))
64 .is_some()
65 {
66 len += 1;
67 if raw
68 .get(len)
69 .filter(|&c| (b'0'..=b'7').contains(c))
70 .is_some()
71 {
72 len += 1;
73 };
74 };
75 char::from_u32(
76 u32::from_str_radix(unsafe { from_utf8_unchecked(&raw[..len]) }, 8).unwrap(),
77 )
78 .unwrap()
79 .encode_utf8(&mut tmp);
80 (len, tmp.as_slice())
81 }
82 b'x' => {
83 if raw.len() < 3 || !raw[1].is_ascii_hexdigit() || !raw[2].is_ascii_hexdigit() {
85 return None;
86 };
87 char::from_u32(
88 u32::from_str_radix(unsafe { from_utf8_unchecked(&raw[1..3]) }, 16).unwrap(),
89 )
90 .unwrap()
91 .encode_utf8(&mut tmp);
92 (3, tmp.as_slice())
93 }
94 b'u' => match raw.get(1) {
95 Some(b'{') => {
96 let Some(end_pos) = memchr(b'}', raw) else {
98 return None;
99 };
100 if !(3..=8).contains(&end_pos) {
101 return None;
102 };
103 let cp =
104 u32::from_str_radix(unsafe { from_utf8_unchecked(&raw[2..end_pos]) }, 16).ok()?;
105 let c = char::from_u32(cp)?;
106 c.encode_utf8(&mut tmp);
107 (end_pos + 1, tmp.as_slice())
108 }
109 Some(_) => {
110 if raw.len() < 5 {
112 return None;
113 };
114 let cp = u32::from_str_radix(unsafe { from_utf8_unchecked(&raw[1..5]) }, 16).ok()?;
115 let c = char::from_u32(cp)?;
116 c.encode_utf8(&mut tmp);
117 (5, tmp.as_slice())
118 }
119 None => {
120 return None;
121 }
122 },
123 c => (1, {
124 tmp[0] = c;
125 &tmp[..1]
126 }),
127 };
128 norm.extend_from_slice(add);
129 raw = &raw[skip..];
130 }
131 Some(String::from_utf8(norm).unwrap())
133}
134
135pub fn normalise_literal_string(raw: &str) -> Option<String> {
136 normalise_literal_string_or_template_inner(&raw.as_bytes()[1..raw.len() - 1])
137}
138
139impl<'a> Parser<'a> {
140 pub fn parse_and_normalise_literal_string(&mut self, ctx: ParseCtx) -> SyntaxResult<String> {
141 let t = self.require(TokenType::LiteralString)?;
142 normalise_literal_string(self.str(t.loc))
143 .ok_or_else(|| t.loc.error(SyntaxErrorType::InvalidCharacterEscape, None))
144 }
145}