arrow_parser/
parse_literal.rs

1use memchr::memchr;
2use ordered_float::OrderedFloat;
3use std::str::from_utf8_unchecked;
4use std::str::FromStr;
5
6pub fn normalise_literal_int(raw: &[u8]) -> Option<i64> {
7  match unsafe { from_utf8_unchecked(raw) } {
8    s if s.starts_with("0b") || s.starts_with("0B") => i64::from_str_radix(&s[2..], 2),
9    s if s.starts_with("0o") || s.starts_with("0o") => i64::from_str_radix(&s[2..], 8),
10    s if s.starts_with("0x") || s.starts_with("0X") => i64::from_str_radix(&s[2..], 16),
11    s => i64::from_str_radix(s, 10),
12  }
13  .ok()
14}
15
16pub fn normalise_literal_float(raw: &[u8]) -> Option<OrderedFloat<f64>> {
17  let raw = raw.strip_suffix(b"f")?;
18  fn parse_radix(raw: &str, radix: u32) -> Option<f64> {
19    u64::from_str_radix(raw, radix)
20      // TODO This is lossy, but there is no TryFrom for converting from u64 to f64, and u32 cannot represent all possible float integer values.
21      .map(|v| v as f64)
22      .ok()
23  }
24
25  match unsafe { from_utf8_unchecked(raw) } {
26    s if s.starts_with("0b") || s.starts_with("0B") => parse_radix(&s[2..], 2),
27    s if s.starts_with("0o") || s.starts_with("0o") => parse_radix(&s[2..], 8),
28    s if s.starts_with("0x") || s.starts_with("0X") => parse_radix(&s[2..], 16),
29    s => f64::from_str(s).ok(),
30  }
31  .map(|n| OrderedFloat(n))
32}
33
34pub fn normalise_literal_template_string_part(mut raw: &[u8]) -> Option<String> {
35  let mut norm = vec![];
36  while !raw.is_empty() {
37    let Some(escape_pos) = memchr(b'\\', raw) else {
38      norm.extend_from_slice(raw);
39      break;
40    };
41    norm.extend_from_slice(&raw[..escape_pos]);
42    raw = &raw[escape_pos + 1..];
43    let mut tmp = [0u8; 4];
44    let (skip, add): (usize, &[u8]) = match raw[0] {
45      b'\n' => (1, b""),
46      b'n' => (1, b"\n"),
47      b'r' => (1, b"\r"),
48      b't' => (1, b"\t"),
49      b'0' => (1, b"\0"),
50      b'x' => match raw.get(1) {
51        Some(b'{') => {
52          // Unicode code point escape.
53          let Some(end_pos) = memchr(b'}', raw) else {
54            return None;
55          };
56          if end_pos < 3 || end_pos > 8 {
57            return None;
58          };
59          let cp =
60            u32::from_str_radix(unsafe { from_utf8_unchecked(&raw[2..end_pos]) }, 16).ok()?;
61          let c = char::from_u32(cp)?;
62          c.encode_utf8(&mut tmp);
63          (end_pos + 1, tmp.as_slice())
64        }
65        _ => {
66          return None;
67        }
68      },
69      c => (1, {
70        tmp[0] = c;
71        &tmp[..1]
72      }),
73    };
74    norm.extend_from_slice(add);
75    raw = &raw[skip..];
76  }
77  Some(unsafe { String::from_utf8_unchecked(norm) })
78}