arrow-parser 0.0.2

Parser for the Arrow programming language
Documentation
use memchr::memchr;
use ordered_float::OrderedFloat;
use std::str::from_utf8_unchecked;
use std::str::FromStr;

pub fn normalise_literal_int(raw: &[u8]) -> Option<i64> {
  match unsafe { from_utf8_unchecked(raw) } {
    s if s.starts_with("0b") || s.starts_with("0B") => i64::from_str_radix(&s[2..], 2),
    s if s.starts_with("0o") || s.starts_with("0o") => i64::from_str_radix(&s[2..], 8),
    s if s.starts_with("0x") || s.starts_with("0X") => i64::from_str_radix(&s[2..], 16),
    s => i64::from_str_radix(s, 10),
  }
  .ok()
}

pub fn normalise_literal_float(raw: &[u8]) -> Option<OrderedFloat<f64>> {
  let raw = raw.strip_suffix(b"f")?;
  fn parse_radix(raw: &str, radix: u32) -> Option<f64> {
    u64::from_str_radix(raw, radix)
      // TODO This is lossy, but there is no TryFrom for converting from u64 to f64, and u32 cannot represent all possible float integer values.
      .map(|v| v as f64)
      .ok()
  }

  match unsafe { from_utf8_unchecked(raw) } {
    s if s.starts_with("0b") || s.starts_with("0B") => parse_radix(&s[2..], 2),
    s if s.starts_with("0o") || s.starts_with("0o") => parse_radix(&s[2..], 8),
    s if s.starts_with("0x") || s.starts_with("0X") => parse_radix(&s[2..], 16),
    s => f64::from_str(s).ok(),
  }
  .map(|n| OrderedFloat(n))
}

pub fn normalise_literal_template_string_part(mut raw: &[u8]) -> Option<String> {
  let mut norm = vec![];
  while !raw.is_empty() {
    let Some(escape_pos) = memchr(b'\\', raw) else {
      norm.extend_from_slice(raw);
      break;
    };
    norm.extend_from_slice(&raw[..escape_pos]);
    raw = &raw[escape_pos + 1..];
    let mut tmp = [0u8; 4];
    let (skip, add): (usize, &[u8]) = match raw[0] {
      b'\n' => (1, b""),
      b'n' => (1, b"\n"),
      b'r' => (1, b"\r"),
      b't' => (1, b"\t"),
      b'0' => (1, b"\0"),
      b'x' => match raw.get(1) {
        Some(b'{') => {
          // Unicode code point escape.
          let Some(end_pos) = memchr(b'}', raw) else {
            return None;
          };
          if end_pos < 3 || end_pos > 8 {
            return None;
          };
          let cp =
            u32::from_str_radix(unsafe { from_utf8_unchecked(&raw[2..end_pos]) }, 16).ok()?;
          let c = char::from_u32(cp)?;
          c.encode_utf8(&mut tmp);
          (end_pos + 1, tmp.as_slice())
        }
        _ => {
          return None;
        }
      },
      c => (1, {
        tmp[0] = c;
        &tmp[..1]
      }),
    };
    norm.extend_from_slice(add);
    raw = &raw[skip..];
  }
  Some(unsafe { String::from_utf8_unchecked(norm) })
}