deno_kv 0.2.0

Implementation of the Deno database API
Documentation
// Copyright 2018-2023 the Deno authors. All rights reserved. MIT license.
// Ported from https://github.com/foundationdb-rs/foundationdb-rs/blob/main/foundationdb/src/tuple/pack.rs

use crate::Key;
use crate::KeyPart;

//const NIL: u8 = 0x00;
const BYTES: u8 = 0x01;
const STRING: u8 = 0x02;
//const NESTED: u8 = 0x05;
const NEGINTSTART: u8 = 0x0b;
const INTZERO: u8 = 0x14;
const POSINTEND: u8 = 0x1d;
//const FLOAT: u8 = 0x20;
const DOUBLE: u8 = 0x21;
const FALSE: u8 = 0x26;
const TRUE: u8 = 0x27;

const ESCAPE: u8 = 0xff;

const CANONICAL_NAN_POS: u64 = 0x7ff8000000000000u64;
const CANONICAL_NAN_NEG: u64 = 0xfff8000000000000u64;

pub fn canonicalize_f64(n: f64) -> f64 {
  if n.is_nan() {
    if n.is_sign_negative() {
      f64::from_bits(CANONICAL_NAN_NEG)
    } else {
      f64::from_bits(CANONICAL_NAN_POS)
    }
  } else {
    n
  }
}

pub fn encode_key(key: &Key) -> std::io::Result<Vec<u8>> {
  let mut output: Vec<u8> = vec![];
  for part in &key.0 {
    match part {
      KeyPart::String(key) => {
        output.push(STRING);
        escape_raw_bytes_into(&mut output, key.as_bytes());
        output.push(0);
      }
      KeyPart::Int(key) => {
        bigint::encode_into(&mut output, key)?;
      }
      KeyPart::Float(key) => {
        double::encode_into(&mut output, *key);
      }
      KeyPart::Bytes(key) => {
        output.push(BYTES);
        escape_raw_bytes_into(&mut output, key);
        output.push(0);
      }
      KeyPart::False => {
        output.push(FALSE);
      }
      KeyPart::True => {
        output.push(TRUE);
      }
    }
  }
  Ok(output)
}

pub fn decode_key(mut bytes: &[u8]) -> std::io::Result<Key> {
  let mut key = Key(vec![]);
  while !bytes.is_empty() {
    let tag = bytes[0];
    bytes = &bytes[1..];

    let next_bytes = match tag {
      self::STRING => {
        let (next_bytes, data) = parse_slice(bytes)?;
        let data = String::from_utf8(data).map_err(|_| {
          std::io::Error::new(std::io::ErrorKind::InvalidData, "invalid utf8")
        })?;
        key.0.push(KeyPart::String(data));
        next_bytes
      }
      self::NEGINTSTART..=self::POSINTEND => {
        let (next_bytes, data) = bigint::decode_from(bytes, tag)?;
        key.0.push(KeyPart::Int(data));
        next_bytes
      }
      self::DOUBLE => {
        let (next_bytes, data) = double::decode_from(bytes)?;
        key.0.push(KeyPart::Float(data));
        next_bytes
      }
      self::BYTES => {
        let (next_bytes, data) = parse_slice(bytes)?;
        key.0.push(KeyPart::Bytes(data));
        next_bytes
      }
      self::FALSE => {
        key.0.push(KeyPart::False);
        bytes
      }
      self::TRUE => {
        key.0.push(KeyPart::True);
        bytes
      }
      _ => {
        return Err(std::io::Error::new(
          std::io::ErrorKind::InvalidData,
          "invalid tag",
        ))
      }
    };

    bytes = next_bytes;
  }
  Ok(key)
}

fn escape_raw_bytes_into(out: &mut Vec<u8>, x: &[u8]) {
  for &b in x {
    out.push(b);
    if b == 0 {
      out.push(ESCAPE);
    }
  }
}

mod bigint {
  use num_bigint::BigInt;
  use num_bigint::Sign;

  use super::parse_byte;
  use super::parse_bytes;
  const MAX_SZ: usize = 8;

  // Ported from https://github.com/foundationdb-rs/foundationdb-rs/blob/7415e116d5d96c2630976058de28e439eed7e809/foundationdb/src/tuple/pack.rs#L575
  pub fn encode_into(out: &mut Vec<u8>, key: &BigInt) -> std::io::Result<()> {
    if key.sign() == Sign::NoSign {
      out.push(super::INTZERO);
      return Ok(());
    }
    let (sign, mut bytes) = key.to_bytes_be();
    let n = bytes.len();
    match sign {
      Sign::Minus => {
        if n <= MAX_SZ {
          out.push(super::INTZERO - n as u8);
        } else {
          out.extend_from_slice(&[super::NEGINTSTART, bigint_n(n)? ^ 0xff]);
        }
        invert(&mut bytes);
        out.extend_from_slice(&bytes);
      }
      Sign::NoSign => unreachable!(),
      Sign::Plus => {
        if n <= MAX_SZ {
          out.push(super::INTZERO + n as u8);
        } else {
          out.extend_from_slice(&[super::POSINTEND, bigint_n(n)?]);
        }
        out.extend_from_slice(&bytes);
      }
    }
    Ok(())
  }

  pub fn decode_from(
    input: &[u8],
    tag: u8,
  ) -> std::io::Result<(&[u8], BigInt)> {
    if super::INTZERO <= tag && tag <= super::INTZERO + MAX_SZ as u8 {
      let n = (tag - super::INTZERO) as usize;
      let (input, bytes) = parse_bytes(input, n)?;
      Ok((input, BigInt::from_bytes_be(Sign::Plus, bytes)))
    } else if super::INTZERO - MAX_SZ as u8 <= tag && tag < super::INTZERO {
      let n = (super::INTZERO - tag) as usize;
      let (input, bytes) = parse_bytes(input, n)?;
      Ok((input, BigInt::from_bytes_be(Sign::Minus, &inverted(bytes))))
    } else if tag == super::NEGINTSTART {
      let (input, raw_length) = parse_byte(input)?;
      let n = usize::from(raw_length ^ 0xff);
      let (input, bytes) = parse_bytes(input, n)?;
      Ok((input, BigInt::from_bytes_be(Sign::Minus, &inverted(bytes))))
    } else if tag == super::POSINTEND {
      let (input, raw_length) = parse_byte(input)?;
      let n: usize = usize::from(raw_length);
      let (input, bytes) = parse_bytes(input, n)?;
      Ok((input, BigInt::from_bytes_be(Sign::Plus, bytes)))
    } else {
      Err(std::io::Error::new(
        std::io::ErrorKind::InvalidData,
        format!("unknown bigint tag: {}", tag),
      ))
    }
  }

  fn invert(bytes: &mut [u8]) {
    // The ones' complement of a binary number is defined as the value
    // obtained by inverting all the bits in the binary representation
    // of the number (swapping 0s for 1s and vice versa).
    for byte in bytes.iter_mut() {
      *byte = !*byte;
    }
  }

  fn inverted(bytes: &[u8]) -> Vec<u8> {
    // The ones' complement of a binary number is defined as the value
    // obtained by inverting all the bits in the binary representation
    // of the number (swapping 0s for 1s and vice versa).
    bytes.iter().map(|byte| !*byte).collect()
  }

  fn bigint_n(n: usize) -> std::io::Result<u8> {
    u8::try_from(n).map_err(|_| {
      std::io::Error::new(
        std::io::ErrorKind::InvalidInput,
        "BigUint requires more than 255 bytes to be represented",
      )
    })
  }
}

mod double {
  macro_rules! sign_bit {
    ($type:ident) => {
      (1 << (std::mem::size_of::<$type>() * 8 - 1))
    };
  }

  fn f64_to_ux_be_bytes(f: f64) -> [u8; 8] {
    let u = if f.is_sign_negative() {
      f.to_bits() ^ ::std::u64::MAX
    } else {
      f.to_bits() ^ sign_bit!(u64)
    };
    u.to_be_bytes()
  }

  pub fn encode_into(out: &mut Vec<u8>, x: f64) {
    out.push(super::DOUBLE);
    out.extend_from_slice(&f64_to_ux_be_bytes(super::canonicalize_f64(x)));
  }

  pub fn decode_from(input: &[u8]) -> std::io::Result<(&[u8], f64)> {
    let (input, bytes) = super::parse_bytes(input, 8)?;
    let mut arr = [0u8; 8];
    arr.copy_from_slice(bytes);
    let u = u64::from_be_bytes(arr);
    Ok((
      input,
      f64::from_bits(if (u & sign_bit!(u64)) == 0 {
        u ^ ::std::u64::MAX
      } else {
        u ^ sign_bit!(u64)
      }),
    ))
  }
}

#[inline]
fn parse_bytes(input: &[u8], num: usize) -> std::io::Result<(&[u8], &[u8])> {
  if input.len() < num {
    Err(std::io::ErrorKind::UnexpectedEof.into())
  } else {
    Ok((&input[num..], &input[..num]))
  }
}

#[inline]
fn parse_byte(input: &[u8]) -> std::io::Result<(&[u8], u8)> {
  if input.is_empty() {
    Err(std::io::ErrorKind::UnexpectedEof.into())
  } else {
    Ok((&input[1..], input[0]))
  }
}

fn parse_slice(input: &[u8]) -> std::io::Result<(&[u8], Vec<u8>)> {
  let mut output: Vec<u8> = Vec::new();
  let mut i = 0usize;

  while i < input.len() {
    let byte = input[i];
    i += 1;

    if byte == 0 {
      if input.get(i).copied() == Some(ESCAPE) {
        output.push(0);
        i += 1;
        continue;
      } else {
        return Ok((&input[i..], output));
      }
    }

    output.push(byte);
  }

  Err(std::io::ErrorKind::UnexpectedEof.into())
}

#[cfg(test)]
mod tests {
  use num_bigint::BigInt;
  use std::cmp::Ordering;

  use crate::Key;
  use crate::KeyPart;

  use super::decode_key;
  use super::encode_key;

  fn roundtrip(key: Key) {
    let bytes = encode_key(&key).unwrap();
    let decoded = decode_key(&bytes).unwrap();
    assert_eq!(&key, &decoded);
    assert_eq!(format!("{:?}", key), format!("{:?}", decoded));
  }

  fn check_order(a: Key, b: Key, expected: Ordering) {
    let a_bytes = encode_key(&a).unwrap();
    let b_bytes = encode_key(&b).unwrap();

    assert_eq!(a.cmp(&b), expected);
    assert_eq!(a_bytes.cmp(&b_bytes), expected);
  }

  fn check_bijection(key: Key, serialized: &[u8]) {
    let bytes = encode_key(&key).unwrap();
    assert_eq!(&bytes[..], serialized);
    let decoded = decode_key(serialized).unwrap();
    assert_eq!(&key, &decoded);
  }

  #[test]
  fn simple_roundtrip() {
    roundtrip(Key(vec![
      KeyPart::Bytes(vec![0, 1, 2, 3, 0xff, 0x00, 0xff, 0x00]),
      KeyPart::String("foo".to_string()),
      KeyPart::Float(-f64::NAN),
      KeyPart::Float(-f64::INFINITY),
      KeyPart::Float(-42.1),
      KeyPart::Float(-0.0),
      KeyPart::Float(0.0),
      KeyPart::Float(42.1),
      KeyPart::Float(f64::INFINITY),
      KeyPart::Float(f64::NAN),
      KeyPart::Int(BigInt::from(-10000)),
      KeyPart::Int(BigInt::from(-1)),
      KeyPart::Int(BigInt::from(0)),
      KeyPart::Int(BigInt::from(1)),
      KeyPart::Int(BigInt::from(10000)),
      KeyPart::False,
      KeyPart::True,
    ]));
  }

  #[test]
  #[rustfmt::skip]
  fn order_bytes() {
    check_order(
      Key(vec![KeyPart::Bytes(vec![0, 1, 2, 3, 0xff, 0x00, 0xff, 0x00])]),
      Key(vec![KeyPart::Bytes(vec![0, 1, 2, 3, 0xff, 0x00, 0xff, 0x00])]),
      Ordering::Equal,
    );

    check_order(
      Key(vec![KeyPart::Bytes(vec![0, 1, 2, 3, 0xff, 0x00, 0xff, 0x00])]),
      Key(vec![KeyPart::Bytes(vec![0, 1, 2, 3, 0xff, 0x00, 0xff, 0x01])]),
      Ordering::Less,
    );

    check_order(
      Key(vec![KeyPart::Bytes(vec![0, 1, 2, 3, 0xff, 0x00, 0xff, 0x01])]),
      Key(vec![KeyPart::Bytes(vec![0, 1, 2, 3, 0xff, 0x00, 0xff, 0x00])]),
      Ordering::Greater,
    );

    check_order(
      Key(vec![KeyPart::Bytes(vec![0, 1, 2, 3, 0xff, 0x00, 0xff, 0x00])]),
      Key(vec![KeyPart::Bytes(vec![0, 1, 2, 3, 0xff, 0x00, 0xff, 0x00, 0x00])]),
      Ordering::Less,
    );

    check_order(
      Key(vec![KeyPart::Bytes(vec![0, 1, 2, 3, 0xff, 0x00, 0xff, 0x00, 0x00])]),
      Key(vec![KeyPart::Bytes(vec![0, 1, 2, 3, 0xff, 0x00, 0xff, 0x00])]),
      Ordering::Greater,
    );
  }

  #[test]
  #[rustfmt::skip]
  fn order_tags() {
    check_order(
      Key(vec![KeyPart::Bytes(vec![])]),
      Key(vec![KeyPart::String("".into())]),
      Ordering::Less,
    );

    check_order(
      Key(vec![KeyPart::String("".into())]),
      Key(vec![KeyPart::Int(BigInt::from(0))]),
      Ordering::Less,
    );

    check_order(
      Key(vec![KeyPart::Int(BigInt::from(0))]),
      Key(vec![KeyPart::Float(0.0)]),
      Ordering::Less,
    );

    check_order(
      Key(vec![KeyPart::Float(0.0)]),
      Key(vec![KeyPart::False]),
      Ordering::Less,
    );

    check_order(
      Key(vec![KeyPart::False]),
      Key(vec![KeyPart::True]),
      Ordering::Less,
    );

    check_order(
      Key(vec![KeyPart::True]),
      Key(vec![KeyPart::Bytes(vec![])]),
      Ordering::Greater,
    );
  }

  #[test]
  #[rustfmt::skip]
  fn order_floats() {
    check_order(
      Key(vec![KeyPart::Float(-f64::NAN)]),
      Key(vec![KeyPart::Float(-f64::INFINITY)]),
      Ordering::Less,
    );
    check_order(
      Key(vec![KeyPart::Float(-f64::INFINITY)]),
      Key(vec![KeyPart::Float(-10.0)]),
      Ordering::Less,
    );
    check_order(
      Key(vec![KeyPart::Float(-10.0)]),
      Key(vec![KeyPart::Float(-0.0)]),
      Ordering::Less,
    );
    check_order(
      Key(vec![KeyPart::Float(-0.0)]),
      Key(vec![KeyPart::Float(0.0)]),
      Ordering::Less,
    );
    check_order(
      Key(vec![KeyPart::Float(0.0)]),
      Key(vec![KeyPart::Float(10.0)]),
      Ordering::Less,
    );
    check_order(
      Key(vec![KeyPart::Float(10.0)]),
      Key(vec![KeyPart::Float(f64::INFINITY)]),
      Ordering::Less,
    );
    check_order(
      Key(vec![KeyPart::Float(f64::INFINITY)]),
      Key(vec![KeyPart::Float(f64::NAN)]),
      Ordering::Less,
    );
  }

  #[test]
  #[rustfmt::skip]
  fn order_ints() {
    check_order(
      Key(vec![KeyPart::Int(BigInt::from(-10000))]),
      Key(vec![KeyPart::Int(BigInt::from(-100))]),
      Ordering::Less,
    );
    check_order(
      Key(vec![KeyPart::Int(BigInt::from(-100))]),
      Key(vec![KeyPart::Int(BigInt::from(-1))]),
      Ordering::Less,
    );
    check_order(
      Key(vec![KeyPart::Int(BigInt::from(-1))]),
      Key(vec![KeyPart::Int(BigInt::from(0))]),
      Ordering::Less,
    );
    check_order(
      Key(vec![KeyPart::Int(BigInt::from(0))]),
      Key(vec![KeyPart::Int(BigInt::from(1))]),
      Ordering::Less,
    );
    check_order(
      Key(vec![KeyPart::Int(BigInt::from(1))]),
      Key(vec![KeyPart::Int(BigInt::from(100))]),
      Ordering::Less,
    );
    check_order(
      Key(vec![KeyPart::Int(BigInt::from(100))]),
      Key(vec![KeyPart::Int(BigInt::from(10000))]),
      Ordering::Less,
    );
  }

  #[test]
  #[rustfmt::skip]
  fn float_canonicalization() {
    let key1 = Key(vec![KeyPart::Float(f64::from_bits(0x7ff8000000000001))]);
    let key2 = Key(vec![KeyPart::Float(f64::from_bits(0x7ff8000000000002))]);

    assert_eq!(key1, key2);
    assert_eq!(encode_key(&key1).unwrap(), encode_key(&key2).unwrap());
  }

  #[test]
  #[rustfmt::skip]
  fn explicit_bijection() {
    // string
    check_bijection(
      Key(vec![KeyPart::String("hello".into())]),
      &[0x02, 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x00],
    );

    // zero byte escape
    check_bijection(
      Key(vec![KeyPart::Bytes(vec![0x01, 0x02, 0x00, 0x07, 0x08])]),
      &[0x01, 0x01, 0x02, 0x00, 0xff, 0x07, 0x08, 0x00],
    );

    // array
    check_bijection(
      Key(vec![
        KeyPart::String("hello".into()),
        KeyPart::Bytes(vec![0x01, 0x02, 0x00, 0x07, 0x08]),
      ]),
      &[
        0x02, 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x00, /* string */
        0x01, 0x01, 0x02, 0x00, 0xff, 0x07, 0x08, 0x00, /* bytes */
      ],
    );
  }
}