lean_string 0.5.1

Compact, clone-on-write string.
Documentation
/*
UTF-8 encoding rules:
  - 1 byte character is 0b0xxxxxxx
  - 2 byte character is 0b110xxxxx 0b10xxxxxx
  - 3 byte character is 0b1110xxxx 0b10xxxxxx 0b10xxxxxx
  ...
It is known that a byte starting with `10` is a trailing byte.
*/
#[rustfmt::skip]
#[allow(dead_code)]
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
#[repr(u8)]
pub enum LastByte {
    // 1 byte character (0b0xxxxxxx)
    _0x00 = 0x00,
    _0x01 = 0x01,
    _0x02 = 0x02,
    _0x03 = 0x03,
    _0x04 = 0x04,
    _0x05 = 0x05,
    _0x06 = 0x06,
    _0x07 = 0x07,
    _0x08 = 0x08,
    _0x09 = 0x09,
    _0x0A = 0x0A,
    _0x0B = 0x0B,
    _0x0C = 0x0C,
    _0x0D = 0x0D,
    _0x0E = 0x0E,
    _0x0F = 0x0F,
    _0x10 = 0x10,
    _0x11 = 0x11,
    _0x12 = 0x12,
    _0x13 = 0x13,
    _0x14 = 0x14,
    _0x15 = 0x15,
    _0x16 = 0x16,
    _0x17 = 0x17,
    _0x18 = 0x18,
    _0x19 = 0x19,
    _0x1A = 0x1A,
    _0x1B = 0x1B,
    _0x1C = 0x1C,
    _0x1D = 0x1D,
    _0x1E = 0x1E,
    _0x1F = 0x1F,
    _0x20 = 0x20,
    _0x21 = 0x21,
    _0x22 = 0x22,
    _0x23 = 0x23,
    _0x24 = 0x24,
    _0x25 = 0x25,
    _0x26 = 0x26,
    _0x27 = 0x27,
    _0x28 = 0x28,
    _0x29 = 0x29,
    _0x2A = 0x2A,
    _0x2B = 0x2B,
    _0x2C = 0x2C,
    _0x2D = 0x2D,
    _0x2E = 0x2E,
    _0x2F = 0x2F,
    _0x30 = 0x30,
    _0x31 = 0x31,
    _0x32 = 0x32,
    _0x33 = 0x33,
    _0x34 = 0x34,
    _0x35 = 0x35,
    _0x36 = 0x36,
    _0x37 = 0x37,
    _0x38 = 0x38,
    _0x39 = 0x39,
    _0x3A = 0x3A,
    _0x3B = 0x3B,
    _0x3C = 0x3C,
    _0x3D = 0x3D,
    _0x3E = 0x3E,
    _0x3F = 0x3F,
    _0x40 = 0x40,
    _0x41 = 0x41,
    _0x42 = 0x42,
    _0x43 = 0x43,
    _0x44 = 0x44,
    _0x45 = 0x45,
    _0x46 = 0x46,
    _0x47 = 0x47,
    _0x48 = 0x48,
    _0x49 = 0x49,
    _0x4A = 0x4A,
    _0x4B = 0x4B,
    _0x4C = 0x4C,
    _0x4D = 0x4D,
    _0x4E = 0x4E,
    _0x4F = 0x4F,
    _0x50 = 0x50,
    _0x51 = 0x51,
    _0x52 = 0x52,
    _0x53 = 0x53,
    _0x54 = 0x54,
    _0x55 = 0x55,
    _0x56 = 0x56,
    _0x57 = 0x57,
    _0x58 = 0x58,
    _0x59 = 0x59,
    _0x5A = 0x5A,
    _0x5B = 0x5B,
    _0x5C = 0x5C,
    _0x5D = 0x5D,
    _0x5E = 0x5E,
    _0x5F = 0x5F,
    _0x60 = 0x60,
    _0x61 = 0x61,
    _0x62 = 0x62,
    _0x63 = 0x63,
    _0x64 = 0x64,
    _0x65 = 0x65,
    _0x66 = 0x66,
    _0x67 = 0x67,
    _0x68 = 0x68,
    _0x69 = 0x69,
    _0x6A = 0x6A,
    _0x6B = 0x6B,
    _0x6C = 0x6C,
    _0x6D = 0x6D,
    _0x6E = 0x6E,
    _0x6F = 0x6F,
    _0x70 = 0x70,
    _0x71 = 0x71,
    _0x72 = 0x72,
    _0x73 = 0x73,
    _0x74 = 0x74,
    _0x75 = 0x75,
    _0x76 = 0x76,
    _0x77 = 0x77,
    _0x78 = 0x78,
    _0x79 = 0x79,
    _0x7A = 0x7A,
    _0x7B = 0x7B,
    _0x7C = 0x7C,
    _0x7D = 0x7D,
    _0x7E = 0x7E,
    _0x7F = 0x7F,

    // trailing byte of multi-byte character (0b10xxxxxx)
    _0x80 = 0x80,
    _0x81 = 0x81,
    _0x82 = 0x82,
    _0x83 = 0x83,
    _0x84 = 0x84,
    _0x85 = 0x85,
    _0x86 = 0x86,
    _0x87 = 0x87,
    _0x88 = 0x88,
    _0x89 = 0x89,
    _0x8A = 0x8A,
    _0x8B = 0x8B,
    _0x8C = 0x8C,
    _0x8D = 0x8D,
    _0x8E = 0x8E,
    _0x8F = 0x8F,
    _0x90 = 0x90,
    _0x91 = 0x91,
    _0x92 = 0x92,
    _0x93 = 0x93,
    _0x94 = 0x94,
    _0x95 = 0x95,
    _0x96 = 0x96,
    _0x97 = 0x97,
    _0x98 = 0x98,
    _0x99 = 0x99,
    _0x9A = 0x9A,
    _0x9B = 0x9B,
    _0x9C = 0x9C,
    _0x9D = 0x9D,
    _0x9E = 0x9E,
    _0x9F = 0x9F,
    _0xA0 = 0xA0,
    _0xA1 = 0xA1,
    _0xA2 = 0xA2,
    _0xA3 = 0xA3,
    _0xA4 = 0xA4,
    _0xA5 = 0xA5,
    _0xA6 = 0xA6,
    _0xA7 = 0xA7,
    _0xA8 = 0xA8,
    _0xA9 = 0xA9,
    _0xAA = 0xAA,
    _0xAB = 0xAB,
    _0xAC = 0xAC,
    _0xAD = 0xAD,
    _0xAE = 0xAE,
    _0xAF = 0xAF,
    _0xB0 = 0xB0,
    _0xB1 = 0xB1,
    _0xB2 = 0xB2,
    _0xB3 = 0xB3,
    _0xB4 = 0xB4,
    _0xB5 = 0xB5,
    _0xB6 = 0xB6,
    _0xB7 = 0xB7,
    _0xB8 = 0xB8,
    _0xB9 = 0xB9,
    _0xBA = 0xBA,
    _0xBB = 0xBB,
    _0xBC = 0xBC,
    _0xBD = 0xBD,
    _0xBE = 0xBE,
    _0xBF = 0xBF,

    // Cannot be a last byte.
    // 0b11xxxxxx is start of a multi-byte character.
    Length00 = 0xC0,
    Length01 = 0xC1,
    Length02 = 0xC2,
    Length03 = 0xC3,
    Length04 = 0xC4,
    Length05 = 0xC5,
    Length06 = 0xC6,
    Length07 = 0xC7,
    Length08 = 0xC8,
    Length09 = 0xC9,
    Length10 = 0xCA,
    Length11 = 0xCB,
    Length12 = 0xCC,
    Length13 = 0xCD,
    Length14 = 0xCE,
    Length15 = 0xCF,

    HeapMarker = 0xD0,
    StaticMarker = 0xD1,
}

const _: () = {
    assert!(size_of::<LastByte>() == 1);
    assert!(size_of::<Option<LastByte>>() == 1);
};

#[allow(dead_code)]
impl LastByte {
    pub const MASK_1100_0000: u8 = 0b1100_0000;
    pub const MASK_0011_1111: u8 = 0b0011_1111;
}

#[cfg(test)]
mod tests {
    use super::*;
    use paste::paste;

    #[test]
    #[allow(clippy::identity_op, clippy::zero_prefixed_literal)]
    fn langth_name_check() {
        macro_rules! gen_case {
            ($( $len:literal ),* $(,)?) => {$(
                paste! {
                    let raw_value = LastByte::[<Length $len>] as u8;
                    assert_eq!(raw_value, $len | LastByte::MASK_1100_0000);
                }
            )*};
        }
        gen_case! {
            00, 01, 02, 03, 04, 05, 06, 07,
            08, 09, 10, 11, 12, 13, 14, 15,
        }
    }
}