lisette-semantics 0.2.13

Little language inspired by Rust that compiles to Go
Documentation
use syntax::ast::Literal;

pub(crate) fn runtime_bytes(literal: &Literal) -> Option<Vec<u8>> {
    if let Literal::String { value, raw } = literal {
        Some(decode(value, *raw))
    } else {
        None
    }
}

pub(crate) fn equals_target(
    candidate: &Literal,
    target: &Literal,
    target_bytes: Option<&[u8]>,
) -> bool {
    if let Literal::String { value: cv, raw: cr } = candidate
        && let Literal::String { value: tv, raw: tr } = target
    {
        if cr == tr && cv == tv {
            return true;
        }
        if let Some(target_bytes) = target_bytes {
            return decode(cv, *cr) == target_bytes;
        }
    }
    candidate == target
}

fn decode(value: &str, raw: bool) -> Vec<u8> {
    if raw {
        return value.as_bytes().to_vec();
    }

    let mut out = Vec::with_capacity(value.len());
    let mut chars = value.chars().peekable();

    while let Some(c) = chars.next() {
        if c != '\\' {
            push_char_utf8(&mut out, c);
            continue;
        }

        match chars.next() {
            None => out.push(b'\\'),
            Some('n') => out.push(b'\n'),
            Some('t') => out.push(b'\t'),
            Some('r') => out.push(b'\r'),
            Some('\\') => out.push(b'\\'),
            Some('\'') => out.push(b'\''),
            Some('"') => out.push(b'"'),
            Some('a') => out.push(0x07),
            Some('b') => out.push(0x08),
            Some('f') => out.push(0x0C),
            Some('v') => out.push(0x0B),
            Some('x') => {
                let mut byte: u8 = 0;
                for _ in 0..2 {
                    match chars.peek().and_then(|d| d.to_digit(16)) {
                        Some(d) => {
                            byte = byte.wrapping_mul(16) + d as u8;
                            chars.next();
                        }
                        None => break,
                    }
                }
                out.push(byte);
            }
            Some('u') => {
                if chars.next_if_eq(&'{').is_some() {
                    let mut codepoint: u32 = 0;
                    while let Some(&d) = chars.peek() {
                        if d == '}' {
                            chars.next();
                            break;
                        }
                        codepoint = codepoint.wrapping_mul(16) + d.to_digit(16).unwrap_or(0);
                        chars.next();
                    }
                    if let Some(c) = char::from_u32(codepoint) {
                        push_char_utf8(&mut out, c);
                    }
                }
            }
            Some(d @ '0'..='7') => {
                let mut value: u16 = d.to_digit(8).unwrap() as u16;
                for _ in 0..2 {
                    match chars.peek().and_then(|n| n.to_digit(8)) {
                        Some(n) => {
                            value = value * 8 + n as u16;
                            chars.next();
                        }
                        None => break,
                    }
                }
                out.push((value & 0xFF) as u8);
            }
            Some(other) => {
                out.push(b'\\');
                push_char_utf8(&mut out, other);
            }
        }
    }

    out
}

fn push_char_utf8(out: &mut Vec<u8>, c: char) {
    if c.is_ascii() {
        out.push(c as u8);
        return;
    }
    let mut buf = [0u8; 4];
    out.extend_from_slice(c.encode_utf8(&mut buf).as_bytes());
}

#[cfg(test)]
mod tests {
    use super::*;

    fn s(value: &str, raw: bool) -> Literal {
        Literal::String {
            value: value.to_string(),
            raw,
        }
    }

    fn equal(a: &Literal, b: &Literal) -> bool {
        equals_target(a, b, runtime_bytes(b).as_deref())
    }

    #[test]
    fn raw_and_escaped_with_same_runtime_value_are_equal() {
        assert!(equal(&s("a\\nb", true), &s("a\\\\nb", false)));
    }

    #[test]
    fn unicode_escape_equals_literal_char() {
        assert!(equal(&s("A", false), &s("\\u{0041}", false)));
    }

    #[test]
    fn hex_escape_equals_literal_char() {
        assert!(equal(&s("A", false), &s("\\x41", false)));
    }

    #[test]
    fn octal_escape_equals_literal_char() {
        assert!(equal(&s("A", false), &s("\\101", false)));
    }

    #[test]
    fn newline_spellings_are_equal() {
        assert!(equal(&s("\\n", false), &s("\\u{000A}", false)));
        assert!(equal(&s("\\n", false), &s("\\x0A", false)));
        assert!(equal(&s("\\n", false), &s("\\012", false)));
    }

    #[test]
    fn distinct_strings_are_not_equal() {
        assert!(!equal(&s("a", false), &s("b", false)));
        assert!(!equal(&s("\\n", false), &s("n", false)));
    }

    #[test]
    fn unicode_escape_for_multibyte_codepoint() {
        assert!(equal(&s("\u{1F600}", false), &s("\\u{1F600}", false)));
    }

    #[test]
    fn raw_string_preserves_backslashes() {
        assert_eq!(decode("a\\nb", true), b"a\\nb".to_vec());
    }

    #[test]
    fn identical_source_short_circuits() {
        let a = s("hello", false);
        let b = s("hello", false);
        assert!(equals_target(&a, &b, None));
    }
}