r-toml 0.0.27

Regular subset of TOML
Documentation
#![allow(non_camel_case_types)]
use automata::{lex, KeyValueIterator};
use std::io::Write;
use std::{collections::HashMap, mem::transmute};

// the automaton has been generated by a modified version of `resharp` (https://github.com/ieviev/resharp)
pub mod automata {
    include!("dfa_generated.rs");

    #[inline]
    fn re_lex_mrel(
        data: *const u8,
        text_len: usize,
        mut callback: impl FnMut(usize, u8),
    ) {
        unsafe {
            let pdfa = &DFA;
            let mut pos: usize = 0;
            let mut rax: usize = 1;
            while rax != 0 {
                rax = (rax << DFA_SHIFT) + pdfa[*data.byte_add(pos) as usize] as usize;
                rax = pdfa[256 + rax] as usize;
                pos += 1;
                let eff = pdfa[DFA_OFFSET + rax];
                if eff != 0 {
                    let effect_ptr = pdfa.as_ptr().byte_add(DFA_EFF_OFFSET + eff as usize);
                    let end_ptr = effect_ptr.byte_add((effect_ptr.read_unaligned() as usize) << 1);
                    let mut curr_eff_ptr = effect_ptr.byte_add(1);
                    while curr_eff_ptr < end_ptr {
                        let rel = *curr_eff_ptr;
                        let tag = *curr_eff_ptr.byte_add(1);
                        callback(pos.wrapping_sub(rel as usize), tag);
                        curr_eff_ptr = curr_eff_ptr.byte_add(2);
                    }
                }
                if pos == text_len {
                    break;
                }
            }
        }
    }

    pub fn lex(data: &[u8], mut on_tag: impl FnMut(usize, usize, u8)) -> Result<(), usize> {
        let mut last_pos: usize = 0;
        let mut ok = false;

        // append a newline to flush the last line
        let mut buf = Vec::with_capacity(data.len() + 1);
        buf.extend_from_slice(data);
        buf.push(b'\n');

        re_lex_mrel(buf.as_ptr(), buf.len(), |pos, tag| {
            ok = true;
            // tags 1 (Ignore) and 2 (Comment) just advance last_pos
            if tag <= 2 {
                last_pos = pos;
            } else {
                // value/key/table tag: text is [last_pos..pos]
                on_tag(last_pos, pos, tag);
                last_pos = pos;
            }
        });

        if ok || data.is_empty() { Ok(()) } else { Err(0) }
    }

    pub struct KeyValueIterator {
        inner: std::vec::IntoIter<(String, crate::Value)>,
    }

    impl KeyValueIterator {
        pub(crate) fn new(data: &[u8]) -> Self {
            let mut items = Vec::new();
            let mut key_buf = Vec::new();
            let _ = crate::stream(data, |k, v| {
                items.push((k.to_str(&mut key_buf, data).to_string(), v));
                key_buf.clear();
            });
            KeyValueIterator { inner: items.into_iter() }
        }
    }

    impl Iterator for KeyValueIterator {
        type Item = (String, crate::Value);

        fn next(&mut self) -> Option<Self::Item> {
            self.inner.next()
        }
    }
}

// --
#[repr(u8)]
#[derive(PartialEq, Eq, Debug, Clone)]
pub enum Token {
    NONE = 0,
    IGNORE = 1,
    COMMENT = 2,
    TABLE_STD = 3,
    TABLE_ARR = 4,
    UQ_KEY = 5,
    TRUE = 6,
    FALSE = 7,
    INT = 8,
    VALID_STR = 9,
    ESC_STR = 10,
    DATE = 11,
    FLOAT = 12,
    EMPTYSTR = 13,
    ARR1_INT = 14,
    ARR1_FLOAT = 15,
    ARR1_BOOL = 16,
    ARR1_STR = 17,
}

#[derive(Clone, Debug)]
pub struct Key {
    pub index: usize,
    pub root_begin: usize,
    pub root_end: usize,
    pub key_begin: usize,
    pub key_end: usize,
}
impl Key {
    /// NOTE: clearing the key buffer after use is the caller's responsibility
    pub fn to_str<'a>(&self, key_buffer: &'a mut Vec<u8>, data: &[u8]) -> &'a str {
        // no root
        if self.root_end == 0 {
            return unsafe {
                std::str::from_utf8_unchecked(std::slice::from_raw_parts(
                    &data[self.key_begin],
                    self.key_end - self.key_begin,
                ))
            };
        }
        // root.key
        else {
            let root_s = unsafe {
                std::str::from_utf8_unchecked(std::slice::from_raw_parts(
                    &data[self.root_begin],
                    self.root_end - self.root_begin,
                ))
            };
            let key_s = unsafe {
                std::str::from_utf8_unchecked(std::slice::from_raw_parts(
                    &data[self.key_begin],
                    self.key_end - self.key_begin,
                ))
            };
            key_buffer.extend_from_slice(root_s.as_bytes());
            if self.index > 0 {
                write!(key_buffer, "/{}", self.index).unwrap();
            }
            write!(key_buffer, ".").unwrap();
            key_buffer.extend_from_slice(key_s.as_bytes());
            let str_ref = std::str::from_utf8(key_buffer).unwrap();
            return str_ref;
        }
    }
}
#[derive(Clone, Debug)]
pub struct Value {
    pub kind: Token,
    pub pos_begin: usize,
    pub pos_end: usize,
}
impl Value {
    pub fn to_slice(&self, data: &[u8]) -> &str {
        unsafe {
            std::str::from_utf8_unchecked(std::slice::from_raw_parts(
                &data[self.pos_begin],
                self.pos_end - self.pos_begin,
            ))
        }
    }
    pub fn to_int(&self, data: &[u8]) -> Result<i64, std::num::ParseIntError> {
        self.to_slice(data).parse::<i64>()
    }
    pub fn to_float(&self, data: &[u8]) -> Result<f64, std::num::ParseFloatError> {
        self.to_slice(data).parse::<f64>()
    }
    pub fn to_str(&self, data: &[u8]) -> Result<&str, Token> {
        match self.kind {
            Token::VALID_STR => Ok(self.to_slice(data)),
            Token::EMPTYSTR => Ok(""),
            _ => Err(self.kind.clone()),
        }
    }
    pub fn to_bool(&self) -> Result<bool, Token> {
        match self.kind {
            Token::TRUE => Ok(true),
            Token::FALSE => Ok(false),
            _ => Err(self.kind.clone()),
        }
    }
    // can't have a datetime conversion otherwise i'd be lying about no dependencies
}

pub fn stream(data: &[u8], mut on_key_value: impl FnMut(&Key, Value)) -> Result<(), usize> {
    let mut key = Key {
        index: 0,
        root_begin: 0,
        root_end: 0,
        key_begin: 0,
        key_end: 0,
    };
    lex(data, |prev, nextpos, tag| {
        if tag >= 3 {
            match unsafe { transmute(tag) } {
                Token::UQ_KEY => {
                    key.key_begin = prev;
                    key.key_end = nextpos;
                }
                Token::TABLE_STD => {
                    key.index = 0;
                    key.root_begin = prev;
                    key.root_end = nextpos;
                }
                Token::TABLE_ARR => {
                    key.index = key.index + 1;
                    key.root_begin = prev;
                    key.root_end = nextpos;
                }
                token => {
                    let value = Value {
                        kind: token,
                        pos_begin: prev,
                        pos_end: nextpos,
                    };
                    on_key_value(&key, value)
                }
            }
        }
    })
}

pub fn to_vec(data: &[u8]) -> Result<Vec<(String, Value)>, usize> {
    let mut result: Vec<(String, Value)> = Vec::with_capacity(128);
    let mut key_buf = Vec::with_capacity(0);
    stream(data, |k, v| {
        result.push((k.to_str(&mut key_buf, data).to_string(), v));
    })?;
    Ok(result)
}

pub fn to_struct_vec(data: &[u8]) -> Result<Vec<(Key, Value)>, usize> {
    let mut result: Vec<(Key, Value)> = Vec::with_capacity(128);
    stream(data, |k, v| {
        result.push((k.clone(), v));
    })?;
    Ok(result)
}

pub fn to_map(data: &[u8]) -> Result<HashMap<String, Value>, usize> {
    let mut result: HashMap<String, Value> = HashMap::new();
    let mut key_buf = Vec::with_capacity(0);
    stream(data, |k, v| {
        result.insert(k.to_str(&mut key_buf, data).to_string(), v);
        key_buf.clear();
    })?;
    Ok(result)
}

pub fn to_iter(data: &[u8]) -> KeyValueIterator {
    KeyValueIterator::new(data)
}