Skip to main content

r_toml/
lib.rs

1#![allow(non_camel_case_types)]
2use automata::{lex, KeyValueIterator};
3use std::io::Write;
4use std::{collections::HashMap, mem::transmute};
5
6// the automaton has been generated by a modified version of `resharp` (https://github.com/ieviev/resharp)
7pub mod automata {
8    include!("dfa_generated.rs");
9
10    #[inline]
11    fn re_lex_mrel(
12        data: *const u8,
13        text_len: usize,
14        mut callback: impl FnMut(usize, u8),
15    ) {
16        unsafe {
17            let pdfa = &DFA;
18            let mut pos: usize = 0;
19            let mut rax: usize = 1;
20            while rax != 0 {
21                rax = (rax << DFA_SHIFT) + pdfa[*data.byte_add(pos) as usize] as usize;
22                rax = pdfa[256 + rax] as usize;
23                pos += 1;
24                let eff = pdfa[DFA_OFFSET + rax];
25                if eff != 0 {
26                    let effect_ptr = pdfa.as_ptr().byte_add(DFA_EFF_OFFSET + eff as usize);
27                    let end_ptr = effect_ptr.byte_add((effect_ptr.read_unaligned() as usize) << 1);
28                    let mut curr_eff_ptr = effect_ptr.byte_add(1);
29                    while curr_eff_ptr < end_ptr {
30                        let rel = *curr_eff_ptr;
31                        let tag = *curr_eff_ptr.byte_add(1);
32                        callback(pos.wrapping_sub(rel as usize), tag);
33                        curr_eff_ptr = curr_eff_ptr.byte_add(2);
34                    }
35                }
36                if pos == text_len {
37                    break;
38                }
39            }
40        }
41    }
42
43    pub fn lex(data: &[u8], mut on_tag: impl FnMut(usize, usize, u8)) -> Result<(), usize> {
44        let mut last_pos: usize = 0;
45        let mut ok = false;
46
47        // append a newline to flush the last line
48        let mut buf = Vec::with_capacity(data.len() + 1);
49        buf.extend_from_slice(data);
50        buf.push(b'\n');
51
52        re_lex_mrel(buf.as_ptr(), buf.len(), |pos, tag| {
53            ok = true;
54            // tags 1 (Ignore) and 2 (Comment) just advance last_pos
55            if tag <= 2 {
56                last_pos = pos;
57            } else {
58                // value/key/table tag: text is [last_pos..pos]
59                on_tag(last_pos, pos, tag);
60                last_pos = pos;
61            }
62        });
63
64        if ok || data.is_empty() { Ok(()) } else { Err(0) }
65    }
66
67    pub struct KeyValueIterator {
68        inner: std::vec::IntoIter<(String, crate::Value)>,
69    }
70
71    impl KeyValueIterator {
72        pub(crate) fn new(data: &[u8]) -> Self {
73            let mut items = Vec::new();
74            let mut key_buf = Vec::new();
75            let _ = crate::stream(data, |k, v| {
76                items.push((k.to_str(&mut key_buf, data).to_string(), v));
77                key_buf.clear();
78            });
79            KeyValueIterator { inner: items.into_iter() }
80        }
81    }
82
83    impl Iterator for KeyValueIterator {
84        type Item = (String, crate::Value);
85
86        fn next(&mut self) -> Option<Self::Item> {
87            self.inner.next()
88        }
89    }
90}
91
92// --
93#[repr(u8)]
94#[derive(PartialEq, Eq, Debug, Clone)]
95pub enum Token {
96    NONE = 0,
97    IGNORE = 1,
98    COMMENT = 2,
99    TABLE_STD = 3,
100    TABLE_ARR = 4,
101    UQ_KEY = 5,
102    TRUE = 6,
103    FALSE = 7,
104    INT = 8,
105    VALID_STR = 9,
106    ESC_STR = 10,
107    DATE = 11,
108    FLOAT = 12,
109    EMPTYSTR = 13,
110    ARR1_INT = 14,
111    ARR1_FLOAT = 15,
112    ARR1_BOOL = 16,
113    ARR1_STR = 17,
114}
115
116#[derive(Clone, Debug)]
117pub struct Key {
118    pub index: usize,
119    pub root_begin: usize,
120    pub root_end: usize,
121    pub key_begin: usize,
122    pub key_end: usize,
123}
124impl Key {
125    /// NOTE: clearing the key buffer after use is the caller's responsibility
126    pub fn to_str<'a>(&self, key_buffer: &'a mut Vec<u8>, data: &[u8]) -> &'a str {
127        // no root
128        if self.root_end == 0 {
129            return unsafe {
130                std::str::from_utf8_unchecked(std::slice::from_raw_parts(
131                    &data[self.key_begin],
132                    self.key_end - self.key_begin,
133                ))
134            };
135        }
136        // root.key
137        else {
138            let root_s = unsafe {
139                std::str::from_utf8_unchecked(std::slice::from_raw_parts(
140                    &data[self.root_begin],
141                    self.root_end - self.root_begin,
142                ))
143            };
144            let key_s = unsafe {
145                std::str::from_utf8_unchecked(std::slice::from_raw_parts(
146                    &data[self.key_begin],
147                    self.key_end - self.key_begin,
148                ))
149            };
150            key_buffer.extend_from_slice(root_s.as_bytes());
151            if self.index > 0 {
152                write!(key_buffer, "/{}", self.index).unwrap();
153            }
154            write!(key_buffer, ".").unwrap();
155            key_buffer.extend_from_slice(key_s.as_bytes());
156            let str_ref = std::str::from_utf8(key_buffer).unwrap();
157            return str_ref;
158        }
159    }
160}
161#[derive(Clone, Debug)]
162pub struct Value {
163    pub kind: Token,
164    pub pos_begin: usize,
165    pub pos_end: usize,
166}
167impl Value {
168    pub fn to_slice(&self, data: &[u8]) -> &str {
169        unsafe {
170            std::str::from_utf8_unchecked(std::slice::from_raw_parts(
171                &data[self.pos_begin],
172                self.pos_end - self.pos_begin,
173            ))
174        }
175    }
176    pub fn to_int(&self, data: &[u8]) -> Result<i64, std::num::ParseIntError> {
177        self.to_slice(data).parse::<i64>()
178    }
179    pub fn to_float(&self, data: &[u8]) -> Result<f64, std::num::ParseFloatError> {
180        self.to_slice(data).parse::<f64>()
181    }
182    pub fn to_str(&self, data: &[u8]) -> Result<&str, Token> {
183        match self.kind {
184            Token::VALID_STR => Ok(self.to_slice(data)),
185            Token::EMPTYSTR => Ok(""),
186            _ => Err(self.kind.clone()),
187        }
188    }
189    pub fn to_bool(&self) -> Result<bool, Token> {
190        match self.kind {
191            Token::TRUE => Ok(true),
192            Token::FALSE => Ok(false),
193            _ => Err(self.kind.clone()),
194        }
195    }
196    // can't have a datetime conversion otherwise i'd be lying about no dependencies
197}
198
199pub fn stream(data: &[u8], mut on_key_value: impl FnMut(&Key, Value)) -> Result<(), usize> {
200    let mut key = Key {
201        index: 0,
202        root_begin: 0,
203        root_end: 0,
204        key_begin: 0,
205        key_end: 0,
206    };
207    lex(data, |prev, nextpos, tag| {
208        if tag >= 3 {
209            match unsafe { transmute(tag) } {
210                Token::UQ_KEY => {
211                    key.key_begin = prev;
212                    key.key_end = nextpos;
213                }
214                Token::TABLE_STD => {
215                    key.index = 0;
216                    key.root_begin = prev;
217                    key.root_end = nextpos;
218                }
219                Token::TABLE_ARR => {
220                    key.index = key.index + 1;
221                    key.root_begin = prev;
222                    key.root_end = nextpos;
223                }
224                token => {
225                    let value = Value {
226                        kind: token,
227                        pos_begin: prev,
228                        pos_end: nextpos,
229                    };
230                    on_key_value(&key, value)
231                }
232            }
233        }
234    })
235}
236
237pub fn to_vec(data: &[u8]) -> Result<Vec<(String, Value)>, usize> {
238    let mut result: Vec<(String, Value)> = Vec::with_capacity(128);
239    let mut key_buf = Vec::with_capacity(0);
240    stream(data, |k, v| {
241        result.push((k.to_str(&mut key_buf, data).to_string(), v));
242    })?;
243    Ok(result)
244}
245
246pub fn to_struct_vec(data: &[u8]) -> Result<Vec<(Key, Value)>, usize> {
247    let mut result: Vec<(Key, Value)> = Vec::with_capacity(128);
248    stream(data, |k, v| {
249        result.push((k.clone(), v));
250    })?;
251    Ok(result)
252}
253
254pub fn to_map(data: &[u8]) -> Result<HashMap<String, Value>, usize> {
255    let mut result: HashMap<String, Value> = HashMap::new();
256    let mut key_buf = Vec::with_capacity(0);
257    stream(data, |k, v| {
258        result.insert(k.to_str(&mut key_buf, data).to_string(), v);
259        key_buf.clear();
260    })?;
261    Ok(result)
262}
263
264pub fn to_iter(data: &[u8]) -> KeyValueIterator {
265    KeyValueIterator::new(data)
266}