toml_spanner/
de.rs

1//! Core deserialization logic that deserializes toml content to [`Value`]
2
3use crate::{
4    Span,
5    error::{Error, ErrorKind},
6    tokens::{Error as TokenError, Token, Tokenizer},
7    value::{self, Key, Value, ValueInner},
8};
9use smallvec::SmallVec;
10use std::{
11    borrow::Cow,
12    collections::{BTreeMap, btree_map::Entry},
13    ops::Range,
14};
15
16type DeStr<'de> = Cow<'de, str>;
17type TablePair<'de> = (Key<'de>, Val<'de>);
18type InlineVec<T> = SmallVec<[T; 5]>;
19
20/// Parses a toml string into a [`ValueInner::Table`]
21pub fn parse(s: &str) -> Result<Value<'_>, Error> {
22    if s.len() > u32::MAX as usize {
23        return Err(Error {
24            kind: ErrorKind::FileTooLarge,
25            span: Span::new(0, 0),
26            line_info: None,
27        });
28    }
29
30    let mut de = Deserializer::new(s);
31
32    let raw_tables = de.tables()?;
33    let mut ctx = DeserializeCtx {
34        table_indices: &build_table_indices(&raw_tables),
35        table_pindices: &build_table_pindices(&raw_tables),
36        raw_tables,
37        de: &de,
38    };
39    let root = ctx.deserialize_entry(
40        DeserializeTableIdx {
41            table_idx: 0,
42            depth: 0,
43            idx_range: 0..ctx.raw_tables.len(),
44        },
45        Vec::new(),
46    )?;
47
48    Ok(Value::with_span(root, Span::new(0, s.len() as u32)))
49}
50
51struct Deserializer<'a> {
52    input: &'a str,
53    tokens: Tokenizer<'a>,
54}
55
56struct DeserializeCtx<'de, 'b> {
57    raw_tables: Vec<Table<'de>>,
58    // maps table headers to a list of tables with that exact header
59    // (the list contains indices into `raw_tables` and is ordered)
60    table_indices: &'b BTreeMap<InlineVec<DeStr<'de>>, Vec<usize>>,
61    // maps table headers to a list of all subtables
62    // (the list contains indices into `raw_tables` and is ordered)
63    table_pindices: &'b BTreeMap<InlineVec<DeStr<'de>>, Vec<usize>>,
64    de: &'b Deserializer<'de>,
65}
66// specifies the table/array that is currently being deserialized, namely the
67// table/array with the header `raw_tables[table_idx].header[0..depth]`
68struct DeserializeTableIdx {
69    // index of the first occurence of the desired header (even as a prefix)
70    table_idx: usize,
71    depth: usize,
72    // range of `raw_tables` indices to consider, used to isolate subtables of
73    // different array entries
74    idx_range: Range<usize>,
75}
76impl DeserializeTableIdx {
77    fn get_header<'de>(&self, raw_tables: &[Table<'de>]) -> InlineVec<DeStr<'de>> {
78        if self.depth == 0 {
79            return InlineVec::new();
80        }
81
82        raw_tables[self.table_idx].header[0..self.depth]
83            .iter()
84            .map(|key| key.name.clone())
85            .collect()
86    }
87}
88impl<'de, 'b> DeserializeCtx<'de, 'b> {
89    // deserialize the table/array given by `table_idx`
90    fn deserialize_entry(
91        &mut self,
92        table_idx: DeserializeTableIdx,
93        // values defined via dotted keys should be passed on to the corresponding subtable
94        additional_values: Vec<TablePair<'de>>,
95    ) -> Result<value::ValueInner<'de>, Error> {
96        let current_header = table_idx.get_header(&self.raw_tables);
97        let matching_tables = self.get_matching_tables(&current_header, &table_idx.idx_range);
98
99        let is_array = matching_tables
100            .iter()
101            .all(|idx| self.raw_tables[*idx].array)
102            && !matching_tables.is_empty();
103
104        if is_array {
105            // catch invalid cases like:
106            //   [a.b]
107            //   [[a]]
108            if table_idx.table_idx < matching_tables[0] {
109                let array_tbl = &self.raw_tables[matching_tables[0]];
110                return Err(self.de.error(
111                    array_tbl.at as usize,
112                    Some(array_tbl.end as usize),
113                    ErrorKind::RedefineAsArray,
114                ));
115            }
116            assert!(additional_values.is_empty());
117
118            let mut array = value::Array::new();
119            for (i, array_entry_idx) in matching_tables.iter().copied().enumerate() {
120                let entry_range_end = matching_tables
121                    .get(i + 1)
122                    .copied()
123                    .unwrap_or(table_idx.idx_range.end);
124
125                let span = Self::get_table_span(&self.raw_tables[array_entry_idx]);
126                let values = self.raw_tables[array_entry_idx].values.take().unwrap();
127                let array_entry = self.deserialize_as_table(
128                    &current_header,
129                    array_entry_idx..entry_range_end,
130                    values.values.into_iter(),
131                )?;
132                array.push(Value::with_span(ValueInner::Table(array_entry), span));
133            }
134            Ok(ValueInner::Array(array))
135        } else {
136            if matching_tables.len() > 1 {
137                let first_tbl = &self.raw_tables[matching_tables[0]];
138                let second_tbl = &self.raw_tables[matching_tables[1]];
139                return Err(self.de.error(
140                    second_tbl.at as usize,
141                    Some(second_tbl.end as usize),
142                    ErrorKind::DuplicateTable {
143                        name: current_header.last().unwrap().to_string(),
144                        first: Span::new(first_tbl.at, first_tbl.end),
145                    },
146                ));
147            }
148
149            let values = matching_tables
150                .first()
151                .map(|idx| {
152                    self.raw_tables[*idx]
153                        .values
154                        .take()
155                        .unwrap()
156                        .values
157                        .into_iter()
158                })
159                .unwrap_or_default()
160                .chain(additional_values);
161            let subtable =
162                self.deserialize_as_table(&current_header, table_idx.idx_range, values)?;
163
164            Ok(ValueInner::Table(subtable))
165        }
166    }
167    fn deserialize_as_table(
168        &mut self,
169        header: &[DeStr<'de>],
170        range: Range<usize>,
171        values: impl Iterator<Item = TablePair<'de>>,
172    ) -> Result<value::Table<'de>, Error> {
173        let mut table = value::Table::new();
174        let mut dotted_keys_map = BTreeMap::new();
175
176        for (key, val) in values {
177            match val.e {
178                E::DottedTable(mut tbl_vals) => {
179                    tbl_vals.span = Some(Span::new(val.start, val.end));
180                    dotted_keys_map.insert(key, tbl_vals);
181                }
182                _ => table_insert(&mut table, key, val, self.de)?,
183            }
184        }
185
186        let subtables = self.get_subtables(header, &range);
187        for &subtable_idx in subtables {
188            if self.raw_tables[subtable_idx].values.is_none() {
189                continue;
190            }
191
192            let subtable_name = &self.raw_tables[subtable_idx].header[header.len()];
193
194            let dotted_entries = match dotted_keys_map.remove_entry(subtable_name) {
195                // Detect redefinitions of tables created via dotted keys, as
196                // these are considered errors, e.g:
197                //   apple.color = "red"
198                //   [apple]  # INVALID
199                // However adding subtables is allowed:
200                //   apple.color = "red"
201                //   [apple.texture]  # VALID
202                Some((previous_key, _))
203                    if self.raw_tables[subtable_idx].header.len() == header.len() + 1 =>
204                {
205                    return Err(self.de.error(
206                        subtable_name.span.start as usize,
207                        Some(subtable_name.span.end as usize),
208                        ErrorKind::DuplicateKey {
209                            key: subtable_name.to_string(),
210                            first: previous_key.span,
211                        },
212                    ));
213                }
214                Some((_, dotted_entries)) => dotted_entries.values,
215                None => Vec::new(),
216            };
217
218            match table.entry(subtable_name.clone()) {
219                Entry::Vacant(vac) => {
220                    let subtable_span = Self::get_table_span(&self.raw_tables[subtable_idx]);
221                    let subtable_idx = DeserializeTableIdx {
222                        table_idx: subtable_idx,
223                        depth: header.len() + 1,
224                        idx_range: range.clone(),
225                    };
226                    let entry = self.deserialize_entry(subtable_idx, dotted_entries)?;
227                    vac.insert(Value::with_span(entry, subtable_span));
228                }
229                Entry::Occupied(occ) => {
230                    return Err(self.de.error(
231                        subtable_name.span.start as usize,
232                        Some(subtable_name.span.end as usize),
233                        ErrorKind::DuplicateKey {
234                            key: subtable_name.to_string(),
235                            first: occ.key().span,
236                        },
237                    ));
238                }
239            };
240        }
241
242        for (key, val) in dotted_keys_map {
243            let val_span = val.span.unwrap();
244            let val = Val {
245                e: E::DottedTable(val),
246                start: val_span.start,
247                end: val_span.end,
248            };
249            table_insert(&mut table, key, val, self.de)?;
250        }
251
252        Ok(table)
253    }
254
255    fn get_matching_tables(&self, header: &[DeStr<'de>], range: &Range<usize>) -> &'b [usize] {
256        let matching_tables = self
257            .table_indices
258            .get(header)
259            .map(Vec::as_slice)
260            .unwrap_or_default();
261        Self::get_subslice_in_range(matching_tables, range)
262    }
263    fn get_subtables(&self, header: &[DeStr<'de>], range: &Range<usize>) -> &'b [usize] {
264        let subtables = self
265            .table_pindices
266            .get(header)
267            .map(Vec::as_slice)
268            .unwrap_or_default();
269        Self::get_subslice_in_range(subtables, range)
270    }
271    fn get_subslice_in_range<'a>(slice: &'a [usize], range: &Range<usize>) -> &'a [usize] {
272        let start_idx = slice.partition_point(|idx| *idx < range.start);
273        let end_idx = slice.partition_point(|idx| *idx < range.end);
274        &slice[start_idx..end_idx]
275    }
276
277    fn get_table_span(ttable: &Table<'de>) -> Span {
278        ttable.values.as_ref().and_then(|v| v.span).map_or_else(
279            || Span::new(ttable.at, ttable.end),
280            |span| Span::new(ttable.at.min(span.start), ttable.end.max(span.end)),
281        )
282    }
283}
284
285fn to_value<'de>(val: Val<'de>, de: &Deserializer<'de>) -> Result<Value<'de>, Error> {
286    let value = match val.e {
287        E::String(s) => ValueInner::String(s),
288        E::Boolean(b) => ValueInner::Boolean(b),
289        E::Integer(i) => ValueInner::Integer(i),
290        E::Float(f) => ValueInner::Float(f),
291        E::Array(arr) => {
292            let mut varr = Vec::new();
293            for val in arr {
294                varr.push(to_value(val, de)?);
295            }
296            ValueInner::Array(varr)
297        }
298        E::DottedTable(tab) | E::InlineTable(tab) => {
299            let mut ntable = value::Table::new();
300
301            for (k, v) in tab.values {
302                table_insert(&mut ntable, k, v, de)?;
303            }
304
305            ValueInner::Table(ntable)
306        }
307    };
308
309    Ok(Value::with_span(value, Span::new(val.start, val.end)))
310}
311
312fn table_insert<'de>(
313    table: &mut value::Table<'de>,
314    key: Key<'de>,
315    val: Val<'de>,
316    de: &Deserializer<'de>,
317) -> Result<(), Error> {
318    match table.entry(key.clone()) {
319        Entry::Occupied(occ) => Err(de.error(
320            key.span.start as usize,
321            Some(key.span.end as usize),
322            ErrorKind::DuplicateKey {
323                key: key.name.to_string(),
324                first: occ.key().span,
325            },
326        )),
327        Entry::Vacant(vac) => {
328            vac.insert(to_value(val, de)?);
329            Ok(())
330        }
331    }
332}
333
334// Builds a datastructure that allows for efficient sublinear lookups. The
335// returned BTreeMap contains a mapping from table header (like [a.b.c]) to list
336// of tables with that precise name. The tables are being identified by their
337// index in the passed slice. We use a list as the implementation uses this data
338// structure for arrays as well as tables, so if any top level [[name]] array
339// contains multiple entries, there are multiple entries in the list. The lookup
340// is performed in the `SeqAccess` implementation of `MapVisitor`. The lists are
341// ordered, which we exploit in the search code by using bisection.
342fn build_table_indices<'de>(tables: &[Table<'de>]) -> BTreeMap<InlineVec<DeStr<'de>>, Vec<usize>> {
343    let mut res = BTreeMap::new();
344    for (i, table) in tables.iter().enumerate() {
345        let header = table
346            .header
347            .iter()
348            .map(|v| v.name.clone())
349            .collect::<InlineVec<_>>();
350        res.entry(header).or_insert_with(Vec::new).push(i);
351    }
352    res
353}
354
355// Builds a datastructure that allows for efficient sublinear lookups. The
356// returned BTreeMap contains a mapping from table header (like [a.b.c]) to list
357// of tables whose name starts with the specified name and is strictly longer.
358// So searching for [a.b] would give both [a.b.c.d] as well as [a.b.e], but not
359// [a.b] itself. The tables are being identified by their index in the passed
360// slice.
361//
362// A list is used for two reasons: First, the implementation also stores arrays
363// in the same data structure and any top level array of size 2 or greater
364// creates multiple entries in the list with the same shared name. Second, there
365// can be multiple tables sharing the same prefix.
366//
367// The lookup is performed in the `MapAccess` implementation of `MapVisitor`.
368// The lists are ordered, which we exploit in the search code by using
369// bisection.
370fn build_table_pindices<'de>(tables: &[Table<'de>]) -> BTreeMap<InlineVec<DeStr<'de>>, Vec<usize>> {
371    let mut res = BTreeMap::new();
372    for (i, table) in tables.iter().enumerate() {
373        let header = table
374            .header
375            .iter()
376            .map(|v| v.name.clone())
377            .collect::<InlineVec<_>>();
378        for len in 0..header.len() {
379            res.entry(header[..len].into())
380                .or_insert_with(Vec::new)
381                .push(i);
382        }
383    }
384    res
385}
386
387struct Table<'de> {
388    at: u32,
389    end: u32,
390    header: InlineVec<Key<'de>>,
391    values: Option<TableValues<'de>>,
392    array: bool,
393}
394
395struct TableValues<'de> {
396    values: Vec<TablePair<'de>>,
397    span: Option<Span>,
398}
399
400#[allow(clippy::derivable_impls)]
401impl Default for TableValues<'_> {
402    fn default() -> Self {
403        Self {
404            values: Vec::new(),
405            span: None,
406        }
407    }
408}
409
410impl<'a> Deserializer<'a> {
411    fn new(input: &'a str) -> Deserializer<'a> {
412        Deserializer {
413            tokens: Tokenizer::new(input),
414            input,
415        }
416    }
417
418    fn tables(&mut self) -> Result<Vec<Table<'a>>, Error> {
419        let mut tables = Vec::new();
420        let mut cur_table = Table {
421            at: 0,
422            end: 0,
423            header: InlineVec::new(),
424            values: None,
425            array: false,
426        };
427
428        while let Some(line) = self.line()? {
429            match line {
430                Line::Table {
431                    at,
432                    end,
433                    mut header,
434                    array,
435                } => {
436                    if !cur_table.header.is_empty() || cur_table.values.is_some() {
437                        tables.push(cur_table);
438                    }
439                    cur_table = Table {
440                        at,
441                        end,
442                        header: InlineVec::new(),
443                        values: Some(TableValues::default()),
444                        array,
445                    };
446                    while let Some(part) = header.next().map_err(|e| self.token_error(e))? {
447                        cur_table.header.push(part);
448                    }
449                    cur_table.end = header.tokens.current() as u32;
450                }
451                Line::KeyValue {
452                    key,
453                    value,
454                    at,
455                    end,
456                } => {
457                    let table_values = cur_table.values.get_or_insert_with(|| TableValues {
458                        values: Vec::new(),
459                        span: None,
460                    });
461                    self.add_dotted_key(key, value, table_values)?;
462                    match table_values.span {
463                        Some(ref mut span) => {
464                            span.start = span.start.min(at);
465                            span.end = span.end.max(end);
466                        }
467                        None => {
468                            table_values.span = Some(Span::new(at, end));
469                        }
470                    }
471                }
472            }
473        }
474        if !cur_table.header.is_empty() || cur_table.values.is_some() {
475            tables.push(cur_table);
476        }
477        Ok(tables)
478    }
479
480    fn line(&mut self) -> Result<Option<Line<'a>>, Error> {
481        loop {
482            self.eat_whitespace();
483            if self.eat_comment()? {
484                continue;
485            }
486            if self.eat(Token::Newline)? {
487                continue;
488            }
489            break;
490        }
491
492        match self.peek()? {
493            Some((_, Token::LeftBracket)) => self.table_header().map(Some),
494            Some(_) => self.key_value().map(Some),
495            None => Ok(None),
496        }
497    }
498
499    fn table_header(&mut self) -> Result<Line<'a>, Error> {
500        let start = self.tokens.current() as u32;
501        self.expect(Token::LeftBracket)?;
502        let array = self.eat(Token::LeftBracket)?;
503        let ret = Header::new(self.tokens.clone(), array);
504        self.tokens.skip_to_newline();
505        let end = self.tokens.current() as u32;
506        Ok(Line::Table {
507            at: start,
508            end,
509            header: ret,
510            array,
511        })
512    }
513
514    fn key_value(&mut self) -> Result<Line<'a>, Error> {
515        let start = self.tokens.current() as u32;
516        let key = self.dotted_key()?;
517        self.eat_whitespace();
518        self.expect(Token::Equals)?;
519        self.eat_whitespace();
520
521        let value = self.value()?;
522        let end = self.tokens.current() as u32;
523        self.eat_whitespace();
524        if !self.eat_comment()? {
525            self.eat_newline_or_eof()?;
526        }
527
528        Ok(Line::KeyValue {
529            key,
530            value,
531            at: start,
532            end,
533        })
534    }
535
536    fn value(&mut self) -> Result<Val<'a>, Error> {
537        let at = self.tokens.current();
538        let value = match self.next()? {
539            Some((Span { start, end }, Token::String { val, .. })) => Val {
540                e: E::String(val),
541                start,
542                end,
543            },
544            Some((Span { start, end }, Token::Keylike("true"))) => Val {
545                e: E::Boolean(true),
546                start,
547                end,
548            },
549            Some((Span { start, end }, Token::Keylike("false"))) => Val {
550                e: E::Boolean(false),
551                start,
552                end,
553            },
554            Some((span, Token::Keylike(key))) => self.parse_keylike(at, span, key)?,
555            Some((span, Token::Plus)) => self.number_leading_plus(span)?,
556            Some((Span { start, .. }, Token::LeftBrace)) => {
557                self.inline_table().map(|(Span { end, .. }, table)| Val {
558                    e: E::InlineTable(table),
559                    start,
560                    end,
561                })?
562            }
563            Some((Span { start, .. }, Token::LeftBracket)) => {
564                self.array().map(|(Span { end, .. }, array)| Val {
565                    e: E::Array(array),
566                    start,
567                    end,
568                })?
569            }
570            Some(token) => {
571                return Err(self.error(
572                    at,
573                    Some(token.0.end as usize),
574                    ErrorKind::Wanted {
575                        expected: "a value",
576                        found: token.1.describe(),
577                    },
578                ));
579            }
580            None => return Err(self.eof()),
581        };
582        Ok(value)
583    }
584
585    fn parse_keylike(&mut self, at: usize, span: Span, key: &'a str) -> Result<Val<'a>, Error> {
586        if key == "inf" || key == "nan" {
587            return self.number(span, key);
588        }
589
590        let first_char = key.chars().next().expect("key should not be empty here");
591        match first_char {
592            '-' | '0'..='9' => self.number(span, key),
593            _ => Err(self.error(at, Some(span.end as usize), ErrorKind::UnquotedString)),
594        }
595    }
596
597    fn number(&mut self, Span { start, end }: Span, s: &'a str) -> Result<Val<'a>, Error> {
598        let to_integer = |f| Val {
599            e: E::Integer(f),
600            start,
601            end,
602        };
603        if let Some(s) = s.strip_prefix("0x") {
604            self.integer(s, 16).map(to_integer)
605        } else if let Some(s) = s.strip_prefix("0o") {
606            self.integer(s, 8).map(to_integer)
607        } else if let Some(s) = s.strip_prefix("0b") {
608            self.integer(s, 2).map(to_integer)
609        } else if s.contains('e') || s.contains('E') {
610            self.float(s, None).map(|f| Val {
611                e: E::Float(f),
612                start,
613                end: self.tokens.current() as u32,
614            })
615        } else if self.eat(Token::Period)? {
616            let at = self.tokens.current();
617            match self.next()? {
618                Some((Span { .. }, Token::Keylike(after))) => {
619                    self.float(s, Some(after)).map(|f| Val {
620                        e: E::Float(f),
621                        start,
622                        end: self.tokens.current() as u32,
623                    })
624                }
625                _ => Err(self.error(at, Some(end as usize), ErrorKind::InvalidNumber)),
626            }
627        } else if s == "inf" {
628            Ok(Val {
629                e: E::Float(f64::INFINITY),
630                start,
631                end,
632            })
633        } else if s == "-inf" {
634            Ok(Val {
635                e: E::Float(f64::NEG_INFINITY),
636                start,
637                end,
638            })
639        } else if s == "nan" {
640            Ok(Val {
641                e: E::Float(f64::NAN.copysign(1.0)),
642                start,
643                end,
644            })
645        } else if s == "-nan" {
646            Ok(Val {
647                e: E::Float(f64::NAN.copysign(-1.0)),
648                start,
649                end,
650            })
651        } else {
652            self.integer(s, 10).map(to_integer)
653        }
654    }
655
656    fn number_leading_plus(&mut self, Span { start, end }: Span) -> Result<Val<'a>, Error> {
657        let start_token = self.tokens.current();
658        match self.next()? {
659            Some((Span { end, .. }, Token::Keylike(s))) => self.number(Span { start, end }, s),
660            _ => Err(self.error(start_token, Some(end as usize), ErrorKind::InvalidNumber)),
661        }
662    }
663
664    fn integer(&self, s: &'a str, radix: u32) -> Result<i64, Error> {
665        let allow_sign = radix == 10;
666        let allow_leading_zeros = radix != 10;
667        let (prefix, suffix) = self.parse_integer(s, allow_sign, allow_leading_zeros, radix)?;
668        let start = self.tokens.substr_offset(s);
669        if !suffix.is_empty() {
670            return Err(self.error(start, Some(start + s.len()), ErrorKind::InvalidNumber));
671        }
672        i64::from_str_radix(prefix.replace('_', "").trim_start_matches('+'), radix)
673            .map_err(|_e| self.error(start, Some(start + s.len()), ErrorKind::InvalidNumber))
674    }
675
676    fn parse_integer(
677        &self,
678        s: &'a str,
679        allow_sign: bool,
680        allow_leading_zeros: bool,
681        radix: u32,
682    ) -> Result<(&'a str, &'a str), Error> {
683        let start = self.tokens.substr_offset(s);
684
685        let mut first = true;
686        let mut first_zero = false;
687        let mut underscore = false;
688        let mut end = s.len();
689        let send = start + s.len();
690        for (i, c) in s.char_indices() {
691            let at = i + start;
692            if i == 0 && (c == '+' || c == '-') && allow_sign {
693                continue;
694            }
695
696            if c == '0' && first {
697                first_zero = true;
698            } else if c.is_digit(radix) {
699                if !first && first_zero && !allow_leading_zeros {
700                    return Err(self.error(at, Some(send), ErrorKind::InvalidNumber));
701                }
702                underscore = false;
703            } else if c == '_' && first {
704                return Err(self.error(at, Some(send), ErrorKind::InvalidNumber));
705            } else if c == '_' && !underscore {
706                underscore = true;
707            } else {
708                end = i;
709                break;
710            }
711            first = false;
712        }
713        if first || underscore {
714            return Err(self.error(start, Some(send), ErrorKind::InvalidNumber));
715        }
716        Ok((&s[..end], &s[end..]))
717    }
718
719    fn float(&mut self, s: &'a str, after_decimal: Option<&'a str>) -> Result<f64, Error> {
720        let (integral, mut suffix) = self.parse_integer(s, true, false, 10)?;
721        let start = self.tokens.substr_offset(integral);
722
723        let mut fraction = None;
724        if let Some(after) = after_decimal {
725            if !suffix.is_empty() {
726                return Err(self.error(start, Some(start + s.len()), ErrorKind::InvalidNumber));
727            }
728            let (a, b) = self.parse_integer(after, false, true, 10)?;
729            fraction = Some(a);
730            suffix = b;
731        }
732
733        let mut exponent = None;
734        if suffix.starts_with('e') || suffix.starts_with('E') {
735            let (a, b) = if suffix.len() == 1 {
736                self.eat(Token::Plus)?;
737                match self.next()? {
738                    Some((_, Token::Keylike(s))) => self.parse_integer(s, false, true, 10)?,
739                    _ => {
740                        return Err(self.error(
741                            start,
742                            Some(start + s.len()),
743                            ErrorKind::InvalidNumber,
744                        ));
745                    }
746                }
747            } else {
748                self.parse_integer(&suffix[1..], true, true, 10)?
749            };
750            if !b.is_empty() {
751                return Err(self.error(start, Some(start + s.len()), ErrorKind::InvalidNumber));
752            }
753            exponent = Some(a);
754        } else if !suffix.is_empty() {
755            return Err(self.error(start, Some(start + s.len()), ErrorKind::InvalidNumber));
756        }
757
758        let mut number = integral
759            .trim_start_matches('+')
760            .chars()
761            .filter(|c| *c != '_')
762            .collect::<String>();
763        if let Some(fraction) = fraction {
764            number.push('.');
765            number.extend(fraction.chars().filter(|c| *c != '_'));
766        }
767        if let Some(exponent) = exponent {
768            number.push('E');
769            number.extend(exponent.chars().filter(|c| *c != '_'));
770        }
771        number
772            .parse()
773            .map_err(|_e| self.error(start, Some(start + s.len()), ErrorKind::InvalidNumber))
774            .and_then(|n: f64| {
775                if n.is_finite() {
776                    Ok(n)
777                } else {
778                    Err(self.error(start, Some(start + s.len()), ErrorKind::InvalidNumber))
779                }
780            })
781    }
782
783    // TODO(#140): shouldn't buffer up this entire table in memory, it'd be
784    // great to defer parsing everything until later.
785    fn inline_table(&mut self) -> Result<(Span, TableValues<'a>), Error> {
786        let mut ret = TableValues::default();
787        self.eat_inline_table_whitespace()?;
788        if let Some(span) = self.eat_spanned(Token::RightBrace)? {
789            return Ok((span, ret));
790        }
791        loop {
792            let key = self.dotted_key()?;
793            self.eat_inline_table_whitespace()?;
794            self.expect(Token::Equals)?;
795            self.eat_inline_table_whitespace()?;
796            let value = self.value()?;
797            self.add_dotted_key(key, value, &mut ret)?;
798
799            self.eat_inline_table_whitespace()?;
800            if let Some(span) = self.eat_spanned(Token::RightBrace)? {
801                return Ok((span, ret));
802            }
803            self.expect(Token::Comma)?;
804            self.eat_inline_table_whitespace()?;
805            if let Some(span) = self.eat_spanned(Token::RightBrace)? {
806                return Ok((span, ret));
807            }
808        }
809    }
810
811    // TODO(#140): shouldn't buffer up this entire array in memory, it'd be
812    // great to defer parsing everything until later.
813    fn array(&mut self) -> Result<(Span, Vec<Val<'a>>), Error> {
814        let mut ret = Vec::new();
815
816        let intermediate = |me: &mut Deserializer<'_>| -> Result<(), Error> {
817            loop {
818                me.eat_whitespace();
819                if !me.eat(Token::Newline)? && !me.eat_comment()? {
820                    break;
821                }
822            }
823            Ok(())
824        };
825
826        loop {
827            intermediate(self)?;
828            if let Some(span) = self.eat_spanned(Token::RightBracket)? {
829                return Ok((span, ret));
830            }
831            let value = self.value()?;
832            ret.push(value);
833            intermediate(self)?;
834            if !self.eat(Token::Comma)? {
835                break;
836            }
837        }
838        intermediate(self)?;
839        let span = self.expect_spanned(Token::RightBracket)?;
840        Ok((span, ret))
841    }
842
843    fn table_key(&mut self) -> Result<Key<'a>, Error> {
844        self.tokens.table_key().map_err(|e| self.token_error(e))
845    }
846
847    fn dotted_key(&mut self) -> Result<Vec<Key<'a>>, Error> {
848        let mut result = Vec::new();
849        result.push(self.table_key()?);
850        self.eat_whitespace();
851        while self.eat(Token::Period)? {
852            self.eat_whitespace();
853            result.push(self.table_key()?);
854            self.eat_whitespace();
855        }
856        Ok(result)
857    }
858
859    /// Stores a value in the appropriate hierarchical structure positioned based on the dotted key.
860    ///
861    /// Given the following definition: `multi.part.key = "value"`, `multi` and `part` are
862    /// intermediate parts which are mapped to the relevant fields in the deserialized type's data
863    /// hierarchy.
864    ///
865    /// # Parameters
866    ///
867    /// * `key_parts`: Each segment of the dotted key, e.g. `part.one` maps to
868    ///   `vec![Cow::Borrowed("part"), Cow::Borrowed("one")].`
869    /// * `value`: The parsed value.
870    /// * `values`: The `Vec` to store the value in.
871    fn add_dotted_key(
872        &self,
873        mut key_parts: Vec<Key<'a>>,
874        value: Val<'a>,
875        values: &mut TableValues<'a>,
876    ) -> Result<(), Error> {
877        let key = key_parts.remove(0);
878        if key_parts.is_empty() {
879            values.values.push((key, value));
880            return Ok(());
881        }
882        match values
883            .values
884            .iter_mut()
885            .find(|&&mut (ref k, _)| k.name == key.name)
886        {
887            Some(&mut (
888                _,
889                Val {
890                    e: E::DottedTable(ref mut v),
891                    ..
892                },
893            )) => {
894                return self.add_dotted_key(key_parts, value, v);
895            }
896            Some(&mut (ref first, _)) => {
897                return Err(self.error(
898                    key.span.start as usize,
899                    Some(value.end as usize),
900                    ErrorKind::DottedKeyInvalidType { first: first.span },
901                ));
902            }
903            None => {}
904        }
905        // The start/end value is somewhat misleading here.
906        let table_values = Val {
907            e: E::DottedTable(TableValues::default()),
908            start: value.start,
909            end: value.end,
910        };
911        values.values.push((key, table_values));
912        let last_i = values.values.len() - 1;
913        if let (
914            _,
915            Val {
916                e: E::DottedTable(ref mut v),
917                ..
918            },
919        ) = values.values[last_i]
920        {
921            self.add_dotted_key(key_parts, value, v)?;
922        }
923        Ok(())
924    }
925
926    fn eat_whitespace(&mut self) {
927        self.tokens.eat_whitespace();
928    }
929
930    fn eat_inline_table_whitespace(&mut self) -> Result<(), Error> {
931        loop {
932            self.eat_whitespace();
933            if !self.eat(Token::Newline)? && !self.eat_comment()? {
934                break;
935            }
936        }
937        Ok(())
938    }
939
940    fn eat_comment(&mut self) -> Result<bool, Error> {
941        self.tokens.eat_comment().map_err(|e| self.token_error(e))
942    }
943
944    fn eat_newline_or_eof(&mut self) -> Result<(), Error> {
945        self.tokens
946            .eat_newline_or_eof()
947            .map_err(|e| self.token_error(e))
948    }
949
950    fn eat(&mut self, expected: Token<'a>) -> Result<bool, Error> {
951        self.tokens.eat(expected).map_err(|e| self.token_error(e))
952    }
953
954    fn eat_spanned(&mut self, expected: Token<'a>) -> Result<Option<Span>, Error> {
955        self.tokens
956            .eat_spanned(expected)
957            .map_err(|e| self.token_error(e))
958    }
959
960    fn expect(&mut self, expected: Token<'a>) -> Result<(), Error> {
961        self.tokens
962            .expect(expected)
963            .map_err(|e| self.token_error(e))
964    }
965
966    fn expect_spanned(&mut self, expected: Token<'a>) -> Result<Span, Error> {
967        self.tokens
968            .expect_spanned(expected)
969            .map_err(|e| self.token_error(e))
970    }
971
972    fn next(&mut self) -> Result<Option<(Span, Token<'a>)>, Error> {
973        self.tokens.step().map_err(|e| self.token_error(e))
974    }
975
976    fn peek(&mut self) -> Result<Option<(Span, Token<'a>)>, Error> {
977        self.tokens.peek().map_err(|e| self.token_error(e))
978    }
979
980    fn eof(&self) -> Error {
981        self.error(self.input.len(), None, ErrorKind::UnexpectedEof)
982    }
983
984    fn token_error(&self, error: TokenError) -> Error {
985        match error {
986            TokenError::InvalidCharInString(at, ch) => {
987                self.error(at, None, ErrorKind::InvalidCharInString(ch))
988            }
989            TokenError::InvalidEscape(at, ch) => self.error(at, None, ErrorKind::InvalidEscape(ch)),
990            TokenError::InvalidEscapeValue(at, len, v) => {
991                self.error(at, Some(at + len), ErrorKind::InvalidEscapeValue(v))
992            }
993            TokenError::InvalidHexEscape(at, ch) => {
994                self.error(at, None, ErrorKind::InvalidHexEscape(ch))
995            }
996            TokenError::NewlineInString(at) => {
997                self.error(at, None, ErrorKind::InvalidCharInString('\n'))
998            }
999            TokenError::Unexpected(at, ch) => self.error(at, None, ErrorKind::Unexpected(ch)),
1000            TokenError::UnterminatedString(at) => {
1001                self.error(at, None, ErrorKind::UnterminatedString)
1002            }
1003            TokenError::Wanted {
1004                at,
1005                expected,
1006                found,
1007            } => self.error(
1008                at,
1009                Some(at + found.len()),
1010                ErrorKind::Wanted { expected, found },
1011            ),
1012            TokenError::MultilineStringKey(at, end) => {
1013                self.error(at, Some(end), ErrorKind::MultilineStringKey)
1014            }
1015        }
1016    }
1017
1018    fn error(&self, start: usize, end: Option<usize>, kind: ErrorKind) -> Error {
1019        let span = Span::new(start as u32, end.unwrap_or(start + 1) as u32);
1020        let line_info = Some(self.to_linecol(start));
1021        Error {
1022            span,
1023            kind,
1024            line_info,
1025        }
1026    }
1027
1028    /// Converts a byte offset from an error message to a (line, column) pair
1029    ///
1030    /// All indexes are 0-based.
1031    fn to_linecol(&self, offset: usize) -> (usize, usize) {
1032        let mut cur = 0;
1033        // Use split_terminator instead of lines so that if there is a `\r`, it
1034        // is included in the offset calculation. The `+1` values below account
1035        // for the `\n`.
1036        for (i, line) in self.input.split_terminator('\n').enumerate() {
1037            if cur + line.len() + 1 > offset {
1038                return (i, offset - cur);
1039            }
1040            cur += line.len() + 1;
1041        }
1042        (self.input.lines().count(), 0)
1043    }
1044}
1045
1046impl std::convert::From<Error> for std::io::Error {
1047    fn from(e: Error) -> Self {
1048        std::io::Error::new(std::io::ErrorKind::InvalidData, e.to_string())
1049    }
1050}
1051
1052enum Line<'a> {
1053    Table {
1054        at: u32,
1055        end: u32,
1056        header: Header<'a>,
1057        array: bool,
1058    },
1059    KeyValue {
1060        at: u32,
1061        end: u32,
1062        key: Vec<Key<'a>>,
1063        value: Val<'a>,
1064    },
1065}
1066
1067struct Header<'a> {
1068    first: bool,
1069    array: bool,
1070    tokens: Tokenizer<'a>,
1071}
1072
1073impl<'a> Header<'a> {
1074    fn new(tokens: Tokenizer<'a>, array: bool) -> Header<'a> {
1075        Header {
1076            first: true,
1077            array,
1078            tokens,
1079        }
1080    }
1081
1082    fn next(&mut self) -> Result<Option<Key<'a>>, TokenError> {
1083        self.tokens.eat_whitespace();
1084
1085        if self.first || self.tokens.eat(Token::Period)? {
1086            self.first = false;
1087            self.tokens.eat_whitespace();
1088            self.tokens.table_key().map(Some)
1089        } else {
1090            self.tokens.expect(Token::RightBracket)?;
1091            if self.array {
1092                self.tokens.expect(Token::RightBracket)?;
1093            }
1094
1095            self.tokens.eat_whitespace();
1096            if !self.tokens.eat_comment()? {
1097                self.tokens.eat_newline_or_eof()?;
1098            }
1099            Ok(None)
1100        }
1101    }
1102}
1103
1104struct Val<'a> {
1105    e: E<'a>,
1106    start: u32,
1107    end: u32,
1108}
1109
1110enum E<'a> {
1111    Integer(i64),
1112    Float(f64),
1113    Boolean(bool),
1114    String(DeStr<'a>),
1115    Array(Vec<Val<'a>>),
1116    InlineTable(TableValues<'a>),
1117    DottedTable(TableValues<'a>),
1118}
1119
1120impl E<'_> {
1121    #[allow(dead_code)]
1122    fn type_name(&self) -> &'static str {
1123        match *self {
1124            E::String(..) => "string",
1125            E::Integer(..) => "integer",
1126            E::Float(..) => "float",
1127            E::Boolean(..) => "boolean",
1128            E::Array(..) => "array",
1129            E::InlineTable(..) => "inline table",
1130            E::DottedTable(..) => "dotted table",
1131        }
1132    }
1133}