toml_span/
de.rs

1//! Core deserialization logic that deserializes toml content to [`Value`]
2
3use crate::{
4    Span,
5    error::{Error, ErrorKind},
6    tokens::{Error as TokenError, Token, Tokenizer},
7    value::{self, Key, Value, ValueInner},
8};
9use smallvec::SmallVec;
10use std::{
11    borrow::Cow,
12    collections::{BTreeMap, btree_map::Entry},
13    ops::Range,
14};
15
16type DeStr<'de> = Cow<'de, str>;
17type TablePair<'de> = (Key<'de>, Val<'de>);
18type InlineVec<T> = SmallVec<[T; 5]>;
19
20/// Parses a toml string into a [`ValueInner::Table`]
21pub fn parse(s: &str) -> Result<Value<'_>, Error> {
22    let mut de = Deserializer::new(s);
23
24    let raw_tables = de.tables()?;
25    let mut ctx = DeserializeCtx {
26        table_indices: &build_table_indices(&raw_tables),
27        table_pindices: &build_table_pindices(&raw_tables),
28        raw_tables,
29        de: &de,
30    };
31    let root = ctx.deserialize_entry(
32        DeserializeTableIdx {
33            table_idx: 0,
34            depth: 0,
35            idx_range: 0..ctx.raw_tables.len(),
36        },
37        Vec::new(),
38    )?;
39
40    Ok(Value::with_span(root, Span::new(0, s.len())))
41}
42
43struct Deserializer<'a> {
44    input: &'a str,
45    tokens: Tokenizer<'a>,
46}
47
48struct DeserializeCtx<'de, 'b> {
49    raw_tables: Vec<Table<'de>>,
50    // maps table headers to a list of tables with that exact header
51    // (the list contains indices into `raw_tables` and is ordered)
52    table_indices: &'b BTreeMap<InlineVec<DeStr<'de>>, Vec<usize>>,
53    // maps table headers to a list of all subtables
54    // (the list contains indices into `raw_tables` and is ordered)
55    table_pindices: &'b BTreeMap<InlineVec<DeStr<'de>>, Vec<usize>>,
56    de: &'b Deserializer<'de>,
57}
58// specifies the table/array that is currently being deserialized, namely the
59// table/array with the header `raw_tables[table_idx].header[0..depth]`
60struct DeserializeTableIdx {
61    // index of the first occurence of the desired header (even as a prefix)
62    table_idx: usize,
63    depth: usize,
64    // range of `raw_tables` indices to consider, used to isolate subtables of
65    // different array entries
66    idx_range: Range<usize>,
67}
68impl DeserializeTableIdx {
69    fn get_header<'de>(&self, raw_tables: &[Table<'de>]) -> InlineVec<DeStr<'de>> {
70        if self.depth == 0 {
71            return InlineVec::new();
72        }
73
74        raw_tables[self.table_idx].header[0..self.depth]
75            .iter()
76            .map(|key| key.name.clone())
77            .collect()
78    }
79}
80impl<'de, 'b> DeserializeCtx<'de, 'b> {
81    // deserialize the table/array given by `table_idx`
82    fn deserialize_entry(
83        &mut self,
84        table_idx: DeserializeTableIdx,
85        // values defined via dotted keys should be passed on to the corresponding subtable
86        additional_values: Vec<TablePair<'de>>,
87    ) -> Result<value::ValueInner<'de>, Error> {
88        let current_header = table_idx.get_header(&self.raw_tables);
89        let matching_tables = self.get_matching_tables(&current_header, &table_idx.idx_range);
90
91        let is_array = matching_tables
92            .iter()
93            .all(|idx| self.raw_tables[*idx].array)
94            && !matching_tables.is_empty();
95
96        if is_array {
97            // catch invalid cases like:
98            //   [a.b]
99            //   [[a]]
100            if table_idx.table_idx < matching_tables[0] {
101                let array_tbl = &self.raw_tables[matching_tables[0]];
102                return Err(self.de.error(
103                    array_tbl.at,
104                    Some(array_tbl.end),
105                    ErrorKind::RedefineAsArray,
106                ));
107            }
108            assert!(additional_values.is_empty());
109
110            let mut array = value::Array::new();
111            for (i, array_entry_idx) in matching_tables.iter().copied().enumerate() {
112                let entry_range_end = matching_tables
113                    .get(i + 1)
114                    .copied()
115                    .unwrap_or(table_idx.idx_range.end);
116
117                let span = Self::get_table_span(&self.raw_tables[array_entry_idx]);
118                let values = self.raw_tables[array_entry_idx].values.take().unwrap();
119                let array_entry = self.deserialize_as_table(
120                    &current_header,
121                    array_entry_idx..entry_range_end,
122                    values.values.into_iter(),
123                )?;
124                array.push(Value::with_span(ValueInner::Table(array_entry), span));
125            }
126            Ok(ValueInner::Array(array))
127        } else {
128            if matching_tables.len() > 1 {
129                let first_tbl = &self.raw_tables[matching_tables[0]];
130                let second_tbl = &self.raw_tables[matching_tables[1]];
131                return Err(self.de.error(
132                    second_tbl.at,
133                    Some(second_tbl.end),
134                    ErrorKind::DuplicateTable {
135                        name: current_header.last().unwrap().to_string(),
136                        first: Span::new(first_tbl.at, first_tbl.end),
137                    },
138                ));
139            }
140
141            let values = matching_tables
142                .first()
143                .map(|idx| {
144                    self.raw_tables[*idx]
145                        .values
146                        .take()
147                        .unwrap()
148                        .values
149                        .into_iter()
150                })
151                .unwrap_or_default()
152                .chain(additional_values);
153            let subtable =
154                self.deserialize_as_table(&current_header, table_idx.idx_range, values)?;
155
156            Ok(ValueInner::Table(subtable))
157        }
158    }
159    fn deserialize_as_table(
160        &mut self,
161        header: &[DeStr<'de>],
162        range: Range<usize>,
163        values: impl Iterator<Item = TablePair<'de>>,
164    ) -> Result<value::Table<'de>, Error> {
165        let mut table = value::Table::new();
166        let mut dotted_keys_map = BTreeMap::new();
167
168        for (key, val) in values {
169            match val.e {
170                E::DottedTable(mut tbl_vals) => {
171                    tbl_vals.span = Some(Span::new(val.start, val.end));
172                    dotted_keys_map.insert(key, tbl_vals);
173                }
174                _ => table_insert(&mut table, key, val, self.de)?,
175            }
176        }
177
178        let subtables = self.get_subtables(header, &range);
179        for &subtable_idx in subtables {
180            if self.raw_tables[subtable_idx].values.is_none() {
181                continue;
182            }
183
184            let subtable_name = &self.raw_tables[subtable_idx].header[header.len()];
185
186            let dotted_entries = match dotted_keys_map.remove_entry(subtable_name) {
187                // Detect redefinitions of tables created via dotted keys, as
188                // these are considered errors, e.g:
189                //   apple.color = "red"
190                //   [apple]  # INVALID
191                // However adding subtables is allowed:
192                //   apple.color = "red"
193                //   [apple.texture]  # VALID
194                Some((previous_key, _))
195                    if self.raw_tables[subtable_idx].header.len() == header.len() + 1 =>
196                {
197                    return Err(self.de.error(
198                        subtable_name.span.start,
199                        Some(subtable_name.span.end),
200                        ErrorKind::DuplicateKey {
201                            key: subtable_name.to_string(),
202                            first: previous_key.span,
203                        },
204                    ));
205                }
206                Some((_, dotted_entries)) => dotted_entries.values,
207                None => Vec::new(),
208            };
209
210            match table.entry(subtable_name.clone()) {
211                Entry::Vacant(vac) => {
212                    let subtable_span = Self::get_table_span(&self.raw_tables[subtable_idx]);
213                    let subtable_idx = DeserializeTableIdx {
214                        table_idx: subtable_idx,
215                        depth: header.len() + 1,
216                        idx_range: range.clone(),
217                    };
218                    let entry = self.deserialize_entry(subtable_idx, dotted_entries)?;
219                    vac.insert(Value::with_span(entry, subtable_span));
220                }
221                Entry::Occupied(occ) => {
222                    return Err(self.de.error(
223                        subtable_name.span.start,
224                        Some(subtable_name.span.end),
225                        ErrorKind::DuplicateKey {
226                            key: subtable_name.to_string(),
227                            first: occ.key().span,
228                        },
229                    ));
230                }
231            };
232        }
233
234        for (key, val) in dotted_keys_map {
235            let val_span = val.span.unwrap();
236            let val = Val {
237                e: E::DottedTable(val),
238                start: val_span.start,
239                end: val_span.end,
240            };
241            table_insert(&mut table, key, val, self.de)?;
242        }
243
244        Ok(table)
245    }
246
247    fn get_matching_tables(&self, header: &[DeStr<'de>], range: &Range<usize>) -> &'b [usize] {
248        let matching_tables = self
249            .table_indices
250            .get(header)
251            .map(Vec::as_slice)
252            .unwrap_or_default();
253        Self::get_subslice_in_range(matching_tables, range)
254    }
255    fn get_subtables(&self, header: &[DeStr<'de>], range: &Range<usize>) -> &'b [usize] {
256        let subtables = self
257            .table_pindices
258            .get(header)
259            .map(Vec::as_slice)
260            .unwrap_or_default();
261        Self::get_subslice_in_range(subtables, range)
262    }
263    fn get_subslice_in_range<'a>(slice: &'a [usize], range: &Range<usize>) -> &'a [usize] {
264        let start_idx = slice.partition_point(|idx| *idx < range.start);
265        let end_idx = slice.partition_point(|idx| *idx < range.end);
266        &slice[start_idx..end_idx]
267    }
268
269    fn get_table_span(ttable: &Table<'de>) -> Span {
270        ttable.values.as_ref().and_then(|v| v.span).map_or_else(
271            || Span::new(ttable.at, ttable.end),
272            |span| Span::new(ttable.at.min(span.start), ttable.end.max(span.end)),
273        )
274    }
275}
276
277fn to_value<'de>(val: Val<'de>, de: &Deserializer<'de>) -> Result<Value<'de>, Error> {
278    let value = match val.e {
279        E::String(s) => ValueInner::String(s),
280        E::Boolean(b) => ValueInner::Boolean(b),
281        E::Integer(i) => ValueInner::Integer(i),
282        E::Float(f) => ValueInner::Float(f),
283        E::Array(arr) => {
284            let mut varr = Vec::new();
285            for val in arr {
286                varr.push(to_value(val, de)?);
287            }
288            ValueInner::Array(varr)
289        }
290        E::DottedTable(tab) | E::InlineTable(tab) => {
291            let mut ntable = value::Table::new();
292
293            for (k, v) in tab.values {
294                table_insert(&mut ntable, k, v, de)?;
295            }
296
297            ValueInner::Table(ntable)
298        }
299    };
300
301    Ok(Value::with_span(value, Span::new(val.start, val.end)))
302}
303
304fn table_insert<'de>(
305    table: &mut value::Table<'de>,
306    key: Key<'de>,
307    val: Val<'de>,
308    de: &Deserializer<'de>,
309) -> Result<(), Error> {
310    match table.entry(key.clone()) {
311        Entry::Occupied(occ) => Err(de.error(
312            key.span.start,
313            Some(key.span.end),
314            ErrorKind::DuplicateKey {
315                key: key.name.to_string(),
316                first: occ.key().span,
317            },
318        )),
319        Entry::Vacant(vac) => {
320            vac.insert(to_value(val, de)?);
321            Ok(())
322        }
323    }
324}
325
326// Builds a datastructure that allows for efficient sublinear lookups. The
327// returned BTreeMap contains a mapping from table header (like [a.b.c]) to list
328// of tables with that precise name. The tables are being identified by their
329// index in the passed slice. We use a list as the implementation uses this data
330// structure for arrays as well as tables, so if any top level [[name]] array
331// contains multiple entries, there are multiple entries in the list. The lookup
332// is performed in the `SeqAccess` implementation of `MapVisitor`. The lists are
333// ordered, which we exploit in the search code by using bisection.
334fn build_table_indices<'de>(tables: &[Table<'de>]) -> BTreeMap<InlineVec<DeStr<'de>>, Vec<usize>> {
335    let mut res = BTreeMap::new();
336    for (i, table) in tables.iter().enumerate() {
337        let header = table
338            .header
339            .iter()
340            .map(|v| v.name.clone())
341            .collect::<InlineVec<_>>();
342        res.entry(header).or_insert_with(Vec::new).push(i);
343    }
344    res
345}
346
347// Builds a datastructure that allows for efficient sublinear lookups. The
348// returned BTreeMap contains a mapping from table header (like [a.b.c]) to list
349// of tables whose name starts with the specified name and is strictly longer.
350// So searching for [a.b] would give both [a.b.c.d] as well as [a.b.e], but not
351// [a.b] itself. The tables are being identified by their index in the passed
352// slice.
353//
354// A list is used for two reasons: First, the implementation also stores arrays
355// in the same data structure and any top level array of size 2 or greater
356// creates multiple entries in the list with the same shared name. Second, there
357// can be multiple tables sharing the same prefix.
358//
359// The lookup is performed in the `MapAccess` implementation of `MapVisitor`.
360// The lists are ordered, which we exploit in the search code by using
361// bisection.
362fn build_table_pindices<'de>(tables: &[Table<'de>]) -> BTreeMap<InlineVec<DeStr<'de>>, Vec<usize>> {
363    let mut res = BTreeMap::new();
364    for (i, table) in tables.iter().enumerate() {
365        let header = table
366            .header
367            .iter()
368            .map(|v| v.name.clone())
369            .collect::<InlineVec<_>>();
370        for len in 0..header.len() {
371            res.entry(header[..len].into())
372                .or_insert_with(Vec::new)
373                .push(i);
374        }
375    }
376    res
377}
378
379struct Table<'de> {
380    at: usize,
381    end: usize,
382    header: InlineVec<Key<'de>>,
383    values: Option<TableValues<'de>>,
384    array: bool,
385}
386
387struct TableValues<'de> {
388    values: Vec<TablePair<'de>>,
389    span: Option<Span>,
390}
391
392#[allow(clippy::derivable_impls)]
393impl Default for TableValues<'_> {
394    fn default() -> Self {
395        Self {
396            values: Vec::new(),
397            span: None,
398        }
399    }
400}
401
402impl<'a> Deserializer<'a> {
403    fn new(input: &'a str) -> Deserializer<'a> {
404        Deserializer {
405            tokens: Tokenizer::new(input),
406            input,
407        }
408    }
409
410    fn tables(&mut self) -> Result<Vec<Table<'a>>, Error> {
411        let mut tables = Vec::new();
412        let mut cur_table = Table {
413            at: 0,
414            end: 0,
415            header: InlineVec::new(),
416            values: None,
417            array: false,
418        };
419
420        while let Some(line) = self.line()? {
421            match line {
422                Line::Table {
423                    at,
424                    end,
425                    mut header,
426                    array,
427                } => {
428                    if !cur_table.header.is_empty() || cur_table.values.is_some() {
429                        tables.push(cur_table);
430                    }
431                    cur_table = Table {
432                        at,
433                        end,
434                        header: InlineVec::new(),
435                        values: Some(TableValues::default()),
436                        array,
437                    };
438                    while let Some(part) = header.next().map_err(|e| self.token_error(e))? {
439                        cur_table.header.push(part);
440                    }
441                    cur_table.end = header.tokens.current();
442                }
443                Line::KeyValue {
444                    key,
445                    value,
446                    at,
447                    end,
448                } => {
449                    let table_values = cur_table.values.get_or_insert_with(|| TableValues {
450                        values: Vec::new(),
451                        span: None,
452                    });
453                    self.add_dotted_key(key, value, table_values)?;
454                    match table_values.span {
455                        Some(ref mut span) => {
456                            span.start = span.start.min(at);
457                            span.end = span.end.max(end);
458                        }
459                        None => {
460                            table_values.span = Some(Span::new(at, end));
461                        }
462                    }
463                }
464            }
465        }
466        if !cur_table.header.is_empty() || cur_table.values.is_some() {
467            tables.push(cur_table);
468        }
469        Ok(tables)
470    }
471
472    fn line(&mut self) -> Result<Option<Line<'a>>, Error> {
473        loop {
474            self.eat_whitespace();
475            if self.eat_comment()? {
476                continue;
477            }
478            if self.eat(Token::Newline)? {
479                continue;
480            }
481            break;
482        }
483
484        match self.peek()? {
485            Some((_, Token::LeftBracket)) => self.table_header().map(Some),
486            Some(_) => self.key_value().map(Some),
487            None => Ok(None),
488        }
489    }
490
491    fn table_header(&mut self) -> Result<Line<'a>, Error> {
492        let start = self.tokens.current();
493        self.expect(Token::LeftBracket)?;
494        let array = self.eat(Token::LeftBracket)?;
495        let ret = Header::new(self.tokens.clone(), array);
496        self.tokens.skip_to_newline();
497        let end = self.tokens.current();
498        Ok(Line::Table {
499            at: start,
500            end,
501            header: ret,
502            array,
503        })
504    }
505
506    fn key_value(&mut self) -> Result<Line<'a>, Error> {
507        let start = self.tokens.current();
508        let key = self.dotted_key()?;
509        self.eat_whitespace();
510        self.expect(Token::Equals)?;
511        self.eat_whitespace();
512
513        let value = self.value()?;
514        let end = self.tokens.current();
515        self.eat_whitespace();
516        if !self.eat_comment()? {
517            self.eat_newline_or_eof()?;
518        }
519
520        Ok(Line::KeyValue {
521            key,
522            value,
523            at: start,
524            end,
525        })
526    }
527
528    fn value(&mut self) -> Result<Val<'a>, Error> {
529        let at = self.tokens.current();
530        let value = match self.next()? {
531            Some((Span { start, end }, Token::String { val, .. })) => Val {
532                e: E::String(val),
533                start,
534                end,
535            },
536            Some((Span { start, end }, Token::Keylike("true"))) => Val {
537                e: E::Boolean(true),
538                start,
539                end,
540            },
541            Some((Span { start, end }, Token::Keylike("false"))) => Val {
542                e: E::Boolean(false),
543                start,
544                end,
545            },
546            Some((span, Token::Keylike(key))) => self.parse_keylike(at, span, key)?,
547            Some((span, Token::Plus)) => self.number_leading_plus(span)?,
548            Some((Span { start, .. }, Token::LeftBrace)) => {
549                self.inline_table().map(|(Span { end, .. }, table)| Val {
550                    e: E::InlineTable(table),
551                    start,
552                    end,
553                })?
554            }
555            Some((Span { start, .. }, Token::LeftBracket)) => {
556                self.array().map(|(Span { end, .. }, array)| Val {
557                    e: E::Array(array),
558                    start,
559                    end,
560                })?
561            }
562            Some(token) => {
563                return Err(self.error(
564                    at,
565                    Some(token.0.end),
566                    ErrorKind::Wanted {
567                        expected: "a value",
568                        found: token.1.describe(),
569                    },
570                ));
571            }
572            None => return Err(self.eof()),
573        };
574        Ok(value)
575    }
576
577    fn parse_keylike(&mut self, at: usize, span: Span, key: &'a str) -> Result<Val<'a>, Error> {
578        if key == "inf" || key == "nan" {
579            return self.number(span, key);
580        }
581
582        let first_char = key.chars().next().expect("key should not be empty here");
583        match first_char {
584            '-' | '0'..='9' => self.number(span, key),
585            _ => Err(self.error(at, Some(span.end), ErrorKind::UnquotedString)),
586        }
587    }
588
589    fn number(&mut self, Span { start, end }: Span, s: &'a str) -> Result<Val<'a>, Error> {
590        let to_integer = |f| Val {
591            e: E::Integer(f),
592            start,
593            end,
594        };
595        if let Some(s) = s.strip_prefix("0x") {
596            self.integer(s, 16).map(to_integer)
597        } else if let Some(s) = s.strip_prefix("0o") {
598            self.integer(s, 8).map(to_integer)
599        } else if let Some(s) = s.strip_prefix("0b") {
600            self.integer(s, 2).map(to_integer)
601        } else if s.contains('e') || s.contains('E') {
602            self.float(s, None).map(|f| Val {
603                e: E::Float(f),
604                start,
605                end: self.tokens.current(),
606            })
607        } else if self.eat(Token::Period)? {
608            let at = self.tokens.current();
609            match self.next()? {
610                Some((Span { .. }, Token::Keylike(after))) => {
611                    self.float(s, Some(after)).map(|f| Val {
612                        e: E::Float(f),
613                        start,
614                        end: self.tokens.current(),
615                    })
616                }
617                _ => Err(self.error(at, Some(end), ErrorKind::InvalidNumber)),
618            }
619        } else if s == "inf" {
620            Ok(Val {
621                e: E::Float(f64::INFINITY),
622                start,
623                end,
624            })
625        } else if s == "-inf" {
626            Ok(Val {
627                e: E::Float(f64::NEG_INFINITY),
628                start,
629                end,
630            })
631        } else if s == "nan" {
632            Ok(Val {
633                e: E::Float(f64::NAN.copysign(1.0)),
634                start,
635                end,
636            })
637        } else if s == "-nan" {
638            Ok(Val {
639                e: E::Float(f64::NAN.copysign(-1.0)),
640                start,
641                end,
642            })
643        } else {
644            self.integer(s, 10).map(to_integer)
645        }
646    }
647
648    fn number_leading_plus(&mut self, Span { start, end }: Span) -> Result<Val<'a>, Error> {
649        let start_token = self.tokens.current();
650        match self.next()? {
651            Some((Span { end, .. }, Token::Keylike(s))) => self.number(Span { start, end }, s),
652            _ => Err(self.error(start_token, Some(end), ErrorKind::InvalidNumber)),
653        }
654    }
655
656    fn integer(&self, s: &'a str, radix: u32) -> Result<i64, Error> {
657        let allow_sign = radix == 10;
658        let allow_leading_zeros = radix != 10;
659        let (prefix, suffix) = self.parse_integer(s, allow_sign, allow_leading_zeros, radix)?;
660        let start = self.tokens.substr_offset(s);
661        if !suffix.is_empty() {
662            return Err(self.error(start, Some(start + s.len()), ErrorKind::InvalidNumber));
663        }
664        i64::from_str_radix(prefix.replace('_', "").trim_start_matches('+'), radix)
665            .map_err(|_e| self.error(start, Some(start + s.len()), ErrorKind::InvalidNumber))
666    }
667
668    fn parse_integer(
669        &self,
670        s: &'a str,
671        allow_sign: bool,
672        allow_leading_zeros: bool,
673        radix: u32,
674    ) -> Result<(&'a str, &'a str), Error> {
675        let start = self.tokens.substr_offset(s);
676
677        let mut first = true;
678        let mut first_zero = false;
679        let mut underscore = false;
680        let mut end = s.len();
681        let send = start + s.len();
682        for (i, c) in s.char_indices() {
683            let at = i + start;
684            if i == 0 && (c == '+' || c == '-') && allow_sign {
685                continue;
686            }
687
688            if c == '0' && first {
689                first_zero = true;
690            } else if c.is_digit(radix) {
691                if !first && first_zero && !allow_leading_zeros {
692                    return Err(self.error(at, Some(send), ErrorKind::InvalidNumber));
693                }
694                underscore = false;
695            } else if c == '_' && first {
696                return Err(self.error(at, Some(send), ErrorKind::InvalidNumber));
697            } else if c == '_' && !underscore {
698                underscore = true;
699            } else {
700                end = i;
701                break;
702            }
703            first = false;
704        }
705        if first || underscore {
706            return Err(self.error(start, Some(send), ErrorKind::InvalidNumber));
707        }
708        Ok((&s[..end], &s[end..]))
709    }
710
711    fn float(&mut self, s: &'a str, after_decimal: Option<&'a str>) -> Result<f64, Error> {
712        let (integral, mut suffix) = self.parse_integer(s, true, false, 10)?;
713        let start = self.tokens.substr_offset(integral);
714
715        let mut fraction = None;
716        if let Some(after) = after_decimal {
717            if !suffix.is_empty() {
718                return Err(self.error(start, Some(start + s.len()), ErrorKind::InvalidNumber));
719            }
720            let (a, b) = self.parse_integer(after, false, true, 10)?;
721            fraction = Some(a);
722            suffix = b;
723        }
724
725        let mut exponent = None;
726        if suffix.starts_with('e') || suffix.starts_with('E') {
727            let (a, b) = if suffix.len() == 1 {
728                self.eat(Token::Plus)?;
729                match self.next()? {
730                    Some((_, Token::Keylike(s))) => self.parse_integer(s, false, true, 10)?,
731                    _ => {
732                        return Err(self.error(
733                            start,
734                            Some(start + s.len()),
735                            ErrorKind::InvalidNumber,
736                        ));
737                    }
738                }
739            } else {
740                self.parse_integer(&suffix[1..], true, true, 10)?
741            };
742            if !b.is_empty() {
743                return Err(self.error(start, Some(start + s.len()), ErrorKind::InvalidNumber));
744            }
745            exponent = Some(a);
746        } else if !suffix.is_empty() {
747            return Err(self.error(start, Some(start + s.len()), ErrorKind::InvalidNumber));
748        }
749
750        let mut number = integral
751            .trim_start_matches('+')
752            .chars()
753            .filter(|c| *c != '_')
754            .collect::<String>();
755        if let Some(fraction) = fraction {
756            number.push('.');
757            number.extend(fraction.chars().filter(|c| *c != '_'));
758        }
759        if let Some(exponent) = exponent {
760            number.push('E');
761            number.extend(exponent.chars().filter(|c| *c != '_'));
762        }
763        number
764            .parse()
765            .map_err(|_e| self.error(start, Some(start + s.len()), ErrorKind::InvalidNumber))
766            .and_then(|n: f64| {
767                if n.is_finite() {
768                    Ok(n)
769                } else {
770                    Err(self.error(start, Some(start + s.len()), ErrorKind::InvalidNumber))
771                }
772            })
773    }
774
775    // TODO(#140): shouldn't buffer up this entire table in memory, it'd be
776    // great to defer parsing everything until later.
777    fn inline_table(&mut self) -> Result<(Span, TableValues<'a>), Error> {
778        let mut ret = TableValues::default();
779        self.eat_whitespace();
780        if let Some(span) = self.eat_spanned(Token::RightBrace)? {
781            return Ok((span, ret));
782        }
783        loop {
784            let key = self.dotted_key()?;
785            self.eat_whitespace();
786            self.expect(Token::Equals)?;
787            self.eat_whitespace();
788            let value = self.value()?;
789            self.add_dotted_key(key, value, &mut ret)?;
790
791            self.eat_whitespace();
792            if let Some(span) = self.eat_spanned(Token::RightBrace)? {
793                return Ok((span, ret));
794            }
795            self.expect(Token::Comma)?;
796            self.eat_whitespace();
797        }
798    }
799
800    // TODO(#140): shouldn't buffer up this entire array in memory, it'd be
801    // great to defer parsing everything until later.
802    fn array(&mut self) -> Result<(Span, Vec<Val<'a>>), Error> {
803        let mut ret = Vec::new();
804
805        let intermediate = |me: &mut Deserializer<'_>| -> Result<(), Error> {
806            loop {
807                me.eat_whitespace();
808                if !me.eat(Token::Newline)? && !me.eat_comment()? {
809                    break;
810                }
811            }
812            Ok(())
813        };
814
815        loop {
816            intermediate(self)?;
817            if let Some(span) = self.eat_spanned(Token::RightBracket)? {
818                return Ok((span, ret));
819            }
820            let value = self.value()?;
821            ret.push(value);
822            intermediate(self)?;
823            if !self.eat(Token::Comma)? {
824                break;
825            }
826        }
827        intermediate(self)?;
828        let span = self.expect_spanned(Token::RightBracket)?;
829        Ok((span, ret))
830    }
831
832    fn table_key(&mut self) -> Result<Key<'a>, Error> {
833        self.tokens.table_key().map_err(|e| self.token_error(e))
834    }
835
836    fn dotted_key(&mut self) -> Result<Vec<Key<'a>>, Error> {
837        let mut result = Vec::new();
838        result.push(self.table_key()?);
839        self.eat_whitespace();
840        while self.eat(Token::Period)? {
841            self.eat_whitespace();
842            result.push(self.table_key()?);
843            self.eat_whitespace();
844        }
845        Ok(result)
846    }
847
848    /// Stores a value in the appropriate hierarchical structure positioned based on the dotted key.
849    ///
850    /// Given the following definition: `multi.part.key = "value"`, `multi` and `part` are
851    /// intermediate parts which are mapped to the relevant fields in the deserialized type's data
852    /// hierarchy.
853    ///
854    /// # Parameters
855    ///
856    /// * `key_parts`: Each segment of the dotted key, e.g. `part.one` maps to
857    ///   `vec![Cow::Borrowed("part"), Cow::Borrowed("one")].`
858    /// * `value`: The parsed value.
859    /// * `values`: The `Vec` to store the value in.
860    fn add_dotted_key(
861        &self,
862        mut key_parts: Vec<Key<'a>>,
863        value: Val<'a>,
864        values: &mut TableValues<'a>,
865    ) -> Result<(), Error> {
866        let key = key_parts.remove(0);
867        if key_parts.is_empty() {
868            values.values.push((key, value));
869            return Ok(());
870        }
871        match values
872            .values
873            .iter_mut()
874            .find(|&&mut (ref k, _)| k.name == key.name)
875        {
876            Some(&mut (
877                _,
878                Val {
879                    e: E::DottedTable(ref mut v),
880                    ..
881                },
882            )) => {
883                return self.add_dotted_key(key_parts, value, v);
884            }
885            Some(&mut (ref first, _)) => {
886                return Err(self.error(
887                    key.span.start,
888                    Some(value.end),
889                    ErrorKind::DottedKeyInvalidType { first: first.span },
890                ));
891            }
892            None => {}
893        }
894        // The start/end value is somewhat misleading here.
895        let table_values = Val {
896            e: E::DottedTable(TableValues::default()),
897            start: value.start,
898            end: value.end,
899        };
900        values.values.push((key, table_values));
901        let last_i = values.values.len() - 1;
902        if let (
903            _,
904            Val {
905                e: E::DottedTable(ref mut v),
906                ..
907            },
908        ) = values.values[last_i]
909        {
910            self.add_dotted_key(key_parts, value, v)?;
911        }
912        Ok(())
913    }
914
915    fn eat_whitespace(&mut self) {
916        self.tokens.eat_whitespace();
917    }
918
919    fn eat_comment(&mut self) -> Result<bool, Error> {
920        self.tokens.eat_comment().map_err(|e| self.token_error(e))
921    }
922
923    fn eat_newline_or_eof(&mut self) -> Result<(), Error> {
924        self.tokens
925            .eat_newline_or_eof()
926            .map_err(|e| self.token_error(e))
927    }
928
929    fn eat(&mut self, expected: Token<'a>) -> Result<bool, Error> {
930        self.tokens.eat(expected).map_err(|e| self.token_error(e))
931    }
932
933    fn eat_spanned(&mut self, expected: Token<'a>) -> Result<Option<Span>, Error> {
934        self.tokens
935            .eat_spanned(expected)
936            .map_err(|e| self.token_error(e))
937    }
938
939    fn expect(&mut self, expected: Token<'a>) -> Result<(), Error> {
940        self.tokens
941            .expect(expected)
942            .map_err(|e| self.token_error(e))
943    }
944
945    fn expect_spanned(&mut self, expected: Token<'a>) -> Result<Span, Error> {
946        self.tokens
947            .expect_spanned(expected)
948            .map_err(|e| self.token_error(e))
949    }
950
951    fn next(&mut self) -> Result<Option<(Span, Token<'a>)>, Error> {
952        self.tokens.step().map_err(|e| self.token_error(e))
953    }
954
955    fn peek(&mut self) -> Result<Option<(Span, Token<'a>)>, Error> {
956        self.tokens.peek().map_err(|e| self.token_error(e))
957    }
958
959    fn eof(&self) -> Error {
960        self.error(self.input.len(), None, ErrorKind::UnexpectedEof)
961    }
962
963    fn token_error(&self, error: TokenError) -> Error {
964        match error {
965            TokenError::InvalidCharInString(at, ch) => {
966                self.error(at, None, ErrorKind::InvalidCharInString(ch))
967            }
968            TokenError::InvalidEscape(at, ch) => self.error(at, None, ErrorKind::InvalidEscape(ch)),
969            TokenError::InvalidEscapeValue(at, len, v) => {
970                self.error(at, Some(at + len), ErrorKind::InvalidEscapeValue(v))
971            }
972            TokenError::InvalidHexEscape(at, ch) => {
973                self.error(at, None, ErrorKind::InvalidHexEscape(ch))
974            }
975            TokenError::NewlineInString(at) => {
976                self.error(at, None, ErrorKind::InvalidCharInString('\n'))
977            }
978            TokenError::Unexpected(at, ch) => self.error(at, None, ErrorKind::Unexpected(ch)),
979            TokenError::UnterminatedString(at) => {
980                self.error(at, None, ErrorKind::UnterminatedString)
981            }
982            TokenError::Wanted {
983                at,
984                expected,
985                found,
986            } => self.error(
987                at,
988                Some(at + found.len()),
989                ErrorKind::Wanted { expected, found },
990            ),
991            TokenError::MultilineStringKey(at, end) => {
992                self.error(at, Some(end), ErrorKind::MultilineStringKey)
993            }
994        }
995    }
996
997    fn error(&self, start: usize, end: Option<usize>, kind: ErrorKind) -> Error {
998        let span = Span::new(start, end.unwrap_or(start + 1));
999        let line_info = Some(self.to_linecol(start));
1000        Error {
1001            span,
1002            kind,
1003            line_info,
1004        }
1005    }
1006
1007    /// Converts a byte offset from an error message to a (line, column) pair
1008    ///
1009    /// All indexes are 0-based.
1010    fn to_linecol(&self, offset: usize) -> (usize, usize) {
1011        let mut cur = 0;
1012        // Use split_terminator instead of lines so that if there is a `\r`, it
1013        // is included in the offset calculation. The `+1` values below account
1014        // for the `\n`.
1015        for (i, line) in self.input.split_terminator('\n').enumerate() {
1016            if cur + line.len() + 1 > offset {
1017                return (i, offset - cur);
1018            }
1019            cur += line.len() + 1;
1020        }
1021        (self.input.lines().count(), 0)
1022    }
1023}
1024
1025impl std::convert::From<Error> for std::io::Error {
1026    fn from(e: Error) -> Self {
1027        std::io::Error::new(std::io::ErrorKind::InvalidData, e.to_string())
1028    }
1029}
1030
1031enum Line<'a> {
1032    Table {
1033        at: usize,
1034        end: usize,
1035        header: Header<'a>,
1036        array: bool,
1037    },
1038    KeyValue {
1039        at: usize,
1040        end: usize,
1041        key: Vec<Key<'a>>,
1042        value: Val<'a>,
1043    },
1044}
1045
1046struct Header<'a> {
1047    first: bool,
1048    array: bool,
1049    tokens: Tokenizer<'a>,
1050}
1051
1052impl<'a> Header<'a> {
1053    fn new(tokens: Tokenizer<'a>, array: bool) -> Header<'a> {
1054        Header {
1055            first: true,
1056            array,
1057            tokens,
1058        }
1059    }
1060
1061    fn next(&mut self) -> Result<Option<Key<'a>>, TokenError> {
1062        self.tokens.eat_whitespace();
1063
1064        if self.first || self.tokens.eat(Token::Period)? {
1065            self.first = false;
1066            self.tokens.eat_whitespace();
1067            self.tokens.table_key().map(Some)
1068        } else {
1069            self.tokens.expect(Token::RightBracket)?;
1070            if self.array {
1071                self.tokens.expect(Token::RightBracket)?;
1072            }
1073
1074            self.tokens.eat_whitespace();
1075            if !self.tokens.eat_comment()? {
1076                self.tokens.eat_newline_or_eof()?;
1077            }
1078            Ok(None)
1079        }
1080    }
1081}
1082
1083struct Val<'a> {
1084    e: E<'a>,
1085    start: usize,
1086    end: usize,
1087}
1088
1089enum E<'a> {
1090    Integer(i64),
1091    Float(f64),
1092    Boolean(bool),
1093    String(DeStr<'a>),
1094    Array(Vec<Val<'a>>),
1095    InlineTable(TableValues<'a>),
1096    DottedTable(TableValues<'a>),
1097}
1098
1099impl E<'_> {
1100    #[allow(dead_code)]
1101    fn type_name(&self) -> &'static str {
1102        match *self {
1103            E::String(..) => "string",
1104            E::Integer(..) => "integer",
1105            E::Float(..) => "float",
1106            E::Boolean(..) => "boolean",
1107            E::Array(..) => "array",
1108            E::InlineTable(..) => "inline table",
1109            E::DottedTable(..) => "dotted table",
1110        }
1111    }
1112}