Skip to main content

uv_toml/
lib.rs

1use std::collections::BTreeMap;
2
3use serde::{Deserialize, Deserializer};
4use toml_datetime::Datetime;
5use toml_parser::decoder::Encoding;
6use toml_parser::lexer::Token;
7use toml_parser::parser::{EventReceiver, parse_document};
8use toml_parser::{ErrorSink, Source, Span};
9
10/// Deserialize a map while ensuring all keys are unique.
11pub fn deserialize_unique_map<'de, D, K, V, F>(
12    deserializer: D,
13    error_msg: F,
14) -> Result<BTreeMap<K, V>, D::Error>
15where
16    D: Deserializer<'de>,
17    K: Deserialize<'de> + Ord,
18    V: Deserialize<'de>,
19    F: FnOnce(&K) -> String,
20{
21    struct Visitor<K, V, F>(F, std::marker::PhantomData<(K, V)>);
22
23    impl<'de, K, V, F> serde::de::Visitor<'de> for Visitor<K, V, F>
24    where
25        K: Deserialize<'de> + Ord,
26        V: Deserialize<'de>,
27        F: FnOnce(&K) -> String,
28    {
29        type Value = BTreeMap<K, V>;
30
31        fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
32            formatter.write_str("a map with unique keys")
33        }
34
35        fn visit_map<M>(self, mut access: M) -> Result<Self::Value, M::Error>
36        where
37            M: serde::de::MapAccess<'de>,
38        {
39            use std::collections::btree_map::Entry;
40
41            let mut map = BTreeMap::new();
42            while let Some((key, value)) = access.next_entry::<K, V>()? {
43                match map.entry(key) {
44                    Entry::Occupied(entry) => {
45                        return Err(serde::de::Error::custom((self.0)(entry.key())));
46                    }
47                    Entry::Vacant(entry) => {
48                        entry.insert(value);
49                    }
50                }
51            }
52            Ok(map)
53        }
54    }
55
56    deserializer.deserialize_map(Visitor(error_msg, std::marker::PhantomData))
57}
58
59/// Detect TOML 1.1 specific features in a TOML document.
60///
61/// Note: This function does _not_ perform any validation.
62pub fn has_toml11_features(source: &str) -> bool {
63    let tokens: Box<[Token]> = Source::new(source).lex().collect();
64    let mut checker = DetectToml11::new(source);
65    let mut errors = None;
66    parse_document(&tokens, &mut checker, &mut errors);
67    checker.is_11()
68}
69
70/// Structure state in a TOML document
71#[derive(Debug, Copy, Clone)]
72enum State {
73    /// Regular table (e.g. `[foo]`)
74    StdTable,
75    /// Array table (e.g. `[[foo]]`)
76    ArrayTable,
77    /// Inline table (e.g. `{ k = "v" }`
78    InlineTable { trailing_sep: bool },
79    /// Array (e.g. `[1, 2, 3]`)
80    Array,
81}
82
83/// Detect TOML 1.1 specific features.
84pub struct DetectToml11<'s> {
85    /// The underlying TOML source
86    source: &'s str,
87    /// Current nesting state
88    state: Vec<State>,
89    /// Set to true when a TOML 1.1 specific feature is seen
90    toml11: bool,
91}
92
93impl<'s> DetectToml11<'s> {
94    fn new(source: &'s str) -> Self {
95        Self {
96            source,
97            state: Vec::new(),
98            toml11: false,
99        }
100    }
101
102    fn raw_at(&self, span: Span) -> &'s str {
103        &self.source[span.start()..span.end()]
104    }
105
106    fn flag_11(&mut self) {
107        self.toml11 = true;
108    }
109
110    fn set_sep(&mut self, sep: bool) {
111        if let Some(State::InlineTable { trailing_sep }) = self.state.last_mut() {
112            *trailing_sep = sep;
113        }
114    }
115
116    fn is_11(&self) -> bool {
117        self.toml11
118    }
119}
120
121impl EventReceiver for DetectToml11<'_> {
122    fn std_table_open(&mut self, _span: Span, _error: &mut dyn ErrorSink) {
123        self.state.push(State::StdTable);
124    }
125
126    fn std_table_close(&mut self, _span: Span, _error: &mut dyn ErrorSink) {
127        self.state.pop();
128    }
129
130    fn array_table_open(&mut self, _span: Span, _error: &mut dyn ErrorSink) {
131        self.state.push(State::ArrayTable);
132    }
133
134    fn array_table_close(&mut self, _span: Span, _error: &mut dyn ErrorSink) {
135        self.state.pop();
136    }
137
138    fn inline_table_open(&mut self, _span: Span, _error: &mut dyn ErrorSink) -> bool {
139        self.state.push(State::InlineTable {
140            trailing_sep: false,
141        });
142        true
143    }
144
145    fn inline_table_close(&mut self, _span: Span, _error: &mut dyn ErrorSink) {
146        if matches!(
147            self.state.last(),
148            Some(State::InlineTable { trailing_sep: true })
149        ) {
150            // TOML 1.1 introduces trailing commas in inline tables
151            self.flag_11();
152        }
153        self.state.pop();
154    }
155
156    fn array_open(&mut self, _span: Span, _error: &mut dyn ErrorSink) -> bool {
157        self.state.push(State::Array);
158        true
159    }
160
161    fn array_close(&mut self, _span: Span, _error: &mut dyn ErrorSink) {
162        self.state.pop();
163    }
164
165    fn simple_key(&mut self, span: Span, kind: Option<Encoding>, _error: &mut dyn ErrorSink) {
166        self.set_sep(false);
167
168        if matches!(kind, Some(Encoding::BasicString | Encoding::MlBasicString))
169            && has_toml11_escapes(self.raw_at(span))
170        {
171            // TOML 1.1 introduces new escape sequences
172            self.flag_11();
173        }
174    }
175
176    fn scalar(&mut self, span: Span, kind: Option<Encoding>, _error: &mut dyn ErrorSink) {
177        self.set_sep(false);
178
179        if matches!(kind, Some(Encoding::BasicString | Encoding::MlBasicString)) {
180            if has_toml11_escapes(self.raw_at(span)) {
181                // TOML 1.1 introduces new escape sequences
182                self.flag_11();
183            }
184        } else if has_toml11_optional_second_time(self.raw_at(span)) {
185            // TOML 1.1 makes seconds optional in times and datetimes.
186            self.flag_11();
187        }
188    }
189
190    fn value_sep(&mut self, _span: Span, _error: &mut dyn ErrorSink) {
191        self.set_sep(true);
192    }
193
194    fn newline(&mut self, _span: Span, _error: &mut dyn ErrorSink) {
195        if matches!(self.state.last(), Some(State::InlineTable { .. })) {
196            // TOML 1.1 introduces newlines in inline tables
197            self.flag_11();
198        }
199    }
200}
201
202/// Scan the characters of a snippet of TOML representing a basic string for the TOML 1.1 exclusive
203/// escape sequences: `\xHH` and `\e`
204fn has_toml11_escapes(raw: &str) -> bool {
205    let mut chars = raw.chars();
206    while let Some(c) = chars.next() {
207        if c == '\\'
208            && let Some(c) = chars.next()
209            && matches!(c, 'x' | 'e')
210        {
211            return true;
212        }
213    }
214    false
215}
216
217/// Scan for the TOML 1.1 optional-second time syntax, such as `12:34` and `1969-06-20T20:17Z`.
218fn has_toml11_optional_second_time(raw: &str) -> bool {
219    // Non-datetime scalars, such as booleans and integers, fail to parse as date and/or time.
220    let Ok(datetime) = raw.parse::<Datetime>() else {
221        return false;
222    };
223
224    datetime
225        .time
226        .as_ref()
227        .is_some_and(|time| time.second.is_none())
228}
229
230#[cfg(test)]
231mod tests {
232    use super::*;
233
234    #[test]
235    fn escapes_plain_string() {
236        assert!(!has_toml11_escapes(r#""hello world""#));
237    }
238
239    #[test]
240    fn escapes_toml10_escape_n() {
241        assert!(!has_toml11_escapes(r#""hello\nworld""#));
242    }
243
244    #[test]
245    fn escapes_toml10_escape_u() {
246        assert!(!has_toml11_escapes(r#""r\u00E9sum\u00E9""#));
247    }
248
249    #[test]
250    fn escapes_toml11_hex() {
251        assert!(has_toml11_escapes(r#""val \x41""#));
252    }
253
254    #[test]
255    fn escapes_toml11_esc() {
256        assert!(has_toml11_escapes(r#""val \e""#));
257    }
258
259    #[test]
260    fn escapes_double_backslash_e() {
261        assert!(!has_toml11_escapes(r#""\\e""#));
262    }
263
264    #[test]
265    fn escapes_double_backslash_x() {
266        assert!(!has_toml11_escapes(r#""\\x41""#));
267    }
268
269    #[test]
270    fn features_plain_toml10() {
271        assert!(!has_toml11_features("x = 1\ny = \"hello\"\nz = true\n"));
272    }
273
274    #[test]
275    fn features_std_table() {
276        assert!(!has_toml11_features(
277            "[server]\nhost = \"localhost\"\nport = 8080\n"
278        ));
279    }
280
281    #[test]
282    fn features_array_of_tables() {
283        assert!(!has_toml11_features(
284            "[[items]]\nname = \"a\"\n[[items]]\nname = \"b\"\n"
285        ));
286    }
287
288    #[test]
289    fn features_inline_table_no_trailing_comma() {
290        assert!(!has_toml11_features("x = {a = 1, b = 2}\n"));
291    }
292
293    #[test]
294    fn features_trailing_comma_in_inline_table() {
295        assert!(has_toml11_features("x = {a = 1, b = 2,}\n"));
296    }
297
298    #[test]
299    fn features_multiline_inline_table() {
300        assert!(has_toml11_features("x = {\n  a = 1\n}\n"));
301    }
302
303    #[test]
304    fn features_multiline_inline_table_with_trailing_comma() {
305        assert!(has_toml11_features("x = {\n  a = 1,\n}\n"));
306    }
307
308    #[test]
309    fn features_hex_escape() {
310        assert!(has_toml11_features("x = \"val \\x41\"\n"));
311    }
312
313    #[test]
314    fn features_hex_escape_in_quoted_key() {
315        assert!(has_toml11_features("\"\\x62ar\" = \"baz\"\n"));
316    }
317
318    #[test]
319    fn features_hex_escape_in_dotted_quoted_key() {
320        assert!(has_toml11_features("foo.\"\\x62ar\" = \"baz\"\n"));
321    }
322
323    #[test]
324    fn features_esc_escape() {
325        assert!(has_toml11_features("x = \"val \\e\"\n"));
326    }
327
328    #[test]
329    fn features_double_backslash_not_escape() {
330        assert!(!has_toml11_features("x = \"\\\\e\"\n"));
331    }
332
333    #[test]
334    fn features_toml10_escape_in_value() {
335        assert!(!has_toml11_features("x = \"tab\\there\"\n"));
336    }
337
338    #[test]
339    fn features_escape_in_nested_structure() {
340        assert!(has_toml11_features("[t]\na = {b = \"\\x20\",}\n"));
341    }
342
343    #[test]
344    fn features_trailing_comma_in_array_is_not_11() {
345        assert!(!has_toml11_features("x = [1, 2, 3,]\n"));
346    }
347
348    #[test]
349    fn features_optional_second_time_values() {
350        assert!(has_toml11_features("x = 20:17\n"));
351        assert!(has_toml11_features("x = 1969-06-20T20:17\n"));
352        assert!(has_toml11_features("x = 1969-06-20 20:17\n"));
353        assert!(has_toml11_features("x = 1969-06-20T20:17Z\n"));
354        assert!(has_toml11_features("x = 1969-06-20T20:17z\n"));
355        assert!(has_toml11_features("x = 1969-06-20T20:17-07:00\n"));
356    }
357
358    #[test]
359    fn features_toml10_time_values_are_not_11() {
360        assert!(!has_toml11_features("x = 20:17:00\n"));
361        assert!(!has_toml11_features("x = 1969-06-20T20:17:00Z\n"));
362        assert!(!has_toml11_features("x = 1969-06-20\n"));
363    }
364}