kq/
kdlrs.rs

1// The code of this file is modified from https://github.com/kdl-org/kdl-rs
2// (mainly from https://github.com/kdl-org/kdl-rs/blob/v3.0.0/src/parser.rs)
3use kdl::KdlValue;
4use nom::branch::alt;
5use nom::bytes::complete::{tag, take_until, take_until1, take_while_m_n};
6use nom::character::complete::{anychar, char, none_of, one_of};
7use nom::combinator::{eof, map, map_opt, map_res, not, opt, recognize, value};
8use nom::multi::{fold_many0, many0, many1, many_till};
9use nom::sequence::{delimited, preceded, terminated, tuple};
10use nom::IResult;
11
12/// https://github.com/kdl-org/kdl-rs/blob/v3.0.0/src/parser.rs#L148-L151
13/// `identifier := bare_identifier | string`
14///
15// fn identifier(input: &str) -> IResult<&str, String, KdlParseError<&str>> {
16pub(crate) fn identifier(input: &str) -> IResult<&str, String> {
17    alt((string, (map(bare_identifier, String::from))))(input)
18}
19
20/// https://github.com/kdl-org/kdl-rs/blob/v3.0.0/src/parser.rs#L124-L142
21/// `bare_identifier := ((identifier-char - digit - sign) identifier-char* | sign ((identifier-char - digit) identifier-char*)?) - keyword`
22///
23// fn bare_identifier(input: &str) -> IResult<&str, &str, KdlParseError<&str>>> {
24fn bare_identifier(input: &str) -> IResult<&str, &str> {
25    // fn left(input: &str) -> IResult<&str, (), KdlParseError<&str>> {
26    fn left(input: &str) -> IResult<&str, ()> {
27        not(keyword)(input)?;
28        not(one_of("0123456789"))(input)?;
29        not(one_of("+-"))(input)?;
30        let (input, _) = identifier_char(input)?;
31        let (input, _) = many0(identifier_char)(input)?;
32        Ok((input, ()))
33    }
34    // fn right(input: &str) -> IResult<&str, (), KdlParseError<&str>> {
35    fn right(input: &str) -> IResult<&str, ()> {
36        let (input, _) = one_of("+-")(input)?;
37        not(keyword)(input)?;
38        not(one_of("0123456789"))(input)?;
39        let (input, _) = opt(many1(identifier_char))(input)?;
40        Ok((input, ()))
41    }
42    recognize(alt((left, right)))(input)
43}
44
45/// https://github.com/kdl-org/kdl-rs/blob/v3.0.0/src/parser.rs#L144-L146
46/// `string := '"' character* '"'`
47///
48// fn keyword(input: &str) -> IResult<&str, String, KdlParseError<&str>> {
49fn keyword(input: &str) -> IResult<&str, String> {
50    map(alt((tag("true"), tag("false"), tag("null"))), String::from)(input)
51}
52
53/// https://github.com/kdl-org/kdl-rs/blob/v3.0.0/src/parser.rs#L118-L122
54/// `identifier_char := unicode - linespace - [\/(){}<>;[]=,"]
55///
56// fn identifier_char(input: &str) -> IResult<&str, &str, KdlParseError<&str>> {
57fn identifier_char(input: &str) -> IResult<&str, &str> {
58    not(linespace)(input)?;
59    recognize(none_of(r#"\/(){}<>;[]=,""#))(input)
60}
61
62/// https://github.com/kdl-org/kdl-rs/blob/v3.0.0/src/parser.rs#L118-L122
63/// `linespace := newline | ws | single-line-comment`
64///
65// fn linespace(input: &str) -> IResult<&str, (), KdlParseError<&str>> {
66fn linespace(input: &str) -> IResult<&str, ()> {
67    value((), alt((newline, whitespace, single_line_comment)))(input)
68}
69
70/// https://github.com/kdl-org/kdl-rs/blob/v3.0.0/src/parser.rs#L473-L487
71/// `newline := All line-break unicode white_space
72///
73// fn newline(input: &str) -> IResult<&str, (), KdlParseError<&str>> {
74fn newline(input: &str) -> IResult<&str, ()> {
75    value(
76        (),
77        alt((
78            tag("\r\n"),
79            tag("\r"),
80            tag("\n"),
81            tag("\u{0085}"),
82            tag("\u{000C}"),
83            tag("\u{2028}"),
84            tag("\u{2029}"),
85        )),
86    )(input)
87}
88
89/// https://github.com/kdl-org/kdl-rs/blob/v3.0.0/src/parser.rs#L437-L448
90/// `ws := bom | unicode-space | multi-line-comment`
91///
92// fn whitespace(input: &str) -> IResult<&str, (), KdlParseError<&str>> {
93pub(crate) fn whitespace(input: &str) -> IResult<&str, ()> {
94    // TODO: bom?
95    value(
96        (),
97        alt((
98            tag("\u{FEFF}"),
99            unicode_space,
100            recognize(multi_line_comment),
101        )),
102    )(input)
103}
104
105/// https://github.com/kdl-org/kdl-rs/blob/v3.0.0/src/parser.rs#L400-L405
106/// `single-line-comment := '//' ('\r' [^\n] | [^\r\n])* (newline | eof)`
107///
108// fn single_line_comment(input: &str) -> IResult<&str, (), KdlParseError<&str>> {
109fn single_line_comment(input: &str) -> IResult<&str, ()> {
110    let (input, _) = tag("//")(input)?;
111    let (input, _) = many_till(value((), anychar), alt((newline, value((), eof))))(input)?;
112    Ok((input, ()))
113}
114
115/// https://github.com/kdl-org/kdl-rs/blob/v3.0.0/src/parser.rs#L407-L411
116/// `multi-line-comment := '/*' commented-block
117///
118// fn multi_line_comment(input: &str) -> IResult<&str, &str, KdlParseError<&str>> {
119fn multi_line_comment(input: &str) -> IResult<&str, &str> {
120    let (input, _) = tag("/*")(input)?;
121    commented_block(input)
122}
123
124/// https://github.com/kdl-org/kdl-rs/blob/v3.0.0/src/parser.rs#L413-L422
125/// `commented-block := '*/' | (multi-line-comment | '*' | '/' | [^*/]+) commented-block`
126///
127// fn commented_block(input: &str) -> IResult<&str, &str, KdlParseError<&str>> {
128fn commented_block(input: &str) -> IResult<&str, &str> {
129    alt((
130        tag("*/"),
131        terminated(
132            alt((multi_line_comment, take_until1("*/"), tag("*"), tag("/"))),
133            commented_block,
134        ),
135    ))(input)
136}
137
138/// https://github.com/kdl-org/kdl-rs/blob/v3.0.0/src/parser.rs#L450-L471
139///
140// fn unicode_space(input: &str) -> IResult<&str, &str, KdlParseError<&str>> {
141fn unicode_space(input: &str) -> IResult<&str, &str> {
142    alt((
143        tag(" "),
144        tag("\t"),
145        tag("\u{00A0}"),
146        tag("\u{1680}"),
147        tag("\u{2000}"),
148        tag("\u{2001}"),
149        tag("\u{2002}"),
150        tag("\u{2003}"),
151        tag("\u{2004}"),
152        tag("\u{2005}"),
153        tag("\u{2006}"),
154        tag("\u{2007}"),
155        tag("\u{2008}"),
156        tag("\u{2009}"),
157        tag("\u{200A}"),
158        tag("\u{202F}"),
159        tag("\u{205F}"),
160        tag("\u{3000}"),
161    ))(input)
162}
163
164/// https://github.com/kdl-org/kdl-rs/blob/v3.0.0/src/parser.rs#L213-L223
165/// `string := '"' character* '"'`
166///
167// fn string(input: &str) -> IResult<&str, String, KdlParseError<&str>> {
168fn string(input: &str) -> IResult<&str, String> {
169    delimited(
170        char('"'),
171        fold_many0(character, String::new, |mut acc, ch| {
172            acc.push(ch);
173            acc
174        }),
175        char('"'),
176    )(input)
177}
178
179/// https://github.com/kdl-org/kdl-rs/blob/v3.0.0/src/parser.rs#L225-L228
180/// `character := '\' escape | [^\"]`
181///
182// fn character(input: &str) -> IResult<&str, char, KdlParseError<&str>> {
183fn character(input: &str) -> IResult<&str, char> {
184    alt((preceded(char('\\'), escape), none_of("\\\"")))(input)
185}
186
187/// https://github.com/kdl-org/kdl-rs/blob/v3.0.0/src/parser.rs#L237-L247
188/// a map and its inverse of escape-sequence<->char
189///
190/// (instead of building a map by phf, use a function with pattern matching)
191fn escape_chars(input: char) -> Option<char> {
192    match input {
193        '"' => Some('"'),
194        '\\' => Some('\\'),
195        '/' => Some('/'),
196        'b' => Some('\u{08}'),
197        'f' => Some('\u{0C}'),
198        'n' => Some('\n'),
199        'r' => Some('\r'),
200        't' => Some('\t'),
201        _ => None,
202    }
203}
204
205/// https://github.com/kdl-org/kdl-rs/blob/v3.0.0/src/parser.rs#L249-L255
206/// `escape := ["\\/bfnrt] | 'u{' hex-digit{1, 6} '}'`
207///
208// fn escape(input: &str) -> IResult<&str, char, KdlParseError<&str>> {
209fn escape(input: &str) -> IResult<&str, char> {
210    alt((
211        delimited(tag("u{"), unicode, char('}')),
212        map_opt(anychar, escape_chars),
213    ))(input)
214}
215
216// fn unicode(input: &str) -> IResult<&str, char, KdlParseError<&str>> {
217fn unicode(input: &str) -> IResult<&str, char> {
218    map_opt(
219        map_res(
220            take_while_m_n(1, 6, |c: char| c.is_ascii_hexdigit()),
221            |hex| u32::from_str_radix(hex, 16),
222        ),
223        std::char::from_u32,
224    )(input)
225}
226
227/// https://github.com/kdl-org/kdl-rs/blob/v3.0.0/src/parser.rs#L180-L190
228/// `value := type-annotation? (string | raw_string | number | boolean | 'null'`)
229///
230// fn node_value(input: &str) -> IResult<&str, KdlValue, KdlParseError<&str>> {
231pub(crate) fn node_value(input: &str) -> IResult<&str, KdlValue> {
232    // let (input, _ty) = opt(type_annotation)(input)?;
233    alt((
234        map(string, KdlValue::String),
235        map(raw_string, |s| KdlValue::String(s.into())),
236        number,
237        boolean,
238        value(KdlValue::Null, tag("null")),
239    ))(input)
240}
241
242/// https://github.com/kdl-org/kdl-rs/blob/v3.0.0/src/parser.rs#L267-L278
243/// `raw-string := 'r' raw-string-hash`
244/// `raw-string-hash := '#' raw-string-hash '#' | raw-string-quotes`
245/// `raw-string-quotes := '"' .* '"'`
246///
247// fn raw_string(input: &str) -> IResult<&str, &str, KdlParseError<&str>> {
248fn raw_string(input: &str) -> IResult<&str, &str> {
249    let (input, _) = char('r')(input)?;
250    let (input, hashes) = recognize(many0(char('#')))(input)?;
251    let (input, _) = char('"')(input)?;
252    let close = format!("\"{}", hashes);
253    let (input, string) = take_until(&close[..])(input)?;
254    let (input, _) = tag(&close[..])(input)?;
255    Ok((input, string))
256}
257
258/// https://github.com/kdl-org/kdl-rs/blob/v3.0.0/src/parser.rs#L280-L289
259/// `number := decimal | hex | octal | binary`
260///
261// fn number(input: &str) -> IResult<&str, KdlValue, KdlParseError<&str>> {
262fn number(input: &str) -> IResult<&str, KdlValue> {
263    alt((
264        map(hexadecimal, KdlValue::Int),
265        map(octal, KdlValue::Int),
266        map(binary, KdlValue::Int),
267        map(float, KdlValue::Float),
268        map(integer, KdlValue::Int),
269    ))(input)
270}
271
272/// https://github.com/kdl-org/kdl-rs/blob/v3.0.0/src/parser.rs#L331-L343
273///
274// fn sign(input: &str) -> IResult<&str, i64, KdlParseError<&str>> {
275fn sign(input: &str) -> IResult<&str, i64> {
276    let (input, sign) = opt(alt((char('+'), char('-'))))(input)?;
277    let mult = if let Some(sign) = sign {
278        if sign == '+' {
279            1
280        } else {
281            -1
282        }
283    } else {
284        1
285    };
286    Ok((input, mult))
287}
288
289/// https://github.com/kdl-org/kdl-rs/blob/v3.0.0/src/parser.rs#L345-L358
290/// `hex := sign? '0x' [0-9a-fA-F] [0-9a-fA-F_]*`
291///
292// fn hexadecimal(input: &str) -> IResult<&str, i64, KdlParseError<&str>> {
293fn hexadecimal(input: &str) -> IResult<&str, i64> {
294    let (input, sign) = sign(input)?;
295    map_res(
296        preceded(
297            alt((tag("0x"), tag("0X"))),
298            recognize(many1(terminated(
299                one_of("0123456789abcdefABCDEF"),
300                many0(char('_')),
301            ))),
302        ),
303        move |out: &str| i64::from_str_radix(&str::replace(out, "_", ""), 16).map(|x| x * sign),
304    )(input)
305}
306
307/// https://github.com/kdl-org/kdl-rs/blob/v3.0.0/src/parser.rs#L360-L370
308///
309/// `octal := sign? '0o' [0-7] [0-7_]*`
310// fn octal(input: &str) -> IResult<&str, i64, KdlParseError<&str>> {
311fn octal(input: &str) -> IResult<&str, i64> {
312    let (input, sign) = sign(input)?;
313    map_res(
314        preceded(
315            alt((tag("0o"), tag("0O"))),
316            recognize(many1(terminated(one_of("01234567"), many0(char('_'))))),
317        ),
318        move |out: &str| i64::from_str_radix(&str::replace(out, "_", ""), 8).map(|x| x * sign),
319    )(input)
320}
321
322/// https://github.com/kdl-org/kdl-rs/blob/v3.0.0/src/parser.rs#L372-L382
323///
324// fn binary(input: &str) -> IResult<&str, i64, KdlParseError<&str>> {
325fn binary(input: &str) -> IResult<&str, i64> {
326    let (input, sign) = sign(input)?;
327    map_res(
328        preceded(
329            alt((tag("0b"), tag("0B"))),
330            recognize(many1(terminated(one_of("01"), many0(char('_'))))),
331        ),
332        move |out: &str| i64::from_str_radix(&str::replace(out, "_", ""), 2).map(|x| x * sign),
333    )(input)
334}
335
336/// https://github.com/kdl-org/kdl-rs/blob/v3.0.0/src/parser.rs#L384-L390
337/// `boolean := 'true' | 'false'`
338///
339// fn boolean(input: &str) -> IResult<&str, KdlValue, KdlParseError<&str>> {
340fn boolean(input: &str) -> IResult<&str, KdlValue> {
341    alt((
342        value(KdlValue::Boolean(true), tag("true")),
343        value(KdlValue::Boolean(false), tag("false")),
344    ))(input)
345}
346
347/// https://github.com/kdl-org/kdl-rs/blob/v3.0.0/src/parser.rs#L291-L311
348/// ```text
349/// decimal := integer ('.' [0-9]+)? exponent?
350/// exponent := ('e' | 'E') integer
351/// integer := sign? [1-9] [0-9_]*
352/// sign := '+' | '-'
353/// ```
354///
355// fn float(input: &str) -> IResult<&str, f64, KdlParseError<&str>> {
356fn float(input: &str) -> IResult<&str, f64> {
357    map_res(
358        alt((
359            recognize(tuple((
360                integer,
361                opt(preceded(char('.'), integer)),
362                one_of("eE"),
363                opt(one_of("+-")),
364                integer,
365            ))),
366            recognize(tuple((integer, char('.'), integer))),
367        )),
368        |x| str::replace(x, "_", "").parse::<f64>(),
369    )(input)
370}
371
372/// https://github.com/kdl-org/kdl-rs/blob/v3.0.0/src/parser.rs#L313-L329
373/// ```text
374/// decimal := integer ('.' [0-9]+)? exponent?
375/// exponent := ('e' | 'E') integer
376/// integer := sign? [1-9] [0-9_]*
377/// sign := '+' | '-'
378/// ```
379///
380// fn integer(input: &str) -> IResult<&str, i64, KdlParseError<&str>> {
381fn integer(input: &str) -> IResult<&str, i64> {
382    let (input, sign) = sign(input)?;
383    map_res(
384        recognize(many1(terminated(one_of("0123456789"), many0(char('_'))))),
385        move |out: &str| {
386            str::replace(out, "_", "")
387                .parse::<i64>()
388                .map(move |x| x * sign)
389        },
390    )(input)
391}