beancount_parser_lima/
parsers.rs

1use crate::{
2    lexer::Token,
3    options::{BeancountOption, BeancountOptionError, ParserOptions, DEFAULT_LONG_STRING_MAXLINES},
4    types::*,
5};
6use chumsky::{input::BorrowInput, prelude::*};
7use either::Either;
8use rust_decimal::Decimal;
9use std::{
10    collections::{hash_map, HashMap, HashSet},
11    iter::once,
12    ops::Deref,
13    path::Path,
14};
15use time::Date;
16
17/// Matches all the includes in the file, ignoring everything else.
18pub(crate) fn includes<'s, I>() -> impl Parser<'s, I, Vec<String>, Extra<'s>>
19where
20    I: BorrowInput<'s, Token = Token<'s>, Span = Span>,
21{
22    (just(Token::Include).ignore_then(string()).map(Some))
23        .or(any_ref().map(|_| None))
24        .repeated()
25        .collect::<Vec<_>>()
26        .map(|includes| {
27            includes
28                .into_iter()
29                .filter_map(|s| s.as_ref().map(|s| s.to_string()))
30                .collect::<Vec<_>>()
31        })
32}
33
34/// Matches the whole file.
35pub(crate) fn file<'s, I>(
36    source_path: Option<&'s Path>,
37) -> impl Parser<'s, I, Vec<Spanned<Declaration<'s>>>, Extra<'s>>
38where
39    I: BorrowInput<'s, Token = Token<'s>, Span = Span>,
40{
41    declaration(source_path).repeated().collect::<Vec<_>>()
42}
43
44/// Matches a [Declaration], and returns with Span.
45pub(crate) fn declaration<'s, I>(
46    source_path: Option<&'s Path>,
47) -> impl Parser<'s, I, Spanned<Declaration<'s>>, Extra<'s>>
48where
49    I: BorrowInput<'s, Token = Token<'s>, Span = Span>,
50{
51    use Declaration::*;
52
53    choice((directive().map(Directive), pragma(source_path).map(Pragma)))
54        .map_with(spanned_extra)
55        .recover_with(skip_then_retry_until(any_ref().ignored(), end()))
56}
57
58/// Matches a [Directive].
59pub(crate) fn directive<'s, I>() -> impl Parser<'s, I, Directive<'s>, Extra<'s>>
60where
61    I: BorrowInput<'s, Token = Token<'s>, Span = Span>,
62{
63    choice((
64        transaction().labelled("transaction").as_context(),
65        choice((
66            price(),
67            balance(),
68            open(),
69            close(),
70            commodity(),
71            pad(),
72            document(),
73            note(),
74            event(),
75            query(),
76            custom(),
77        ))
78        .labelled("directive")
79        .as_context(),
80    ))
81}
82
83/// Matches a [Pragma].
84pub(crate) fn pragma<'s, I>(
85    source_path: Option<&'s Path>,
86) -> impl Parser<'s, I, Pragma<'s>, Extra<'s>>
87where
88    I: BorrowInput<'s, Token = Token<'s>, Span = Span>,
89{
90    choice((
91        just(Token::Pushtag)
92            .ignore_then(tag())
93            .map_with(|tag, e| Pragma::Pushtag(spanned(tag, e.span()))),
94        just(Token::Poptag)
95            .ignore_then(tag())
96            .map_with(|tag, e| Pragma::Poptag(spanned(tag, e.span()))),
97        just(Token::Pushmeta)
98            .ignore_then(meta_key_value())
99            .map(Pragma::Pushmeta),
100        just(Token::Popmeta)
101            .ignore_then(key())
102            .then_ignore(just(Token::Colon))
103            .map_with(|key, e| Pragma::Popmeta(spanned(key, e.span()))),
104        just(Token::Include)
105            .ignore_then(string().map_with(|path, e| Pragma::Include(spanned(path, e.span())))),
106        option(source_path).map(Pragma::Option),
107        just(Token::Plugin)
108            .ignore_then(string().map_with(spanned_extra))
109            .then(string().map_with(spanned_extra).or_not())
110            .map(|(module_name, config)| {
111                Pragma::Plugin(Plugin {
112                    module_name,
113                    config,
114                })
115            }),
116    ))
117    .then_ignore(just(Token::Eol))
118    .labelled("directive") // yeah, pragma is not a user-facing concept
119    .as_context()
120}
121
122/// Matches a [BeancountOption], failing if the option cannot be processed.
123pub(crate) fn option<'s, I>(
124    source_path: Option<&'s Path>,
125) -> impl Parser<'s, I, BeancountOption<'s>, Extra<'s>>
126where
127    I: BorrowInput<'s, Token = Token<'s>, Span = Span>,
128{
129    just(Token::Option)
130        .ignore_then(string().map_with(|name, e| spanned(name, e.span())))
131        .then(string().map_with(|value, e| spanned(value, e.span())))
132        .try_map_with(move |(name, value), e| {
133            use BeancountOptionError::*;
134
135            let opt = BeancountOption::parse(name, value, source_path).map_err(|e| match e {
136                UnknownOption => Rich::custom(name.span, e.to_string()),
137                BadValue(_) => Rich::custom(value.span, e.to_string()),
138            });
139
140            if let Ok(opt) = opt {
141                let parser_state: &mut extra::SimpleState<ParserState> = e.state();
142                parser_state
143                    .options
144                    .assimilate(opt)
145                    .map_err(|e| Rich::custom(value.span, e.to_string()))
146            } else {
147                opt
148            }
149
150            // TODO
151            // match parser_options.assimilate(&opt.name, &opt.value) {
152            //     Ok(()) => Ok(opt),
153            //     // TODO report location of duplicate option
154            //     Err(ref e @ DuplicateOption(ref _span)) => {
155            //         Err(Rich::custom(name.span, e.to_string()))
156            //     }
157            //     Err(ref e @ UnknownOption) => Err(Rich::custom(name.span, e.to_string())),
158            //     Err(ref e @ BadValue(_)) => Err(Rich::custom(value.span, e.to_string())),
159            //     // TODO report location of duplicate value
160            //     Err(ref e @ DuplicateValue(ref _span)) => {
161            //         Err(Rich::custom(value.span, e.to_string()))
162            //     }
163            // }
164        })
165}
166
167/// Matches a transaction, including metadata and postings, over several lines.
168pub(crate) fn transaction<'s, I>() -> impl Parser<'s, I, Directive<'s>, Extra<'s>>
169where
170    I: BorrowInput<'s, Token = Token<'s>, Span = Span>,
171{
172    group((
173        transaction_header_line(),
174        metadata().map_with(spanned_extra),
175        posting().repeated().collect::<Vec<_>>(),
176    ))
177    .validate(
178        |((date, flag, (payee, narration), (tags, links)), mut metadata, postings),
179         _span,
180         emitter| {
181            metadata.merge_tags(&tags, emitter);
182            metadata.merge_links(&links, emitter);
183
184            Directive {
185                date,
186                metadata,
187                variant: DirectiveVariant::Transaction(Transaction {
188                    flag,
189                    payee,
190                    narration,
191                    postings,
192                }),
193            }
194        },
195    )
196}
197
198type TransactionHeaderLine<'s> = (
199    Spanned<Date>,
200    Spanned<Flag>,
201    (Option<Spanned<&'s str>>, Option<Spanned<&'s str>>),
202    (HashSet<Spanned<Tag<'s>>>, HashSet<Spanned<Link<'s>>>),
203);
204
205/// Matches the first line of a transaction.
206fn transaction_header_line<'s, I>() -> impl Parser<'s, I, TransactionHeaderLine<'s>, Extra<'s>>
207where
208    I: BorrowInput<'s, Token = Token<'s>, Span = Span>,
209{
210    group((
211        date().map_with(spanned_extra),
212        txn().map_with(spanned_extra),
213        // payee and narration get special handling in case one is omitted
214        group((
215            string().map_with(spanned_extra).or_not(),
216            string().map_with(spanned_extra).or_not(),
217        ))
218        .map(|(s1, s2)| match (s1, s2) {
219            // a single string is narration
220            (Some(s1), None) => (None, Some(s1)),
221            (s1, s2) => (s1, s2),
222        })
223        .map(|(payee, narration)| {
224            (
225                replace_some_empty_with_none(payee),
226                replace_some_empty_with_none(narration),
227            )
228        }),
229        tags_links(),
230    ))
231    .then_ignore(just(Token::Eol))
232}
233
234fn replace_some_empty_with_none(s: Option<Spanned<&str>>) -> Option<Spanned<&str>> {
235    match s {
236        Some(maybe_empty) => {
237            if maybe_empty.is_empty() {
238                None
239            } else {
240                s
241            }
242        }
243        None => None,
244    }
245}
246
247/// Matches a price directive, including metadata, over several lines.
248pub(crate) fn price<'s, I>() -> impl Parser<'s, I, Directive<'s>, Extra<'s>>
249where
250    I: BorrowInput<'s, Token = Token<'s>, Span = Span>,
251{
252    group((
253        date().map_with(spanned_extra),
254        just(Token::Price),
255        currency().map_with(spanned_extra),
256        amount().map_with(spanned_extra),
257        tags_links(),
258    ))
259    .then_ignore(just(Token::Eol))
260    .then(metadata().map_with(spanned_extra))
261    .validate(
262        |((date, _, currency, amount, (tags, links)), mut metadata), _span, emitter| {
263            metadata.merge_tags(&tags, emitter);
264            metadata.merge_links(&links, emitter);
265            Directive {
266                date,
267                metadata,
268                variant: DirectiveVariant::Price(Price { currency, amount }),
269            }
270        },
271    )
272    .labelled("price")
273    .as_context()
274}
275
276/// Matches a balance directive, including metadata, over several lines.
277pub(crate) fn balance<'s, I>() -> impl Parser<'s, I, Directive<'s>, Extra<'s>>
278where
279    I: BorrowInput<'s, Token = Token<'s>, Span = Span>,
280{
281    group((
282        date().map_with(spanned_extra),
283        just(Token::Balance),
284        account().map_with(spanned_extra),
285        amount_with_tolerance().map_with(spanned_extra),
286        tags_links(),
287    ))
288    .then_ignore(just(Token::Eol))
289    .then(metadata().map_with(spanned_extra))
290    .validate(
291        |((date, _, account, atol, (tags, links)), mut metadata), _span, emitter| {
292            metadata.merge_tags(&tags, emitter);
293            metadata.merge_links(&links, emitter);
294            Directive {
295                date,
296                metadata,
297                variant: DirectiveVariant::Balance(Balance { account, atol }),
298            }
299        },
300    )
301    .labelled("balance")
302    .as_context()
303}
304
305/// Matches a open, including metadata, over several lines.
306pub(crate) fn open<'s, I>() -> impl Parser<'s, I, Directive<'s>, Extra<'s>>
307where
308    I: BorrowInput<'s, Token = Token<'s>, Span = Span>,
309{
310    group((open_header_line(), metadata().map_with(spanned_extra))).validate(
311        |((date, account, currencies, booking, (tags, links)), mut metadata), _span, emitter| {
312            metadata.merge_tags(&tags, emitter);
313            metadata.merge_links(&links, emitter);
314
315            Directive {
316                date,
317                metadata,
318                variant: DirectiveVariant::Open(Open {
319                    account,
320                    currencies,
321                    booking,
322                }),
323            }
324        },
325    )
326}
327
328type OpenHeaderLine<'s> = (
329    Spanned<Date>,
330    Spanned<Account<'s>>,
331    HashSet<Spanned<Currency<'s>>>,
332    Option<Spanned<Booking>>,
333    (HashSet<Spanned<Tag<'s>>>, HashSet<Spanned<Link<'s>>>),
334);
335
336/// Matches the first line of a open.
337fn open_header_line<'s, I>() -> impl Parser<'s, I, OpenHeaderLine<'s>, Extra<'s>>
338where
339    I: BorrowInput<'s, Token = Token<'s>, Span = Span>,
340{
341    group((
342        date().map_with(spanned_extra),
343        just(Token::Open),
344        account().map_with(spanned_extra),
345        currency_list(),
346        booking().map_with(spanned_extra).or_not(),
347        tags_links(),
348    ))
349    .then_ignore(just(Token::Eol))
350    .map(|(date, _, account, currency, booking, tags_links)| {
351        (date, account, currency, booking, tags_links)
352    })
353}
354
355/// Matches zero or more currencies, comma-separated.
356fn currency_list<'s, I>() -> impl Parser<'s, I, HashSet<Spanned<Currency<'s>>>, Extra<'s>>
357where
358    I: BorrowInput<'s, Token = Token<'s>, Span = Span>,
359{
360    group((
361        currency().map_with(spanned_extra),
362        (just(Token::Comma).ignore_then(currency().map_with(spanned_extra)))
363            .repeated()
364            .collect::<Vec<_>>(),
365    ))
366    .validate(|(first_currency, mut currencies), _span, emitter| {
367        currencies.push(first_currency);
368        currencies
369            .into_iter()
370            .fold(HashSet::new(), |mut currencies, currency| {
371                if currencies.contains(&currency) {
372                    emitter.emit(Rich::custom(
373                        currency.span,
374                        format!("duplicate currency {}", currency),
375                    ))
376                } else {
377                    currencies.insert(currency);
378                }
379
380                currencies
381            })
382    })
383    .or_not()
384    .map(|currencies| currencies.unwrap_or_default())
385}
386
387/// Matches a [Account].
388fn account<'s, I>() -> impl Parser<'s, I, Account<'s>, Extra<'s>>
389where
390    I: BorrowInput<'s, Token = Token<'s>, Span = Span>,
391{
392    let s = select_ref!(Token::Account(s) => *s);
393
394    s.try_map_with(|s, e| {
395        let span = e.span();
396        //
397        // look up the account type name to see which account type it is currently mapped to
398        let parser_state: &mut extra::SimpleState<ParserState> = e.state();
399        let account_type_names = &parser_state.options.account_type_names;
400
401        Account::new(s, account_type_names).map_err(|e| Rich::custom(span, e.to_string()))
402    })
403}
404
405/// Matches a [Booking].
406fn booking<'s, I>() -> impl Parser<'s, I, Booking, Extra<'s>>
407where
408    I: BorrowInput<'s, Token = Token<'s>, Span = Span>,
409{
410    string().try_map(|s, span| Booking::try_from(s).map_err(|e| Rich::custom(span, e.to_string())))
411}
412
413/// Matches a close, including metadata, over several lines.
414pub(crate) fn close<'s, I>() -> impl Parser<'s, I, Directive<'s>, Extra<'s>>
415where
416    I: BorrowInput<'s, Token = Token<'s>, Span = Span>,
417{
418    group((
419        date().map_with(spanned_extra),
420        just(Token::Close),
421        account().map_with(spanned_extra),
422        tags_links(),
423    ))
424    .then_ignore(just(Token::Eol))
425    .then(metadata().map_with(spanned_extra))
426    .validate(
427        |((date, _, account, (tags, links)), mut metadata), _span, emitter| {
428            metadata.merge_tags(&tags, emitter);
429            metadata.merge_links(&links, emitter);
430
431            Directive {
432                date,
433                metadata,
434                variant: DirectiveVariant::Close(Close { account }),
435            }
436        },
437    )
438}
439
440/// Matches a commodity, including metadata, over several lines.
441pub(crate) fn commodity<'s, I>() -> impl Parser<'s, I, Directive<'s>, Extra<'s>>
442where
443    I: BorrowInput<'s, Token = Token<'s>, Span = Span>,
444{
445    group((
446        date().map_with(spanned_extra),
447        just(Token::Commodity),
448        currency().map_with(spanned_extra),
449        tags_links(),
450    ))
451    .then_ignore(just(Token::Eol))
452    .then(metadata().map_with(spanned_extra))
453    .validate(
454        |((date, _, currency, (tags, links)), mut metadata), _span, emitter| {
455            metadata.merge_tags(&tags, emitter);
456            metadata.merge_links(&links, emitter);
457
458            Directive {
459                date,
460                metadata,
461                variant: DirectiveVariant::Commodity(Commodity { currency }),
462            }
463        },
464    )
465}
466
467/// Matches a pad, including metadata, over several lines.
468pub(crate) fn pad<'s, I>() -> impl Parser<'s, I, Directive<'s>, Extra<'s>>
469where
470    I: BorrowInput<'s, Token = Token<'s>, Span = Span>,
471{
472    group((
473        date().map_with(spanned_extra),
474        just(Token::Pad),
475        account().map_with(spanned_extra),
476        account().map_with(spanned_extra),
477        tags_links(),
478    ))
479    .then_ignore(just(Token::Eol))
480    .then(metadata().map_with(spanned_extra))
481    .validate(
482        |((date, _, account, source, (tags, links)), mut metadata), _span, emitter| {
483            metadata.merge_tags(&tags, emitter);
484            metadata.merge_links(&links, emitter);
485
486            Directive {
487                date,
488                metadata,
489                variant: DirectiveVariant::Pad(Pad { account, source }),
490            }
491        },
492    )
493}
494
495/// Matches a document, including metadata, over several lines.
496pub(crate) fn document<'s, I>() -> impl Parser<'s, I, Directive<'s>, Extra<'s>>
497where
498    I: BorrowInput<'s, Token = Token<'s>, Span = Span>,
499{
500    group((
501        date().map_with(spanned_extra),
502        just(Token::Document),
503        account().map_with(spanned_extra),
504        string().map_with(spanned_extra),
505        tags_links(),
506    ))
507    .then_ignore(just(Token::Eol))
508    .then(metadata().map_with(spanned_extra))
509    .validate(
510        |((date, _, account, path, (tags, links)), mut metadata), _span, emitter| {
511            metadata.merge_tags(&tags, emitter);
512            metadata.merge_links(&links, emitter);
513
514            Directive {
515                date,
516                metadata,
517                variant: DirectiveVariant::Document(Document { account, path }),
518            }
519        },
520    )
521}
522
523/// Matches a note, including metadata, over several lines.
524pub(crate) fn note<'s, I>() -> impl Parser<'s, I, Directive<'s>, Extra<'s>>
525where
526    I: BorrowInput<'s, Token = Token<'s>, Span = Span>,
527{
528    group((
529        date().map_with(spanned_extra),
530        just(Token::Note),
531        account().map_with(spanned_extra),
532        string().map_with(spanned_extra),
533        tags_links(),
534    ))
535    .then_ignore(just(Token::Eol))
536    .then(metadata().map_with(spanned_extra))
537    .validate(
538        |((date, _, account, comment, (tags, links)), mut metadata), _span, emitter| {
539            metadata.merge_tags(&tags, emitter);
540            metadata.merge_links(&links, emitter);
541
542            Directive {
543                date,
544                metadata,
545                variant: DirectiveVariant::Note(Note { account, comment }),
546            }
547        },
548    )
549}
550
551/// Matches an event, including metadata, over several lines.
552pub(crate) fn event<'s, I>() -> impl Parser<'s, I, Directive<'s>, Extra<'s>>
553where
554    I: BorrowInput<'s, Token = Token<'s>, Span = Span>,
555{
556    group((
557        date().map_with(spanned_extra),
558        just(Token::Event),
559        string().map_with(spanned_extra),
560        string().map_with(spanned_extra),
561        tags_links(),
562    ))
563    .then_ignore(just(Token::Eol))
564    .then(metadata().map_with(spanned_extra))
565    .validate(
566        |((date, _, event_type, description, (tags, links)), mut metadata), _span, emitter| {
567            metadata.merge_tags(&tags, emitter);
568            metadata.merge_links(&links, emitter);
569
570            Directive {
571                date,
572                metadata,
573                variant: DirectiveVariant::Event(Event {
574                    event_type,
575                    description,
576                }),
577            }
578        },
579    )
580}
581
582/// Matches a query, including metadata, over several lines.
583pub(crate) fn query<'s, I>() -> impl Parser<'s, I, Directive<'s>, Extra<'s>>
584where
585    I: BorrowInput<'s, Token = Token<'s>, Span = Span>,
586{
587    group((
588        date().map_with(spanned_extra),
589        just(Token::Query),
590        string().map_with(spanned_extra),
591        string().map_with(spanned_extra),
592        tags_links(),
593    ))
594    .then_ignore(just(Token::Eol))
595    .then(metadata().map_with(spanned_extra))
596    .validate(
597        |((date, _, name, content, (tags, links)), mut metadata), _span, emitter| {
598            metadata.merge_tags(&tags, emitter);
599            metadata.merge_links(&links, emitter);
600
601            Directive {
602                date,
603                metadata,
604                variant: DirectiveVariant::Query(Query { name, content }),
605            }
606        },
607    )
608}
609
610/// Matches a custom, including metadata, over several lines.
611pub(crate) fn custom<'s, I>() -> impl Parser<'s, I, Directive<'s>, Extra<'s>>
612where
613    I: BorrowInput<'s, Token = Token<'s>, Span = Span>,
614{
615    group((
616        date().map_with(spanned_extra),
617        just(Token::Custom),
618        string().map_with(spanned_extra),
619        meta_value()
620            .map_with(spanned_extra)
621            .repeated()
622            .collect::<Vec<_>>(),
623    ))
624    .then_ignore(just(Token::Eol))
625    .then(metadata().map_with(spanned_extra))
626    .map(|((date, _, type_, values), metadata)| Directive {
627        date,
628        metadata,
629        variant: DirectiveVariant::Custom(Custom { type_, values }),
630    })
631}
632
633/// Matches the `txn` keyword or a flag.
634pub(crate) fn txn<'s, I>() -> impl Parser<'s, I, Flag, Extra<'s>>
635where
636    I: BorrowInput<'s, Token = Token<'s>, Span = Span>,
637{
638    choice((just(Token::Txn).to(Flag::default()), flag()))
639}
640
641/// Matches any flag, dedicated or overloaded
642pub(crate) fn flag<'s, I>() -> impl Parser<'s, I, Flag, Extra<'s>>
643where
644    I: BorrowInput<'s, Token = Token<'s>, Span = Span>,
645{
646    let dedicated_flag = select_ref!(Token::DedicatedFlag(flag) => *flag);
647
648    choice((
649        dedicated_flag,
650        just(Token::Asterisk).to(Flag::Asterisk),
651        just(Token::Hash).to(Flag::Hash),
652    ))
653}
654
655/// Matches a [Posting] complete with [Metadata] over several lines.
656fn posting<'s, I>() -> impl Parser<'s, I, Spanned<Posting<'s>>, Extra<'s>>
657where
658    I: BorrowInput<'s, Token = Token<'s>, Span = Span>,
659{
660    just(Token::Indent)
661        .ignore_then(
662            group((
663                flag().map_with(spanned_extra).or_not(),
664                account().map_with(spanned_extra),
665                expr_value().map_with(spanned_extra).or_not(),
666                currency().map_with(spanned_extra).or_not(),
667                cost_spec().or_not().map_with(|cost_spec, e| {
668                    cost_spec.map(|cost_spec| spanned(cost_spec, e.span()))
669                }),
670                price_annotation().or_not().map_with(|price_spec, e| {
671                    price_spec.map(|price_spec| spanned(price_spec, e.span()))
672                }),
673            ))
674            .map_with(spanned_extra)
675            .then_ignore(just(Token::Eol))
676            .then(metadata().map_with(spanned_extra))
677            .map(
678                |(
679                    Spanned {
680                        item: (flag, account, amount, currency, cost_spec, price_annotation),
681                        span: posting_span_without_metadata,
682                    },
683                    metadata,
684                )| {
685                    spanned(
686                        Posting {
687                            flag,
688                            account,
689                            amount,
690                            currency,
691                            cost_spec,
692                            price_annotation,
693                            metadata,
694                        },
695                        posting_span_without_metadata,
696                    )
697                },
698            ),
699        )
700        .labelled("posting")
701        .as_context()
702}
703
704/// Matches [Metadata], over several lines.
705fn metadata<'s, I>() -> impl Parser<'s, I, Metadata<'s>, Extra<'s>>
706where
707    I: BorrowInput<'s, Token = Token<'s>, Span = Span>,
708{
709    use Metadatum::*;
710
711    metadatum_line()
712        .repeated()
713        .collect::<Vec<_>>()
714        .validate(|metadata, _span, emitter| {
715            // collate by type of metadatum
716            metadata
717                .into_iter()
718                .fold(Metadata::default(), |mut m, item| match item {
719                    KeyValue(kv) => {
720                        use hash_map::Entry::*;
721
722                        let MetaKeyValue { key, value } = kv.item;
723
724                        let key_span = key.span;
725                        match m.key_values.entry(key) {
726                            Occupied(entry) => emitter.emit(Rich::custom(
727                                key_span,
728                                format!("duplicate key {}", entry.key()),
729                            )),
730                            Vacant(entry) => {
731                                entry.insert(value);
732                            }
733                        }
734
735                        m
736                    }
737                    Tag(tag) => {
738                        if m.tags.contains(&tag) {
739                            emitter.emit(Rich::custom(tag.span, format!("duplicate tag {}", tag)))
740                        } else {
741                            m.tags.insert(tag);
742                        }
743
744                        m
745                    }
746                    Link(link) => {
747                        if m.links.contains(&link) {
748                            emitter
749                                .emit(Rich::custom(link.span, format!("duplicate link {}", link)))
750                        } else {
751                            m.links.insert(link);
752                        }
753
754                        m
755                    }
756                })
757        })
758}
759
760/// A single instance of [Metadata]
761enum Metadatum<'a> {
762    KeyValue(Spanned<MetaKeyValue<'a>>),
763    Tag(Spanned<Tag<'a>>),
764    Link(Spanned<Link<'a>>),
765}
766
767/// Matches a single Metadatum on a single line.
768fn meta_key_value<'s, I>() -> impl Parser<'s, I, MetaKeyValue<'s>, Extra<'s>>
769where
770    I: BorrowInput<'s, Token = Token<'s>, Span = Span>,
771{
772    key()
773        .map_with(spanned_extra)
774        .then(just(Token::Colon).ignore_then(meta_value().or_not().map_with(spanned_extra)))
775        .map(|(key, value)| MetaKeyValue {
776            key,
777            value: value.map_into(|value| value.unwrap_or(MetaValue::Simple(SimpleValue::Null))),
778        })
779}
780
781/// Matches a single Metadatum on a single line.
782fn metadatum_line<'s, I>() -> impl Parser<'s, I, Metadatum<'s>, Extra<'s>>
783where
784    I: BorrowInput<'s, Token = Token<'s>, Span = Span>,
785{
786    use Metadatum::*;
787
788    just(Token::Indent)
789        .ignore_then(
790            choice((
791                meta_key_value().map_with(spanned_extra).map(KeyValue),
792                tag().map_with(spanned_extra).map(Tag),
793                link().map_with(spanned_extra).map(Link),
794            ))
795            .then_ignore(just(Token::Eol)),
796        )
797        .labelled("metadata")
798        .as_context()
799}
800
801/// Matches a non-empty [MetaValue].
802pub(crate) fn meta_value<'s, I>() -> impl Parser<'s, I, MetaValue<'s>, Extra<'s>>
803where
804    I: BorrowInput<'s, Token = Token<'s>, Span = Span>,
805{
806    use MetaValue::*;
807
808    // try for amount first
809    choice((amount().map(Amount), simple_value().map(Simple)))
810}
811
812/// Matches a non-empty [SimpleValue].
813pub(crate) fn simple_value<'s, I>() -> impl Parser<'s, I, SimpleValue<'s>, Extra<'s>>
814where
815    I: BorrowInput<'s, Token = Token<'s>, Span = Span>,
816{
817    use SimpleValue::*;
818
819    choice((
820        string().map(String),
821        currency().map(Currency),
822        account().map(Account),
823        tag().map(Tag),
824        link().map(Link),
825        date().map(Date),
826        bool().map(Bool),
827        just(Token::Null).to(Null),
828        expr_value().map(Expr),
829    ))
830}
831
832pub(crate) fn amount<'s, I>() -> impl Parser<'s, I, Amount<'s>, Extra<'s>>
833where
834    I: BorrowInput<'s, Token = Token<'s>, Span = Span>,
835{
836    group((
837        expr_value().map_with(spanned_extra),
838        currency().map_with(spanned_extra),
839    ))
840    .map(Amount::new)
841}
842
843pub(crate) fn amount_with_tolerance<'s, I>(
844) -> impl Parser<'s, I, AmountWithTolerance<'s>, Extra<'s>>
845where
846    I: BorrowInput<'s, Token = Token<'s>, Span = Span>,
847{
848    choice((
849        amount().map_with(|amount, e| AmountWithTolerance::new((spanned_extra(amount, e), None))),
850        group((
851            expr_value().map_with(spanned_extra),
852            just(Token::Tilde),
853            decimal().map_with(spanned_extra),
854            currency().map_with(spanned_extra),
855        ))
856        .map_with(|(number, _, tolerance, currency), e| {
857            AmountWithTolerance::new((
858                spanned_extra(Amount::new((number, currency)), e),
859                Some(tolerance),
860            ))
861        }),
862    ))
863}
864
865pub(crate) fn loose_amount<'s, I>() -> impl Parser<'s, I, LooseAmount<'s>, Extra<'s>>
866where
867    I: BorrowInput<'s, Token = Token<'s>, Span = Span>,
868{
869    group((
870        expr_value().map_with(spanned_extra).or_not(),
871        currency().map_with(spanned_extra).or_not(),
872    ))
873    .map(LooseAmount::new)
874}
875
876pub(crate) fn compound_amount<'s, I>() -> impl Parser<'s, I, CompoundAmount<'s>, Extra<'s>>
877where
878    I: BorrowInput<'s, Token = Token<'s>, Span = Span>,
879{
880    use CompoundAmount::*;
881
882    choice((
883        (compound_expr().then(currency())).map(|(amount, cur)| CurrencyAmount(amount, cur)),
884        compound_expr().map(BareAmount),
885        just(Token::Hash) // bare currency may or may not be preceeded by hash
886            .or_not()
887            .ignore_then(currency().map(BareCurrency)),
888    ))
889}
890
891pub(crate) fn compound_expr<'s, I>() -> impl Parser<'s, I, CompoundExprValue, Extra<'s>>
892where
893    I: BorrowInput<'s, Token = Token<'s>, Span = Span>,
894{
895    use CompoundExprValue::*;
896
897    choice((
898        // try for both per-unit and total first
899        expr_value()
900            .then_ignore(just(Token::Hash))
901            .then(expr_value())
902            .map(|(per_unit, total)| PerUnitAndTotal(per_unit, total)),
903        expr_value().then_ignore(just(Token::Hash)).map(PerUnit),
904        expr_value().map(PerUnit),
905        just(Token::Hash).ignore_then(expr_value()).map(Total),
906    ))
907}
908
909pub(crate) fn scoped_expr<'s, I>() -> impl Parser<'s, I, ScopedExprValue, Extra<'s>>
910where
911    I: BorrowInput<'s, Token = Token<'s>, Span = Span>,
912{
913    use ScopedExprValue::*;
914
915    choice((
916        expr_value().then_ignore(just(Token::Hash)).map(PerUnit),
917        expr_value().map(PerUnit),
918        just(Token::Hash).ignore_then(expr_value()).map(Total),
919    ))
920}
921
922pub(crate) fn price_annotation<'s, I>() -> impl Parser<'s, I, PriceSpec<'s>, Extra<'s>>
923where
924    I: BorrowInput<'s, Token = Token<'s>, Span = Span>,
925{
926    use PriceSpec::*;
927
928    fn scope(amount: ExprValue, is_total: bool) -> ScopedExprValue {
929        use ScopedExprValue::*;
930
931        if is_total {
932            Total(amount)
933        } else {
934            PerUnit(amount)
935        }
936    }
937
938    group((
939        choice((just(Token::At).to(false), just(Token::AtAt).to(true))),
940        expr_value().or_not(),
941        currency().or_not(),
942    ))
943    .try_map(|(is_total, amount, cur), _span| match (amount, cur) {
944        (Some(amount), Some(cur)) => Ok(CurrencyAmount(scope(amount, is_total), cur)),
945        (Some(amount), None) => Ok(BareAmount(scope(amount, is_total))),
946        (None, Some(cur)) => Ok(BareCurrency(cur)),
947        (None, None) => Ok(Unspecified),
948    })
949}
950
951/// Matches a [CostSpec].
952/// For now we only match the new syntax of single braces.
953fn cost_spec<'s, I>() -> impl Parser<'s, I, CostSpec<'s>, Extra<'s>>
954where
955    I: BorrowInput<'s, Token = Token<'s>, Span = Span>,
956{
957    use self::CompoundAmount::*;
958    use CostComp::*;
959
960    just(Token::Lcurl)
961        .ignore_then(
962            group((
963                cost_comp().map_with(spanned_extra),
964                (just(Token::Comma).ignore_then(cost_comp().map_with(spanned_extra)))
965                    .repeated()
966                    .collect::<Vec<_>>(),
967            ))
968            .or_not(), // allow for empty cost spec
969        )
970        .then_ignore(just(Token::Rcurl))
971        .try_map(move |cost_spec, span| {
972            let mut builder = match cost_spec {
973                Some((head, tail)) => {
974                    once(head).chain(tail).fold(
975                        // accumulate the `CostComp`s in a `CostSpecBuilder`
976                        CostSpecBuilder::default(),
977                        |builder, cost_comp| match cost_comp.item {
978                            CompoundAmount(compound_amount) => match compound_amount {
979                                BareCurrency(cur) => builder.currency(cur, cost_comp.span),
980                                BareAmount(amount) => builder.compound_expr(amount, cost_comp.span),
981                                CurrencyAmount(amount, cur) => builder
982                                    .compound_expr(amount, cost_comp.span)
983                                    .currency(cur, cost_comp.span),
984                            },
985                            Date(date) => builder.date(date, cost_comp.span),
986                            Label(s) => builder.label(s, cost_comp.span),
987                            Merge => builder.merge(cost_comp.span),
988                        },
989                    )
990                }
991                None => CostSpecBuilder::default(),
992            };
993            builder
994                .build()
995                .map_err(|e| Rich::custom(span, e.to_string()))
996        })
997}
998
999#[derive(PartialEq, Eq, Clone, Debug)]
1000/// One component of a cost specification.
1001/// Setting a field type multiple times is rejected by methods in [CostSpec].
1002enum CostComp<'a> {
1003    CompoundAmount(CompoundAmount<'a>),
1004    Date(Date),
1005    Label(&'a str),
1006    Merge,
1007}
1008
1009/// Matches one component of a [CostSpec].
1010fn cost_comp<'s, I>() -> impl Parser<'s, I, CostComp<'s>, Extra<'s>>
1011where
1012    I: BorrowInput<'s, Token = Token<'s>, Span = Span>,
1013{
1014    use CostComp::*;
1015
1016    choice((
1017        compound_amount().map(CompoundAmount),
1018        date().map(Date),
1019        string().map(Label),
1020        just(Token::Asterisk).to(Merge),
1021    ))
1022}
1023
1024/// Matches zero or more tags or links.
1025/// Duplicates are errors.
1026pub(crate) fn tags_links<'s, I>(
1027) -> impl Parser<'s, I, (HashSet<Spanned<Tag<'s>>>, HashSet<Spanned<Link<'s>>>), Extra<'s>>
1028where
1029    I: BorrowInput<'s, Token = Token<'s>, Span = Span>,
1030{
1031    choice((
1032        tag().map_with(spanned_extra).map(Either::Left),
1033        link().map_with(spanned_extra).map(Either::Right),
1034    ))
1035    .repeated()
1036    .collect::<Vec<_>>()
1037    .validate(|tags_or_links, _span, emitter| {
1038        tags_or_links.into_iter().fold(
1039            (HashSet::new(), HashSet::new()),
1040            |(mut tags, mut links), item| match item {
1041                Either::Left(tag) => {
1042                    if tags.contains(&tag) {
1043                        emitter.emit(Rich::custom(tag.span, format!("duplicate tag {}", tag)))
1044                    } else {
1045                        tags.insert(tag);
1046                    }
1047
1048                    (tags, links)
1049                }
1050                Either::Right(link) => {
1051                    if links.contains(&link) {
1052                        emitter.emit(Rich::custom(link.span, format!("duplicate link {}", link)))
1053                    } else {
1054                        links.insert(link);
1055                    }
1056
1057                    (tags, links)
1058                }
1059            },
1060        )
1061    })
1062}
1063
1064/// Matches a bool
1065pub(crate) fn bool<'s, I>() -> impl Parser<'s, I, bool, Extra<'s>>
1066where
1067    I: BorrowInput<'s, Token = Token<'s>, Span = Span>,
1068{
1069    choice((just(Token::True).to(true), just(Token::False).to(false)))
1070}
1071
1072/// Match and evaluate an expression
1073pub(crate) fn expr_value<'s, I>() -> impl Parser<'s, I, ExprValue, Extra<'s>>
1074where
1075    I: BorrowInput<'s, Token = Token<'s>, Span = Span>,
1076{
1077    expr().map(ExprValue::from)
1078}
1079
1080/// Match an expression
1081pub(crate) fn expr<'s, I>() -> impl Parser<'s, I, Expr, Extra<'s>>
1082where
1083    I: BorrowInput<'s, Token = Token<'s>, Span = Span>,
1084{
1085    use Token::*;
1086
1087    recursive(|expr| {
1088        // Match a parenthesized expression
1089        let parens = expr
1090            .clone()
1091            .delimited_by(just(Lparen), just(Rparen))
1092            .map(|x| Expr::Paren(Box::new(x)));
1093
1094        // Match a bare number
1095        let number = select_ref! { Number(x) => Expr::Value(*x) };
1096
1097        // Match a factor of an expression
1098        let factor = choice((just(Minus), just(Plus)))
1099            .or_not()
1100            .then(number.or(parens.clone()))
1101            .map(|(negated, x)| {
1102                if negated.is_some_and(|tok| tok == Minus) {
1103                    Expr::Neg(Box::new(x))
1104                } else {
1105                    x
1106                }
1107            });
1108
1109        // Match a product of factors
1110        let product = factor.clone().foldl(
1111            choice((
1112                just(Asterisk).to(Expr::Mul as fn(_, _) -> _),
1113                just(Slash).to(Expr::Div as fn(_, _) -> _),
1114            ))
1115            .then(factor.clone())
1116            .repeated(),
1117            |lhs, (op, rhs)| op(Box::new(lhs), Box::new(rhs)),
1118        );
1119
1120        // Match an expression
1121        product.clone().foldl(
1122            choice((
1123                just(Plus).to(Expr::Add as fn(_, _) -> _),
1124                just(Minus).to(Expr::Sub as fn(_, _) -> _),
1125            ))
1126            .then(product.clone())
1127            .repeated(),
1128            |lhs, (op, rhs)| op(Box::new(lhs), Box::new(rhs)),
1129        )
1130    })
1131}
1132
1133/// Matches a Tag
1134fn tag<'s, I>() -> impl Parser<'s, I, Tag<'s>, Extra<'s>>
1135where
1136    I: BorrowInput<'s, Token = Token<'s>, Span = Span>,
1137{
1138    let tag = select_ref!(Token::Tag(s) => *s);
1139    tag.try_map(|s, span| {
1140        TagOrLinkIdentifier::try_from(s)
1141            .map(Tag)
1142            .map_err(|e| Rich::custom(span, e.to_string()))
1143    })
1144}
1145
1146/// Matches a Link
1147fn link<'s, I>() -> impl Parser<'s, I, Link<'s>, Extra<'s>>
1148where
1149    I: BorrowInput<'s, Token = Token<'s>, Span = Span>,
1150{
1151    let link = select_ref!(Token::Link(s) => *s);
1152    link.try_map(|s, span| {
1153        TagOrLinkIdentifier::try_from(s)
1154            .map(Link)
1155            .map_err(|e| Rich::custom(span, e.to_string()))
1156    })
1157}
1158
1159/// Matches a Key.
1160/// Note that we may have to hijack another token and use it as a key,
1161/// since keywords do get used as metadata keys.
1162fn key<'s, I>() -> impl Parser<'s, I, Key<'s>, Extra<'s>>
1163where
1164    I: BorrowInput<'s, Token = Token<'s>, Span = Span>,
1165{
1166    let key = select_ref!(Token::Key(s) => *s);
1167
1168    key.try_map(|s, span| Key::try_from(s).map_err(|e| Rich::custom(span, e.to_string())))
1169}
1170
1171/// Matches a Currency
1172fn currency<'s, I>() -> impl Parser<'s, I, Currency<'s>, Extra<'s>>
1173where
1174    I: BorrowInput<'s, Token = Token<'s>, Span = Span>,
1175{
1176    let currency = select_ref!(Token::Currency(s) => *s);
1177    currency.try_map(|s, span| Currency::try_from(s).map_err(|e| Rich::custom(span, e.to_string())))
1178}
1179
1180/// Matches a Date
1181fn date<'s, I>() -> impl Parser<'s, I, Date, Extra<'s>>
1182where
1183    I: BorrowInput<'s, Token = Token<'s>, Span = Span>,
1184{
1185    select_ref!(Token::Date(date) => *date)
1186}
1187
1188/// Matches a Decimal
1189fn decimal<'s, I>() -> impl Parser<'s, I, Decimal, Extra<'s>>
1190where
1191    I: BorrowInput<'s, Token = Token<'s>, Span = Span>,
1192{
1193    select_ref!(Token::Number(x) => *x)
1194}
1195
1196/// Matches a string
1197fn string<'s, I>() -> impl Parser<'s, I, &'s str, Extra<'s>>
1198where
1199    I: BorrowInput<'s, Token = Token<'s>, Span = Span>,
1200{
1201    let string = select_ref!(Token::StringLiteral(s) => s.deref());
1202
1203    string.map_with(|s, e| {
1204        let span = e.span();
1205        let simple_state: &mut extra::SimpleState<ParserState> = e.state();
1206        let parser_state: &mut ParserState = simple_state;
1207        let ParserState { warnings, options } = parser_state;
1208        let line_count = s.chars().filter(|c| *c == '\n').count() + 1;
1209        let long_string_maxlines = options.long_string_maxlines.as_ref().map(|n| *n.item()).unwrap_or(DEFAULT_LONG_STRING_MAXLINES);
1210        if line_count > long_string_maxlines {
1211            let option_span = options.long_string_maxlines.as_ref().map(|s| s.source.value);
1212            let is_default = option_span.is_none();
1213            let warning = Warning::new(
1214                "string too long",
1215                format!(
1216                    "exceeds long_string_maxlines({}{}) - hint: would require option \"long_string_maxlines\" \"{}\"",
1217                    if is_default { "default " } else { "" },
1218                    long_string_maxlines,
1219                    line_count
1220                ),
1221                span,
1222            );
1223
1224            if let Some(option_span) = option_span {
1225                warnings.push(warning.related_to_named_span("max allowed", option_span));
1226            } else {
1227                warnings.push(warning)
1228            }
1229        }
1230        s
1231    })
1232}
1233
1234impl<'a> Metadata<'a> {
1235    pub(crate) fn merge_tags<E>(&mut self, tags: &HashSet<Spanned<Tag<'a>>>, emitter: &mut E)
1236    where
1237        E: Emit<ParserError<'a>>,
1238    {
1239        for tag in tags {
1240            match self.tags.get(tag) {
1241                None => {
1242                    self.tags.insert(*tag);
1243                }
1244                Some(existing_tag) => {
1245                    let error = Rich::custom(existing_tag.span, format!("duplicate tag {}", tag));
1246                    // TODO: label the error in context, type annotations need fixing for chumsky 1.0.0-alpha7 to alpha8 transition
1247                    // LabelError::<
1248                    //     chumsky::input::WithContext<
1249                    //         Span,
1250                    //         chumsky::input::SpannedInput<Token<'_>, Span, &[(Token<'_>, Span)]>,
1251                    //     >,
1252                    //     &str,
1253                    // >::in_context(&mut error, "tag", tag.span);
1254                    emitter.emit(error);
1255                }
1256            }
1257        }
1258    }
1259
1260    // Augment only for tags which are not already present, others silently ignored.
1261    // This is so that tags attached to directives take precedence over the push stack.
1262    pub(crate) fn augment_tags(&mut self, tags: &HashMap<Spanned<Tag<'a>>, Vec<Spanned<Tag<'a>>>>) {
1263        for (tag, spans) in tags.iter() {
1264            if !self.tags.contains(tag) {
1265                let most_recently_pushed_tag = spans.last().unwrap_or(tag);
1266                self.tags.insert(*most_recently_pushed_tag);
1267            }
1268        }
1269    }
1270
1271    pub(crate) fn merge_links<E>(&mut self, links: &HashSet<Spanned<Link<'a>>>, emitter: &mut E)
1272    where
1273        E: Emit<ParserError<'a>>,
1274    {
1275        for link in links {
1276            match self.links.get(link) {
1277                None => {
1278                    self.links.insert(*link);
1279                }
1280                Some(existing_link) => {
1281                    let error =
1282                        Rich::custom(existing_link.span, format!("duplicate link {}", link));
1283                    // TODO: label the error in context, type annotations need fixing for chumsky 1.0.0-alpha7 to alpha8 transition
1284                    // LabelError::<
1285                    //     chumsky::input::WithContext<
1286                    //         Span,
1287                    //         chumsky::input::SpannedInput<Token<'_>, Span, &[(Token<'_>, Span)]>,
1288                    //     >,
1289                    //     &str,
1290                    // >::in_context(&mut error, "link", link.span);
1291                    emitter.emit(error);
1292                }
1293            }
1294        }
1295    }
1296
1297    // Augment only for keys which are not already present, others silently ignored.
1298    // This is so that key/values attached to directives take precedence over the push stack.
1299    pub(crate) fn augment_key_values(
1300        &mut self,
1301        key_values: &HashMap<Spanned<Key<'a>>, Vec<(Span, Spanned<MetaValue<'a>>)>>,
1302    ) {
1303        for (key, values) in key_values {
1304            if !self.key_values.contains_key(key) {
1305                let (key_span, value) = values.last().unwrap();
1306                self.key_values.insert(
1307                    spanned(*key.item(), *key_span),
1308                    // Sadly we do have to clone the value here, so we can
1309                    // merge in metadata key/values from the push/pop stack
1310                    // without consuming it.
1311                    value.clone(),
1312                );
1313            }
1314        }
1315    }
1316}
1317
1318type ParserError<'a> = Rich<'a, Token<'a>, Span>;
1319
1320impl From<ParserError<'_>> for Error {
1321    fn from(error: ParserError) -> Self {
1322        let error = error.map_token(|tok| tok.to_string());
1323
1324        Error::with_contexts(
1325            error.to_string(),
1326            error.reason().to_string(),
1327            *error.span(),
1328            error
1329                .contexts()
1330                .map(|(label, span)| (label.to_string(), *span))
1331                .collect(),
1332        )
1333    }
1334}
1335
1336// the state we thread through the parsers
1337#[derive(Default, Debug)]
1338pub(crate) struct ParserState<'a> {
1339    pub(crate) options: ParserOptions<'a>,
1340    pub(crate) warnings: Vec<Warning>,
1341}
1342
1343// our ParserExtra with our error and state types
1344pub(crate) type Extra<'a> = extra::Full<ParserError<'a>, extra::SimpleState<ParserState<'a>>, ()>;
1345
1346/// Enable use of own functions which emit errors
1347pub(crate) trait Emit<E> {
1348    fn emit(&mut self, err: E);
1349}
1350
1351impl<E> Emit<E> for chumsky::input::Emitter<E> {
1352    fn emit(&mut self, err: E) {
1353        self.emit(err)
1354    }
1355}
1356
1357// simple collection of errors in a Vec
1358impl<E> Emit<E> for Vec<Error>
1359where
1360    E: Into<Error>,
1361{
1362    fn emit(&mut self, err: E) {
1363        self.push(err.into())
1364    }
1365}
1366// a degenerate error sink
1367struct NullEmitter;
1368
1369impl<E> Emit<E> for NullEmitter {
1370    fn emit(&mut self, _err: E) {}
1371}
1372
1373mod tests;