beancount_parser/
lib.rs

1#![cfg_attr(docsrs, feature(doc_auto_cfg))]
2
3//! A parsing library for the [beancount language](https://beancount.github.io/docs/beancount_language_syntax.html)
4//!
5//! # Usage
6//!
7//! Use [`parse`] to get an instance of [`BeancountFile`].
8//!
9//! This is generic over the decimal type. The examples use `f64` as a decimal type.
10//! You may also use `Decimal` from the [rust_decimal crate].
11//!
12//! [rust_decimal crate]: https://docs.rs/rust_decimal
13//!
14//! ```
15//! use beancount_parser::{BeancountFile, DirectiveContent};
16//!
17//! # fn main() -> Result<(), beancount_parser::Error> {
18//! let input = r#"
19//! 2023-05-20 * "Coffee beans"
20//!   Expenses:Groceries   10 CHF
21//!   Assets:Checking
22//! "#;
23//!
24//! // Parse into the `BeancountFile` struct:
25//! let beancount: BeancountFile<f64> = input.parse()?;
26//!
27//! let directive = &beancount.directives[0];
28//! assert_eq!(directive.date.year, 2023);
29//! assert_eq!(directive.date.month, 5);
30//! assert_eq!(directive.date.day, 20);
31//!
32//! let DirectiveContent::Transaction(trx) = &directive.content else {
33//!     panic!("was not a transaction")
34//! };
35//! assert_eq!(trx.narration.as_deref(), Some("Coffee beans"));
36//! assert_eq!(trx.postings[0].account.as_str(), "Expenses:Groceries");
37//! assert_eq!(trx.postings[0].amount.as_ref().unwrap().value, 10.0);
38//! assert_eq!(trx.postings[0].amount.as_ref().unwrap().currency.as_str(), "CHF");
39//! assert_eq!(trx.postings[1].account.as_str(), "Assets:Checking");
40//! assert_eq!(trx.postings[1].amount, None);
41//! # Ok(()) }
42//! ```
43
44use std::{collections::HashSet, fs::File, io::Read, path::PathBuf, str::FromStr};
45
46use nom::{
47    branch::alt,
48    bytes::complete::{tag, take_while},
49    character::complete::{char, line_ending, not_line_ending, space0, space1},
50    combinator::{all_consuming, cut, eof, iterator, map, not, opt, value},
51    sequence::{delimited, preceded, terminated, tuple},
52    Finish, Parser,
53};
54use nom_locate::position;
55
56use crate::iterator::Iter;
57pub use crate::{
58    account::{Account, Balance, Close, Open, Pad},
59    amount::{Amount, Currency, Decimal, Price},
60    date::Date,
61    error::{ConversionError, Error, ReadFileError},
62    event::Event,
63    transaction::{Cost, Link, Posting, PostingPrice, Tag, Transaction},
64};
65
66#[deprecated(note = "use `metadata::Value` instead", since = "1.0.0-beta.3")]
67#[doc(hidden)]
68pub type MetadataValue<D> = metadata::Value<D>;
69
70mod account;
71mod amount;
72mod date;
73mod error;
74mod event;
75mod iterator;
76pub mod metadata;
77mod transaction;
78
79/// Parse the input beancount file and return an instance of [`BeancountFile`] on success
80///
81/// It is generic over the [`Decimal`] type `D`.
82///
83/// See the root crate documentation for an example.
84///
85/// # Errors
86///
87/// Returns an [`Error`] in case of invalid beancount syntax found.
88pub fn parse<D: Decimal>(input: &str) -> Result<BeancountFile<D>, Error> {
89    input.parse()
90}
91
92/// Parse the beancount file and return an iterator over `Result<Entry<D>, Result>`
93///
94/// It is generic over the [`Decimal`] type `D`.
95///
96/// See [`Entry`]
97///
98/// # Errors
99///
100/// The iterator will emit an [`Error`] in case of invalid beancount syntax found.
101pub fn parse_iter<'a, D: Decimal + 'a>(
102    input: &'a str,
103) -> impl Iterator<Item = Result<Entry<D>, Error>> + 'a {
104    Iter::new(input, iterator(Span::new(input), entry::<D>))
105}
106
107impl<D: Decimal> FromStr for BeancountFile<D> {
108    type Err = Error;
109    fn from_str(input: &str) -> Result<Self, Self::Err> {
110        parse_iter(input).collect()
111    }
112}
113
114/// Read the files from disk and parse their content.
115///
116/// It follows the `include` directives found.
117///
118/// # Errors
119///
120/// Returns an error if any file could not be read (IO error)
121/// or if there is a beancount syntax error in any file read
122pub fn read_files<D: Decimal, F: FnMut(Entry<D>)>(
123    files: impl IntoIterator<Item = PathBuf>,
124    mut on_entry: F,
125) -> Result<(), ReadFileError> {
126    let mut loaded: HashSet<PathBuf> = HashSet::new();
127    let mut pending: Vec<PathBuf> = files
128        .into_iter()
129        .map(|p| p.canonicalize())
130        .collect::<Result<_, _>>()?;
131    let mut buffer = String::new();
132    while let Some(path) = pending.pop() {
133        if loaded.contains(&path) {
134            continue;
135        }
136        loaded.insert(path.clone());
137        buffer.clear();
138        File::open(&path)?.read_to_string(&mut buffer)?;
139        for result in parse_iter::<D>(&buffer) {
140            let entry = result?;
141            match entry {
142                Entry::Include(include) => {
143                    let path = if include.is_relative() {
144                        let Some(parent) = path.parent() else {
145                            unreachable!("there must be a parent if the file was valid")
146                        };
147                        parent.join(include)
148                    } else {
149                        include
150                    };
151                    let path = path.canonicalize()?;
152                    if !loaded.contains(&path) {
153                        pending.push(path);
154                    }
155                }
156                entry => on_entry(entry),
157            }
158        }
159    }
160    Ok(())
161}
162
163/// Main struct representing a parsed beancount file.
164///
165/// To get an instance of this, use [`parse`].
166///
167/// For an example, look at the root crate documentation.
168#[derive(Debug, Clone)]
169#[non_exhaustive]
170pub struct BeancountFile<D> {
171    /// List of beancount options
172    ///
173    /// See: <https://beancount.github.io/docs/beancount_language_syntax.html#options>
174    pub options: Vec<BeanOption>,
175    /// Paths of include directives
176    ///
177    /// See: <https://beancount.github.io/docs/beancount_language_syntax.html#includes>
178    pub includes: Vec<PathBuf>,
179    /// List of [`Directive`] found in the file
180    pub directives: Vec<Directive<D>>,
181}
182
183impl<D> Default for BeancountFile<D> {
184    fn default() -> Self {
185        Self {
186            options: Vec::new(),
187            includes: Vec::new(),
188            directives: Vec::new(),
189        }
190    }
191}
192
193impl<D> BeancountFile<D> {
194    /// Returns the first value found for the option
195    ///
196    /// If the option is declared multiple times, this function returns the first one found.
197    ///
198    /// See [`Self::options`] to get all declared options.
199    ///
200    /// Syntax: <https://beancount.github.io/docs/beancount_language_syntax.html#options>
201    ///
202    /// # Example
203    ///
204    /// ```
205    /// use beancount_parser::BeancountFile;
206    /// let input = r#"
207    /// option "favorite_color" "blue"
208    /// option "operating_currency" "CHF"
209    /// option "operating_currency" "PLN"
210    /// "#;
211    /// let beancount: BeancountFile<f64> = input.parse().unwrap();
212    /// assert_eq!(beancount.option("favorite_color"), Some("blue"));
213    /// assert_eq!(beancount.option("operating_currency"), Some("CHF"));
214    /// assert_eq!(beancount.option("foo"), None);
215    /// ```
216    #[must_use]
217    pub fn option(&self, key: &str) -> Option<&str> {
218        self.options
219            .iter()
220            .find(|opt| opt.name == key)
221            .map(|opt| &opt.value[..])
222    }
223}
224
225impl<D> Extend<Entry<D>> for BeancountFile<D> {
226    fn extend<T: IntoIterator<Item = Entry<D>>>(&mut self, iter: T) {
227        for entry in iter {
228            match entry {
229                Entry::Directive(d) => self.directives.push(d),
230                Entry::Option(o) => self.options.push(o),
231                Entry::Include(p) => self.includes.push(p),
232            }
233        }
234    }
235}
236
237impl<D> FromIterator<Entry<D>> for BeancountFile<D> {
238    fn from_iter<T: IntoIterator<Item = Entry<D>>>(iter: T) -> Self {
239        let mut file = BeancountFile::default();
240        file.extend(iter);
241        file
242    }
243}
244
245/// A beancount "directive"
246///
247/// It has fields common to all directives, and a [`Directive::content`] field with
248/// a different content for each directive type.
249///
250/// ```
251/// # use beancount_parser::{BeancountFile, DirectiveContent};
252/// let input = r#"
253/// 2022-01-01 open Assets:Cash
254/// 2022-01-01 * "Grocery shopping"
255///   Expenses:Groceries  10 CHF
256///   Assets:Cash
257/// "#;
258/// let beancount: BeancountFile<f64> = input.parse().unwrap();
259/// assert_eq!(beancount.directives.len(), 2);
260/// for directive in beancount.directives {
261///    println!("line: {}", directive.line_number);
262///    println!("metadata: {:#?}", directive.metadata);
263///    match directive.content {
264///       DirectiveContent::Open(open) => println!("open account directive: {open:?}"),
265///       DirectiveContent::Transaction(trx) => println!("transaction: {trx:?}"),
266///       other => println!("unknown directive: {other:?}"),
267///    }
268/// }
269/// ```
270#[derive(Debug, Clone, PartialEq)]
271#[non_exhaustive]
272pub struct Directive<D> {
273    /// Date of the directive
274    pub date: Date,
275    /// Content of the directive that is specific to each directive type
276    pub content: DirectiveContent<D>,
277    /// Metadata associated to the directive
278    ///
279    /// See the [`metadata`] module for more
280    pub metadata: metadata::Map<D>,
281    /// Line number where the directive was found in the input file
282    pub line_number: u32,
283}
284
285impl<D: Decimal> FromStr for Directive<D> {
286    type Err = Error;
287    fn from_str(s: &str) -> Result<Self, Self::Err> {
288        match all_consuming(directive)(Span::new(s)).finish() {
289            Ok((_, d)) => Ok(d),
290            Err(err) => Err(Error::new(s, err.input)),
291        }
292    }
293}
294
295/// Directive specific content
296#[allow(missing_docs)]
297#[derive(Debug, Clone, PartialEq)]
298#[non_exhaustive]
299pub enum DirectiveContent<D> {
300    Transaction(Transaction<D>),
301    Price(Price<D>),
302    Balance(Balance<D>),
303    Open(Open),
304    Close(Close),
305    Pad(Pad),
306    Commodity(Currency),
307    Event(Event),
308}
309
310type Span<'a> = nom_locate::LocatedSpan<&'a str>;
311type IResult<'a, O> = nom::IResult<Span<'a>, O>;
312
313/// Entry in the beancount syntax
314///
315/// It is more general than `Directive` as an entry can also be option or an include.
316#[allow(missing_docs)]
317#[non_exhaustive]
318#[derive(Debug, Clone)]
319pub enum Entry<D> {
320    Directive(Directive<D>),
321    Option(BeanOption),
322    Include(PathBuf),
323}
324
325enum RawEntry<D> {
326    Directive(Directive<D>),
327    Option(BeanOption),
328    Include(PathBuf),
329    PushTag(Tag),
330    PopTag(Tag),
331    Comment,
332}
333
334/// An beancount option
335///
336/// See: <https://beancount.github.io/docs/beancount_language_syntax.html#options>
337#[derive(Debug, Clone)]
338#[non_exhaustive]
339pub struct BeanOption {
340    /// Name of the option
341    pub name: String,
342    /// Value of the option
343    pub value: String,
344}
345
346fn entry<D: Decimal>(input: Span<'_>) -> IResult<'_, RawEntry<D>> {
347    alt((
348        directive.map(RawEntry::Directive),
349        option.map(|(name, value)| RawEntry::Option(BeanOption { name, value })),
350        include.map(|p| RawEntry::Include(p)),
351        tag_stack_operation,
352        line.map(|()| RawEntry::Comment),
353    ))(input)
354}
355
356fn directive<D: Decimal>(input: Span<'_>) -> IResult<'_, Directive<D>> {
357    let (input, position) = position(input)?;
358    let (input, date) = date::parse(input)?;
359    let (input, _) = cut(space1)(input)?;
360    let (input, (content, metadata)) = alt((
361        map(transaction::parse, |(t, m)| {
362            (DirectiveContent::Transaction(t), m)
363        }),
364        tuple((
365            terminated(
366                alt((
367                    map(
368                        preceded(tag("price"), cut(preceded(space1, amount::price))),
369                        DirectiveContent::Price,
370                    ),
371                    map(
372                        preceded(tag("balance"), cut(preceded(space1, account::balance))),
373                        DirectiveContent::Balance,
374                    ),
375                    map(
376                        preceded(tag("open"), cut(preceded(space1, account::open))),
377                        DirectiveContent::Open,
378                    ),
379                    map(
380                        preceded(tag("close"), cut(preceded(space1, account::close))),
381                        DirectiveContent::Close,
382                    ),
383                    map(
384                        preceded(tag("pad"), cut(preceded(space1, account::pad))),
385                        DirectiveContent::Pad,
386                    ),
387                    map(
388                        preceded(tag("commodity"), cut(preceded(space1, amount::currency))),
389                        DirectiveContent::Commodity,
390                    ),
391                    map(
392                        preceded(tag("event"), cut(preceded(space1, event::parse))),
393                        DirectiveContent::Event,
394                    ),
395                )),
396                end_of_line,
397            ),
398            metadata::parse,
399        )),
400    ))(input)?;
401    Ok((
402        input,
403        Directive {
404            date,
405            content,
406            metadata,
407            line_number: position.location_line(),
408        },
409    ))
410}
411
412fn option(input: Span<'_>) -> IResult<'_, (String, String)> {
413    let (input, _) = tag("option")(input)?;
414    let (input, key) = preceded(space1, string)(input)?;
415    let (input, value) = preceded(space1, string)(input)?;
416    let (input, ()) = end_of_line(input)?;
417    Ok((input, (key, value)))
418}
419
420fn include(input: Span<'_>) -> IResult<'_, PathBuf> {
421    let (input, _) = tag("include")(input)?;
422    let (input, path) = cut(delimited(space1, string, end_of_line))(input)?;
423    Ok((input, path.into()))
424}
425
426fn tag_stack_operation<D>(input: Span<'_>) -> IResult<'_, RawEntry<D>> {
427    alt((
428        preceded(tuple((tag("pushtag"), space1)), transaction::parse_tag).map(RawEntry::PushTag),
429        preceded(tuple((tag("poptag"), space1)), transaction::parse_tag).map(RawEntry::PopTag),
430    ))(input)
431}
432
433fn end_of_line(input: Span<'_>) -> IResult<'_, ()> {
434    let (input, _) = space0(input)?;
435    let (input, _) = opt(comment)(input)?;
436    let (input, _) = alt((line_ending, eof))(input)?;
437    Ok((input, ()))
438}
439
440fn comment(input: Span<'_>) -> IResult<'_, ()> {
441    let (input, _) = char(';')(input)?;
442    let (input, _) = not_line_ending(input)?;
443    Ok((input, ()))
444}
445
446fn line(input: Span<'_>) -> IResult<'_, ()> {
447    let (input, _) = not_line_ending(input)?;
448    let (input, _) = line_ending(input)?;
449    Ok((input, ()))
450}
451
452fn empty_line(input: Span<'_>) -> IResult<'_, ()> {
453    let (input, ()) = not(eof)(input)?;
454    end_of_line(input)
455}
456
457fn string(input: Span<'_>) -> IResult<'_, String> {
458    let (input, _) = char('"')(input)?;
459    let mut string = String::new();
460    let take_data = take_while(|c: char| c != '"' && c != '\\');
461    let (mut input, mut part) = take_data(input)?;
462    while !part.fragment().is_empty() {
463        string.push_str(part.fragment());
464        let (new_input, escaped) =
465            opt(alt((value('"', tag("\\\"")), value('\\', tag("\\\\")))))(input)?;
466        let Some(escaped) = escaped else { break };
467        string.push(escaped);
468        let (new_input, new_part) = take_data(new_input)?;
469        input = new_input;
470        part = new_part;
471    }
472    let (input, _) = char('"')(input)?;
473    Ok((input, string))
474}
475
476#[cfg(test)]
477type ChumskyError = chumsky::error::Simple<char>;
478
479#[cfg(test)]
480trait ChumskyParser<O>: chumsky::Parser<char, O, Error = ChumskyError> {}
481
482#[cfg(test)]
483impl<O, P: chumsky::Parser<char, O, Error = ChumskyError>> ChumskyParser<O> for P {}
484
485#[cfg(test)]
486mod chumksy {
487    use chumsky::prelude::*;
488
489    use crate::ChumskyParser;
490
491    pub(crate) fn string() -> impl ChumskyParser<String> {
492        choice((just("\\\"").to('"'), just("\\\\").to('\\'), just('"').not()))
493            .repeated()
494            .delimited_by(just('"'), just('"'))
495            .collect()
496            .labelled("string")
497    }
498
499    #[cfg(test)]
500    mod tests {
501        use super::*;
502        use rstest::rstest;
503
504        #[rstest]
505        #[case::empty("\"\"", "")]
506        #[case::normal("\"hello\"", "hello")]
507        #[case::escaped_quote("\"hello \\\"world\\\"\"", "hello \"world\"")]
508        #[case::escaped_backslash("\"hello\\\\world\"", "hello\\world")]
509        fn should_parse_valid_string(#[case] input: &str, #[case] expected: &str) {
510            let string: String = string().then_ignore(end()).parse(input).unwrap();
511            assert_eq!(string, expected);
512        }
513
514        #[rstest]
515        #[case::nothing("")]
516        #[case::not_quoted("hello")]
517        #[case::not_closed("\"hello")]
518        #[case::not_closed_escaped("\"hello\\\"")]
519        fn should_not_parse_invalid_string(#[case] input: &str) {
520            let result: Result<String, _> = string().then_ignore(end()).parse(input);
521            assert!(result.is_err(), "{result:?}");
522        }
523    }
524}