Skip to main content

beancount_parser/
lib.rs

1//! A parsing library for the [beancount language](https://beancount.github.io/docs/beancount_language_syntax.html)
2//!
3//! # Usage
4//!
5//! Use [`parse`] to get an instance of [`BeancountFile`].
6//!
7//! This is generic over the decimal type. The examples use `f64` as a decimal type.
8//! You may also use `Decimal` from the [rust_decimal crate].
9//!
10//! [rust_decimal crate]: https://docs.rs/rust_decimal
11//!
12//! ```
13//! use beancount_parser::{BeancountFile, DirectiveContent};
14//!
15//! # fn main() -> Result<(), beancount_parser::Error> {
16//! let input = r#"
17//! 2023-05-20 * "Coffee beans"
18//!   Expenses:Groceries   10 CHF
19//!   Assets:Checking
20//! "#;
21//!
22//! // Parse into the `BeancountFile` struct:
23//! let beancount: BeancountFile<f64> = input.parse()?;
24//!
25//! let directive = &beancount.directives[0];
26//! assert_eq!(directive.date.year, 2023);
27//! assert_eq!(directive.date.month, 5);
28//! assert_eq!(directive.date.day, 20);
29//!
30//! let DirectiveContent::Transaction(trx) = &directive.content else {
31//!     panic!("was not a transaction")
32//! };
33//! assert_eq!(trx.narration.as_deref(), Some("Coffee beans"));
34//! assert_eq!(trx.postings[0].account.as_str(), "Expenses:Groceries");
35//! assert_eq!(trx.postings[0].amount.as_ref().unwrap().value, 10.0);
36//! assert_eq!(trx.postings[0].amount.as_ref().unwrap().currency.as_str(), "CHF");
37//! assert_eq!(trx.postings[1].account.as_str(), "Assets:Checking");
38//! assert_eq!(trx.postings[1].amount, None);
39//! # Ok(()) }
40//! ```
41
42use std::{collections::HashSet, fs::File, io::Read, path::PathBuf, str::FromStr};
43
44use nom::{
45    branch::alt,
46    bytes::complete::{tag, take_while},
47    character::complete::{char, line_ending, not_line_ending, space0, space1},
48    combinator::{all_consuming, cut, eof, iterator, map, not, opt, value},
49    sequence::{delimited, preceded, terminated},
50    Finish, Parser,
51};
52use nom_locate::position;
53
54pub use crate::{
55    account::{Account, Balance, Close, Open, Pad},
56    amount::{Amount, Currency, Decimal, Price},
57    date::Date,
58    error::{ConversionError, Error},
59    event::Event,
60    transaction::{Cost, Link, Posting, PostingPrice, Tag, Transaction},
61};
62use crate::{
63    error::{ReadFileErrorContent, ReadFileErrorV2},
64    iterator::Iter,
65};
66
67#[deprecated(note = "use `metadata::Value` instead", since = "1.0.0-beta.3")]
68#[doc(hidden)]
69pub type MetadataValue<D> = metadata::Value<D>;
70
71mod account;
72mod amount;
73mod date;
74mod error;
75mod event;
76mod iterator;
77pub mod metadata;
78mod transaction;
79
80/// Parse the input beancount file and return an instance of [`BeancountFile`] on success
81///
82/// It is generic over the [`Decimal`] type `D`.
83///
84/// See the root crate documentation for an example.
85///
86/// # Errors
87///
88/// Returns an [`Error`] in case of invalid beancount syntax found.
89pub fn parse<D: Decimal>(input: &str) -> Result<BeancountFile<D>, Error> {
90    input.parse()
91}
92
93/// Parse the beancount file and return an iterator over `Result<Entry<D>, Result>`
94///
95/// It is generic over the [`Decimal`] type `D`.
96///
97/// See [`Entry`]
98///
99/// # Errors
100///
101/// The iterator will emit an [`Error`] in case of invalid beancount syntax found.
102pub fn parse_iter<'a, D: Decimal + 'a>(
103    input: &'a str,
104) -> impl Iterator<Item = Result<Entry<D>, Error>> + 'a {
105    Iter::new(input, iterator(Span::new(input), entry::<D>))
106}
107
108impl<D: Decimal> FromStr for BeancountFile<D> {
109    type Err = Error;
110    fn from_str(input: &str) -> Result<Self, Self::Err> {
111        parse_iter(input).collect()
112    }
113}
114
115/// Read the files from disk and parse their content. Invokes `on_entry` for each entries found.
116///
117/// It follows the `include` directives found.
118///
119/// # Errors
120///
121/// Returns an error if any file could not be read (IO error)
122/// or if there is a beancount syntax error in any file read
123#[allow(deprecated)]
124#[deprecated(
125    since = "2.4.0",
126    note = "use `read_files_v2` or `read_files_to_vec` instead"
127)]
128pub fn read_files<D: Decimal, F: FnMut(Entry<D>)>(
129    files: impl IntoIterator<Item = PathBuf>,
130    on_entry: F,
131) -> Result<(), error::ReadFileError> {
132    read_files_v2(files, on_entry).map_err(|err| match err.error {
133        ReadFileErrorContent::Io(err) => error::ReadFileError::Io(err),
134        ReadFileErrorContent::Syntax(err) => error::ReadFileError::Syntax(err),
135    })
136}
137
138/// Read the files from disk and parse their content. Returns a [`Vec`] of each entries found.
139///
140/// It follows the `include` directives found.
141///
142/// # Errors
143///
144/// Returns an error if any file could not be read (IO error)
145/// or if there is a beancount syntax error in any file read
146pub fn read_files_to_vec<D: Decimal>(
147    files: impl IntoIterator<Item = PathBuf>,
148) -> Result<Vec<Entry<D>>, ReadFileErrorV2> {
149    let mut vec = Vec::new();
150    read_files_v2(files, |entry| vec.push(entry))?;
151    Ok(vec)
152}
153
154/// Read the files from disk and parse their content.
155///
156/// It follows the `include` directives found.
157///
158/// # Errors
159///
160/// Returns an error if any file could not be read (IO error)
161/// or if there is a beancount syntax error in any file read
162pub fn read_files_v2<D: Decimal, F: FnMut(Entry<D>)>(
163    files: impl IntoIterator<Item = PathBuf>,
164    mut on_entry: F,
165) -> Result<(), ReadFileErrorV2> {
166    let mut loaded: HashSet<PathBuf> = HashSet::new();
167    let mut pending: Vec<PathBuf> = files
168        .into_iter()
169        .map(|p| {
170            p.canonicalize()
171                .map_err(|err| ReadFileErrorV2::from_io(p, err))
172        })
173        .collect::<Result<_, _>>()?;
174    let mut buffer = String::new();
175    while let Some(path) = pending.pop() {
176        if loaded.contains(&path) {
177            continue;
178        }
179        loaded.insert(path.clone());
180        buffer.clear();
181        File::open(&path)
182            .and_then(|mut f| f.read_to_string(&mut buffer))
183            .map_err(|err| ReadFileErrorV2::from_io(path.clone(), err))?;
184        for result in parse_iter::<D>(&buffer) {
185            let entry = match result {
186                Ok(entry) => entry,
187                Err(err) => return Err(ReadFileErrorV2::from_syntax(path, err)),
188            };
189            match entry {
190                Entry::Include(include) => {
191                    let path = if include.is_relative() {
192                        let Some(parent) = path.parent() else {
193                            unreachable!("there must be a parent if the file was valid")
194                        };
195                        parent.join(&include)
196                    } else {
197                        include
198                    };
199                    let path = path
200                        .canonicalize()
201                        .map_err(|err| ReadFileErrorV2::from_io(path, err))?;
202                    if !loaded.contains(&path) {
203                        pending.push(path.clone());
204                    }
205                    on_entry(Entry::Include(path));
206                }
207                entry => on_entry(entry),
208            }
209        }
210    }
211    Ok(())
212}
213
214/// Main struct representing a parsed beancount file.
215///
216/// To get an instance of this, use [`parse`].
217///
218/// For an example, look at the root crate documentation.
219#[derive(Debug, Clone)]
220#[non_exhaustive]
221pub struct BeancountFile<D> {
222    /// List of beancount options
223    ///
224    /// See: <https://beancount.github.io/docs/beancount_language_syntax.html#options>
225    pub options: Vec<BeanOption>,
226    /// Paths of include directives
227    ///
228    /// See: <https://beancount.github.io/docs/beancount_language_syntax.html#includes>
229    pub includes: Vec<PathBuf>,
230    /// List of [`Directive`] found in the file
231    pub directives: Vec<Directive<D>>,
232}
233
234impl<D> Default for BeancountFile<D> {
235    fn default() -> Self {
236        Self {
237            options: Vec::new(),
238            includes: Vec::new(),
239            directives: Vec::new(),
240        }
241    }
242}
243
244impl<D> BeancountFile<D> {
245    /// Returns the first value found for the option
246    ///
247    /// If the option is declared multiple times, this function returns the first one found.
248    ///
249    /// See [`Self::options`] to get all declared options.
250    ///
251    /// Syntax: <https://beancount.github.io/docs/beancount_language_syntax.html#options>
252    ///
253    /// # Example
254    ///
255    /// ```
256    /// use beancount_parser::BeancountFile;
257    /// let input = r#"
258    /// option "favorite_color" "blue"
259    /// option "operating_currency" "CHF"
260    /// option "operating_currency" "PLN"
261    /// "#;
262    /// let beancount: BeancountFile<f64> = input.parse().unwrap();
263    /// assert_eq!(beancount.option("favorite_color"), Some("blue"));
264    /// assert_eq!(beancount.option("operating_currency"), Some("CHF"));
265    /// assert_eq!(beancount.option("foo"), None);
266    /// ```
267    #[must_use]
268    pub fn option(&self, key: &str) -> Option<&str> {
269        self.options
270            .iter()
271            .find(|opt| opt.name == key)
272            .map(|opt| &opt.value[..])
273    }
274}
275
276impl<D: Decimal> BeancountFile<D> {
277    /// Read from files
278    ///
279    /// # Errors
280    ///
281    /// Return an error if any of the files cannot be read or contains a syntax error
282    pub fn read_files(files: impl IntoIterator<Item = PathBuf>) -> Result<Self, ReadFileErrorV2> {
283        let mut file = BeancountFile::default();
284        read_files_v2(files, |entry| {
285            file.extend(std::iter::once(entry));
286        })?;
287        Ok(file)
288    }
289}
290
291impl<D> Extend<Entry<D>> for BeancountFile<D> {
292    fn extend<T: IntoIterator<Item = Entry<D>>>(&mut self, iter: T) {
293        for entry in iter {
294            match entry {
295                Entry::Directive(d) => self.directives.push(d),
296                Entry::Option(o) => self.options.push(o),
297                Entry::Include(p) => self.includes.push(p),
298            }
299        }
300    }
301}
302
303impl<D> FromIterator<Entry<D>> for BeancountFile<D> {
304    fn from_iter<T: IntoIterator<Item = Entry<D>>>(iter: T) -> Self {
305        let mut file = BeancountFile::default();
306        file.extend(iter);
307        file
308    }
309}
310
311/// A beancount "directive"
312///
313/// It has fields common to all directives, and a [`Directive::content`] field with
314/// a different content for each directive type.
315///
316/// ```
317/// # use beancount_parser::{BeancountFile, DirectiveContent};
318/// let input = r#"
319/// 2022-01-01 open Assets:Cash
320/// 2022-01-01 * "Grocery shopping"
321///   Expenses:Groceries  10 CHF
322///   Assets:Cash
323/// "#;
324/// let beancount: BeancountFile<f64> = input.parse().unwrap();
325/// assert_eq!(beancount.directives.len(), 2);
326/// for directive in beancount.directives {
327///    println!("line: {}", directive.line_number);
328///    println!("metadata: {:#?}", directive.metadata);
329///    match directive.content {
330///       DirectiveContent::Open(open) => println!("open account directive: {open:?}"),
331///       DirectiveContent::Transaction(trx) => println!("transaction: {trx:?}"),
332///       other => println!("unknown directive: {other:?}"),
333///    }
334/// }
335/// ```
336#[derive(Debug, Clone, PartialEq)]
337#[non_exhaustive]
338pub struct Directive<D> {
339    /// Date of the directive
340    pub date: Date,
341    /// Content of the directive that is specific to each directive type
342    pub content: DirectiveContent<D>,
343    /// Metadata associated to the directive
344    ///
345    /// See the [`metadata`] module for more
346    pub metadata: metadata::Map<D>,
347    /// Line number where the directive was found in the input file
348    pub line_number: u32,
349}
350
351impl<D: Decimal> FromStr for Directive<D> {
352    type Err = Error;
353    fn from_str(s: &str) -> Result<Self, Self::Err> {
354        match all_consuming(directive).parse(Span::new(s)).finish() {
355            Ok((_, d)) => Ok(d),
356            Err(err) => Err(Error::new(s, err.input)),
357        }
358    }
359}
360
361/// Directive specific content
362#[allow(missing_docs)]
363#[derive(Debug, Clone, PartialEq)]
364#[non_exhaustive]
365pub enum DirectiveContent<D> {
366    Transaction(Transaction<D>),
367    Price(Price<D>),
368    Balance(Balance<D>),
369    Open(Open),
370    Close(Close),
371    Pad(Pad),
372    Commodity(Currency),
373    Event(Event),
374}
375
376type Span<'a> = nom_locate::LocatedSpan<&'a str>;
377type IResult<'a, O> = nom::IResult<Span<'a>, O>;
378
379/// Entry in the beancount syntax
380///
381/// It is more general than `Directive` as an entry can also be option or an include.
382#[allow(missing_docs)]
383#[non_exhaustive]
384#[derive(Debug, Clone)]
385pub enum Entry<D> {
386    Directive(Directive<D>),
387    Option(BeanOption),
388    Include(PathBuf),
389}
390
391enum RawEntry<D> {
392    Directive(Directive<D>),
393    Option(BeanOption),
394    Include(PathBuf),
395    PushTag(Tag),
396    PopTag(Tag),
397    Comment,
398}
399
400/// An beancount option
401///
402/// See: <https://beancount.github.io/docs/beancount_language_syntax.html#options>
403#[derive(Debug, Clone)]
404#[non_exhaustive]
405pub struct BeanOption {
406    /// Name of the option
407    pub name: String,
408    /// Value of the option
409    pub value: String,
410}
411
412fn entry<D: Decimal>(input: Span<'_>) -> IResult<'_, RawEntry<D>> {
413    alt((
414        directive.map(RawEntry::Directive),
415        option.map(|(name, value)| RawEntry::Option(BeanOption { name, value })),
416        include.map(|p| RawEntry::Include(p)),
417        tag_stack_operation,
418        line.map(|()| RawEntry::Comment),
419    ))
420    .parse(input)
421}
422
423fn directive<D: Decimal>(input: Span<'_>) -> IResult<'_, Directive<D>> {
424    let (input, position) = position(input)?;
425    let (input, date) = date::parse(input)?;
426    let (input, _) = cut(space1).parse(input)?;
427    let (input, (content, metadata)) = alt((
428        map(transaction::parse, |(t, m)| {
429            (DirectiveContent::Transaction(t), m)
430        }),
431        (
432            terminated(
433                alt((
434                    map(
435                        preceded(tag("price"), cut(preceded(space1, amount::price))),
436                        DirectiveContent::Price,
437                    ),
438                    map(
439                        preceded(tag("balance"), cut(preceded(space1, account::balance))),
440                        DirectiveContent::Balance,
441                    ),
442                    map(
443                        preceded(tag("open"), cut(preceded(space1, account::open))),
444                        DirectiveContent::Open,
445                    ),
446                    map(
447                        preceded(tag("close"), cut(preceded(space1, account::close))),
448                        DirectiveContent::Close,
449                    ),
450                    map(
451                        preceded(tag("pad"), cut(preceded(space1, account::pad))),
452                        DirectiveContent::Pad,
453                    ),
454                    map(
455                        preceded(tag("commodity"), cut(preceded(space1, amount::currency))),
456                        DirectiveContent::Commodity,
457                    ),
458                    map(
459                        preceded(tag("event"), cut(preceded(space1, event::parse))),
460                        DirectiveContent::Event,
461                    ),
462                )),
463                end_of_line,
464            ),
465            metadata::parse,
466        ),
467    ))
468    .parse(input)?;
469    Ok((
470        input,
471        Directive {
472            date,
473            content,
474            metadata,
475            line_number: position.location_line(),
476        },
477    ))
478}
479
480fn option(input: Span<'_>) -> IResult<'_, (String, String)> {
481    let (input, _) = tag("option")(input)?;
482    let (input, key) = preceded(space1, string).parse(input)?;
483    let (input, value) = preceded(space1, string).parse(input)?;
484    let (input, ()) = end_of_line(input)?;
485    Ok((input, (key, value)))
486}
487
488fn include(input: Span<'_>) -> IResult<'_, PathBuf> {
489    let (input, _) = tag("include")(input)?;
490    let (input, path) = cut(delimited(space1, string, end_of_line)).parse(input)?;
491    Ok((input, path.into()))
492}
493
494fn tag_stack_operation<D>(input: Span<'_>) -> IResult<'_, RawEntry<D>> {
495    alt((
496        preceded((tag("pushtag"), space1), transaction::parse_tag).map(RawEntry::PushTag),
497        preceded((tag("poptag"), space1), transaction::parse_tag).map(RawEntry::PopTag),
498    ))
499    .parse(input)
500}
501
502fn end_of_line(input: Span<'_>) -> IResult<'_, ()> {
503    let (input, _) = space0(input)?;
504    let (input, _) = opt(comment).parse(input)?;
505    let (input, _) = alt((line_ending, eof)).parse(input)?;
506    Ok((input, ()))
507}
508
509fn comment(input: Span<'_>) -> IResult<'_, ()> {
510    let (input, _) = char(';')(input)?;
511    let (input, _) = not_line_ending(input)?;
512    Ok((input, ()))
513}
514
515fn line(input: Span<'_>) -> IResult<'_, ()> {
516    let (input, _) = not_line_ending(input)?;
517    let (input, _) = line_ending(input)?;
518    Ok((input, ()))
519}
520
521fn empty_line(input: Span<'_>) -> IResult<'_, ()> {
522    let (input, ()) = not(eof).parse(input)?;
523    end_of_line(input)
524}
525
526fn string(input: Span<'_>) -> IResult<'_, String> {
527    let (input, _) = char('"')(input)?;
528    let mut string = String::new();
529    let mut take_data = take_while(|c: char| c != '"' && c != '\\');
530    let (mut input, mut part) = take_data.parse(input)?;
531    while !part.fragment().is_empty() {
532        string.push_str(part.fragment());
533        let (new_input, escaped) =
534            opt(alt((value('"', tag("\\\"")), value('\\', tag("\\\\"))))).parse_complete(input)?;
535        let Some(escaped) = escaped else { break };
536        string.push(escaped);
537        let (new_input, new_part) = take_data.parse(new_input)?;
538        input = new_input;
539        part = new_part;
540    }
541    let (input, _) = char('"')(input)?;
542    Ok((input, string))
543}