okane_core/
load.rs

1//! Contains the functions to load Ledger file,
2//! with recursively resolving the `include` directives.
3
4use std::{
5    borrow::Cow,
6    collections::HashMap,
7    path::{self, Path, PathBuf},
8};
9
10use crate::{parse, syntax};
11
12/// Error caused by [Loader::load].
13#[derive(thiserror::Error, Debug)]
14pub enum LoadError {
15    #[error("failed to perform IO on file {1}")]
16    IO(#[source] std::io::Error, PathBuf),
17    #[error("failed to parse file {1}")]
18    Parse(#[source] parse::ParseError, PathBuf),
19    #[error("loading file path {0} doesn't have parent, maybe filesystem root is passed")]
20    RootLoadingPath(PathBuf),
21    #[error("invalid Unicode path is not supported: {0}")]
22    InvalidUnicodePath(String),
23    #[error("invalid glob pattern specified")]
24    InvalidIncludeGlob(#[from] glob::PatternError),
25    #[error("failed to match glob pattern")]
26    GlobFailure(#[from] glob::GlobError),
27}
28
29/// Loader is an object to keep loading a given file and may recusrively load them as `repr::LedgerEntry`,
30/// with the metadata about filename or line/column to point the error in a user friendly manner.
31pub struct Loader<F: FileSystem> {
32    source: PathBuf,
33    error_style: annotate_snippets::Renderer,
34    filesystem: F,
35}
36
37/// Creates a new [`Loader`] instance with [`ProdFileSystem`].
38pub fn new_loader(source: PathBuf) -> Loader<ProdFileSystem> {
39    Loader::new(source, ProdFileSystem)
40}
41
42impl<F: FileSystem> Loader<F> {
43    /// Create a new instance of `Loader` to load the given path.
44    ///
45    /// It might look weird to have the source path as a `Loader` member,
46    /// but that would give future flexibility to support loading from stdio/network without include,
47    /// or completely static one.
48    pub fn new(source: PathBuf, filesystem: F) -> Self {
49        Self {
50            source,
51            error_style: annotate_snippets::Renderer::styled(),
52            filesystem,
53        }
54    }
55
56    /// Create a new instance with the given `renderer`.
57    pub fn with_error_renderer(self, renderer: annotate_snippets::Renderer) -> Self {
58        Self {
59            source: self.source,
60            error_style: renderer,
61            filesystem: self.filesystem,
62        }
63    }
64
65    /// Returns a [`annotate_snippets::Renderer`] instance.
66    pub(crate) fn error_style(&self) -> &annotate_snippets::Renderer {
67        &self.error_style
68    }
69
70    /// Loads [syntax::LedgerEntry] and invoke callback on every entry,
71    /// recursively resolving `include` directives.
72    pub fn load<T, E, Deco>(&self, mut callback: T) -> Result<(), E>
73    where
74        T: FnMut(&Path, &parse::ParsedContext<'_>, &syntax::LedgerEntry<'_, Deco>) -> Result<(), E>,
75        E: std::error::Error + From<LoadError>,
76        Deco: syntax::decoration::Decoration,
77    {
78        let popts = parse::ParseOptions::default().with_error_style(self.error_style.clone());
79        self.load_impl(&popts, &self.source, &mut callback)
80    }
81
82    fn load_impl<T, E, Deco>(
83        &self,
84        parse_options: &parse::ParseOptions,
85        path: &Path,
86        callback: &mut T,
87    ) -> Result<(), E>
88    where
89        T: FnMut(&Path, &parse::ParsedContext<'_>, &syntax::LedgerEntry<'_, Deco>) -> Result<(), E>,
90        E: std::error::Error + From<LoadError>,
91        Deco: syntax::decoration::Decoration,
92    {
93        let path: Cow<'_, Path> = self.filesystem.canonicalize_path(path);
94        let content = self
95            .filesystem
96            .file_content_utf8(&path)
97            .map_err(|err| LoadError::IO(err, path.clone().into_owned()))?;
98        for parsed in parse::parse_ledger(parse_options, &content) {
99            let (ctx, entry) =
100                parsed.map_err(|e| LoadError::Parse(e, path.clone().into_owned()))?;
101            match entry {
102                syntax::LedgerEntry::Include(p) => {
103                    let include_path: PathBuf = p.0.as_ref().into();
104                    let target: String = path
105                        .as_ref()
106                        .parent()
107                        .ok_or_else(|| LoadError::RootLoadingPath(path.as_ref().to_owned()))?
108                        .join(include_path)
109                        .into_os_string()
110                        .into_string()
111                        .map_err(|x| {
112                            LoadError::InvalidUnicodePath(format!("{}", PathBuf::from(x).display()))
113                        })?;
114                    let mut paths: Vec<PathBuf> = self.filesystem.glob(&target)?;
115                    if paths.is_empty() {
116                        return Err(LoadError::IO(
117                            std::io::Error::new(
118                                std::io::ErrorKind::NotFound,
119                                format!("glob {} does not hit any files", target),
120                            ),
121                            PathBuf::from(target),
122                        )
123                        .into());
124                    }
125                    paths.sort_unstable();
126                    for path in &paths {
127                        self.load_impl(parse_options, path, callback)?;
128                    }
129                    Ok(())
130                }
131                _ => callback(&path, &ctx, &entry),
132            }?;
133        }
134        Ok(())
135    }
136}
137
138/// Interface to abstract file system.
139/// Normally you want to use [ProdFileSystem].
140pub trait FileSystem {
141    /// canonicalize the given path.
142    fn canonicalize_path<'a>(&self, path: &'a Path) -> Cow<'a, Path>;
143
144    /// Load the given path and returns it as UTF-8 String.
145    fn file_content_utf8<P: AsRef<Path>>(&self, path: P) -> Result<String, std::io::Error>;
146
147    /// Returns all paths matching the given glob.
148    /// Paths can be in arbitrary order, and caller must sort it beforehand.
149    fn glob(&self, pattern: &str) -> Result<Vec<PathBuf>, LoadError>;
150}
151
152/// [FileSystem] to regularly reads the files recursively in the local files.
153pub struct ProdFileSystem;
154
155impl FileSystem for ProdFileSystem {
156    fn canonicalize_path<'a>(&self, path: &'a Path) -> Cow<'a, Path> {
157        std::fs::canonicalize(path)
158            .map(|x| {
159                if x == path {
160                    Cow::Borrowed(path)
161                } else {
162                    Cow::Owned(x)
163                }
164            })
165            .unwrap_or_else(|x| {
166                log::warn!(
167                    "failed to canonicalize path {}, likeky to fail to load: {}",
168                    path.display(),
169                    x
170                );
171                path.into()
172            })
173    }
174
175    fn file_content_utf8<P: AsRef<Path>>(&self, path: P) -> Result<String, std::io::Error> {
176        std::fs::read_to_string(path)
177    }
178
179    fn glob(&self, pattern: &str) -> Result<Vec<PathBuf>, LoadError> {
180        let paths: Vec<PathBuf> = glob::glob_with(pattern, glob_match_options())?
181            .collect::<Result<Vec<_>, glob::GlobError>>()?;
182        Ok(paths)
183    }
184}
185
186const fn glob_match_options() -> glob::MatchOptions {
187    glob::MatchOptions {
188        case_sensitive: true,
189        require_literal_separator: true,
190        require_literal_leading_dot: true,
191    }
192}
193
194/// [FileSystem] with given set of filename and content mapping.
195/// It won't cause any actual file read.
196pub struct FakeFileSystem(HashMap<PathBuf, Vec<u8>>);
197
198impl From<HashMap<PathBuf, Vec<u8>>> for FakeFileSystem {
199    fn from(value: HashMap<PathBuf, Vec<u8>>) -> Self {
200        Self(value)
201    }
202}
203
204impl FileSystem for FakeFileSystem {
205    fn canonicalize_path<'a>(&self, path: &'a Path) -> Cow<'a, Path> {
206        let mut ret = PathBuf::new();
207        for pc in path.components() {
208            match pc {
209                path::Component::CurDir => (),
210                path::Component::ParentDir => {
211                    if !ret.pop() {
212                        log::warn!(
213                            "failed to pop parent, maybe wrong path given: {}",
214                            path.display()
215                        );
216                    }
217                }
218                _ => ret.push(pc),
219            }
220        }
221        Cow::Owned(ret)
222    }
223
224    fn file_content_utf8<P: AsRef<Path>>(&self, path: P) -> Result<String, std::io::Error> {
225        let path = path.as_ref();
226        self.0
227            .get(path)
228            .ok_or(std::io::Error::new(
229                std::io::ErrorKind::NotFound,
230                format!("fake file {} not found", path.display()),
231            ))
232            .and_then(|x| {
233                String::from_utf8(x.clone())
234                    .map_err(|err| std::io::Error::new(std::io::ErrorKind::InvalidData, err))
235            })
236    }
237
238    fn glob(&self, pattern: &str) -> Result<Vec<PathBuf>, LoadError> {
239        let pattern = glob::Pattern::new(pattern)?;
240        let mut paths: Vec<PathBuf> = self
241            .0
242            .keys()
243            .filter(|x| pattern.matches_path_with(x, glob_match_options()))
244            .cloned()
245            .collect();
246        paths.sort_by(|x, y| y.cmp(x));
247        Ok(paths)
248    }
249}
250
251#[cfg(test)]
252mod tests {
253    use super::*;
254
255    use std::{borrow::Borrow, path::Path, vec::Vec};
256
257    use indoc::indoc;
258    use maplit::hashmap;
259    use pretty_assertions::assert_eq;
260
261    fn parse_static_ledger_entry(
262        input: &[(&Path, &'static str)],
263    ) -> Result<Vec<(PathBuf, syntax::plain::LedgerEntry<'static>)>, parse::ParseError> {
264        let opts = parse::ParseOptions::default();
265        input
266            .iter()
267            .flat_map(|(p, content)| {
268                parse::parse_ledger(&opts, content)
269                    .map(|elem| elem.map(|(_ctx, entry)| (p.to_path_buf(), entry)))
270            })
271            .collect()
272    }
273
274    fn parse_into_vec<L, F>(
275        loader: L,
276    ) -> Result<Vec<(PathBuf, syntax::plain::LedgerEntry<'static>)>, LoadError>
277    where
278        L: Borrow<Loader<F>>,
279        F: FileSystem,
280    {
281        let mut ret: Vec<(PathBuf, syntax::plain::LedgerEntry<'static>)> = Vec::new();
282        loader.borrow().load(|path, _ctx, entry| {
283            ret.push((path.to_owned(), entry.to_static()));
284            Ok::<(), LoadError>(())
285        })?;
286        Ok(ret)
287    }
288
289    #[test]
290    fn load_valid_input_real_file() {
291        let mut testdata_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
292        assert!(
293            testdata_dir.pop(),
294            "CARGO_MANIFEST_DIR={} must have parent dir",
295            testdata_dir.display()
296        );
297        testdata_dir.push("testdata/load");
298        let root = testdata_dir
299            .join("recursive.ledger")
300            .canonicalize()
301            .unwrap();
302        let child1 = testdata_dir.join("child1.ledger").canonicalize().unwrap();
303        let child2 = testdata_dir
304            .join("sub/child2.ledger")
305            .canonicalize()
306            .unwrap();
307        let child3 = testdata_dir.join("child3.ledger").canonicalize().unwrap();
308        let child4 = testdata_dir
309            .join("sub/child4.ledger")
310            .canonicalize()
311            .unwrap();
312        let want = parse_static_ledger_entry(&[
313            (
314                &root,
315                indoc! {"
316            ; Demonstrates include feature including glob, parent dir, ...
317
318            account Expenses:Grocery
319                note スーパーマーケットで買ったやつ全部
320                ; comment
321                alias Expenses:CVS
322
323            2024/01/01 Initial Balance
324                Equity:Opening Balance                  -1000.00 CHF
325                Assets:Bank:ZKB                          1000.00 CHF
326            "},
327            ),
328            (
329                &child2,
330                indoc! {"
331            2024/01/01 * Complicated salary
332                Income:Salary                          -3,000.00 CHF
333                Assets:Bank:ZKB                         2,500.00 CHF
334                Expenses:Income Tax                       312.34 CHF
335                Expenses:Social Tax                        37.66 CHF
336                Assets:Fixed:年金                         150.00 CHF
337            "},
338            ),
339            (
340                &child3,
341                indoc! {"
342            2024/03/01 * SBB CFF FFS
343                Assets:Bank:ZKB                            -5.60 CHF
344                Expenses:Travel:Train                       5.60 CHF
345            "},
346            ),
347            (
348                &child2,
349                indoc! {"
350            2024/01/25 ! RSU
351                ; TODO: FMV not determined
352                Income:RSU                    (-50.0000 * 100.23 USD)
353                Expenses:Income Tax
354                Assets:Broker                            40.0000 OKANE @ 100.23 USD
355            "},
356            ),
357            (
358                &child4,
359                indoc! {"
360            2024/7/1 * Send money
361                Assets:Bank:ZKB                         -1000.00 CHF
362                Assets:Wire:Wise                         1000.00 CHF
363            "},
364            ),
365            (
366                &child1,
367                indoc! {"
368            2024/05/01 * Migros
369                Expenses:Grocery                          -10.00 CHF
370                Assets:Bank:ZKB                            10.00 CHF
371            "},
372            ),
373        ])
374        .expect("test input parse must not fail");
375        let got = parse_into_vec(new_loader(root.clone())).expect("failed to parse the test data");
376        assert_eq!(want, got);
377    }
378
379    #[test]
380    fn load_valid_fake() {
381        let fake = hashmap! {
382            PathBuf::from("/path/to/root.ledger") => indoc! {"
383                include child1.ledger
384            "}.as_bytes().to_vec(),
385            PathBuf::from("/path/to/child1.ledger") => indoc! {"
386                include sub/*.ledger
387            "}.as_bytes().to_vec(),
388            PathBuf::from("/path/to/sub/child2.ledger") => "".as_bytes().to_vec(),
389            PathBuf::from("/path/to/sub/child3.ledger") => indoc! {"
390                ; comment here
391            "}.as_bytes().to_vec(),
392            PathBuf::from("/path/to/sub/.unloaded.ledger") => indoc! {"
393                completely invalid file, should not be loaded
394            "}.as_bytes().to_vec(),
395        };
396
397        let want = parse_static_ledger_entry(&[(
398            Path::new("/path/to/sub/child3.ledger"),
399            indoc! {"
400            ; comment here
401            "},
402        )])
403        .expect("test input parse must not fail");
404
405        let got = parse_into_vec(Loader::new(
406            PathBuf::from("/path/to/root.ledger"),
407            FakeFileSystem::from(fake),
408        ))
409        .expect("parse failed");
410        assert_eq!(want, got);
411    }
412
413    #[test]
414    fn load_non_existing_file() {
415        let fake = hashmap! {
416            PathBuf::from("/path/to/root.ledger") => indoc! {"
417                ; foo
418            "}.as_bytes().to_vec(),
419        };
420
421        let got_err = parse_into_vec(Loader::new(
422            PathBuf::from("/path/to/not_found.ledger"),
423            FakeFileSystem::from(fake),
424        ))
425        .unwrap_err();
426
427        match got_err {
428            LoadError::IO(e, _) => assert!(
429                e.kind() == std::io::ErrorKind::NotFound,
430                "should cause NotFound IO error: got {:?}",
431                e
432            ),
433            _ => panic!("unexpected error: {:?}", got_err),
434        }
435    }
436
437    #[test]
438    fn load_include_non_existing_file() {
439        let fake = hashmap! {
440            PathBuf::from("/path/to/root.ledger") => indoc! {"
441                include non_existing.ledger
442            "}.as_bytes().to_vec(),
443        };
444
445        let got_err = parse_into_vec(Loader::new(
446            PathBuf::from("/path/to/root.ledger"),
447            FakeFileSystem::from(fake),
448        ))
449        .expect_err("parse failed");
450
451        match got_err {
452            LoadError::IO(e, _) => assert!(
453                e.kind() == std::io::ErrorKind::NotFound,
454                "should cause NotFound IO error: got {:?}",
455                e
456            ),
457            _ => panic!("unexpected error: {:?}", got_err),
458        }
459    }
460}