okane_core/
load.rs

1//! Contains the functions to load Ledger file,
2//! with recursively resolving the `include` directives.
3
4use std::{
5    borrow::Cow,
6    collections::HashMap,
7    path::{self, Path, PathBuf},
8};
9
10use crate::{parse, syntax};
11
12/// Error caused by [Loader::load].
13#[derive(thiserror::Error, Debug)]
14pub enum LoadError {
15    #[error("failed to perform IO on file {1}")]
16    IO(#[source] std::io::Error, PathBuf),
17    #[error("failed to parse file {1}")]
18    Parse(#[source] parse::ParseError, PathBuf),
19    #[error("loading file path {0} doesn't have parent, maybe filesystem root is passed")]
20    RootLoadingPath(PathBuf),
21    #[error("invalid Unicode path is not supported: {0}")]
22    InvalidUnicodePath(String),
23    #[error("invalid glob pattern specified")]
24    InvalidIncludeGlob(#[from] glob::PatternError),
25    #[error("failed to match glob pattern")]
26    GlobFailure(#[from] glob::GlobError),
27}
28
29/// Loader is an object to keep loading a given file and may recusrively load them as `repr::LedgerEntry`,
30/// with the metadata about filename or line/column to point the error in a user friendly manner.
31pub struct Loader<F: FileSystem> {
32    source: PathBuf,
33    error_style: annotate_snippets::Renderer,
34    filesystem: F,
35}
36
37/// Creates a new [`Loader`] instance with [`ProdFileSystem`].
38pub fn new_loader(source: PathBuf) -> Loader<ProdFileSystem> {
39    Loader::new(source, ProdFileSystem)
40}
41
42impl<F: FileSystem> Loader<F> {
43    /// Create a new instance of `Loader` to load the given path.
44    ///
45    /// It might look weird to have the source path as a `Loader` member,
46    /// but that would give future flexibility to support loading from stdio/network without include,
47    /// or completely static one.
48    pub fn new(source: PathBuf, filesystem: F) -> Self {
49        Self {
50            source,
51            error_style: annotate_snippets::Renderer::styled(),
52            filesystem,
53        }
54    }
55
56    /// Create a new instance with the given `renderer`.
57    pub fn with_error_renderer(self, renderer: annotate_snippets::Renderer) -> Self {
58        Self {
59            source: self.source,
60            error_style: renderer,
61            filesystem: self.filesystem,
62        }
63    }
64
65    /// Returns a [`annotate_snippets::Renderer`] instance.
66    pub(crate) fn error_style(&self) -> &annotate_snippets::Renderer {
67        &self.error_style
68    }
69
70    /// Loads [syntax::LedgerEntry] and invoke callback on every entry,
71    /// recursively resolving `include` directives.
72    pub fn load<T, E, Deco>(&self, mut callback: T) -> Result<(), E>
73    where
74        T: FnMut(&Path, &parse::ParsedContext<'_>, &syntax::LedgerEntry<'_, Deco>) -> Result<(), E>,
75        E: std::error::Error + From<LoadError>,
76        Deco: syntax::decoration::Decoration,
77    {
78        let popts = parse::ParseOptions::default().with_error_style(self.error_style.clone());
79        self.load_impl(&popts, &self.source, &mut callback)
80    }
81
82    fn load_impl<T, E, Deco>(
83        &self,
84        parse_options: &parse::ParseOptions,
85        path: &Path,
86        callback: &mut T,
87    ) -> Result<(), E>
88    where
89        T: FnMut(&Path, &parse::ParsedContext<'_>, &syntax::LedgerEntry<'_, Deco>) -> Result<(), E>,
90        E: std::error::Error + From<LoadError>,
91        Deco: syntax::decoration::Decoration,
92    {
93        let path: Cow<'_, Path> = F::canonicalize_path(path);
94        let content = self
95            .filesystem
96            .file_content_utf8(&path)
97            .map_err(|err| LoadError::IO(err, path.clone().into_owned()))?;
98        for parsed in parse::parse_ledger(parse_options, &content) {
99            let (ctx, entry) =
100                parsed.map_err(|e| LoadError::Parse(e, path.clone().into_owned()))?;
101            match entry {
102                syntax::LedgerEntry::Include(p) => {
103                    let include_path: PathBuf = p.0.as_ref().into();
104                    let target: String = path
105                        .as_ref()
106                        .parent()
107                        .ok_or_else(|| LoadError::RootLoadingPath(path.as_ref().to_owned()))?
108                        .join(include_path)
109                        .into_os_string()
110                        .into_string()
111                        .map_err(|x| {
112                            LoadError::InvalidUnicodePath(format!("{}", PathBuf::from(x).display()))
113                        })?;
114                    let mut paths: Vec<PathBuf> = self.filesystem.glob(&target)?;
115                    if paths.is_empty() {
116                        return Err(LoadError::IO(
117                            std::io::Error::new(
118                                std::io::ErrorKind::NotFound,
119                                format!("glob {} does not hit any files", target),
120                            ),
121                            PathBuf::from(target),
122                        )
123                        .into());
124                    }
125                    log::debug!("glob {} hit {} files", target, paths.len());
126                    paths.sort_unstable();
127                    for path in &paths {
128                        self.load_impl(parse_options, path, callback)?;
129                    }
130                    Ok(())
131                }
132                _ => callback(&path, &ctx, &entry),
133            }?;
134        }
135        Ok(())
136    }
137}
138
139/// Interface to abstract file system.
140/// Normally you want to use [ProdFileSystem].
141pub trait FileSystem {
142    /// canonicalize the given path.
143    fn canonicalize_path<'a>(path: &'a Path) -> Cow<'a, Path>;
144
145    /// Load the given path and returns it as UTF-8 String.
146    fn file_content_utf8<P: AsRef<Path>>(&self, path: P) -> Result<String, std::io::Error>;
147
148    /// Returns all paths matching the given glob.
149    /// Paths can be in arbitrary order, and caller must sort it beforehand.
150    fn glob(&self, pattern: &str) -> Result<Vec<PathBuf>, LoadError>;
151}
152
153/// [FileSystem] to regularly reads the files recursively in the local files.
154pub struct ProdFileSystem;
155
156impl FileSystem for ProdFileSystem {
157    fn canonicalize_path<'a>(path: &'a Path) -> Cow<'a, Path> {
158        dunce::canonicalize(path)
159            .map(|x| {
160                if x == path {
161                    Cow::Borrowed(path)
162                } else {
163                    Cow::Owned(x)
164                }
165            })
166            .unwrap_or_else(|x| {
167                log::warn!(
168                    "failed to canonicalize path {}, likeky to fail to load: {}",
169                    path.display(),
170                    x
171                );
172                path.into()
173            })
174    }
175
176    fn file_content_utf8<P: AsRef<Path>>(&self, path: P) -> Result<String, std::io::Error> {
177        std::fs::read_to_string(path)
178    }
179
180    fn glob(&self, pattern: &str) -> Result<Vec<PathBuf>, LoadError> {
181        let paths: Vec<PathBuf> = glob::glob_with(pattern, glob_match_options())?
182            .collect::<Result<Vec<_>, glob::GlobError>>()?;
183        Ok(paths)
184    }
185}
186
187const fn glob_match_options() -> glob::MatchOptions {
188    glob::MatchOptions {
189        case_sensitive: true,
190        require_literal_separator: true,
191        require_literal_leading_dot: true,
192    }
193}
194
195/// [FileSystem] with given set of filename and content mapping.
196/// It won't cause any actual file read.
197pub struct FakeFileSystem(HashMap<PathBuf, Vec<u8>>);
198
199impl From<HashMap<PathBuf, Vec<u8>>> for FakeFileSystem {
200    fn from(value: HashMap<PathBuf, Vec<u8>>) -> Self {
201        Self(value)
202    }
203}
204
205impl FileSystem for FakeFileSystem {
206    fn canonicalize_path<'a>(path: &'a Path) -> Cow<'a, Path> {
207        let mut components = Vec::new();
208        for pc in path.components() {
209            match pc {
210                path::Component::CurDir => (),
211                path::Component::ParentDir => {
212                    if components.pop().is_none() {
213                        log::warn!(
214                            "failed to pop parent, maybe wrong path given: {}",
215                            path.display()
216                        );
217                    }
218                }
219                path::Component::RootDir => components.push("/"),
220                path::Component::Prefix(_) => log::info!("ignore prefix: {:?}", pc),
221                path::Component::Normal(pc) => {
222                    components.push(pc.to_str().unwrap_or("invalid-unicode-component"))
223                }
224            }
225        }
226        Cow::Owned(components.join("/").into())
227    }
228
229    fn file_content_utf8<P: AsRef<Path>>(&self, path: P) -> Result<String, std::io::Error> {
230        let path = path.as_ref();
231        self.0
232            .get(path)
233            .ok_or(std::io::Error::new(
234                std::io::ErrorKind::NotFound,
235                format!("fake file {} not found", path.display()),
236            ))
237            .and_then(|x| {
238                String::from_utf8(x.clone())
239                    .map_err(|err| std::io::Error::new(std::io::ErrorKind::InvalidData, err))
240            })
241    }
242
243    fn glob(&self, pattern: &str) -> Result<Vec<PathBuf>, LoadError> {
244        let pattern = glob::Pattern::new(pattern)?;
245        let mut paths: Vec<PathBuf> = self
246            .0
247            .keys()
248            .filter(|x| pattern.matches_path_with(x, glob_match_options()))
249            .cloned()
250            .collect();
251        paths.sort_by(|x, y| y.cmp(x));
252        Ok(paths)
253    }
254}
255
256#[cfg(test)]
257mod tests {
258    use super::*;
259
260    use std::{borrow::Borrow, path::Path, vec::Vec};
261
262    use indoc::indoc;
263    use maplit::hashmap;
264    use pretty_assertions::assert_eq;
265
266    fn parse_static_ledger_entry(
267        input: &[(&Path, &'static str)],
268    ) -> Result<Vec<(PathBuf, syntax::plain::LedgerEntry<'static>)>, parse::ParseError> {
269        let opts = parse::ParseOptions::default();
270        input
271            .iter()
272            .flat_map(|(p, content)| {
273                parse::parse_ledger(&opts, content)
274                    .map(|elem| elem.map(|(_ctx, entry)| (p.to_path_buf(), entry)))
275            })
276            .collect()
277    }
278
279    fn parse_into_vec<L, F>(
280        loader: L,
281    ) -> Result<Vec<(PathBuf, syntax::plain::LedgerEntry<'static>)>, LoadError>
282    where
283        L: Borrow<Loader<F>>,
284        F: FileSystem,
285    {
286        let mut ret: Vec<(PathBuf, syntax::plain::LedgerEntry<'static>)> = Vec::new();
287        loader.borrow().load(|path, _ctx, entry| {
288            ret.push((path.to_owned(), entry.to_static()));
289            Ok::<(), LoadError>(())
290        })?;
291        Ok(ret)
292    }
293
294    #[test]
295    fn load_valid_input_real_file() {
296        let mut testdata_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
297        assert!(
298            testdata_dir.pop(),
299            "CARGO_MANIFEST_DIR={} must have parent dir",
300            testdata_dir.display()
301        );
302        testdata_dir.push("testdata/load");
303        testdata_dir = dunce::canonicalize(testdata_dir).unwrap();
304        let root = testdata_dir.join("recursive.ledger");
305        let child1 = testdata_dir.join("child1.ledger");
306        let child2 = testdata_dir.join("sub").join("child2.ledger");
307        let child3 = testdata_dir.join("child3.ledger");
308        let child4 = testdata_dir.join("sub").join("child4.ledger");
309        let want = parse_static_ledger_entry(&[
310            (
311                &root,
312                indoc! {"
313            ; Demonstrates include feature including glob, parent dir, ...
314
315            account Expenses:Grocery
316                note スーパーマーケットで買ったやつ全部
317                ; comment
318                alias Expenses:CVS
319
320            2024/01/01 Initial Balance
321                Equity:Opening Balance                  -1000.00 CHF
322                Assets:Bank:ZKB                          1000.00 CHF
323            "},
324            ),
325            (
326                &child2,
327                indoc! {"
328            2024/01/01 * Complicated salary
329                Income:Salary                          -3,000.00 CHF
330                Assets:Bank:ZKB                         2,500.00 CHF
331                Expenses:Income Tax                       312.34 CHF
332                Expenses:Social Tax                        37.66 CHF
333                Assets:Fixed:年金                         150.00 CHF
334            "},
335            ),
336            (
337                &child3,
338                indoc! {"
339            2024/03/01 * SBB CFF FFS
340                Assets:Bank:ZKB                            -5.60 CHF
341                Expenses:Travel:Train                       5.60 CHF
342            "},
343            ),
344            (
345                &child2,
346                indoc! {"
347            2024/01/25 ! RSU
348                ; TODO: FMV not determined
349                Income:RSU                    (-50.0000 * 100.23 USD)
350                Expenses:Income Tax
351                Assets:Broker                            40.0000 OKANE @ 100.23 USD
352            "},
353            ),
354            (
355                &child4,
356                indoc! {"
357            2024/7/1 * Send money
358                Assets:Bank:ZKB                         -1000.00 CHF
359                Assets:Wire:Wise                         1000.00 CHF
360            "},
361            ),
362            (
363                &child1,
364                indoc! {"
365            2024/05/01 * Migros
366                Expenses:Grocery                          -10.00 CHF
367                Assets:Bank:ZKB                            10.00 CHF
368            "},
369            ),
370        ])
371        .expect("test input parse must not fail");
372        let got = parse_into_vec(new_loader(root.clone())).expect("failed to parse the test data");
373        assert_eq!(want, got);
374    }
375
376    #[test]
377    fn load_valid_fake() {
378        let fake = hashmap! {
379            PathBuf::from("path/to/root.ledger") => indoc! {"
380                include child1.ledger
381            "}.as_bytes().to_vec(),
382            PathBuf::from("path/to/child1.ledger") => indoc! {"
383                include sub/*.ledger
384            "}.as_bytes().to_vec(),
385            PathBuf::from("path/to/sub/child2.ledger") => "".as_bytes().to_vec(),
386            PathBuf::from("path/to/sub/child3.ledger") => indoc! {"
387                ; comment here
388            "}.as_bytes().to_vec(),
389            PathBuf::from("path/to/sub/.unloaded.ledger") => indoc! {"
390                completely invalid file, should not be loaded
391            "}.as_bytes().to_vec(),
392        };
393
394        let want = parse_static_ledger_entry(&[(
395            Path::new("path/to/sub/child3.ledger"),
396            indoc! {"
397            ; comment here
398            "},
399        )])
400        .expect("test input parse must not fail");
401
402        let got = parse_into_vec(Loader::new(
403            PathBuf::from("path/to/root.ledger"),
404            FakeFileSystem::from(fake),
405        ))
406        .expect("parse failed");
407        assert_eq!(want, got);
408    }
409
410    #[test]
411    fn load_non_existing_file() {
412        let fake = hashmap! {
413            PathBuf::from("/path/to/root.ledger") => indoc! {"
414                ; foo
415            "}.as_bytes().to_vec(),
416        };
417
418        let got_err = parse_into_vec(Loader::new(
419            PathBuf::from("/path/to/not_found.ledger"),
420            FakeFileSystem::from(fake),
421        ))
422        .unwrap_err();
423
424        match got_err {
425            LoadError::IO(e, _) => assert!(
426                e.kind() == std::io::ErrorKind::NotFound,
427                "should cause NotFound IO error: got {:?}",
428                e
429            ),
430            _ => panic!("unexpected error: {:?}", got_err),
431        }
432    }
433
434    #[test]
435    fn load_include_non_existing_file() {
436        let fake = hashmap! {
437            PathBuf::from("/path/to/root.ledger") => indoc! {"
438                include non_existing.ledger
439            "}.as_bytes().to_vec(),
440        };
441
442        let got_err = parse_into_vec(Loader::new(
443            PathBuf::from("/path/to/root.ledger"),
444            FakeFileSystem::from(fake),
445        ))
446        .expect_err("parse failed");
447
448        match got_err {
449            LoadError::IO(e, _) => assert!(
450                e.kind() == std::io::ErrorKind::NotFound,
451                "should cause NotFound IO error: got {:?}",
452                e
453            ),
454            _ => panic!("unexpected error: {:?}", got_err),
455        }
456    }
457
458    mod fake_file_system {
459        use super::*;
460
461        use pretty_assertions::assert_eq;
462
463        #[test]
464        fn canonicalize_simple() {
465            assert_eq!(
466                FakeFileSystem::canonicalize_path(Path::new("path/to/file")),
467                Cow::Owned::<Path>(PathBuf::from("path/to/file")),
468            );
469        }
470
471        #[test]
472        fn canonicalize_up_and_current() {
473            assert_eq!(
474                FakeFileSystem::canonicalize_path(Path::new("path/to/sub/./.././file")),
475                Cow::Owned::<Path>(PathBuf::from("path/to/file")),
476            );
477        }
478    }
479}