Skip to main content

okane_core/
load.rs

1//! Contains the functions to load Ledger file,
2//! with recursively resolving the `include` directives.
3
4use std::{
5    borrow::Cow,
6    collections::HashMap,
7    path::{self, Path, PathBuf},
8};
9
10use crate::{parse, syntax};
11
12/// Error caused by [Loader::load].
13#[derive(thiserror::Error, Debug)]
14pub enum LoadError {
15    #[error("failed to perform IO on file {1}")]
16    IO(#[source] std::io::Error, PathBuf),
17    #[error("failed to parse file {1}")]
18    Parse(#[source] parse::ParseError, PathBuf),
19    #[error("loading file path {0} doesn't have parent, maybe filesystem root is passed")]
20    RootLoadingPath(PathBuf),
21    #[error("invalid Unicode path is not supported: {0}")]
22    InvalidUnicodePath(String),
23    #[error("invalid glob pattern specified")]
24    InvalidIncludeGlob(#[from] glob::PatternError),
25    #[error("failed to match glob pattern")]
26    GlobFailure(#[from] glob::GlobError),
27}
28
29/// Loader is an object to keep loading a given file and may recusrively load them as `repr::LedgerEntry`,
30/// with the metadata about filename or line/column to point the error in a user friendly manner.
31pub struct Loader<F: FileSystem> {
32    source: PathBuf,
33    error_style: annotate_snippets::Renderer,
34    filesystem: F,
35}
36
37/// Creates a new [`Loader`] instance with [`ProdFileSystem`].
38pub fn new_loader(source: PathBuf) -> Loader<ProdFileSystem> {
39    Loader::new(source, ProdFileSystem)
40}
41
42impl<F: FileSystem> Loader<F> {
43    /// Create a new instance of `Loader` to load the given path.
44    ///
45    /// It might look weird to have the source path as a `Loader` member,
46    /// but that would give future flexibility to support loading from stdio/network without include,
47    /// or completely static one.
48    pub fn new(source: PathBuf, filesystem: F) -> Self {
49        Self {
50            source,
51            error_style: annotate_snippets::Renderer::styled(),
52            filesystem,
53        }
54    }
55
56    /// Create a new instance with the given `renderer`.
57    pub fn with_error_renderer(self, renderer: annotate_snippets::Renderer) -> Self {
58        Self {
59            source: self.source,
60            error_style: renderer,
61            filesystem: self.filesystem,
62        }
63    }
64
65    /// Returns a [`annotate_snippets::Renderer`] instance.
66    pub(crate) fn error_style(&self) -> &annotate_snippets::Renderer {
67        &self.error_style
68    }
69
70    /// Returns a `filesystem` reference.
71    pub(crate) fn filesystem(&self) -> &F {
72        &self.filesystem
73    }
74
75    /// Loads [syntax::LedgerEntry] and invoke callback on every entry,
76    /// recursively resolving `include` directives.
77    pub fn load<T, E, Deco>(&self, mut callback: T) -> Result<(), E>
78    where
79        T: FnMut(&Path, &parse::ParsedContext<'_>, &syntax::LedgerEntry<'_, Deco>) -> Result<(), E>,
80        E: std::error::Error + From<LoadError>,
81        Deco: syntax::decoration::Decoration,
82    {
83        let popts = parse::ParseOptions::default().with_error_style(self.error_style.clone());
84        self.load_impl(&popts, &self.source, &mut callback)
85    }
86
87    fn load_impl<T, E, Deco>(
88        &self,
89        parse_options: &parse::ParseOptions,
90        path: &Path,
91        callback: &mut T,
92    ) -> Result<(), E>
93    where
94        T: FnMut(&Path, &parse::ParsedContext<'_>, &syntax::LedgerEntry<'_, Deco>) -> Result<(), E>,
95        E: std::error::Error + From<LoadError>,
96        Deco: syntax::decoration::Decoration,
97    {
98        let path: Cow<'_, Path> = F::canonicalize_path(path);
99        let content = self
100            .filesystem
101            .file_content_utf8(&path)
102            .map_err(|err| LoadError::IO(err, path.clone().into_owned()))?;
103        for parsed in parse::parse_ledger(parse_options, &content) {
104            let (ctx, entry) =
105                parsed.map_err(|e| LoadError::Parse(e, path.clone().into_owned()))?;
106            match entry {
107                syntax::LedgerEntry::Include(p) => {
108                    let include_path: PathBuf = p.0.as_ref().into();
109                    let target: String = path
110                        .as_ref()
111                        .parent()
112                        .ok_or_else(|| LoadError::RootLoadingPath(path.as_ref().to_owned()))?
113                        .join(include_path)
114                        .into_os_string()
115                        .into_string()
116                        .map_err(|x| {
117                            LoadError::InvalidUnicodePath(format!("{}", PathBuf::from(x).display()))
118                        })?;
119                    let mut paths: Vec<PathBuf> = self.filesystem.glob(&target)?;
120                    if paths.is_empty() {
121                        return Err(LoadError::IO(
122                            std::io::Error::new(
123                                std::io::ErrorKind::NotFound,
124                                format!("glob {} does not hit any files", target),
125                            ),
126                            PathBuf::from(target),
127                        )
128                        .into());
129                    }
130                    log::debug!("glob {} hit {} files", target, paths.len());
131                    paths.sort_unstable();
132                    for path in &paths {
133                        self.load_impl(parse_options, path, callback)?;
134                    }
135                    Ok(())
136                }
137                _ => callback(&path, &ctx, &entry),
138            }?;
139        }
140        Ok(())
141    }
142}
143
144/// Interface to abstract file system.
145/// Normally you want to use [ProdFileSystem].
146pub trait FileSystem {
147    /// canonicalize the given path.
148    fn canonicalize_path<'a>(path: &'a Path) -> Cow<'a, Path>;
149
150    /// Load the given path and returns it as UTF-8 String.
151    fn file_content_utf8<P: AsRef<Path>>(&self, path: P) -> Result<String, std::io::Error>;
152
153    /// Returns all paths matching the given glob.
154    /// Paths can be in arbitrary order, and caller must sort it beforehand.
155    fn glob(&self, pattern: &str) -> Result<Vec<PathBuf>, LoadError>;
156}
157
158/// [FileSystem] to regularly reads the files recursively in the local files.
159pub struct ProdFileSystem;
160
161impl FileSystem for ProdFileSystem {
162    fn canonicalize_path<'a>(path: &'a Path) -> Cow<'a, Path> {
163        dunce::canonicalize(path)
164            .map(|x| {
165                if x == path {
166                    Cow::Borrowed(path)
167                } else {
168                    Cow::Owned(x)
169                }
170            })
171            .unwrap_or_else(|x| {
172                log::warn!(
173                    "failed to canonicalize path {}, likeky to fail to load: {}",
174                    path.display(),
175                    x
176                );
177                path.into()
178            })
179    }
180
181    fn file_content_utf8<P: AsRef<Path>>(&self, path: P) -> Result<String, std::io::Error> {
182        std::fs::read_to_string(path)
183    }
184
185    fn glob(&self, pattern: &str) -> Result<Vec<PathBuf>, LoadError> {
186        let paths: Vec<PathBuf> = glob::glob_with(pattern, glob_match_options())?
187            .collect::<Result<Vec<_>, glob::GlobError>>()?;
188        Ok(paths)
189    }
190}
191
192const fn glob_match_options() -> glob::MatchOptions {
193    glob::MatchOptions {
194        case_sensitive: true,
195        require_literal_separator: true,
196        require_literal_leading_dot: true,
197    }
198}
199
200/// [FileSystem] with given set of filename and content mapping.
201/// It won't cause any actual file read.
202pub struct FakeFileSystem(HashMap<PathBuf, Vec<u8>>);
203
204impl From<HashMap<PathBuf, Vec<u8>>> for FakeFileSystem {
205    fn from(value: HashMap<PathBuf, Vec<u8>>) -> Self {
206        Self(value)
207    }
208}
209
210impl FileSystem for FakeFileSystem {
211    fn canonicalize_path<'a>(path: &'a Path) -> Cow<'a, Path> {
212        let mut components = Vec::new();
213        for pc in path.components() {
214            match pc {
215                path::Component::CurDir => (),
216                path::Component::ParentDir => {
217                    if components.pop().is_none() {
218                        log::warn!(
219                            "failed to pop parent, maybe wrong path given: {}",
220                            path.display()
221                        );
222                    }
223                }
224                path::Component::RootDir => components.push("/"),
225                path::Component::Prefix(_) => log::info!("ignore prefix: {:?}", pc),
226                path::Component::Normal(pc) => {
227                    components.push(pc.to_str().unwrap_or("invalid-unicode-component"))
228                }
229            }
230        }
231        Cow::Owned(components.join("/").into())
232    }
233
234    fn file_content_utf8<P: AsRef<Path>>(&self, path: P) -> Result<String, std::io::Error> {
235        let path = path.as_ref();
236        self.0
237            .get(path)
238            .ok_or(std::io::Error::new(
239                std::io::ErrorKind::NotFound,
240                format!("fake file {} not found", path.display()),
241            ))
242            .and_then(|x| {
243                String::from_utf8(x.clone())
244                    .map_err(|err| std::io::Error::new(std::io::ErrorKind::InvalidData, err))
245            })
246    }
247
248    fn glob(&self, pattern: &str) -> Result<Vec<PathBuf>, LoadError> {
249        let pattern = glob::Pattern::new(pattern)?;
250        let mut paths: Vec<PathBuf> = self
251            .0
252            .keys()
253            .filter(|x| pattern.matches_path_with(x, glob_match_options()))
254            .cloned()
255            .collect();
256        paths.sort_by(|x, y| y.cmp(x));
257        Ok(paths)
258    }
259}
260
261#[cfg(test)]
262mod tests {
263    use super::*;
264
265    use std::{borrow::Borrow, path::Path, vec::Vec};
266
267    use indoc::indoc;
268    use maplit::hashmap;
269    use pretty_assertions::assert_eq;
270
271    fn parse_static_ledger_entry(
272        input: &[(&Path, &'static str)],
273    ) -> Result<Vec<(PathBuf, syntax::plain::LedgerEntry<'static>)>, parse::ParseError> {
274        let opts = parse::ParseOptions::default();
275        input
276            .iter()
277            .flat_map(|(p, content)| {
278                parse::parse_ledger(&opts, content)
279                    .map(|elem| elem.map(|(_ctx, entry)| (p.to_path_buf(), entry)))
280            })
281            .collect()
282    }
283
284    fn parse_into_vec<L, F>(
285        loader: L,
286    ) -> Result<Vec<(PathBuf, syntax::plain::LedgerEntry<'static>)>, LoadError>
287    where
288        L: Borrow<Loader<F>>,
289        F: FileSystem,
290    {
291        let mut ret: Vec<(PathBuf, syntax::plain::LedgerEntry<'static>)> = Vec::new();
292        loader.borrow().load(|path, _ctx, entry| {
293            ret.push((path.to_owned(), entry.to_static()));
294            Ok::<(), LoadError>(())
295        })?;
296        Ok(ret)
297    }
298
299    #[test]
300    fn load_valid_input_real_file() {
301        let mut testdata_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
302        assert!(
303            testdata_dir.pop(),
304            "CARGO_MANIFEST_DIR={} must have parent dir",
305            testdata_dir.display()
306        );
307        testdata_dir.push("testdata/load");
308        testdata_dir = dunce::canonicalize(testdata_dir).unwrap();
309        let root = testdata_dir.join("recursive.ledger");
310        let child1 = testdata_dir.join("child1.ledger");
311        let child2 = testdata_dir.join("sub").join("child2.ledger");
312        let child3 = testdata_dir.join("child3.ledger");
313        let child4 = testdata_dir.join("sub").join("child4.ledger");
314        let want = parse_static_ledger_entry(&[
315            (
316                &root,
317                indoc! {"
318            ; Demonstrates include feature including glob, parent dir, ...
319
320            account Expenses:Grocery
321                note スーパーマーケットで買ったやつ全部
322                ; comment
323                alias Expenses:CVS
324
325            2024/01/01 Initial Balance
326                Equity:Opening Balance                  -1000.00 CHF
327                Assets:Bank:ZKB                          1000.00 CHF
328            "},
329            ),
330            (
331                &child2,
332                indoc! {"
333            2024/01/01 * Complicated salary
334                Income:Salary                          -3,000.00 CHF
335                Assets:Bank:ZKB                         2,500.00 CHF
336                Expenses:Income Tax                       312.34 CHF
337                Expenses:Social Tax                        37.66 CHF
338                Assets:Fixed:年金                         150.00 CHF
339            "},
340            ),
341            (
342                &child3,
343                indoc! {"
344            2024/03/01 * SBB CFF FFS
345                Assets:Bank:ZKB                            -5.60 CHF
346                Expenses:Travel:Train                       5.60 CHF
347            "},
348            ),
349            (
350                &child2,
351                indoc! {"
352            2024/01/25 ! RSU
353                ; TODO: FMV not determined
354                Income:RSU                    (-50.0000 * 100.23 USD)
355                Expenses:Income Tax
356                Assets:Broker                            40.0000 OKANE @ 100.23 USD
357            "},
358            ),
359            (
360                &child4,
361                indoc! {"
362            2024/7/1 * Send money
363                Assets:Bank:ZKB                         -1000.00 CHF
364                Assets:Wire:Wise                         1000.00 CHF
365            "},
366            ),
367            (
368                &child1,
369                indoc! {"
370            2024/05/01 * Migros
371                Expenses:Grocery                          -10.00 CHF
372                Assets:Bank:ZKB                            10.00 CHF
373            "},
374            ),
375        ])
376        .expect("test input parse must not fail");
377        let got = parse_into_vec(new_loader(root.clone())).expect("failed to parse the test data");
378        assert_eq!(want, got);
379    }
380
381    #[test]
382    fn load_valid_fake() {
383        let fake = hashmap! {
384            PathBuf::from("path/to/root.ledger") => indoc! {"
385                include child1.ledger
386            "}.as_bytes().to_vec(),
387            PathBuf::from("path/to/child1.ledger") => indoc! {"
388                include sub/*.ledger
389            "}.as_bytes().to_vec(),
390            PathBuf::from("path/to/sub/child2.ledger") => "".as_bytes().to_vec(),
391            PathBuf::from("path/to/sub/child3.ledger") => indoc! {"
392                ; comment here
393            "}.as_bytes().to_vec(),
394            PathBuf::from("path/to/sub/.unloaded.ledger") => indoc! {"
395                completely invalid file, should not be loaded
396            "}.as_bytes().to_vec(),
397        };
398
399        let want = parse_static_ledger_entry(&[(
400            Path::new("path/to/sub/child3.ledger"),
401            indoc! {"
402            ; comment here
403            "},
404        )])
405        .expect("test input parse must not fail");
406
407        let got = parse_into_vec(Loader::new(
408            PathBuf::from("path/to/root.ledger"),
409            FakeFileSystem::from(fake),
410        ))
411        .expect("parse failed");
412        assert_eq!(want, got);
413    }
414
415    #[test]
416    fn load_non_existing_file() {
417        let fake = hashmap! {
418            PathBuf::from("/path/to/root.ledger") => indoc! {"
419                ; foo
420            "}.as_bytes().to_vec(),
421        };
422
423        let got_err = parse_into_vec(Loader::new(
424            PathBuf::from("/path/to/not_found.ledger"),
425            FakeFileSystem::from(fake),
426        ))
427        .unwrap_err();
428
429        match got_err {
430            LoadError::IO(e, _) => assert!(
431                e.kind() == std::io::ErrorKind::NotFound,
432                "should cause NotFound IO error: got {:?}",
433                e
434            ),
435            _ => panic!("unexpected error: {:?}", got_err),
436        }
437    }
438
439    #[test]
440    fn load_include_non_existing_file() {
441        let fake = hashmap! {
442            PathBuf::from("/path/to/root.ledger") => indoc! {"
443                include non_existing.ledger
444            "}.as_bytes().to_vec(),
445        };
446
447        let got_err = parse_into_vec(Loader::new(
448            PathBuf::from("/path/to/root.ledger"),
449            FakeFileSystem::from(fake),
450        ))
451        .expect_err("parse failed");
452
453        match got_err {
454            LoadError::IO(e, _) => assert!(
455                e.kind() == std::io::ErrorKind::NotFound,
456                "should cause NotFound IO error: got {:?}",
457                e
458            ),
459            _ => panic!("unexpected error: {:?}", got_err),
460        }
461    }
462
463    mod fake_file_system {
464        use super::*;
465
466        use pretty_assertions::assert_eq;
467
468        #[test]
469        fn canonicalize_simple() {
470            assert_eq!(
471                FakeFileSystem::canonicalize_path(Path::new("path/to/file")),
472                Cow::Owned::<Path>(PathBuf::from("path/to/file")),
473            );
474        }
475
476        #[test]
477        fn canonicalize_up_and_current() {
478            assert_eq!(
479                FakeFileSystem::canonicalize_path(Path::new("path/to/sub/./.././file")),
480                Cow::Owned::<Path>(PathBuf::from("path/to/file")),
481            );
482        }
483    }
484}