1#![allow(dead_code)]
3#![recursion_limit = "256"]
4#![doc = include_str!("../README.md")]
5
6use ariadne::{Color, Label, Report};
86use chumsky::prelude::{Input, Parser};
87use lazy_format::lazy_format;
88use lexer::{lex, Token};
89use parsers::{file, includes, ParserState};
90use sort::SortIteratorAdaptor;
91use std::{
92 collections::{HashMap, HashSet, VecDeque},
93 ffi::OsStr,
94 fmt::{self, Formatter},
95 fs::File,
96 io::{self, Read, Write},
97 iter::once,
98 path::{Path, PathBuf},
99};
100
101pub use crate::{trim::trim_trailing_whitespace, types::*};
102
103#[derive(Clone)]
119pub struct BeancountSources {
120 root_path: Option<PathBuf>,
121 root_source_id: SourceId,
122 root_content: String,
123 root_content_char_indices: Vec<usize>,
124 included_content: HashMap<PathBuf, IncludedSource>,
125 source_id_strings: Vec<String>, }
127
128#[derive(Clone, Debug)]
129enum IncludedSource {
130 Content(SourceId, String, Vec<usize>), IoError(String),
132 Duplicate,
133}
134
135fn get_includes(content: &str, source_id: SourceId) -> Vec<String> {
137 fn get_includes_for_tokens(
138 tokens: Vec<(Token, Span)>,
139 source_id: SourceId,
140 end_of_input: Span,
141 ) -> Vec<String> {
142 let mut parser_state = chumsky::extra::SimpleState(ParserState::default());
143
144 let spanned_tokens = tokens
145 .map(end_of_input, |(t, s)| (t, s))
146 .with_context(source_id);
147
148 includes()
150 .parse_with_state(spanned_tokens, &mut parser_state)
151 .into_output()
152 .unwrap_or_default()
153 }
154
155 let tokens = lex_with_source(source_id, content);
156 get_includes_for_tokens(tokens, source_id, end_of_input(source_id, content))
157}
158
159fn path_dir(p: &Path) -> Option<&Path> {
161 p.parent().and_then(|p| {
162 if !AsRef::<OsStr>::as_ref(&p).is_empty() {
163 Some(p)
164 } else {
165 None
166 }
167 })
168}
169
170fn resolve_included_path(including_path: Option<&PathBuf>, included_path: &Path) -> PathBuf {
172 match including_path.and_then(|p| path_dir(p.as_ref())) {
173 Some(p) => p.join(included_path),
174 None => included_path.to_path_buf(),
175 }
176}
177
178impl BeancountSources {
179 fn try_read_with_includes(root_path: PathBuf) -> io::Result<Self> {
180 let root_content = read(&root_path)?;
181 Ok(Self::read_with_includes(Some(root_path), root_content))
182 }
183
184 fn read_with_includes(root_path: Option<PathBuf>, root_content: String) -> Self {
185 let root_source_id = SourceId::default();
186 let root_source_id_string = root_path
187 .as_ref()
188 .map(|p| p.to_string_lossy().into())
189 .unwrap_or("inline".to_string());
190 let mut source_id_strings = Vec::from([root_source_id_string]);
191
192 let mut pending_paths = get_includes(&root_content, root_source_id)
193 .into_iter()
194 .map(|included_path| resolve_included_path(root_path.as_ref(), included_path.as_ref()))
195 .collect::<VecDeque<_>>();
196
197 let mut included_content: HashMap<PathBuf, IncludedSource> = HashMap::new();
198
199 let mut canonical_paths =
201 HashSet::from([root_path.as_ref().and_then(|p| p.canonicalize().ok())]);
202
203 while !pending_paths.is_empty() {
204 let path = pending_paths.pop_front().unwrap();
205 let canonical_path = path.canonicalize().ok();
206
207 if canonical_paths.contains(&canonical_path) {
208 included_content
210 .entry(path)
211 .or_insert(IncludedSource::Duplicate);
212 } else {
213 canonical_paths.insert(canonical_path);
214
215 let source_id = SourceId::from(source_id_strings.len());
216 source_id_strings.push(path.to_string_lossy().into());
217
218 let included_source = read(&path).map_or_else(
219 |e| IncludedSource::IoError(e.to_string()),
220 |c| {
221 let char_indices = c.char_indices().map(|(i, _)| i).collect::<Vec<_>>();
225 IncludedSource::Content(source_id, c, char_indices)
226 },
227 );
228
229 included_content.insert(path.clone(), included_source);
232 let included_source = included_content.get(&path).unwrap();
233
234 if let IncludedSource::Content(_, content, _) = included_source {
235 let mut includes = get_includes(content, source_id)
236 .into_iter()
237 .map(|included_path| {
238 resolve_included_path(Some(&path), included_path.as_ref())
239 })
240 .collect::<VecDeque<_>>();
241 pending_paths.append(&mut includes);
242 }
243 }
244 }
245
246 let root_content_char_indices = root_content
247 .char_indices()
248 .map(|(i, _)| i)
249 .collect::<Vec<_>>();
250
251 Self {
252 root_path,
253 root_source_id,
254 root_content,
255 root_content_char_indices,
256 included_content,
257 source_id_strings,
258 }
259 }
260
261 pub fn write<W, E, K>(&self, mut w: W, errors_or_warnings: Vec<E>) -> io::Result<()>
262 where
263 W: Write + Copy,
264 E: Into<AnnotatedErrorOrWarning<K>>,
265 K: ErrorOrWarningKind,
266 {
267 for error_or_warning in errors_or_warnings.into_iter() {
268 use chumsky::span::Span;
269 let AnnotatedErrorOrWarning {
270 error_or_warning,
271 annotation,
272 } = error_or_warning.into();
273 let (src_id, span) = self.source_id_string_and_adjusted_span(&error_or_warning.span);
274 let color = error_or_warning.color();
275 let report_kind = error_or_warning.report_kind();
276
277 Report::build(report_kind, (src_id.clone(), (span.start()..span.end())))
278 .with_message(error_or_warning.message)
279 .with_labels(Some(
280 Label::new((src_id, (span.start()..span.end())))
281 .with_message(error_or_warning.reason)
282 .with_color(color),
283 ))
284 .with_labels(error_or_warning.contexts.into_iter().map(|(label, span)| {
285 let (src_id, span) = self.source_id_string_and_adjusted_span(&span);
286 Label::new((src_id, (span.start()..span.end())))
287 .with_message(lazy_format!("in this {}", label))
288 .with_color(Color::Yellow)
289 }))
290 .with_labels(error_or_warning.related.into_iter().map(|(label, span)| {
291 let (src_id, span) = self.source_id_string_and_adjusted_span(&span);
292 Label::new((src_id, (span.start()..span.end())))
293 .with_message(lazy_format!("{}", label))
294 .with_color(Color::Yellow)
295 }))
296 .finish()
297 .write(ariadne::sources(self.sources()), w)?;
298
299 if let Some(annotation) = annotation {
300 w.write_fmt(core::format_args!("{}\n", &annotation))?;
302 }
303 }
304 Ok(())
305 }
306
307 fn byte_to_rune(&self, char_indices: &[usize], byte_span: Span) -> Span {
308 let mut rune_span = byte_span;
309 rune_span.start = char_indices.partition_point(|&i| i < byte_span.start);
310 rune_span.end = char_indices.partition_point(|&i| i < byte_span.end);
311 rune_span
312 }
313
314 fn source_id_string_and_adjusted_span(&self, span: &Span) -> (String, Span) {
315 use chumsky::span::Span;
316 let source_id = span.context();
317 let source_id_str = self.source_id_string(source_id);
318 let empty_char_indices = Vec::default();
319 let (source_content, source_content_char_indices) = if source_id == self.root_source_id {
320 (self.root_content.as_str(), &self.root_content_char_indices)
321 } else if let IncludedSource::Content(_, content, content_char_indices) =
322 self.included_content.get(Path::new(source_id_str)).unwrap()
323 {
324 (content.as_str(), content_char_indices)
325 } else {
326 ("", &empty_char_indices)
327 };
328
329 let trimmed = trimmed_span(source_content, span);
330 let rune_span = self.byte_to_rune(source_content_char_indices, trimmed);
331
332 (source_id_str.to_string(), rune_span)
333 }
334
335 fn source_id_string(&self, source_id: SourceId) -> &str {
336 self.source_id_strings[Into::<usize>::into(source_id)].as_str()
337 }
338
339 fn sources(&self) -> Vec<(String, &str)> {
340 once((
341 self.source_id_string(self.root_source_id).to_string(),
342 self.root_content.as_str(),
343 ))
344 .chain(
345 self.included_content
346 .iter()
347 .filter_map(|(_, included_source)| {
348 if let IncludedSource::Content(source_id, content, _) = included_source {
349 Some((
350 self.source_id_string(*source_id).to_string(),
351 content.as_str(),
352 ))
353 } else {
354 None
355 }
356 }),
357 )
358 .collect()
359 }
360
361 fn content_iter(&self) -> impl Iterator<Item = (SourceId, Option<&Path>, &str)> {
362 once((
363 self.root_source_id,
364 self.root_path.as_deref(),
365 self.root_content.as_str(),
366 ))
367 .chain(
368 self.included_content
369 .iter()
370 .filter_map(|(pathbuf, included_source)| {
371 if let IncludedSource::Content(source_id, content, _) = included_source {
372 Some((*source_id, Some(pathbuf.as_path()), content.as_str()))
373 } else {
374 None
375 }
376 }),
377 )
378 }
379
380 fn error_path_iter(&self) -> impl Iterator<Item = (Option<&Path>, String)> {
381 self.included_content
382 .iter()
383 .filter_map(|(pathbuf, included_source)| {
384 if let IncludedSource::IoError(e) = included_source {
385 Some((Some(pathbuf.as_path()), e.clone()))
386 } else {
387 None
388 }
389 })
390 }
391}
392
393impl TryFrom<PathBuf> for BeancountSources {
394 type Error = io::Error;
395
396 fn try_from(source_path: PathBuf) -> io::Result<Self> {
397 Self::try_read_with_includes(source_path)
398 }
399}
400
401impl TryFrom<&Path> for BeancountSources {
402 type Error = io::Error;
403
404 fn try_from(source_path: &Path) -> io::Result<Self> {
405 Self::try_read_with_includes(source_path.to_owned())
406 }
407}
408
409impl From<String> for BeancountSources {
410 fn from(source_string: String) -> Self {
411 Self::read_with_includes(None, source_string)
412 }
413}
414
415impl From<&str> for BeancountSources {
416 fn from(source_string: &str) -> Self {
417 Self::read_with_includes(None, source_string.to_owned())
418 }
419}
420
421impl std::fmt::Debug for BeancountSources {
422 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
423 writeln!(f, "BeancountSources(",)?;
424
425 for (path, included_source) in &self.included_content {
426 match included_source {
427 IncludedSource::Content(source_id, content, _) => writeln!(
428 f,
429 " {} ok len {},",
430 self.source_id_string(*source_id),
431 content.len()
432 )?,
433 IncludedSource::IoError(e) => writeln!(f, " {:?} err {},", path, e)?,
434 IncludedSource::Duplicate => writeln!(f, " {:?} duplicate include", path)?,
435 }
436 }
437
438 writeln!(f, ")",)
439 }
440}
441
442pub fn lex_with_source(source_id: SourceId, s: &str) -> Vec<(Token, Span)> {
443 lex(s)
444 .map(|(tok, span)| (tok, chumsky::span::Span::new(source_id, span)))
445 .collect::<Vec<_>>()
446}
447
448fn read<P>(file_path: P) -> io::Result<String>
449where
450 P: AsRef<Path>,
451{
452 let mut f = File::open(&file_path)?;
453 let mut file_content = String::new();
454
455 f.read_to_string(&mut file_content)?;
457 Ok(file_content)
458}
459
460type SpannedToken<'t> = (Token<'t>, Span);
461
462pub struct BeancountParser<'s> {
504 sources: &'s BeancountSources,
505 tokenized_sources: Vec<Vec<SpannedToken<'s>>>,
507}
508
509#[derive(Debug)]
511pub struct ParseSuccess<'t> {
512 pub directives: Vec<Spanned<Directive<'t>>>,
513 pub options: Options<'t>,
514 pub plugins: Vec<Plugin<'t>>,
515 pub warnings: Vec<Warning>,
516}
517
518#[derive(Debug)]
520pub struct ParseError {
521 pub errors: Vec<Error>,
522 pub warnings: Vec<Warning>,
523}
524
525type ParseDeclarationsResult<'t> = (
527 HashMap<Option<&'t Path>, Vec<Spanned<Declaration<'t>>>>,
528 Options<'t>,
529 Vec<Error>,
530 Vec<Warning>,
531);
532
533impl<'s> BeancountParser<'s> {
534 pub fn new(sources: &'s BeancountSources) -> Self {
536 let mut tokenized_sources = Vec::new();
537
538 for (source_id, _path, content) in sources.content_iter() {
539 tokenized_sources.push(lex_with_source(source_id, content));
540 }
541
542 BeancountParser {
543 sources,
544 tokenized_sources,
545 }
546 }
547
548 pub fn parse(&self) -> Result<ParseSuccess, ParseError> {
550 let (parsed_sources, options, mut errors, warnings) = self.parse_declarations();
551 let error_paths = self.sources.error_path_iter().collect::<HashMap<_, _>>();
552 let mut p = PragmaProcessor::new(self.root_path(), parsed_sources, error_paths, options);
553
554 let directives = p
557 .by_ref()
558 .sort(|d| {
559 (
560 *d.item().date().item(),
561 !matches!(d.variant(), DirectiveVariant::Balance(_)),
563 )
564 })
565 .collect::<Vec<_>>();
566 let (options, plugins, mut pragma_errors) = p.result();
567 errors.append(&mut pragma_errors);
568
569 if errors.is_empty() {
570 Ok(ParseSuccess {
571 directives,
572 options,
573 plugins,
574 warnings,
575 })
576 } else {
577 Err(ParseError { errors, warnings })
578 }
579 }
580
581 fn root_path(&self) -> Option<&'s Path> {
582 self.sources.root_path.as_deref()
583 }
584
585 fn parse_declarations(&self) -> ParseDeclarationsResult {
588 let mut all_outputs = HashMap::new();
589 let mut all_errors = Vec::new();
590 let mut parser_state = chumsky::extra::SimpleState(ParserState::default());
591
592 for (source_id, source_path, content) in self.sources.content_iter() {
593 let i_source: usize = source_id.into();
594 let tokens = &self.tokenized_sources[i_source];
595
596 let spanned_tokens = tokens
597 .map(end_of_input(source_id, content), |(t, s)| (t, s))
598 .with_context(source_id);
599
600 let (output, errors) = file(source_path)
601 .parse_with_state(spanned_tokens, &mut parser_state)
602 .into_output_errors();
603
604 all_outputs.insert(source_path, output.unwrap_or(Vec::new()));
605 all_errors.extend(errors);
606 }
607
608 let ParserState { options, warnings } = parser_state.0;
609
610 (
611 all_outputs,
612 Options::new(options),
613 all_errors.into_iter().map(Error::from).collect(),
614 warnings,
615 )
616 }
617}
618
619#[derive(Debug)]
623struct PragmaProcessor<'s> {
624 current_path: Option<PathBuf>,
625 current_declarations: VecDeque<Spanned<Declaration<'s>>>,
626 stacked: VecDeque<(Option<PathBuf>, VecDeque<Spanned<Declaration<'s>>>)>,
627 remaining: HashMap<Option<PathBuf>, VecDeque<Spanned<Declaration<'s>>>>,
628 error_paths: HashMap<Option<PathBuf>, String>,
629 include_span_by_canonical_path: HashMap<PathBuf, Span>,
630 tags: HashMap<Spanned<Tag<'s>>, Vec<Spanned<Tag<'s>>>>,
632 meta_key_values: HashMap<Spanned<Key<'s>>, Vec<(Span, Spanned<MetaValue<'s>>)>>,
633 options: Options<'s>,
634 plugins: Vec<Plugin<'s>>,
635 errors: Vec<Error>,
637}
638
639impl<'s> PragmaProcessor<'s> {
640 fn new(
641 root_path: Option<&Path>,
642 parsed_sources: HashMap<Option<&Path>, Vec<Spanned<Declaration<'s>>>>,
643 error_paths: HashMap<Option<&Path>, String>,
644 options: Options<'s>,
645 ) -> Self {
646 let mut remaining = parsed_sources
647 .into_iter()
648 .map(|(path, declarations)| {
649 (path.map(|p| p.to_path_buf()), VecDeque::from(declarations))
650 })
651 .collect::<HashMap<_, _>>();
652 let error_paths = error_paths
653 .into_iter()
654 .map(|(path, e)| (path.map(|p| p.to_path_buf()), e))
655 .collect::<HashMap<_, _>>();
656
657 let current_path = root_path.map(|p| p.to_path_buf());
658 let current_declarations = remaining.remove(¤t_path).unwrap();
659
660 PragmaProcessor {
661 current_path,
662 current_declarations,
663 stacked: VecDeque::new(),
664 remaining,
665 error_paths,
666 include_span_by_canonical_path: HashMap::default(),
667 tags: HashMap::new(),
668 meta_key_values: HashMap::new(),
669 options,
670 plugins: Vec::new(),
671 errors: Vec::new(),
672 }
673 }
674
675 fn result(self) -> (Options<'s>, Vec<Plugin<'s>>, Vec<Error>) {
676 let mut errors = self.errors;
678
679 for (key, _value) in self.meta_key_values {
680 let e = Error::new(
681 "invalid pushmeta",
682 "missing corresponding popmeta",
683 key.span,
684 );
685 errors.push(e);
686 }
687
688 for (tag, others) in self.tags {
689 let e = Error::new("invalid pushtag", "missing corresponding poptag", tag.span);
690 errors.push(e);
691 for other in others {
692 let e = Error::new(
693 "invalid pushtag",
694 "missing corresponding poptag",
695 other.span,
696 );
697 errors.push(e);
698 }
699 }
700
701 (self.options, self.plugins, errors)
702 }
703}
704
705impl<'s> Iterator for PragmaProcessor<'s> {
706 type Item = Spanned<Directive<'s>>;
707
708 fn next(&mut self) -> Option<Self::Item> {
709 match self.current_declarations.pop_front() {
710 Some(declaration) => {
711 match declaration.item {
712 Declaration::Directive(mut directive) => {
713 directive.metadata.augment_tags(&self.tags);
714 directive.metadata.augment_key_values(&self.meta_key_values);
715
716 Some(spanned(directive, declaration.span))
717 }
718
719 Declaration::Pragma(pragma) => {
720 use Pragma::*;
721
722 match pragma {
723 Pushtag(tag) => match self.tags.get_mut(&tag) {
724 Some(others) => {
725 others.push(tag);
726 }
727 None => {
728 self.tags.insert(tag, Vec::default());
729 }
730 },
731 Poptag(tag) => {
732 let mut last_tag = false;
733
734 match self.tags.get_mut(&tag) {
735 Some(others) => {
736 if others.is_empty() {
737 last_tag = true;
738 } else {
740 others.pop();
741 }
742 }
743 None => {
744 let e = Error::new(
745 "invalid poptag",
746 "missing corresponding pushtag",
747 tag.span,
748 );
749 self.errors.push(e);
750 }
751 }
752
753 if last_tag {
754 self.tags.remove(&tag);
755 }
756 }
757 Pushmeta(meta) => match self.meta_key_values.get_mut(&meta.key) {
758 Some(values) => {
759 values.push((meta.key.span, meta.value));
760 }
761 None => {
762 self.meta_key_values
763 .insert(meta.key, vec![(meta.key.span, meta.value)]);
764 }
765 },
766 Popmeta(meta) => {
767 let mut last_meta = false;
768
769 match self.meta_key_values.get_mut(&meta) {
770 Some(values) => {
771 values.pop();
772 if values.is_empty() {
773 last_meta = true;
774 }
776 }
777 None => {
778 let e = Error::new(
779 "invalid popmeta",
780 "missing corresponding pushmeta",
781 meta.span,
782 );
783 self.errors.push(e);
784 }
785 }
786
787 if last_meta {
788 self.meta_key_values.remove(&meta);
789 }
790 }
791 Include(relpath) => {
792 let (path, span) = (
793 Some(resolve_included_path(
794 self.current_path.as_ref(),
795 AsRef::<Path>::as_ref(*relpath.item()),
796 )),
797 *relpath.span(),
798 );
799 let canonical_path =
800 path.as_ref().and_then(|p| p.canonicalize().ok());
801
802 match self.remaining.remove_entry(&path) {
803 Some((included_path, included_declarations)) => {
804 let stacked_path = std::mem::replace(
805 &mut self.current_path,
806 included_path,
807 );
808 let stacked_declarations = std::mem::replace(
809 &mut self.current_declarations,
810 included_declarations,
811 );
812 self.stacked
813 .push_front((stacked_path, stacked_declarations));
814
815 if let Some(canonical_path) = canonical_path {
817 self.include_span_by_canonical_path
818 .insert(canonical_path, span);
819 }
820 }
821
822 None => {
823 let e = match self.error_paths.get(&path) {
825 Some(e) => {
826 Error::new("can't read file", e.to_string(), span)
827 }
828 None => {
829 let e = Error::new(
830 "duplicate include",
831 "file already included",
832 span,
833 );
834
835 if let Some(span) = canonical_path.and_then(|p| {
837 self.include_span_by_canonical_path.get(&p)
838 }) {
839 e.related_to_named_span("file", *span)
840 } else {
841 e
842 }
843 }
844 };
845 self.errors.push(e);
846 }
847 }
848 }
849
850 Option(opt) => {
851 if let Err(e) = self.options.assimilate(opt) {
852 self.errors.push(e);
853 }
854 }
855
856 Plugin(plugin) => self.plugins.push(plugin),
857 }
858
859 self.next()
861 }
862 }
863 }
864 None => match self.stacked.pop_front() {
865 Some((path, declarations)) => {
866 self.current_path = path;
867 self.current_declarations = declarations;
868 self.next()
869 }
870 None => None,
871 },
872 }
873 }
874}
875
876fn end_of_input(source_id: SourceId, s: &str) -> Span {
877 chumsky::span::Span::new(source_id, s.len()..s.len())
878}
879
880fn trimmed_span(source: &str, span: &Span) -> Span {
881 let mut trimmed = *span;
882 trimmed.end = trim_trailing_whitespace(source, span.start, span.end);
883 trimmed
884}
885
886#[cfg(test)]
887pub use lexer::bare_lex;
888mod format;
889mod lexer;
890pub use options::Options;
891mod options;
892mod parsers;
893mod sort;
894mod trim;
895pub mod types;