use std::{
cell::RefCell,
path::{Path, PathBuf},
rc::Rc,
string::ToString,
};
use tracing::instrument;
mod blocks;
mod constants;
mod error;
pub(crate) mod grammar;
mod model;
mod options;
mod parsed;
mod preprocessor;
mod safe_mode;
mod warning;
pub(crate) use grammar::{InlinePreprocessorParserState, ProcessedContent, inline_preprocessing};
use preprocessor::Preprocessor;
pub use error::{Error, Positioning, SourceLocation};
pub use grammar::parse_text_for_quotes;
pub use model::{
Admonition, AdmonitionVariant, Anchor, AttributeName, AttributeValue, Attribution, Audio,
Author, Autolink, Block, BlockMetadata, Bold, Button, CalloutList, CalloutListItem, CalloutRef,
CalloutRefKind, CiteTitle, ColumnFormat, ColumnStyle, ColumnWidth, Comment, CrossReference,
CurvedApostrophe, CurvedQuotation, DelimitedBlock, DelimitedBlockType, DescriptionList,
DescriptionListItem, DiscreteHeader, Document, DocumentAttribute, DocumentAttributes,
ElementAttributes, Footnote, Form, HEADER, Header, Highlight, HorizontalAlignment, ICON_SIZES,
Icon, Image, IndexTerm, IndexTermKind, InlineMacro, InlineNode, Italic, Keyboard, LineBreak,
Link, ListItem, ListItemCheckedStatus, Location, MAX_SECTION_LEVELS, MAX_TOC_LEVELS, Mailto,
Menu, Monospace, NORMAL, OrderedList, PageBreak, Paragraph, Pass, PassthroughKind, Plain,
Position, Raw, Role, Section, Source, SourceUrl, StandaloneCurvedApostrophe, Stem, StemContent,
StemNotation, Subscript, Substitution, SubstitutionOp, SubstitutionSpec, Subtitle, Superscript,
Table, TableColumn, TableOfContents, TableRow, ThematicBreak, Title, TocEntry,
UNNUMBERED_SECTION_STYLES, UnorderedList, Url, VERBATIM, Verbatim, VerticalAlignment, Video,
inlines_to_string, strip_quotes, substitute,
};
pub use options::{Options, OptionsBuilder, SafeMode};
pub use parsed::{OwnedSource, ParseInlineResult, ParseResult};
pub use warning::{Warning, WarningKind};
#[derive(Debug)]
pub struct Parser<'input> {
input: &'input str,
options: Options<'input>,
}
impl<'input> Parser<'input> {
#[must_use]
pub fn new(input: &'input str) -> Self {
Self {
input,
options: Options::default(),
}
}
#[must_use]
pub fn with_options(mut self, options: Options<'input>) -> Self {
self.options = options;
self
}
pub fn parse(self) -> Result<ParseResult, Error> {
parse(self.input, &self.options)
}
pub fn parse_inline(self) -> Result<ParseInlineResult, Error> {
parse_inline(self.input, &self.options)
}
}
#[instrument(skip(reader))]
pub fn parse_from_reader<R: std::io::Read>(
reader: R,
options: &Options<'_>,
) -> Result<ParseResult, Error> {
let warnings_handle: Rc<RefCell<Vec<Warning>>> = Rc::new(RefCell::new(Vec::new()));
let result = {
let _span = tracing::info_span!("preprocess").entered();
Preprocessor::process_reader(reader, options, Rc::clone(&warnings_handle))?
};
let text: Box<str> = result.text.into_owned().into_boxed_str();
let _span = tracing::info_span!("grammar_parse", input_len = text.len()).entered();
parse_input(
text,
options.clone(),
None,
result.leveloffset_ranges,
result.source_ranges,
warnings_handle,
)
}
#[instrument]
pub fn parse(input: &str, options: &Options<'_>) -> Result<ParseResult, Error> {
let warnings_handle: Rc<RefCell<Vec<Warning>>> = Rc::new(RefCell::new(Vec::new()));
let result = {
let _span = tracing::info_span!("preprocess").entered();
Preprocessor::process(input, options, Rc::clone(&warnings_handle))?
};
let text: Box<str> = result.text.into_owned().into_boxed_str();
let _span = tracing::info_span!("grammar_parse", input_len = text.len()).entered();
parse_input(
text,
options.clone(),
None,
result.leveloffset_ranges,
result.source_ranges,
warnings_handle,
)
}
#[instrument(skip(file_path))]
pub fn parse_file<P: AsRef<Path>>(
file_path: P,
options: &Options<'_>,
) -> Result<ParseResult, Error> {
let path = file_path.as_ref().to_path_buf();
let raw = preprocessor::read_and_decode_file(file_path.as_ref(), None)?;
let warnings_handle: Rc<RefCell<Vec<Warning>>> = Rc::new(RefCell::new(Vec::new()));
let result = {
let _span = tracing::info_span!("preprocess").entered();
Preprocessor::process_with_file(
&raw,
file_path.as_ref(),
options,
Rc::clone(&warnings_handle),
)?
};
let text: Box<str> = result.text.into_owned().into_boxed_str();
let _span = tracing::info_span!("grammar_parse", input_len = text.len()).entered();
parse_input(
text,
options.clone(),
Some(path),
result.leveloffset_ranges,
result.source_ranges,
warnings_handle,
)
}
fn peg_error_to_source_location(
error: &peg::error::ParseError<peg::str::LineCol>,
state: &grammar::ParserState,
) -> SourceLocation {
let offset = error.location.offset;
if let Some(range) = state
.source_ranges
.iter()
.rev()
.find(|r| r.contains(offset))
{
let line_in_file = state
.input
.get(range.start_offset..offset)
.map_or(0, |s| s.matches('\n').count());
SourceLocation {
file: Some(range.file.clone()),
positioning: Positioning::Position(Position {
line: range.start_line + line_in_file,
column: error.location.column,
}),
}
} else {
SourceLocation {
file: state.current_file.clone(),
positioning: Positioning::Position(Position {
line: error.location.line,
column: error.location.column,
}),
}
}
}
#[instrument(skip_all)]
fn parse_input(
input: Box<str>,
options: Options<'_>,
file_path: Option<PathBuf>,
leveloffset_ranges: Vec<model::LeveloffsetRange>,
source_ranges: Vec<model::SourceRange>,
warnings_handle: Rc<RefCell<Vec<Warning>>>,
) -> Result<ParseResult, Error> {
tracing::trace!(?input, "post preprocessor");
let owner = parsed::OwnedInput::new(input);
let options_owned = options.into_static();
let warnings_for_state = Rc::clone(&warnings_handle);
ParseResult::try_new(owner, warnings_handle, move |owner| {
let mut state = grammar::ParserState::new(&owner.source, &owner.arena);
state.document_attributes = Rc::new(options_owned.document_attributes.clone());
state.options = Rc::new(options_owned);
state.current_file = file_path;
state.leveloffset_ranges = leveloffset_ranges;
state.source_ranges = source_ranges;
state.warnings = warnings_for_state;
let result = match grammar::document_parser::document(&owner.source, &mut state) {
Ok(Ok(doc)) => Ok(doc),
Ok(Err(e)) => Err(e),
Err(error) => {
tracing::error!(?error, "error parsing document content");
let source_location = peg_error_to_source_location(&error, &state);
Err(Error::Parse(Box::new(source_location), error.to_string()))
}
};
state.emit_warnings();
result
})
}
#[instrument]
pub fn parse_inline(input: &str, options: &Options<'_>) -> Result<ParseInlineResult, Error> {
tracing::trace!(?input, "post preprocessor");
let owner = parsed::OwnedInput::new(input.into());
let options_owned = options.clone().into_static();
let warnings_handle: Rc<RefCell<Vec<Warning>>> = Rc::new(RefCell::new(Vec::new()));
let warnings_for_state = Rc::clone(&warnings_handle);
ParseInlineResult::try_new(owner, warnings_handle, move |owner| {
let mut state = grammar::ParserState::new(&owner.source, &owner.arena);
state.document_attributes = Rc::new(options_owned.document_attributes.clone());
state.options = Rc::new(options_owned);
state.warnings = warnings_for_state;
let result = match grammar::inline_parser::inlines(&owner.source, &mut state) {
Ok(inlines) => Ok(inlines),
Err(error) => {
tracing::error!(?error, "error parsing inline content");
Err(Error::Parse(
Box::new(peg_error_to_source_location(&error, &state)),
error.to_string(),
))
}
};
state.emit_warnings();
result
})
}
#[cfg(test)]
mod proptests;
#[cfg(test)]
#[allow(clippy::unwrap_used)]
#[allow(clippy::panic)]
#[allow(clippy::expect_used)]
mod tests {
use std::{fs, path::PathBuf};
use pretty_assertions::assert_eq;
use super::*;
fn read_file_contents_with_extension(path: &PathBuf, ext: &str) -> Result<String, Error> {
let test_file_path = path.with_extension(ext);
let file_contents = fs::read_to_string(&test_file_path).inspect_err(
|e| tracing::warn!(?path, ?test_file_path, error = %e, "test file not found"),
)?;
Ok(file_contents)
}
#[rstest::rstest]
#[tracing_test::traced_test]
fn test_with_fixtures(#[files("fixtures/tests/**/*.adoc")] path: PathBuf) -> Result<(), Error> {
let options = Options::builder().with_safe_mode(SafeMode::Unsafe).build();
match parse_file(&path, &options) {
Ok(result) => {
let expected = read_file_contents_with_extension(&path, "json")?;
let actual = serde_json::to_string_pretty(result.document())
.expect("could not serialize result");
assert_eq!(expected, actual);
}
Err(e) => {
let file_contents = read_file_contents_with_extension(&path, "error")?;
let expected = file_contents.trim();
assert_eq!(expected, e.to_string());
}
}
Ok(())
}
#[cfg(test)]
mod empty_document_tests {
use crate::{Options, parse};
#[test]
fn test_whitespace_only_documents() {
let test_cases = vec![
"\n", "\n\n", "\t", " \n\t\n ", " ",
"\n\n\t",
];
for input in test_cases {
let options = Options::default();
let result = parse(input, &options);
match result {
Ok(parsed) => {
let doc = parsed.document();
assert!(
doc.location.absolute_start <= doc.location.absolute_end,
"Failed for input {input:?}: absolute_start {} > absolute_end {}",
doc.location.absolute_start,
doc.location.absolute_end
);
doc.location.validate(input).unwrap_or_else(|e| {
panic!("Location validation failed for {input:?}: {e}")
});
}
Err(e) => {
panic!("Failed to parse {input:?}: {e}");
}
}
}
}
#[test]
fn test_document_with_content_after_whitespace() {
let test_cases = vec!["\n\nHello", "\t\tWorld", " \n = Title"];
for input in test_cases {
let options = Options::default();
let parsed =
parse(input, &options).unwrap_or_else(|_| panic!("Should parse {input:?}"));
let doc = parsed.document();
assert!(
doc.location.absolute_start <= doc.location.absolute_end,
"Failed for input {input:?}: absolute_start {} > absolute_end {}",
doc.location.absolute_start,
doc.location.absolute_end
);
doc.location
.validate(input)
.unwrap_or_else(|e| panic!("Location validation failed for {input:?}: {e}"));
}
}
#[test]
fn test_unicode_characters() {
let test_cases = vec![
"😀", "א", "Hello 世界", "\u{200b}", ];
for input in test_cases {
let options = Options::default();
let result = parse(input, &options);
match result {
Ok(parsed) => {
let doc = parsed.document();
assert!(
input.is_char_boundary(doc.location.absolute_start),
"Absolute start {} not on UTF-8 boundary for {input:?}",
doc.location.absolute_start,
);
assert!(
input.is_char_boundary(doc.location.absolute_end),
"Absolute end {} not on UTF-8 boundary for {input:?}",
doc.location.absolute_end,
);
doc.location.validate(input).unwrap_or_else(|e| {
panic!("Location validation failed for {input:?}: {e}");
});
}
Err(e) => {
println!("Failed to parse {input:?}: {e} (this might be expected)");
}
}
}
}
}
mod warning_deduplication_tests {
use crate::{Options, parse};
#[test]
#[tracing_test::traced_test]
fn counter_reference_peg_backtracking_does_not_duplicate() {
let input = "= Title\n\n{counter:hits} then {counter:hits} again";
let options = Options::default();
let result = parse(input, &options).expect("should parse");
let counter_warnings = result
.warnings()
.iter()
.filter(|w| {
w.kind
.to_string()
.contains("not supported and will be removed")
})
.count();
assert_eq!(
counter_warnings,
2,
"expected 2 counter warnings (one per position), got {counter_warnings}: {:?}",
result.warnings(),
);
assert!(
result
.warnings()
.iter()
.all(|w| w.source_location().is_some()),
"counter warnings must carry locations",
);
}
#[test]
#[tracing_test::traced_test]
fn distinct_warnings_all_emitted() {
let input = "= Title\n\n{counter:a} and {counter2:b}";
let options = Options::default();
let _doc = parse(input, &options).expect("should parse");
assert!(logs_contain(
"Counters ({counter:a}) are not supported and will be removed from output"
));
assert!(logs_contain(
"Counters ({counter2:b}) are not supported and will be removed from output"
));
}
}
mod parse_result_tests {
use crate::{Options, WarningKind, parse, parse_file};
#[test]
fn missing_include_warning_surfaces_on_parse_result() {
use std::io::Write;
let tmp = std::env::temp_dir().join("acdc_test_missing_include.adoc");
let mut f = std::fs::File::create(&tmp).expect("create tmp");
writeln!(
f,
"= Doc Title\n\ninclude::definitely-missing-{}.adoc[]\n",
std::process::id()
)
.expect("write tmp");
drop(f);
let options = Options::default();
let result = parse_file(&tmp, &options).expect("should parse");
let _ = std::fs::remove_file(&tmp);
let has_missing_include = result
.warnings()
.iter()
.any(|w| w.kind.to_string().contains("file is missing"));
assert!(
has_missing_include,
"expected missing-include warning, got: {:?}",
result.warnings(),
);
}
#[test]
fn section_level_out_of_sequence_surfaces_on_parse_result() {
let input = "= Doc Title\n\n=== Starts at level 2\n\nContent\n";
let options = Options::default();
let result = parse(input, &options).expect("document should parse");
assert_eq!(
result.warnings().len(),
1,
"expected exactly one warning, got: {:?}",
result.warnings(),
);
let warning = result.warnings().first().expect("asserted non-empty");
assert!(
matches!(
&warning.kind,
WarningKind::SectionLevelOutOfSequence { got: 2, .. },
),
"unexpected warning kind: {:?}",
warning.kind,
);
assert!(
warning.source_location().is_some(),
"warning should carry a source location",
);
}
#[test]
fn valid_document_has_no_warnings() {
let input = "= Doc Title\n\n== First\n\nContent\n";
let options = Options::default();
let result = parse(input, &options).expect("document should parse");
assert!(
result.warnings().is_empty(),
"expected no warnings, got: {:?}",
result.warnings(),
);
}
}
mod attribute_resolution_tests {
use std::borrow::Cow;
use crate::{AttributeValue, Options, parse};
#[test]
fn test_definition_time_resolution_bar_defined_first() {
let input = r":bar: resolved-bar
:foo: {bar}
{foo}
";
let options = Options::default();
let parsed = parse(input, &options).expect("should parse");
let doc = parsed.document();
assert_eq!(
doc.attributes.get("foo"),
Some(&AttributeValue::String(Cow::Borrowed("resolved-bar")))
);
}
#[test]
fn test_definition_time_resolution_bar_defined_after() {
let input = r":foo: {bar}
:bar: resolved-bar
{foo}
";
let options = Options::default();
let parsed = parse(input, &options).expect("should parse");
let doc = parsed.document();
assert_eq!(
doc.attributes.get("foo"),
Some(&AttributeValue::String(Cow::Borrowed("{bar}")))
);
}
#[test]
fn test_chained_attribute_resolution() {
let input = r":c: final-value
:b: {c}
:a: {b}
{a}
";
let options = Options::default();
let parsed = parse(input, &options).expect("should parse");
let doc = parsed.document();
assert_eq!(
doc.attributes.get("c"),
Some(&AttributeValue::String(Cow::Borrowed("final-value")))
);
assert_eq!(
doc.attributes.get("b"),
Some(&AttributeValue::String(Cow::Borrowed("final-value")))
);
assert_eq!(
doc.attributes.get("a"),
Some(&AttributeValue::String(Cow::Borrowed("final-value")))
);
}
}
}