#![deny(clippy::pedantic)]
#![warn(clippy::all)]
use std::{
path::{Path, PathBuf},
string::ToString,
};
use tracing::instrument;
mod blocks;
mod constants;
mod error;
pub(crate) mod grammar;
mod model;
mod options;
mod preprocessor;
mod safe_mode;
pub(crate) use grammar::{InlinePreprocessorParserState, ProcessedContent, inline_preprocessing};
use preprocessor::Preprocessor;
pub use error::{Error, Positioning, SourceLocation};
pub use grammar::parse_text_for_quotes;
pub use model::{
Admonition, AdmonitionVariant, Anchor, AttributeName, AttributeValue, Attribution, Audio,
Author, Autolink, Block, BlockMetadata, Bold, Button, CalloutList, CalloutListItem, CalloutRef,
CalloutRefKind, CiteTitle, ColumnFormat, ColumnStyle, ColumnWidth, Comment, CrossReference,
CurvedApostrophe, CurvedQuotation, DelimitedBlock, DelimitedBlockType, DescriptionList,
DescriptionListItem, DiscreteHeader, Document, DocumentAttribute, DocumentAttributes,
ElementAttributes, Footnote, Form, HEADER, Header, Highlight, HorizontalAlignment, ICON_SIZES,
Icon, Image, IndexTerm, IndexTermKind, InlineMacro, InlineNode, Italic, Keyboard, LineBreak,
Link, ListItem, ListItemCheckedStatus, Location, MAX_SECTION_LEVELS, MAX_TOC_LEVELS, Mailto,
Menu, Monospace, NORMAL, OrderedList, PageBreak, Paragraph, Pass, PassthroughKind, Plain,
Position, Raw, Role, Section, Source, SourceUrl, StandaloneCurvedApostrophe, Stem, StemContent,
StemNotation, Subscript, Substitution, SubstitutionOp, SubstitutionSpec, Subtitle, Superscript,
Table, TableColumn, TableOfContents, TableRow, ThematicBreak, Title, TocEntry,
UNNUMBERED_SECTION_STYLES, UnorderedList, Url, VERBATIM, Verbatim, VerticalAlignment, Video,
inlines_to_string, strip_quotes, substitute,
};
pub use options::{Options, OptionsBuilder, SafeMode};
#[derive(Debug)]
pub struct Parser<'input> {
input: &'input str,
options: Options,
}
impl<'input> Parser<'input> {
#[must_use]
pub fn new(input: &'input str) -> Self {
Self {
input,
options: Options::default(),
}
}
#[must_use]
pub fn with_options(mut self, options: Options) -> Self {
self.options = options;
self
}
pub fn parse(self) -> Result<Document, Error> {
parse(self.input, &self.options)
}
pub fn parse_inline(self) -> Result<Vec<InlineNode>, Error> {
parse_inline(self.input, &self.options)
}
}
#[instrument(skip(reader))]
pub fn parse_from_reader<R: std::io::Read>(
reader: R,
options: &Options,
) -> Result<Document, Error> {
let result = Preprocessor.process_reader(reader, options)?;
parse_input(
&result.text,
options,
None,
result.leveloffset_ranges,
result.source_ranges,
)
}
#[instrument]
pub fn parse(input: &str, options: &Options) -> Result<Document, Error> {
let result = Preprocessor.process(input, options)?;
parse_input(
&result.text,
options,
None,
result.leveloffset_ranges,
result.source_ranges,
)
}
#[instrument(skip(file_path))]
pub fn parse_file<P: AsRef<Path>>(file_path: P, options: &Options) -> Result<Document, Error> {
let path = file_path.as_ref().to_path_buf();
let result = Preprocessor.process_file(file_path, options)?;
parse_input(
&result.text,
options,
Some(path),
result.leveloffset_ranges,
result.source_ranges,
)
}
fn peg_error_to_source_location(
error: &peg::error::ParseError<peg::str::LineCol>,
state: &grammar::ParserState,
) -> SourceLocation {
let offset = error.location.offset;
if let Some(range) = state
.source_ranges
.iter()
.rev()
.find(|r| r.contains(offset))
{
let line_in_file = state
.input
.get(range.start_offset..offset)
.map_or(0, |s| s.matches('\n').count());
SourceLocation {
file: Some(range.file.clone()),
positioning: Positioning::Position(Position {
line: range.start_line + line_in_file,
column: error.location.column,
}),
}
} else {
SourceLocation {
file: state.current_file.clone(),
positioning: Positioning::Position(Position {
line: error.location.line,
column: error.location.column,
}),
}
}
}
#[instrument]
fn parse_input(
input: &str,
options: &Options,
file_path: Option<PathBuf>,
leveloffset_ranges: Vec<model::LeveloffsetRange>,
source_ranges: Vec<model::SourceRange>,
) -> Result<Document, Error> {
tracing::trace!(?input, "post preprocessor");
let mut state = grammar::ParserState::new(input);
state.document_attributes = options.document_attributes.clone();
state.options = options.clone();
state.current_file = file_path;
state.leveloffset_ranges = leveloffset_ranges;
state.source_ranges = source_ranges;
let result = match grammar::document_parser::document(input, &mut state) {
Ok(doc) => doc,
Err(error) => {
tracing::error!(?error, "error parsing document content");
let source_location = peg_error_to_source_location(&error, &state);
Err(Error::Parse(Box::new(source_location), error.to_string()))
}
};
state.emit_warnings();
result
}
#[instrument]
pub fn parse_inline(input: &str, options: &Options) -> Result<Vec<InlineNode>, Error> {
tracing::trace!(?input, "post preprocessor");
let mut state = grammar::ParserState::new(input);
state.document_attributes = options.document_attributes.clone();
state.options = options.clone();
let result = match grammar::inline_parser::inlines(
input,
&mut state,
0,
&grammar::BlockParsingMetadata::default(),
) {
Ok(inlines) => Ok(inlines),
Err(error) => {
tracing::error!(?error, "error parsing inline content");
Err(Error::Parse(
Box::new(peg_error_to_source_location(&error, &state)),
error.to_string(),
))
}
};
state.emit_warnings();
result
}
#[cfg(test)]
mod proptests;
#[cfg(test)]
#[allow(clippy::unwrap_used)]
#[allow(clippy::panic)]
#[allow(clippy::expect_used)]
mod tests {
use super::*;
use pretty_assertions::assert_eq;
fn read_file_contents_with_extension(
path: &std::path::PathBuf,
ext: &str,
) -> Result<String, Error> {
let test_file_path = path.with_extension(ext);
let file_contents = std::fs::read_to_string(&test_file_path).inspect_err(
|e| tracing::warn!(?path, ?test_file_path, error = %e, "test file not found"),
)?;
Ok(file_contents)
}
#[rstest::rstest]
#[tracing_test::traced_test]
fn test_with_fixtures(
#[files("fixtures/tests/**/*.adoc")] path: std::path::PathBuf,
) -> Result<(), Error> {
let options = Options::builder().with_safe_mode(SafeMode::Unsafe).build();
match parse_file(&path, &options) {
Ok(result) => {
let expected = read_file_contents_with_extension(&path, "json")?;
let actual =
serde_json::to_string_pretty(&result).expect("could not serialize result");
assert_eq!(expected, actual);
}
Err(e) => {
let file_contents = read_file_contents_with_extension(&path, "error")?;
let expected = file_contents.trim();
assert_eq!(expected, e.to_string());
}
}
Ok(())
}
#[cfg(test)]
mod empty_document_tests {
use crate::{Options, parse};
#[test]
fn test_whitespace_only_documents() {
let test_cases = vec![
"\n", "\n\n", "\t", " \n\t\n ", " ",
"\n\n\t",
];
for input in test_cases {
let options = Options::default();
let result = parse(input, &options);
match result {
Ok(doc) => {
assert!(
doc.location.absolute_start <= doc.location.absolute_end,
"Failed for input {input:?}: absolute_start {} > absolute_end {}",
doc.location.absolute_start,
doc.location.absolute_end
);
doc.location.validate(input).unwrap_or_else(|e| {
panic!("Location validation failed for {input:?}: {e}")
});
}
Err(e) => {
panic!("Failed to parse {input:?}: {e}");
}
}
}
}
#[test]
fn test_document_with_content_after_whitespace() {
let test_cases = vec!["\n\nHello", "\t\tWorld", " \n = Title"];
for input in test_cases {
let options = Options::default();
let doc =
parse(input, &options).unwrap_or_else(|_| panic!("Should parse {input:?}"));
assert!(
doc.location.absolute_start <= doc.location.absolute_end,
"Failed for input {input:?}: absolute_start {} > absolute_end {}",
doc.location.absolute_start,
doc.location.absolute_end
);
doc.location
.validate(input)
.unwrap_or_else(|e| panic!("Location validation failed for {input:?}: {e}"));
}
}
#[test]
fn test_unicode_characters() {
let test_cases = vec![
"😀", "א", "Hello 世界", "\u{200b}", ];
for input in test_cases {
let options = Options::default();
let result = parse(input, &options);
match result {
Ok(doc) => {
assert!(
input.is_char_boundary(doc.location.absolute_start),
"Absolute start {} not on UTF-8 boundary for {input:?}",
doc.location.absolute_start,
);
assert!(
input.is_char_boundary(doc.location.absolute_end),
"Absolute end {} not on UTF-8 boundary for {input:?}",
doc.location.absolute_end,
);
doc.location.validate(input).unwrap_or_else(|e| {
panic!("Location validation failed for {input:?}: {e}");
});
}
Err(e) => {
println!("Failed to parse {input:?}: {e} (this might be expected)",);
}
}
}
}
}
mod warning_deduplication_tests {
use crate::{Options, parse};
#[test]
#[tracing_test::traced_test]
fn counter_reference_emits_single_warning() {
let input = "= Title\n\n{counter:hits} then {counter:hits} again";
let options = Options::default();
let _doc = parse(input, &options).expect("should parse");
assert!(logs_contain("Counters"));
logs_assert(|lines: &[&str]| {
let count = lines
.iter()
.filter(|l| l.contains("not supported and will be removed"))
.count();
if count == 1 {
Ok(())
} else {
Err(format!("expected exactly 1 counter warning, got {count}"))
}
});
}
#[test]
#[tracing_test::traced_test]
fn distinct_warnings_all_emitted() {
let input = "= Title\n\n{counter:a} and {counter2:b}";
let options = Options::default();
let _doc = parse(input, &options).expect("should parse");
assert!(logs_contain(
"Counters ({counter:a}) are not supported and will be removed from output"
));
assert!(logs_contain(
"Counters ({counter2:b}) are not supported and will be removed from output"
));
}
}
mod attribute_resolution_tests {
use crate::{AttributeValue, Options, parse};
#[test]
fn test_definition_time_resolution_bar_defined_first() {
let input = r":bar: resolved-bar
:foo: {bar}
{foo}
";
let options = Options::default();
let doc = parse(input, &options).expect("should parse");
assert_eq!(
doc.attributes.get("foo"),
Some(&AttributeValue::String("resolved-bar".to_string()))
);
}
#[test]
fn test_definition_time_resolution_bar_defined_after() {
let input = r":foo: {bar}
:bar: resolved-bar
{foo}
";
let options = Options::default();
let doc = parse(input, &options).expect("should parse");
assert_eq!(
doc.attributes.get("foo"),
Some(&AttributeValue::String("{bar}".to_string()))
);
}
#[test]
fn test_chained_attribute_resolution() {
let input = r":c: final-value
:b: {c}
:a: {b}
{a}
";
let options = Options::default();
let doc = parse(input, &options).expect("should parse");
assert_eq!(
doc.attributes.get("c"),
Some(&AttributeValue::String("final-value".to_string()))
);
assert_eq!(
doc.attributes.get("b"),
Some(&AttributeValue::String("final-value".to_string()))
);
assert_eq!(
doc.attributes.get("a"),
Some(&AttributeValue::String("final-value".to_string()))
);
}
}
}