use std::{
cell::{Cell, RefCell},
collections::HashMap,
};
use peg::parser;
use crate::{
AttributeValue, DocumentAttributes, Error, Location, Pass, PassthroughKind, Position,
Substitution, grammar::LineMap, model::substitution::parse_substitution,
};
#[derive(Debug)]
pub(crate) struct InlinePreprocessorParserState<'a> {
pub(crate) pass_found_count: Cell<usize>,
pub(crate) passthroughs: RefCell<Vec<Pass>>,
pub(crate) attributes: RefCell<HashMap<usize, Location>>,
pub(crate) current_offset: Cell<usize>,
pub(crate) line_map: LineMap,
pub(crate) full_input: &'a str,
pub(crate) source_map: RefCell<SourceMap>,
pub(crate) input: RefCell<&'a str>,
pub(crate) substring_start_offset: Cell<usize>,
pub(crate) warnings: RefCell<Vec<String>>,
pub(crate) macros_enabled: bool,
pub(crate) attributes_enabled: bool,
}
impl<'a> InlinePreprocessorParserState<'a> {
pub(crate) fn new(
input: &'a str,
line_map: LineMap,
full_input: &'a str,
macros_enabled: bool,
attributes_enabled: bool,
) -> Self {
Self {
pass_found_count: Cell::new(0),
passthroughs: RefCell::new(Vec::new()),
attributes: RefCell::new(HashMap::new()),
current_offset: Cell::new(0),
line_map,
full_input,
source_map: RefCell::new(SourceMap::default()),
input: RefCell::new(input),
substring_start_offset: Cell::new(0),
warnings: RefCell::new(Vec::new()),
macros_enabled,
attributes_enabled,
}
}
pub(crate) fn new_all_enabled(input: &'a str, line_map: LineMap, full_input: &'a str) -> Self {
Self::new(input, line_map, full_input, true, true)
}
pub(crate) fn set_initial_position(&mut self, _location: &Location, absolute_offset: usize) {
self.substring_start_offset.set(absolute_offset);
self.current_offset.set(absolute_offset);
}
fn get_position(&self) -> Position {
self.line_map
.offset_to_position(self.current_offset.get(), self.full_input)
}
fn get_offset(&self) -> usize {
self.current_offset.get()
}
fn advance(&self, s: &str) {
self.current_offset.set(self.current_offset.get() + s.len());
}
fn advance_by(&self, n: usize) {
self.current_offset.set(self.current_offset.get() + n);
}
pub(crate) fn add_warning(&self, message: String) {
let mut warnings = self.warnings.borrow_mut();
if !warnings.contains(&message) {
warnings.push(message);
}
}
pub(crate) fn drain_warnings(&self) -> Vec<String> {
self.warnings.borrow_mut().drain(..).collect()
}
fn parse_pass_macro_parts(full: &str) -> (&str, &str, Vec<Substitution>) {
let subs_end = full[5..].find('[').unwrap_or(0);
let subs_str = &full[5..5 + subs_end];
let content = &full[5 + subs_end + 1..full.len() - 1];
let substitutions = if subs_str.is_empty() {
Vec::new()
} else {
subs_str
.split(',')
.filter_map(|s| parse_substitution(s.trim()))
.collect()
};
(subs_str, content, substitutions)
}
fn expand_disabled_pass_macro(
&self,
full: &str,
document_attributes: &DocumentAttributes,
) -> String {
let (subs_str, content, substitutions) = Self::parse_pass_macro_parts(full);
let has_attr_subs = substitutions
.iter()
.any(|s| matches!(s, Substitution::Attributes | Substitution::Normal));
if !has_attr_subs {
self.advance(full);
return full.to_string();
}
let expanded = inline_preprocessing::attribute_reference_substitutions(
content,
document_attributes,
self,
)
.unwrap_or_else(|_| content.to_string());
let reconstructed = format!("pass:{subs_str}[{expanded}]");
let absolute_start = self.get_offset();
self.advance(full);
if reconstructed.chars().count() != full.chars().count() {
self.source_map.borrow_mut().add_replacement(
absolute_start,
absolute_start + full.len(),
reconstructed.chars().count(),
ProcessedKind::Attribute,
);
}
reconstructed
}
fn calculate_location(&self, start: Position, content: &str, padding: usize) -> Location {
let absolute_start = self.get_offset();
self.advance(content);
self.advance_by(padding);
Location {
absolute_start,
absolute_end: self.get_offset(),
start,
end: self.get_position(),
}
}
}
#[derive(Debug)]
pub(crate) struct ProcessedContent {
pub text: String,
pub passthroughs: Vec<Pass>,
pub(crate) source_map: SourceMap,
}
#[derive(Debug, Clone)]
pub(crate) struct Replacement {
pub absolute_start: usize,
pub absolute_end: usize,
pub processed_end: usize, pub kind: ProcessedKind,
}
#[derive(Debug, Clone, Default)]
pub(crate) struct SourceMap {
pub replacements: Vec<Replacement>,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub(crate) enum ProcessedKind {
Attribute,
Passthrough,
}
fn to_signed(value: usize, context: &str) -> Result<i32, Error> {
i32::try_from(value).map_err(|e| {
tracing::error!(value, context, error = %e, "position overflow");
e.into()
})
}
fn to_unsigned(value: i32, context: &str) -> Result<usize, Error> {
usize::try_from(value).map_err(|e| {
tracing::error!(value, context, error = %e, "negative position");
e.into()
})
}
impl SourceMap {
pub(crate) fn add_replacement(
&mut self,
absolute_start: usize,
absolute_end: usize,
physical_length: usize,
kind: ProcessedKind,
) {
self.replacements.push(Replacement {
absolute_start,
absolute_end,
processed_end: absolute_start + physical_length,
kind,
});
self.replacements.sort_by_key(|r| r.absolute_start);
}
pub(crate) fn map_position(&self, pos: usize) -> Result<usize, Error> {
let signed_pos = to_signed(pos, "pos")?;
let mut adjustment: i32 = 0;
for rep in &self.replacements {
let rep_start = to_signed(rep.absolute_start, "rep.absolute_start")?;
let rep_end = to_signed(rep.absolute_end, "rep.absolute_end")?;
let rep_processed_end = to_signed(rep.processed_end, "rep.processed_end")?;
if signed_pos <= rep_start {
break;
}
if signed_pos < rep_processed_end {
return match rep.kind {
ProcessedKind::Attribute => {
Ok(rep.absolute_start)
}
ProcessedKind::Passthrough if signed_pos >= rep_end => {
Ok(rep.absolute_end - 1)
}
ProcessedKind::Passthrough => {
to_unsigned(signed_pos - adjustment, "within_passthrough")
}
};
}
adjustment += rep_processed_end - rep_end;
}
to_unsigned(signed_pos - adjustment, "final_position")
}
}
parser!(
pub(crate) grammar inline_preprocessing(document_attributes: &DocumentAttributes, state: &InlinePreprocessorParserState) for str {
pub rule run() -> ProcessedContent
= content:inlines()+ {
ProcessedContent {
text: content.join(""),
passthroughs: state.passthroughs.borrow().clone(),
source_map: state.source_map.borrow().clone(),
}
}
rule inlines() -> String = quiet!{
kbd_macro()
/ monospace()
/ passthrough()
/ counter_reference()
/ attribute_reference()
/ unprocessed_text()
} / expected!("inlines parser failed")
rule monospace() -> String
= text:$("``" (!"``" [_])+ "``" / "`" [^('`' | ' ' | '\t' | '\n')] [^'`']* "`") {
tracing::debug!(text, "monospace matched");
state.advance(text);
text.to_string()
}
rule kbd_macro() -> String
= text:$("kbd:[" (!"]" [_])* "]") {
state.advance(text);
text.to_string()
}
rule counter_reference() -> String
= start:position() "{"
counter_type:$("counter2" / "counter") ":"
name:$(['a'..='z' | 'A'..='Z' | '0'..='9' | '_' | '-']+)
(":" ['a'..='z' | 'A'..='Z' | '0'..='9']+)?
"}"
{
state.add_warning(format!(
"Counters ({{{counter_type}:{name}}}) are not supported and will be removed from output"
));
String::new()
}
rule attribute_reference() -> String
= start:position() "{" attribute_name:attribute_name() "}" {
if !state.attributes_enabled {
let text = format!("{{{attribute_name}}}");
state.advance(&text);
return text;
}
let location = state.calculate_location(start, attribute_name, 2);
let is_char_ref = matches!(
attribute_name,
"lt" | "gt" | "amp"
| "plus" | "pp" | "cpp" | "cxx"
| "asterisk" | "backtick" | "caret" | "tilde"
| "vbar" | "startsb" | "endsb" | "backslash"
| "two-colons" | "two-semicolons"
| "apos" | "quot"
);
match document_attributes.get(attribute_name) {
Some(AttributeValue::String(value)) => {
if is_char_ref {
state.passthroughs.borrow_mut().push(Pass {
text: Some(value.clone()),
substitutions: Vec::new(),
location: location.clone(),
kind: PassthroughKind::AttributeRef,
});
let new_content = format!("\u{FFFD}\u{FFFD}\u{FFFD}{}\u{FFFD}\u{FFFD}\u{FFFD}", state.pass_found_count.get());
state.source_map.borrow_mut().add_replacement(
location.absolute_start,
location.absolute_end,
new_content.chars().count(),
ProcessedKind::Passthrough,
);
state.pass_found_count.set(state.pass_found_count.get() + 1);
new_content
} else {
let mut attributes = state.attributes.borrow_mut();
state.source_map.borrow_mut().add_replacement(
location.absolute_start,
location.absolute_end,
value.chars().count(),
ProcessedKind::Attribute,
);
attributes.insert(state.source_map.borrow().replacements.len(), location);
value.clone()
}
},
Some(AttributeValue::Bool(true)) => {
let mut attributes = state.attributes.borrow_mut();
state.source_map.borrow_mut().add_replacement(
location.absolute_start,
location.absolute_end,
0,
ProcessedKind::Attribute,
);
attributes.insert(state.source_map.borrow().replacements.len(), location);
String::new()
},
_ => {
format!("{{{attribute_name}}}")
}
}
}
rule attribute_name() -> &'input str
= start:position() attribute_name:$(attribute_name_pattern()) {
attribute_name
}
rule passthrough() -> String = quiet!{
triple_plus_passthrough() / double_plus_passthrough() / single_plus_passthrough() / pass_macro()
} / expected!("passthrough parser failed")
rule single_plus_passthrough() -> String
= start:position() start_offset:byte_offset()
"+"
content:$(![(' '|'\t'|'\n'|'\r')] (!("+" &([' '|'\t'|'\n'|'\r'|','|';'|'"'|'.'|'?'|'!'|':'|')'|']'|'}'|'/'|'-'|'<'|'>'] / ![_])) [_])*)
"+"
{
if !state.macros_enabled {
let text = format!("+{content}+");
state.advance(&text);
return text;
}
let substring_start = state.substring_start_offset.get();
let relative_offset = start_offset - substring_start;
let input_bytes = state.input.borrow();
let prev_byte_value = if relative_offset > 0 {
input_bytes.as_bytes().get(relative_offset - 1).copied()
} else {
None
};
let valid_boundary = relative_offset == 0 || {
if let Some(b) = prev_byte_value {
matches!(
b,
b' ' | b'\t' | b'\n' | b'\r' | b'(' | b'{' | b'[' | b')' | b'}' | b']'
| b'/' | b'-' | b'|' | b',' | b';' | b'.' | b'?' | b'!' | b'\''
| b'"' | b'<' | b'>'
)
} else {
false
}
};
let trailing_valid = {
let input_bytes = state.input.borrow();
let after_plus_relative = relative_offset + 1 + content.len() + 1;
if after_plus_relative >= input_bytes.len() {
true
} else if let Some(next_byte) = input_bytes.as_bytes().get(after_plus_relative) {
matches!(
*next_byte,
b' ' | b'\t' | b'\n' | b'\r' | b',' | b';' | b'"' | b'.' | b'?' | b'!'
| b':' | b')' | b']' | b'}' | b'/' | b'-' | b'<' | b'>'
)
} else {
false
}
};
let location = state.calculate_location(start, content, 2);
if !valid_boundary || !trailing_valid {
return format!("+{content}+");
}
state.passthroughs.borrow_mut().push(Pass {
text: Some(content.to_string()),
substitutions: vec![Substitution::SpecialChars].into_iter().collect(),
location: location.clone(),
kind: PassthroughKind::Single,
});
let new_content = format!("\u{FFFD}\u{FFFD}\u{FFFD}{}\u{FFFD}\u{FFFD}\u{FFFD}", state.pass_found_count.get());
let original_span = location.absolute_end - location.absolute_start;
state.source_map.borrow_mut().add_replacement(
location.absolute_start,
location.absolute_end,
new_content.chars().count(),
ProcessedKind::Passthrough,
);
state.pass_found_count.set(state.pass_found_count.get() + 1);
new_content
}
rule double_plus_passthrough() -> String
= start:position() "++" content:$((!"++" [_])+) "++" {
if !state.macros_enabled {
state.advance(&format!("++{content}++"));
return format!("++{content}++");
}
let location = state.calculate_location(start, content, 4);
state.passthroughs.borrow_mut().push(Pass {
text: Some(content.to_string()),
substitutions: vec![Substitution::SpecialChars].into_iter().collect(),
location: location.clone(),
kind: PassthroughKind::Double,
});
let new_content = format!("\u{FFFD}\u{FFFD}\u{FFFD}{}\u{FFFD}\u{FFFD}\u{FFFD}", state.pass_found_count.get());
let original_span = location.absolute_end - location.absolute_start;
state.source_map.borrow_mut().add_replacement(
location.absolute_start,
location.absolute_end,
new_content.chars().count(),
ProcessedKind::Passthrough,
);
state.pass_found_count.set(state.pass_found_count.get() + 1);
new_content
}
rule triple_plus_passthrough() -> String
= start:position() "+++" content:$((!"+++" [_])+) "+++" {
if !state.macros_enabled {
let text = format!("+++{content}+++");
state.advance(&text);
return text;
}
let location = state.calculate_location(start, content, 6);
state.passthroughs.borrow_mut().push(Pass {
text: Some(content.to_string()),
substitutions: Vec::new(),
location: location.clone(),
kind: PassthroughKind::Triple,
});
let new_content = format!("\u{FFFD}\u{FFFD}\u{FFFD}{}\u{FFFD}\u{FFFD}\u{FFFD}", state.pass_found_count.get());
let original_span = location.absolute_end - location.absolute_start;
state.source_map.borrow_mut().add_replacement(
location.absolute_start,
location.absolute_end,
new_content.chars().count(),
ProcessedKind::Passthrough,
);
state.pass_found_count.set(state.pass_found_count.get() + 1);
new_content
}
rule pass_macro() -> String
= start:position() full:$("pass:" substitutions() "[" [^']']* "]") {
if !state.macros_enabled {
return state.expand_disabled_pass_macro(full, document_attributes);
}
let (subs_str, content, substitutions) =
InlinePreprocessorParserState::parse_pass_macro_parts(full);
let padding = 5 + subs_str.len() + 1 + 1; let location = state.calculate_location(start, content, padding);
let content = if substitutions.contains(&Substitution::Attributes)
|| substitutions.contains(&Substitution::Normal)
{
inline_preprocessing::attribute_reference_substitutions(content, document_attributes, state).unwrap_or_else(|_| content.to_string())
} else {
content.to_string()
};
state.passthroughs.borrow_mut().push(Pass {
text: Some(content.clone()),
substitutions: substitutions.clone(),
location: location.clone(),
kind: PassthroughKind::Macro,
});
let new_content = format!("\u{FFFD}\u{FFFD}\u{FFFD}{}\u{FFFD}\u{FFFD}\u{FFFD}", state.pass_found_count.get());
state.source_map.borrow_mut().add_replacement(
location.absolute_start,
location.absolute_end,
new_content.chars().count(),
ProcessedKind::Passthrough,
);
state.pass_found_count.set(state.pass_found_count.get() + 1);
new_content
}
rule substitutions() -> Vec<Substitution>
= subs:$(substitution_value() ** ",") {
if subs.is_empty() {
Vec::new()
} else {
subs.split(',')
.filter_map(|s| parse_substitution(s.trim()))
.collect()
}
}
rule substitution_value() -> &'input str
= $(['a'..='z' | 'A'..='Z' | '0'..='9']+)
rule unprocessed_text() -> String
= text:$((!(passthrough_pattern() / counter_reference_pattern() / attribute_reference_pattern() / kbd_macro_pattern() / monospace_pattern()) [_])+) {
state.advance(text);
text.to_string()
}
rule counter_reference_pattern() = "{" ("counter2" / "counter") ":" ['a'..='z' | 'A'..='Z' | '0'..='9' | '_' | '-']+ (":" ['a'..='z' | 'A'..='Z' | '0'..='9']+)? "}"
rule attribute_reference_pattern() = "{" attribute_name_pattern() "}"
rule attribute_name_pattern() = ['a'..='z' | 'A'..='Z' | '0'..='9' | '-' | '_']+
rule kbd_macro_pattern() = "kbd:[" (!"]" [_])* "]"
rule monospace_pattern() =
"``" (!"``" [_])+ "``" /
"`" [^('`' | ' ' | '\t' | '\n')] [^'`']* "`"
rule passthrough_pattern() =
"+++" (!("+++") [_])+ "+++" /
"++" (!("++") [_])+ "++" /
"+" ![' '|'\t'|'\n'|'\r'] (!("+" &([' '|'\t'|'\n'|'\r'|','|';'|'"'|'.'|'?'|'!'|':'|')'|']'|'}'|'/'|'-'|'<'|'>'] / ![_])) [_])* "+" /
"pass:" substitutions()? "[" [^']']* "]"
pub rule attribute_reference_substitutions() -> String
= content:(attribute_reference_content() / unprocessed_text_content())+ {
content.join("")
}
rule attribute_reference_content() -> String
= "{" attribute_name:attribute_name() "}" {
match document_attributes.get(attribute_name) {
Some(AttributeValue::String(value)) => value.clone(),
_ => format!("{{{attribute_name}}}"),
}
}
rule unprocessed_text_content() -> String
= text:$((!(passthrough_pattern() / attribute_reference_pattern()) [_])+) {
text.to_string()
}
rule ANY() = [_]
rule position() -> Position = { state.get_position() }
rule byte_offset() -> usize = { state.get_offset() }
}
);
#[cfg(test)]
#[allow(clippy::panic, clippy::indexing_slicing)]
mod tests {
use super::*;
use crate::DocumentAttributes;
fn setup_attributes() -> DocumentAttributes {
let mut attributes = DocumentAttributes::default();
attributes.insert("s".into(), AttributeValue::String("link:/nonono".into()));
attributes.insert("version".into(), AttributeValue::String("1.0".into()));
attributes.insert("title".into(), AttributeValue::String("My Title".into()));
attributes
}
fn setup_state(content: &str) -> InlinePreprocessorParserState<'_> {
InlinePreprocessorParserState {
pass_found_count: Cell::new(0),
passthroughs: RefCell::new(Vec::new()),
attributes: RefCell::new(HashMap::new()),
current_offset: Cell::new(0),
line_map: LineMap::new(content),
full_input: content,
source_map: RefCell::new(SourceMap::default()),
input: RefCell::new(content),
substring_start_offset: Cell::new(0),
warnings: RefCell::new(Vec::new()),
macros_enabled: true,
attributes_enabled: true,
}
}
#[test]
fn test_preprocess_inline_passthrough_single() -> Result<(), Error> {
let attributes = setup_attributes();
let input = "+hello+";
let state = setup_state(input);
let result = inline_preprocessing::run(input, &attributes, &state)?;
assert_eq!(
result.text,
"\u{FFFD}\u{FFFD}\u{FFFD}0\u{FFFD}\u{FFFD}\u{FFFD}"
);
assert_eq!(state.pass_found_count.get(), 1);
let passthroughs = state.passthroughs.into_inner();
assert_eq!(passthroughs.len(), 1);
let Some(first) = passthroughs.first() else {
panic!("expected first passthrough");
};
assert_eq!(first.text, Some("hello".to_string()));
assert_eq!(first.kind, PassthroughKind::Single);
Ok(())
}
#[test]
fn test_preprocess_inline_passthrough_double() -> Result<(), Error> {
let attributes = setup_attributes();
let input = "++hello++";
let state = setup_state(input);
let result = inline_preprocessing::run(input, &attributes, &state)?;
assert_eq!(
result.text,
"\u{FFFD}\u{FFFD}\u{FFFD}0\u{FFFD}\u{FFFD}\u{FFFD}"
);
assert_eq!(result.passthroughs.len(), 1);
let Some(first) = result.passthroughs.first() else {
panic!("expected first passthrough");
};
assert_eq!(first.text, Some("hello".to_string()));
assert_eq!(first.kind, PassthroughKind::Double);
Ok(())
}
#[test]
fn test_preprocess_inline_passthrough_triple() -> Result<(), Error> {
let attributes = setup_attributes();
let input = "+++hello+++";
let state = setup_state(input);
let result = inline_preprocessing::run(input, &attributes, &state)?;
assert_eq!(
result.text,
"\u{FFFD}\u{FFFD}\u{FFFD}0\u{FFFD}\u{FFFD}\u{FFFD}"
);
assert_eq!(result.passthroughs.len(), 1);
let Some(first) = result.passthroughs.first() else {
panic!("expected first passthrough");
};
assert_eq!(first.text, Some("hello".to_string()));
assert_eq!(first.kind, PassthroughKind::Triple);
Ok(())
}
#[test]
fn test_preprocess_inline_passthrough_single_plus() -> Result<(), Error> {
let attributes = setup_attributes();
let input = "+hello+ world+";
let state = setup_state(input);
let result = inline_preprocessing::run(input, &attributes, &state)?;
assert_eq!(
result.text,
"\u{FFFD}\u{FFFD}\u{FFFD}0\u{FFFD}\u{FFFD}\u{FFFD} world+"
);
assert_eq!(result.passthroughs.len(), 1);
let Some(first) = result.passthroughs.first() else {
panic!("expected first passthrough");
};
assert_eq!(first.text, Some("hello".to_string()));
assert_eq!(first.kind, PassthroughKind::Single);
Ok(())
}
#[test]
fn test_preprocess_inline_passthrough_multiple() -> Result<(), Error> {
let attributes = setup_attributes();
let input = "Something\n\nHere is some +*bold*+ text and ++**more bold**++ text.";
let state = setup_state(input);
let result = inline_preprocessing::run(input, &attributes, &state)?;
assert_eq!(
result.text,
"Something\n\nHere is some \u{FFFD}\u{FFFD}\u{FFFD}0\u{FFFD}\u{FFFD}\u{FFFD} text and \u{FFFD}\u{FFFD}\u{FFFD}1\u{FFFD}\u{FFFD}\u{FFFD} text."
);
assert_eq!(result.passthroughs.len(), 2);
let Some(first) = result.passthroughs.first() else {
panic!("expected first passthrough");
};
assert!(matches!(&first.text, Some(s) if s == "*bold*"));
assert_eq!(first.location.absolute_start, 24);
assert_eq!(first.location.absolute_end, 32);
assert_eq!(first.location.start.line, 3);
assert_eq!(first.location.start.column, 14);
assert_eq!(first.location.end.line, 3);
assert_eq!(first.location.end.column, 22);
let Some(second) = result.passthroughs.get(1) else {
panic!("expected second passthrough");
};
assert!(matches!(&second.text, Some(s) if s == "**more bold**"));
assert_eq!(second.location.absolute_start, 42);
assert_eq!(second.location.absolute_end, 59);
assert_eq!(second.location.start.line, 3);
assert_eq!(second.location.start.column, 32);
assert_eq!(second.location.end.line, 3);
assert_eq!(second.location.end.column, 49);
Ok(())
}
#[test]
fn test_preprocess_attribute_in_link() -> Result<(), Error> {
let attributes = setup_attributes();
let input = "The {s}[syntax page] provides complete stuff.";
let state = setup_state(input);
let result = inline_preprocessing::run(input, &attributes, &state)?;
assert_eq!(
result.text,
"The link:/nonono[syntax page] provides complete stuff."
);
assert_eq!(result.source_map.map_position(15)?, 4); assert_eq!(result.source_map.map_position(16)?, 7); assert_eq!(result.source_map.map_position(30)?, 21); Ok(())
}
#[test]
fn test_preprocess_inline_in_attributes() -> Result<(), Error> {
let attributes = setup_attributes();
let input = "Version {version} of {title}";
let state = setup_state(input);
let result = inline_preprocessing::run(input, &attributes, &state)?;
assert_eq!(result.text, "Version 1.0 of My Title");
assert_eq!(result.source_map.map_position(8)?, 8);
assert_eq!(result.source_map.map_position(15)?, 21);
Ok(())
}
#[test]
fn test_preprocess_complex_example() -> Result<(), Error> {
let attributes = setup_attributes();
let input = "Check the {s}[syntax page] and +this {s} won't expand+ for details.";
let state = setup_state(input);
let result = inline_preprocessing::run(input, &attributes, &state)?;
assert_eq!(
result.text,
"Check the link:/nonono[syntax page] and \u{FFFD}\u{FFFD}\u{FFFD}0\u{FFFD}\u{FFFD}\u{FFFD} for details."
);
assert_eq!(result.passthroughs.len(), 1);
let Some(first) = result.passthroughs.first() else {
panic!("expected first passthrough");
};
assert!(matches!(
&first.text,
Some(s) if s == "this {s} won't expand"
));
let pos = result.source_map.map_position(10)?; assert_eq!(pos, 10); Ok(())
}
#[test]
fn test_nested_passthrough_with_nested_attributes() -> Result<(), Error> {
let mut attributes = setup_attributes();
attributes.insert("nested1".into(), AttributeValue::String("{version}".into()));
attributes.insert("nested2".into(), AttributeValue::String("{nested1}".into()));
let input = "Here is a +special {nested2} value+ to test.";
let state = setup_state(input);
let result = inline_preprocessing::run(input, &attributes, &state)?;
assert_eq!(
result.text,
"Here is a \u{FFFD}\u{FFFD}\u{FFFD}0\u{FFFD}\u{FFFD}\u{FFFD} to test."
);
assert_eq!(result.passthroughs.len(), 1);
let Some(first) = result.passthroughs.first() else {
panic!("expected first passthrough");
};
assert!(matches!(
&first.text,
Some(s) if s == "special {nested2} value"
));
let start_pos = first.location.absolute_start;
let end_pos = first.location.absolute_end;
assert_eq!(start_pos, 10); assert_eq!(end_pos, 35); Ok(())
}
#[test]
fn test_line_breaks() -> Result<(), Error> {
let attributes = setup_attributes();
let input = "This is a test +\nwith a line break.";
let state = setup_state(input);
let result = inline_preprocessing::run(input, &attributes, &state)?;
assert_eq!(result.text, "This is a test +\nwith a line break.");
assert!(result.passthroughs.is_empty());
Ok(())
}
#[test]
fn test_section_with_passthrough() -> Result<(), Error> {
let attributes = setup_attributes();
let input = "= Document Title\nHello +<h1>+World+</h1>+ of +<u>+Gemini+</u>+";
let state = setup_state(input);
let result = inline_preprocessing::run(input, &attributes, &state)?;
assert_eq!(
result.text,
"= Document Title\nHello \u{FFFD}\u{FFFD}\u{FFFD}0\u{FFFD}\u{FFFD}\u{FFFD}</h1>+ of \u{FFFD}\u{FFFD}\u{FFFD}1\u{FFFD}\u{FFFD}\u{FFFD}</u>+"
);
assert_eq!(result.passthroughs.len(), 2);
let Some(first_pass) = result.passthroughs.first() else {
panic!("expected first passthrough");
};
let Some(second_pass) = result.passthroughs.get(1) else {
panic!("expected second passthrough");
};
assert!(matches!(&first_pass.text, Some(s) if s == "<h1>+World"));
assert!(matches!(&second_pass.text, Some(s) if s == "<u>+Gemini"));
assert!(
first_pass
.substitutions
.contains(&Substitution::SpecialChars)
);
assert!(
second_pass
.substitutions
.contains(&Substitution::SpecialChars)
);
Ok(())
}
#[test]
fn test_pass_macro_with_mixed_content() -> Result<(), Error> {
let mut attributes = setup_attributes();
attributes.insert("docname".into(), AttributeValue::String("test-doc".into()));
let input = "The text pass:q,a[<u>underline _{docname}_</u>] is underlined.";
let state = setup_state(input);
let result = inline_preprocessing::run(input, &attributes, &state)?;
assert_eq!(
result.text,
"The text \u{FFFD}\u{FFFD}\u{FFFD}0\u{FFFD}\u{FFFD}\u{FFFD} is underlined."
);
assert_eq!(result.passthroughs.len(), 1);
let Some(pass) = result.passthroughs.first() else {
panic!("expected first passthrough");
};
assert!(matches!(
&pass.text,
Some(s) if s == "<u>underline _test-doc_</u>"
));
assert!(pass.substitutions.contains(&Substitution::Quotes)); assert!(pass.substitutions.contains(&Substitution::Attributes));
assert_eq!(pass.location.absolute_start, 9); assert_eq!(pass.location.absolute_end, 47);
assert_eq!(result.source_map.map_position(9)?, 9); assert_eq!(result.source_map.map_position(24)?, 55);
Ok(())
}
#[test]
fn test_all_passthroughs_with_attribute() -> Result<(), Error> {
let mut attributes = setup_attributes();
attributes.insert("meh".into(), AttributeValue::String("1.0".into()));
let input = "1 +2+, ++3++ {meh} and +++4+++ are all numbers.";
let state = setup_state(input);
let result = inline_preprocessing::run(input, &attributes, &state)?;
assert_eq!(
result.text,
"1 \u{FFFD}\u{FFFD}\u{FFFD}0\u{FFFD}\u{FFFD}\u{FFFD}, \u{FFFD}\u{FFFD}\u{FFFD}1\u{FFFD}\u{FFFD}\u{FFFD} 1.0 and \u{FFFD}\u{FFFD}\u{FFFD}2\u{FFFD}\u{FFFD}\u{FFFD} are all numbers."
);
assert_eq!(result.passthroughs.len(), 3);
let Some(first) = result.passthroughs.first() else {
panic!("expected first passthrough");
};
let Some(second) = result.passthroughs.get(1) else {
panic!("expected second passthrough");
};
let Some(third) = result.passthroughs.get(2) else {
panic!("expected third passthrough");
};
assert!(matches!(first.kind, PassthroughKind::Single));
assert!(matches!(second.kind, PassthroughKind::Double));
assert!(matches!(third.kind, PassthroughKind::Triple));
assert!(matches!(&first.text, Some(s) if s == "2"));
assert!(matches!(&second.text, Some(s) if s == "3"));
assert!(matches!(&third.text, Some(s) if s == "4"));
assert_eq!(result.source_map.map_position(2)?, 2);
assert_eq!(result.source_map.map_position(5)?, 4);
assert_eq!(result.source_map.map_position(24)?, 20);
assert_eq!(result.source_map.map_position(48)?, 44);
Ok(())
}
#[test]
fn test_greedy_matching_single_plus_passthrough() -> Result<(), Error> {
let attributes = setup_attributes();
let input = "Test +A+B+ end";
let state = setup_state(input);
let result = inline_preprocessing::run(input, &attributes, &state)?;
assert_eq!(result.passthroughs.len(), 1);
let Some(first) = result.passthroughs.first() else {
panic!("expected first passthrough");
};
assert!(matches!(&first.text, Some(s) if s == "A+B"));
let input2 = "Test +A+ +B+ end";
let state2 = setup_state(input2);
let result2 = inline_preprocessing::run(input2, &attributes, &state2)?;
assert_eq!(result2.passthroughs.len(), 2);
let Some(first) = result2.passthroughs.first() else {
panic!("expected first passthrough");
};
let Some(second) = result2.passthroughs.get(1) else {
panic!("expected second passthrough");
};
assert!(matches!(&first.text, Some(s) if s == "A"));
assert!(matches!(&second.text, Some(s) if s == "B"));
let input3 = "Test +A+B+C+D+ end";
let state3 = setup_state(input3);
let result3 = inline_preprocessing::run(input3, &attributes, &state3)?;
assert_eq!(result3.passthroughs.len(), 1);
let Some(first) = result3.passthroughs.first() else {
panic!("expected first passthrough");
};
assert!(matches!(&first.text, Some(s) if s == "A+B+C+D"));
let input4 = "Test +<em>+text+ end";
let state4 = setup_state(input4);
let result4 = inline_preprocessing::run(input4, &attributes, &state4)?;
assert_eq!(result4.passthroughs.len(), 1);
let Some(first) = result4.passthroughs.first() else {
panic!("expected first passthrough");
};
assert!(matches!(&first.text, Some(s) if s == "<em>+text"));
let input5 = "Look +here+there+, ok";
let state5 = setup_state(input5);
let result5 = inline_preprocessing::run(input5, &attributes, &state5)?;
assert_eq!(result5.passthroughs.len(), 1);
let Some(first) = result5.passthroughs.first() else {
panic!("expected first passthrough");
};
assert!(matches!(&first.text, Some(s) if s == "here+there"));
let input6 = "Hello +<h1>+World+</h1>+ and +<u>+Gemini+</u>+ end";
let state6 = setup_state(input6);
let result6 = inline_preprocessing::run(input6, &attributes, &state6)?;
assert_eq!(result6.passthroughs.len(), 2);
let Some(first) = result6.passthroughs.first() else {
panic!("expected first passthrough");
};
let Some(second) = result6.passthroughs.get(1) else {
panic!("expected second passthrough");
};
assert!(matches!(&first.text, Some(s) if s == "<h1>+World"));
assert!(matches!(&second.text, Some(s) if s == "<u>+Gemini"));
Ok(())
}
#[test]
fn test_all_character_replacement_attributes() -> Result<(), Error> {
let attributes = DocumentAttributes::default();
let input = concat!(
"{empty}{blank}{sp}{nbsp}{zwsp}{wj}",
"{apos}{quot}{lsquo}{rsquo}{ldquo}{rdquo}",
"{deg}{plus}{brvbar}{vbar}{amp}{lt}{gt}",
"{startsb}{endsb}{caret}{asterisk}{tilde}{backslash}{backtick}",
"{two-colons}{two-semicolons}{cpp}{cxx}{pp}"
);
let state = setup_state(input);
let result = inline_preprocessing::run(input, &attributes, &state)?;
let p = |i: usize| format!("\u{FFFD}\u{FFFD}\u{FFFD}{i}\u{FFFD}\u{FFFD}\u{FFFD}");
let expected = format!(
concat!(
"", "", " ", "\u{00A0}", "\u{200B}", "\u{2060}",
"{}", "{}", "\u{2018}", "\u{2019}", "\u{201C}", "\u{201D}",
"\u{00B0}", "{}", "\u{00A6}", "{}", "{}", "{}", "{}", "{}", "{}", "{}", "{}", "{}", "{}", "{}", "{}", "{}", "{}", "{}", "{}", ),
p(0),
p(1),
p(2),
p(3),
p(4),
p(5),
p(6),
p(7),
p(8),
p(9),
p(10),
p(11),
p(12),
p(13),
p(14),
p(15),
p(16),
p(17),
p(18),
);
assert_eq!(
result.text, expected,
"Character replacement attributes did not produce expected values"
);
assert_eq!(
result.passthroughs.len(),
19,
"Should have 19 passthroughs for all ASCII char replacement attributes"
);
assert_eq!(result.passthroughs[0].text.as_deref(), Some("'")); assert_eq!(result.passthroughs[2].text.as_deref(), Some("+")); assert_eq!(result.passthroughs[4].text.as_deref(), Some("&")); assert_eq!(result.passthroughs[16].text.as_deref(), Some("C++"));
Ok(())
}
#[test]
fn test_character_replacement_in_context() -> Result<(), Error> {
let attributes = DocumentAttributes::default();
let input1 = "The temperature is 100{deg}F";
let state1 = setup_state(input1);
let result1 = inline_preprocessing::run(input1, &attributes, &state1)?;
assert_eq!(result1.text, "The temperature is 100\u{00B0}F");
let input2 = "Use {startsb}option{endsb} syntax";
let state2 = setup_state(input2);
let result2 = inline_preprocessing::run(input2, &attributes, &state2)?;
assert_eq!(
result2.text,
"Use \u{FFFD}\u{FFFD}\u{FFFD}0\u{FFFD}\u{FFFD}\u{FFFD}option\u{FFFD}\u{FFFD}\u{FFFD}1\u{FFFD}\u{FFFD}\u{FFFD} syntax"
);
assert_eq!(result2.passthroughs.len(), 2);
assert_eq!(result2.passthroughs[0].text.as_deref(), Some("["));
assert_eq!(result2.passthroughs[1].text.as_deref(), Some("]"));
let input3 = "{ldquo}Hello{rdquo}";
let state3 = setup_state(input3);
let result3 = inline_preprocessing::run(input3, &attributes, &state3)?;
assert_eq!(result3.text, "\u{201C}Hello\u{201D}");
let input4 = "before{empty}after";
let state4 = setup_state(input4);
let result4 = inline_preprocessing::run(input4, &attributes, &state4)?;
assert_eq!(result4.text, "beforeafter");
let input5 = "before{blank}after";
let state5 = setup_state(input5);
let result5 = inline_preprocessing::run(input5, &attributes, &state5)?;
assert_eq!(result5.text, "beforeafter");
let input6 = "{cpp} is same as {cxx}";
let state6 = setup_state(input6);
let result6 = inline_preprocessing::run(input6, &attributes, &state6)?;
assert_eq!(
result6.text,
"\u{FFFD}\u{FFFD}\u{FFFD}0\u{FFFD}\u{FFFD}\u{FFFD} is same as \u{FFFD}\u{FFFD}\u{FFFD}1\u{FFFD}\u{FFFD}\u{FFFD}"
);
assert_eq!(result6.passthroughs.len(), 2);
assert_eq!(result6.passthroughs[0].text.as_deref(), Some("C++"));
assert_eq!(result6.passthroughs[1].text.as_deref(), Some("C++"));
Ok(())
}
#[test]
fn test_counter_reference_collects_warning() -> Result<(), Error> {
let attributes = setup_attributes();
let input = "Count: {counter:mycount}";
let state = setup_state(input);
let result = inline_preprocessing::run(input, &attributes, &state)?;
assert_eq!(result.text, "Count: ");
let warnings = state.warnings.borrow();
assert_eq!(warnings.len(), 1);
assert!(warnings[0].contains("counter"));
assert!(warnings[0].contains("mycount"));
Ok(())
}
#[test]
fn test_duplicate_counter_references_produce_single_warning() -> Result<(), Error> {
let attributes = setup_attributes();
let input = "{counter:hits} and {counter:hits}";
let state = setup_state(input);
let result = inline_preprocessing::run(input, &attributes, &state)?;
assert_eq!(result.text, " and ");
let warnings = state.warnings.borrow();
assert_eq!(
warnings.len(),
1,
"identical counter warnings should be deduplicated"
);
Ok(())
}
#[test]
fn test_distinct_counter_references_produce_separate_warnings() -> Result<(), Error> {
let attributes = setup_attributes();
let input = "{counter:a} and {counter2:b}";
let state = setup_state(input);
let result = inline_preprocessing::run(input, &attributes, &state)?;
assert_eq!(result.text, " and ");
let warnings = state.warnings.borrow();
assert_eq!(
warnings.len(),
2,
"different counter warnings should both be collected"
);
Ok(())
}
fn setup_state_macros_disabled(content: &str) -> InlinePreprocessorParserState<'_> {
InlinePreprocessorParserState {
pass_found_count: Cell::new(0),
passthroughs: RefCell::new(Vec::new()),
attributes: RefCell::new(HashMap::new()),
current_offset: Cell::new(0),
line_map: LineMap::new(content),
full_input: content,
source_map: RefCell::new(SourceMap::default()),
input: RefCell::new(content),
substring_start_offset: Cell::new(0),
warnings: RefCell::new(Vec::new()),
macros_enabled: false,
attributes_enabled: true,
}
}
#[test]
fn test_pass_macro_a_with_macros_disabled_expands_attributes() -> Result<(), Error> {
let attributes = setup_attributes();
let input = "pass:a[{version}]";
let state = setup_state_macros_disabled(input);
let result = inline_preprocessing::run(input, &attributes, &state)?;
assert_eq!(result.text, "pass:a[1.0]");
assert!(state.passthroughs.borrow().is_empty());
Ok(())
}
#[test]
fn test_pass_macro_no_subs_with_macros_disabled_preserves_attributes() -> Result<(), Error> {
let attributes = setup_attributes();
let input = "pass:[{version}]";
let state = setup_state_macros_disabled(input);
let result = inline_preprocessing::run(input, &attributes, &state)?;
assert_eq!(result.text, "pass:[{version}]");
assert!(state.passthroughs.borrow().is_empty());
Ok(())
}
#[test]
fn test_pass_macro_q_with_macros_disabled_preserves_content() -> Result<(), Error> {
let attributes = setup_attributes();
let input = "pass:q[text]";
let state = setup_state_macros_disabled(input);
let result = inline_preprocessing::run(input, &attributes, &state)?;
assert_eq!(result.text, "pass:q[text]");
assert!(state.passthroughs.borrow().is_empty());
Ok(())
}
#[test]
fn test_pass_macro_a_q_with_macros_disabled_expands_attributes() -> Result<(), Error> {
let attributes = setup_attributes();
let input = "pass:a,q[{version}]";
let state = setup_state_macros_disabled(input);
let result = inline_preprocessing::run(input, &attributes, &state)?;
assert_eq!(result.text, "pass:a,q[1.0]");
assert!(state.passthroughs.borrow().is_empty());
Ok(())
}
}