pub mod tokens;
use core::str;
use tokens::{Token, TokenType};
use crate::errors::ScannerError;
#[derive(Debug)]
pub struct Scanner<'a> {
pub source: &'a str,
start: usize,
startcol: usize,
current: usize,
line: usize,
scanning: bool, expecting_tag_end: bool, file_stack: Vec<String>,
}
impl Iterator for Scanner<'_> {
type Item = Result<Token, ScannerError>;
fn next(&mut self) -> Option<Self::Item> {
if !self.is_at_end() {
self.start = self.current;
return Some(self.scan_token());
}
if self.scanning {
self.scanning = false;
return Some(Ok(Token::final_token(self.line, self.file_stack.clone())));
}
None
}
}
impl<'a> Scanner<'a> {
pub fn new(source: &'a str) -> Self {
Scanner {
source,
start: 0, startcol: 1, current: 0, line: 1,
scanning: true,
expecting_tag_end: false,
file_stack: vec![],
}
}
pub fn new_with_stack(source: &'a str, file_stack: Vec<String>) -> Self {
Scanner {
source,
start: 0, startcol: 1, current: 0, line: 1,
scanning: true,
expecting_tag_end: false,
file_stack,
}
}
fn scan_token(&mut self) -> Result<Token, ScannerError> {
let c = self.source.as_bytes()[self.current] as char;
self.current += 1;
match c {
'\n' => self.add_token(TokenType::NewLineChar, false, 1),
'\'' => {
if self.starts_end_tag_line() {
self.add_tag()
} else if self.starts_repeated_char_line(c, 3) {
self.current += 2;
self.add_token(TokenType::ThematicBreak, false, 0)
} else if ['\0', ' ', '\n'].contains(&self.peek_back()) && self.peek() == '`' {
self.current += 1;
self.add_token(TokenType::OpenSingleQuote, true, 0)
} else if ["s ", "s\n"].contains(&self.peeks_ahead(2))
|| (self.peeks_ahead(2) == "\0" && self.peek() == 's')
{
self.add_token(TokenType::CharRef, false, 0)
} else {
self.add_text_until_next_markup()
}
}
'<' => {
if self.starts_end_tag_line() {
self.add_tag()
} else if self.starts_repeated_char_line(c, 3) {
self.current += 2;
self.add_token(TokenType::PageBreak, false, 0)
} else if ['-', '='].contains(&self.peek()) {
self.current += 1;
self.add_token(TokenType::CharRef, false, 0)
} else if self.peek() == '<' {
self.add_cross_reference()
} else if self.starts_new_line() && self.starts_code_callout_list_item() {
self.current += 1; self.add_token(TokenType::CodeCalloutListItem, true, 0)
} else if self.starts_code_callout() {
self.add_token(TokenType::CodeCallout, true, 0)
} else {
self.add_text_until_next_markup()
}
}
'+' | '*' | '-' | '_' | '/' | '=' | '.' => {
if self.starts_end_tag_line() {
self.add_tag()
} else if self.starts_new_line() && self.starts_repeated_char_line(c, 4) {
self.current += 3; self.add_token(TokenType::block_from_char(c)?, false, 0)
} else {
match c {
'=' => {
if self.peek() == '>' {
self.current += 1;
self.add_token(TokenType::CharRef, false, 0)
}
else if self.starts_new_line() {
self.add_heading()
} else {
self.add_text_until_next_markup()
}
}
'-' => {
if self.peek() == '>' {
self.current += 1;
self.add_token(TokenType::CharRef, false, 0)
} else if self.starts_text_symbol_replace_emdash() {
self.current += 1;
self.add_token(TokenType::CharRef, false, 0)
} else if self.starts_new_line() && self.starts_repeated_char_line(c, 2)
{
self.current += 1;
self.add_token(TokenType::OpenBlock, false, 0)
} else {
self.add_text_until_next_markup()
}
}
'*' => {
if self.starts_new_line() && self.peek() == ' ' {
self.add_list_item(TokenType::UnorderedListItem)
} else {
self.handle_inline_formatting(
c,
TokenType::Strong,
TokenType::UnconstrainedStrong,
)
}
}
'/' => {
if self.starts_end_tag_line() {
self.add_tag()
} else if self.starts_new_line() && self.peek() == '/' {
if ["/ tag::", "/ end::"].contains(&self.peeks_ahead(7)) {
self.current += 2;
self.add_token(TokenType::Comment, true, 0)
} else {
while self.peek() != '\n' && !self.is_at_end() {
self.current += 1;
}
self.add_token(TokenType::Comment, true, 0)
}
} else {
self.add_text_until_next_markup()
}
}
'+' => {
if self.starts_new_line() && self.peek() == '\n' {
self.add_token(TokenType::BlockContinuation, false, 0)
} else if self.peek_back() == ' ' && self.peek() == '\n' {
self.add_token(TokenType::LineContinuation, false, 0)
} else {
self.handle_inline_formatting(
c,
TokenType::Literal,
TokenType::UnconstrainedLiteral,
)
}
}
'_' => self.handle_inline_formatting(
c,
TokenType::Emphasis,
TokenType::UnconstrainedEmphasis,
),
'.' => {
if self.peeks_ahead(2) == ".." {
self.current += 2;
self.add_token(TokenType::CharRef, false, 0)
} else if self.starts_new_line() {
if self.peek() == ' ' {
self.add_list_item(TokenType::OrderedListItem)
} else {
self.add_token(TokenType::BlockLabel, false, 0)
}
} else {
self.add_text_until_next_markup()
}
}
_ => self.add_text_until_next_markup(),
}
}
}
'[' => {
if self.starts_new_line() && self.peek() == '[' {
self.add_block_anchor()
} else if self.starts_attribution_line() {
match self.source.as_bytes()[self.start + 1] as char {
'N' => self.add_token(TokenType::NotePara, true, 0),
'T' => self.add_token(TokenType::TipPara, true, 0),
'I' => self.add_token(TokenType::ImportantPara, true, 0),
'C' => self.add_token(TokenType::CautionPara, true, 0),
'W' => self.add_token(TokenType::WarningPara, true, 0),
_ => self.add_token(TokenType::ElementAttributes, true, 0),
}
} else if self.peek() == '.' {
self.add_inline_style()
} else {
self.add_text_until_next_markup()
}
}
'`' => {
if ["' ", "'\n"].contains(&self.peeks_ahead(2))
|| (self.peeks_ahead(2) == "\0" && self.peek() == '\'')
{
self.current += 1;
self.add_token(TokenType::CloseSingleQuote, true, 0)
} else if ["\" ", "\"\n"].contains(&self.peeks_ahead(2))
|| (self.peeks_ahead(2) == "\0" && self.peek() == '\"')
{
self.current += 1;
self.add_token(TokenType::CloseDoubleQuote, true, 0)
} else {
self.handle_inline_formatting(
c,
TokenType::Monospace,
TokenType::UnconstrainedMonospace,
)
}
}
'^' => self.add_token(TokenType::Superscript, false, 0),
'~' => self.add_token(TokenType::Subscript, false, 0),
'#' => {
if self.starts_end_tag_line() {
self.add_tag()
} else {
self.handle_inline_formatting(c, TokenType::Mark, TokenType::UnconstrainedMark)
}
}
':' => {
if self.starts_new_line() && self.starts_attr() {
while self.peek() != '\n' {
self.current += 1
}
self.add_token(TokenType::Attribute, false, 0)
} else if self.peek_back() != ' ' && [": ", ":\n"].contains(&self.peeks_ahead(2)) {
self.current += 2;
self.add_token(TokenType::DescriptionListMarker, false, 0)
} else {
self.add_text_until_next_markup()
}
}
'(' => {
if self.starts_text_symbol_replace_parens() {
self.handle_text_symbol_replacement_parens()
} else {
self.add_text_until_next_markup()
}
}
'f' => {
if self.peeks_ahead(9) == "ootnote:[" {
self.current += 9;
self.add_token(TokenType::FootnoteMacro, false, 0)
} else {
self.add_text_until_next_markup()
}
}
'p' => {
if self.peeks_ahead(5) == "ass:[" {
self.current += 5;
self.add_token(TokenType::PassthroughInlineMacro, false, 0)
} else {
self.add_text_until_next_markup()
}
}
'h' => {
if self.peeks_ahead(3) == "ttp" {
self.add_link()
} else {
self.add_text_until_next_markup()
}
}
'i' => {
if self.starts_new_line() && self.peeks_ahead(8) == "nclude::" {
self.add_include()
} else if self.starts_new_line() && self.peeks_ahead(6) == "mage::" {
self.add_block_image()
} else if self.peeks_ahead(5) == "mage:"
&& self.peeks_ahead(6) != "mage::"
&& self.peeks_ahead(6) != "mage: "
{
self.add_inline_image()
} else {
self.add_text_until_next_markup()
}
}
't' => {
if self.peeks_ahead(4) == "ag::" {
self.add_tag()
} else {
self.add_text_until_next_markup()
}
}
'e' => {
if self.peeks_ahead(4) == "nd::" {
self.add_tag()
} else {
self.add_text_until_next_markup()
}
}
'N' => {
if self.peeks_ahead(5) == "OTE: " {
self.current += 5;
self.add_token(TokenType::NotePara, true, 0)
} else {
self.add_text_until_next_markup()
}
}
'T' => {
if self.peeks_ahead(4) == "IP: " {
self.current += 4;
self.add_token(TokenType::TipPara, true, 0)
} else {
self.add_text_until_next_markup()
}
}
'I' => {
if self.peeks_ahead(10) == "MPORTANT: " {
self.current += 10;
self.add_token(TokenType::ImportantPara, true, 0)
} else {
self.add_text_until_next_markup()
}
}
'C' => {
if self.peeks_ahead(8) == "AUTION: " {
self.current += 8;
self.add_token(TokenType::CautionPara, true, 0)
} else {
self.add_text_until_next_markup()
}
}
'W' => {
if self.peeks_ahead(8) == "ARNING: " {
self.current += 8;
self.add_token(TokenType::WarningPara, true, 0)
} else {
self.add_text_until_next_markup()
}
}
']' => self.add_token(TokenType::InlineMacroClose, true, 0),
'&' => {
if self.starts_charref() {
while self.peek().is_alphanumeric() && !self.is_at_end() {
self.current += 1
}
self.current += 1; self.add_token(TokenType::CharRef, true, 0)
} else {
self.add_text_until_next_markup()
}
}
'{' => {
if self.starts_attribute_reference() {
while (self.peek().is_alphanumeric() || self.peek() == '-') && !self.is_at_end()
{
self.current += 1
}
self.current += 1; self.add_token(TokenType::AttributeReference, false, 0)
} else {
self.add_text_until_next_markup()
}
}
'|' => {
if self.starts_new_line() && self.peeks_ahead(3) == "===" {
self.current += 3; if ['\n', '\0'].contains(&self.peek()) {
self.add_token(TokenType::Table, false, 0)
} else {
self.add_text_until_next_markup()
}
} else if self.starts_new_line() || self.peek_back() == ' ' {
self.add_table_cell()
} else {
self.add_text_until_next_markup()
}
}
'\"' => {
if ['\0', ' ', '\n'].contains(&self.peek_back()) && self.peek() == '`' {
self.current += 1;
self.add_token(TokenType::OpenDoubleQuote, true, 0)
} else {
self.add_text_until_next_markup()
}
}
' ' | '%' => {
if self.starts_end_tag_line() {
self.add_tag()
} else {
self.add_text_until_next_markup()
}
}
_ => self.add_text_until_next_markup(),
}
}
fn add_token(
&mut self,
token_type: TokenType,
include_literal: bool,
advance_line_after: usize,
) -> Result<Token, ScannerError> {
let text = &self.source[self.start..self.current];
let mut literal = None;
if include_literal {
literal = Some(text.to_string())
}
let token_start = self.startcol;
let mut token: Token;
if advance_line_after != 0 {
let token_line = self.line;
self.line += advance_line_after;
self.startcol = 1;
token = Token {
token_type,
lexeme: text.to_string(),
literal,
line: token_line,
startcol: token_start,
endcol: token_start + text.len() - 1, file_stack: self.file_stack.clone(),
}
} else {
self.startcol = token_start + text.len();
token = Token {
token_type,
lexeme: text.to_string(),
literal,
line: self.line,
startcol: token_start,
endcol: token_start + text.len() - 1, file_stack: self.file_stack.clone(),
}
}
token.validate();
Ok(token)
}
fn add_heading(&mut self) -> Result<Token, ScannerError> {
while self.peek() == '=' {
self.current += 1
}
self.current += 1; match self.source.as_bytes()[self.start..self.current - 1].len() {
1 => self.add_token(TokenType::Heading1, false, 0),
2 => self.add_token(TokenType::Heading2, false, 0),
3 => self.add_token(TokenType::Heading3, false, 0),
4 => self.add_token(TokenType::Heading4, false, 0),
5 => self.add_token(TokenType::Heading5, false, 0),
_ => Err(ScannerError::HeadingLevelError(self.line)),
}
}
fn add_list_item(&mut self, list_item_token: TokenType) -> Result<Token, ScannerError> {
self.current += 1; self.add_token(list_item_token, false, 0)
}
fn add_block_image(&mut self) -> Result<Token, ScannerError> {
while self.peek() != ']' {
self.current += 1
}
self.current += 1; self.add_token(TokenType::BlockImageMacro, true, 0)
}
fn add_inline_image(&mut self) -> Result<Token, ScannerError> {
while self.peek() != ']' {
self.current += 1
}
self.current += 1; self.add_token(TokenType::InlineImageMacro, true, 0)
}
fn add_include(&mut self) -> Result<Token, ScannerError> {
while self.peek() != ']' {
self.current += 1
}
self.current += 1; self.add_token(TokenType::Include, true, 0)
}
fn add_table_cell(&mut self) -> Result<Token, ScannerError> {
while !['\n', '\0', '|'].contains(&self.peek()) {
self.current += 1
}
if self.peek() == '|' && self.source.as_bytes()[self.current - 1] as char != ' ' {
self.current += 1;
return self.add_table_cell();
}
self.add_token(TokenType::TableCell, true, 0)
}
fn add_link(&mut self) -> Result<Token, ScannerError> {
let hyperlink_allowed_punctuation = ['.', '#', ':', '/', '?', '=', '&'];
while !['\0', ' '].contains(&self.peek()) && !self.peek().is_ascii_punctuation() {
self.current += 1
}
if self.peek() == '[' {
self.current += 1; self.add_token(TokenType::LinkMacro, true, 0)
} else if ['\0', ' '].contains(&self.peek()) {
self.add_token(TokenType::Hyperlink, true, 0)
} else if hyperlink_allowed_punctuation.contains(&self.peek()) {
if let Some(second_char) = self.peeks_ahead(2).chars().last() {
if ['\0', ' '].contains(&second_char) {
self.add_token(TokenType::Hyperlink, true, 0)
} else {
self.current += 1; self.add_link()
}
} else {
self.current += 1; self.add_link()
}
} else {
self.add_token(TokenType::Hyperlink, true, 0)
}
}
fn add_inline_style(&mut self) -> Result<Token, ScannerError> {
while self.peek() != ']' {
self.current += 1
}
self.current += 1; self.add_token(TokenType::InlineStyle, true, 0)
}
fn add_cross_reference(&mut self) -> Result<Token, ScannerError> {
while self.peeks_ahead(2) != ">>" && !self.is_at_end() {
self.current += 1
}
self.current += 2; self.add_token(TokenType::CrossReference, true, 0)
}
fn add_block_anchor(&mut self) -> Result<Token, ScannerError> {
while self.peeks_ahead(3) != "]]\n" && !self.is_at_end() {
self.current += 1
}
self.current += 2; self.add_token(TokenType::BlockAnchor, true, 0)
}
fn add_text_until_next_markup(&mut self) -> Result<Token, ScannerError> {
while ![
'\n', '*', '_', '`', '^', '~', 'f', 'p', 'h', ']', '[', ':', '#', 'N', 'T', 'I', 'C',
'W', '&', '{', '+', 'i', '<', '\'', '\"', '-', 't', 'e',
]
.contains(&self.peek())
&& !self.is_at_end()
{
self.current += 1;
}
self.add_token(TokenType::Text, true, 0)
}
fn add_tag(&mut self) -> Result<Token, ScannerError> {
let tag_type: TokenType;
if self.expecting_tag_end {
while self.peek() != 'e' {
self.current += 1;
}
self.start = self.current;
self.startcol = self.start;
}
while ![']'].contains(&self.peek()) && !self.is_at_end() {
self.current += 1;
}
self.current += 1;
if (self.source[self.start..self.current]).contains("tag") {
tag_type = TokenType::StartTag;
self.expecting_tag_end = true;
} else if (self.source[self.start..self.current]).contains("end") {
tag_type = TokenType::EndTag;
self.expecting_tag_end = false;
} else {
return Err(ScannerError::TagError(
self.source[self.start..self.current].to_string(),
));
}
let result = self.add_token(tag_type, true, 1);
while self.peek() != '\n' && !self.is_at_end() {
self.current += 1
}
if self.peek() == '\n' {
self.current += 1
}
result
}
fn starts_new_line(&self) -> bool {
self.start == 0 || self.source.as_bytes()[self.start - 1] == b'\n'
}
fn starts_repeated_char_line(&self, c: char, delimiter_len: usize) -> bool {
let mut expected_block = String::from(c).repeat(delimiter_len - 1);
expected_block.push('\n');
self.current + delimiter_len <= self.source.len()
&& self.peeks_ahead(delimiter_len) == expected_block
}
fn starts_attr(&mut self) -> bool {
let current_placeholder = self.current;
while ![' ', '\n', ':'].contains(&self.peek()) && !self.is_at_end() {
self.current += 1
}
let check = self.source.as_bytes()[self.current] as char == ':';
self.current = current_placeholder;
check
}
fn starts_end_tag_line(&mut self) -> bool {
self.expecting_tag_end && self.peek_line().contains("end::")
}
fn starts_attribute_reference(&mut self) -> bool {
let current_placeholder = self.current;
while (self.peek().is_alphanumeric() || self.peek() == '-') && !self.is_at_end() {
self.current += 1
}
let check = self.source.as_bytes()[self.current] as char == '}';
self.current = current_placeholder;
check
}
fn starts_charref(&mut self) -> bool {
let current_placeholder = self.current;
while self.peek().is_alphanumeric() && !self.is_at_end() {
self.current += 1
}
let check = self.source.as_bytes()[self.current] as char == ';';
self.current = current_placeholder;
check
}
fn handle_inline_formatting(
&mut self,
c: char,
constrained: TokenType,
unconstrained: TokenType,
) -> Result<Token, ScannerError> {
let inline_markup_chars = ['*', '_', '`', '+', '^', '~', '#'];
let mut end_of_inline_markers = vec![
' ', '\0', '.', ',', ';', ':', '\n', ')', '"', '!', '?', '\'', ']', '…', '“', '”', '‘',
'’',
];
let mut beginning_of_inline_markers = vec![' ', '\n', '\0', ']', '(', '"', '['];
end_of_inline_markers.extend_from_slice(&inline_markup_chars);
beginning_of_inline_markers.extend_from_slice(&inline_markup_chars);
if self.peek() == ' ' && self.peek_back() == ' ' {
self.add_text_until_next_markup()
} else if self.peek() == c {
self.current += 1;
self.add_token(unconstrained, false, 0)
}
else if end_of_inline_markers.contains(&self.peek()) ||
beginning_of_inline_markers.contains(&self.peek_back()) && self.peek() != c
{
self.add_token(constrained, false, 0)
} else {
self.add_text_until_next_markup()
}
}
fn handle_text_symbol_replacement_parens(&mut self) -> Result<Token, ScannerError> {
while self.peek() != ')' {
self.current += 1
}
self.current += 1;
self.add_token(TokenType::CharRef, true, 0)
}
fn starts_attribution_line(&mut self) -> bool {
let current_placeholder = self.current;
while self.peek() != '\n' && !self.is_at_end() {
self.current += 1;
}
if self.starts_new_line() && self.source.as_bytes()[self.current - 1] as char == ']' {
true
} else {
self.current = current_placeholder;
false
}
}
fn starts_code_callout_list_item(&mut self) -> bool {
while self.peek() != '>' {
if self.peek().is_ascii_digit() {
self.current += 1;
} else {
return false;
}
}
self.current += 1;
self.peek() == ' ' }
fn starts_code_callout(&mut self) -> bool {
while self.peek() != '>' {
if self.peek().is_ascii_digit() {
self.current += 1;
} else {
return false;
}
}
self.current += 1;
self.peek() == '\n' }
fn starts_text_symbol_replace_parens(&mut self) -> bool {
["C)", "R)"].contains(&self.peeks_ahead(2)) || self.peeks_ahead(3) == "TM)"
}
fn starts_text_symbol_replace_emdash(&mut self) -> bool {
let after_char = self.peeks_ahead(2).chars().last().unwrap();
self.peek() == '-' && (after_char.is_alphanumeric() || [' ', '\0'].contains(&after_char))
}
fn is_at_end(&self) -> bool {
self.current >= self.source.len()
}
fn peek(&self) -> char {
if self.is_at_end() || !self.source.is_char_boundary(self.current) {
return '\0';
}
self.source.as_bytes()[self.current] as char
}
fn peek_back(&self) -> char {
if self.start == 0 || !self.source.is_char_boundary(self.start - 1) {
return '\0';
}
self.source.as_bytes()[self.start - 1] as char
}
fn peeks_ahead(&self, count: usize) -> &str {
if self.is_at_end()
|| self.current + count > self.source.len()
|| !self.source.is_char_boundary(self.current + count)
{
return "\0";
}
&self.source[self.current..self.current + count]
}
fn peek_line(&self) -> &str {
let mut count = 1;
while !['\n', '\0'].contains(&self.peeks_ahead(count).chars().last().unwrap()) {
count += 1
}
self.peeks_ahead(count - 1)
}
}
#[cfg(test)]
mod tests {
use std::char;
use crate::scanner::tokens::TokenType;
use rstest::rstest;
use super::*;
fn scan_and_assert_eq(markup: &str, expected_tokens: Vec<Token>) {
let s = Scanner::new(markup);
let mut scanned_tokens: Vec<Token> = s
.into_iter()
.filter_map(|result| result.as_ref().ok().cloned())
.collect();
let _ = scanned_tokens.pop();
assert_eq!(scanned_tokens, expected_tokens);
}
fn newline_token_at(line: usize, col: usize) -> Token {
Token::new_default(
TokenType::NewLineChar,
"\n".to_string(),
None,
line,
col,
col,
)
}
#[test]
fn newline() {
let markup = "\n".to_string();
let expected_tokens = vec![newline_token_at(1, 1)];
scan_and_assert_eq(&markup, expected_tokens)
}
#[rstest]
#[case("++++\n", TokenType::PassthroughBlock)]
#[case("****\n", TokenType::SidebarBlock)]
#[case("----\n", TokenType::SourceBlock)]
#[case("____\n", TokenType::QuoteVerseBlock)]
#[case("____\n", TokenType::QuoteVerseBlock)]
#[case("====\n", TokenType::ExampleBlock)]
#[case("....\n", TokenType::LiteralBlock)]
fn fenced_block_delimiter_start(#[case] markup: &str, #[case] expected_token: TokenType) {
let expected_tokens = vec![
Token::new_default(
expected_token,
markup.to_string()[..4].to_string(),
None,
1,
1,
4,
),
Token::new_default(TokenType::NewLineChar, "\n".to_string(), None, 1, 5, 5),
];
scan_and_assert_eq(markup, expected_tokens);
}
#[rstest]
#[case("\n\n++++\n", TokenType::PassthroughBlock)]
#[case("\n\n****\n", TokenType::SidebarBlock)]
#[case("\n\n----\n", TokenType::SourceBlock)]
#[case("\n\n____\n", TokenType::QuoteVerseBlock)]
#[case("\n\n////\n", TokenType::CommentBlock)]
#[case("\n\n====\n", TokenType::ExampleBlock)]
#[case("\n\n....\n", TokenType::LiteralBlock)]
fn fenced_block_delimiter_new_block(#[case] markup: &str, #[case] expected_token: TokenType) {
let expected_tokens = vec![
Token::new_default(TokenType::NewLineChar, "\n".to_string(), None, 1, 1, 1),
Token::new_default(TokenType::NewLineChar, "\n".to_string(), None, 2, 1, 1),
Token::new_default(
expected_token,
markup.to_string()[2..6].to_string(),
None,
3,
1,
4,
),
Token::new_default(TokenType::NewLineChar, "\n".to_string(), None, 3, 5, 5),
];
scan_and_assert_eq(markup, expected_tokens);
}
#[test]
fn open_block_beginning() {
let markup = "--\n";
let expected_tokens = vec![
Token::new_default(
TokenType::OpenBlock,
markup.to_string()[..2].to_string(),
None,
1,
1,
2,
),
Token::new_default(TokenType::NewLineChar, "\n".to_string(), None, 1, 3, 3),
];
scan_and_assert_eq(markup, expected_tokens);
}
#[test]
fn open_block_new_block() {
let markup = "\n\n--\n";
let expected_tokens = vec![
Token::new_default(TokenType::NewLineChar, "\n".to_string(), None, 1, 1, 1),
Token::new_default(TokenType::NewLineChar, "\n".to_string(), None, 2, 1, 1),
Token::new_default(TokenType::OpenBlock, "--".to_string(), None, 3, 1, 2),
Token::new_default(TokenType::NewLineChar, "\n".to_string(), None, 3, 3, 3),
];
scan_and_assert_eq(markup, expected_tokens);
}
#[rstest]
#[case("* Foo\n".to_string(), TokenType::UnorderedListItem)]
#[case(". Foo\n".to_string(), TokenType::OrderedListItem)]
fn single_list_items(#[case] markup: String, #[case] expected_token: TokenType) {
let mut delimiter = markup
.clone()
.split_whitespace()
.next()
.unwrap()
.to_string();
delimiter.push(' ');
let expected_tokens = vec![
Token::new_default(expected_token, delimiter, None, 1, 1, 2),
Token::new_default(
TokenType::Text,
"Foo".to_string(),
Some("Foo".to_string()),
1,
3,
5,
),
Token::new_default(TokenType::NewLineChar, "\n".to_string(), None, 1, 6, 6),
];
scan_and_assert_eq(&markup, expected_tokens);
}
#[rstest]
#[case::unordered("* Foo\n* Bar".to_string(), TokenType::UnorderedListItem)]
#[case::ordered(". Foo\n. Bar".to_string(), TokenType::OrderedListItem)]
fn multiple_list_items(#[case] markup: String, #[case] expected_token: TokenType) {
let mut delimiter = markup
.clone()
.split_whitespace()
.next()
.unwrap()
.to_string();
delimiter.push(' ');
let expected_tokens = vec![
Token::new_default(expected_token, delimiter.clone(), None, 1, 1, 2),
Token::new_default(
TokenType::Text,
"Foo".to_string(),
Some("Foo".to_string()),
1,
3,
5,
),
Token::new_default(TokenType::NewLineChar, "\n".to_string(), None, 1, 6, 6),
Token::new_default(expected_token, delimiter, None, 2, 1, 2),
Token::new_default(
TokenType::Text,
"Bar".to_string(),
Some("Bar".to_string()),
2,
3,
5,
),
];
scan_and_assert_eq(&markup, expected_tokens);
}
#[test]
fn block_introduction() {
let markup = ".Baz\n";
let title = "Baz".to_string();
let expected_tokens = vec![
Token::new_default(TokenType::BlockLabel, ".".to_string(), None, 1, 1, 1),
Token::new_default(TokenType::Text, title.clone(), Some(title), 1, 2, 4),
Token::new_default(TokenType::NewLineChar, "\n".to_string(), None, 1, 5, 5),
];
scan_and_assert_eq(markup, expected_tokens);
}
#[rstest]
#[case("\n\n= Foo\n".to_string(), TokenType::Heading1, 1)]
#[case("\n\n== Foo\n".to_string(), TokenType::Heading2, 2)]
#[case("\n\n=== Foo\n".to_string(), TokenType::Heading3, 3)]
#[case("\n\n==== Foo\n".to_string(), TokenType::Heading4, 4)]
#[case("\n\n===== Foo\n".to_string(), TokenType::Heading5, 5)]
fn headings_after_block(
#[case] markup: String,
#[case] expected_token: TokenType,
#[case] heading_level: usize,
) {
let mut lexeme = "=".to_string().repeat(heading_level);
lexeme.push(' ');
let expected_tokens = vec![
Token::new_default(TokenType::NewLineChar, "\n".to_string(), None, 1, 1, 1),
Token::new_default(TokenType::NewLineChar, "\n".to_string(), None, 2, 1, 1),
Token::new_default(expected_token, lexeme.clone(), None, 3, 1, lexeme.len()),
Token::new_default(
TokenType::Text,
"Foo".to_string(),
Some("Foo".to_string()),
3,
lexeme.len() + 1,
lexeme.len() + 3,
),
Token::new_default(
TokenType::NewLineChar,
"\n".to_string(),
None,
3,
lexeme.len() + 4,
lexeme.len() + 4,
),
];
scan_and_assert_eq(&markup, expected_tokens);
}
#[rstest]
#[case("\n\n'''\n".to_string(), TokenType::ThematicBreak)]
#[case("\n\n<<<\n".to_string(), TokenType::PageBreak)]
fn breaks(#[case] markup: String, #[case] expected_token: TokenType) {
let expected_tokens = vec![
Token::new_default(TokenType::NewLineChar, "\n".to_string(), None, 1, 1, 1),
Token::new_default(TokenType::NewLineChar, "\n".to_string(), None, 2, 1, 1),
Token::new_default(
expected_token,
markup.clone()[2..5].to_string(),
None,
3,
1,
3,
),
Token::new_default(TokenType::NewLineChar, "\n".to_string(), None, 3, 4, 4),
];
scan_and_assert_eq(&markup, expected_tokens);
}
#[test]
fn comments() {
let comment_line = "// Some text or other".to_string();
let markup = "\n".to_string() + &comment_line + "\n";
let expected_tokens = vec![
Token::new_default(TokenType::NewLineChar, "\n".to_string(), None, 1, 1, 1),
Token::new_default(
TokenType::Comment,
comment_line.clone(),
Some(comment_line.clone()),
2,
1,
comment_line.len(),
),
Token::new_default(
TokenType::NewLineChar,
"\n".to_string(),
None,
2,
comment_line.len() + 1,
comment_line.len() + 1,
),
];
scan_and_assert_eq(&markup, expected_tokens);
}
#[rstest]
#[case::quote("[quote]\n", TokenType::ElementAttributes)]
#[case::quote("[#class.role]\n", TokenType::ElementAttributes)]
#[case("[quote, Georges Perec]\n", TokenType::ElementAttributes)]
#[case("[verse]\n", TokenType::ElementAttributes)]
#[case(
"[verse, Audre Lorde, A Litany for Survival]\n",
TokenType::ElementAttributes
)]
#[case("[source]\n", TokenType::ElementAttributes)]
#[case::role("[role=\"foo\"]\n", TokenType::ElementAttributes)]
#[case::role_dot("[.foo]\n", TokenType::ElementAttributes)]
#[case("[NOTE]\n", TokenType::NotePara)]
#[case("[TIP]\n", TokenType::TipPara)]
#[case("[IMPORTANT]\n", TokenType::ImportantPara)]
#[case("[CAUTION]\n", TokenType::CautionPara)]
#[case("[WARNING]\n", TokenType::WarningPara)]
fn attribute_lines(#[case] markup: &str, #[case] expected_token: TokenType) {
let markup_len = markup[..markup.len() - 1].len();
let expected_tokens = vec![
Token::new_default(
expected_token,
markup[..markup.len() - 1].to_string(),
Some(markup[..markup.len() - 1].to_string()),
1,
1,
markup_len,
),
Token::new_default(
TokenType::NewLineChar,
"\n".to_string(),
None,
1,
markup.len(),
markup.len(),
),
];
scan_and_assert_eq(markup, expected_tokens);
}
#[test]
fn block_continuation() {
let markup = "* Foo\n+\nBar".to_string();
let expected_tokens = vec![
Token::new_default(
TokenType::UnorderedListItem,
"* ".to_string(),
None,
1,
1,
2,
),
Token::new_default(
TokenType::Text,
"Foo".to_string(),
Some("Foo".to_string()),
1,
3,
5,
),
Token::new_default(TokenType::NewLineChar, "\n".to_string(), None, 1, 6, 6),
Token::new_default(TokenType::BlockContinuation, "+".to_string(), None, 2, 1, 1),
Token::new_default(TokenType::NewLineChar, "\n".to_string(), None, 2, 2, 2),
Token::new_default(
TokenType::Text,
"Bar".to_string(),
Some("Bar".to_string()),
3,
1,
3,
),
];
scan_and_assert_eq(&markup, expected_tokens);
}
#[test]
fn line_continuation() {
let markup = "Foo +\nBar".to_string();
let expected_tokens = vec![
Token::new_default(
TokenType::Text,
"Foo ".to_string(),
Some("Foo ".to_string()),
1,
1,
4,
),
Token::new_default(TokenType::LineContinuation, "+".to_string(), None, 1, 5, 5),
Token::new_default(TokenType::NewLineChar, "\n".to_string(), None, 1, 6, 6),
Token::new_default(
TokenType::Text,
"Bar".to_string(),
Some("Bar".to_string()),
2,
1,
3,
),
];
scan_and_assert_eq(&markup, expected_tokens);
}
#[rstest]
#[case('*', TokenType::Strong)]
#[case('_', TokenType::Emphasis)]
#[case('`', TokenType::Monospace)]
#[case('+', TokenType::Literal)]
#[case('^', TokenType::Superscript)]
#[case('~', TokenType::Subscript)]
#[case('#', TokenType::Mark)]
fn inline_formatting(#[case] markup_char: char, #[case] expected_token: TokenType) {
let markup = format!("Somx {}bar{} bar.", markup_char, markup_char);
let expected_tokens = vec![
Token::new_default(
TokenType::Text,
"Somx ".to_string(),
Some("Somx ".to_string()),
1,
1,
5,
),
Token::new_default(expected_token, markup_char.to_string(), None, 1, 6, 6),
Token::new_default(
TokenType::Text,
"bar".to_string(),
Some("bar".to_string()),
1,
7,
9,
),
Token::new_default(expected_token, markup_char.to_string(), None, 1, 10, 10),
Token::new_default(
TokenType::Text,
" bar.".to_string(),
Some(" bar.".to_string()),
1,
11,
markup.len(),
),
];
scan_and_assert_eq(&markup, expected_tokens);
}
#[rstest]
#[case::strong(String::from("**"), TokenType::UnconstrainedStrong)]
#[case::emphasis(String::from("__"), TokenType::UnconstrainedEmphasis)]
#[case::monospace(String::from("``"), TokenType::UnconstrainedMonospace)]
#[case::mark(String::from("##"), TokenType::UnconstrainedMark)]
#[case::mark(String::from("++"), TokenType::UnconstrainedLiteral)]
fn inline_formatting_doubles(#[case] markup_str: String, #[case] expected_token: TokenType) {
let markup = format!("Somx{}bar{}bar.", markup_str, markup_str);
let expected_tokens = vec![
Token::new_default(
TokenType::Text,
"Somx".to_string(),
Some("Somx".to_string()),
1,
1,
4,
),
Token::new_default(expected_token, markup_str.clone(), None, 1, 5, 6),
Token::new_default(
TokenType::Text,
"bar".to_string(),
Some("bar".to_string()),
1,
7,
9,
),
Token::new_default(expected_token, markup_str, None, 1, 10, 11),
Token::new_default(
TokenType::Text,
"bar.".to_string(),
Some("bar.".to_string()),
1,
12,
15,
),
];
scan_and_assert_eq(&markup, expected_tokens);
}
#[rstest]
#[case('*', TokenType::Strong)]
#[case('`', TokenType::Monospace)]
#[case('+', TokenType::Literal)]
#[case('^', TokenType::Superscript)]
#[case('~', TokenType::Subscript)]
#[case('#', TokenType::Mark)]
fn inline_formatting_by_other(#[case] markup_char: char, #[case] expected_token: TokenType) {
let markup = format!("Somx {}_bar_{} bar.", markup_char, markup_char);
let expected_tokens = vec![
Token::new_default(
TokenType::Text,
"Somx ".to_string(),
Some("Somx ".to_string()),
1,
1,
5,
),
Token::new_default(expected_token, markup_char.to_string(), None, 1, 6, 6),
Token::new_default(TokenType::Emphasis, "_".to_string(), None, 1, 7, 7),
Token::new_default(
TokenType::Text,
"bar".to_string(),
Some("bar".to_string()),
1,
8,
10,
),
Token::new_default(TokenType::Emphasis, "_".to_string(), None, 1, 11, 11),
Token::new_default(expected_token, markup_char.to_string(), None, 1, 12, 12),
Token::new_default(
TokenType::Text,
" bar.".to_string(),
Some(" bar.".to_string()),
1,
13,
markup.len(),
),
];
scan_and_assert_eq(&markup, expected_tokens);
}
#[rstest]
#[case('*')]
#[case('_')]
#[case('`')]
#[case('+')]
#[case('#')]
fn inline_formatting_by_self_is_text(#[case] markup_char: char) {
let markup = format!(" {} ", markup_char);
let expected_tokens = vec![
Token::new_default(
TokenType::Text,
" ".to_string(),
Some(" ".to_string()),
1,
1,
1,
),
Token::new_default(
TokenType::Text,
format!("{} ", markup_char),
Some(format!("{} ", markup_char)),
1,
2,
3,
),
];
scan_and_assert_eq(&markup, expected_tokens);
}
#[rstest]
#[case("footnote:[", TokenType::FootnoteMacro)]
#[case("pass:[", TokenType::PassthroughInlineMacro)]
fn inline_macros(#[case] markup_check: &str, #[case] expected_token: TokenType) {
let markup = format!("Somx {}bar]", &markup_check);
let expected_tokens = vec![
Token::new_default(
TokenType::Text,
"Somx ".to_string(),
Some("Somx ".to_string()),
1,
1,
5,
),
Token::new_default(
expected_token,
markup_check.to_string(),
None,
1,
6,
6 + markup_check.len() - 1,
),
Token::new_default(
TokenType::Text,
"bar".to_string(),
Some("bar".to_string()),
1,
6 + markup_check.len(),
6 + markup_check.len() + 2,
),
Token::new_default(
TokenType::InlineMacroClose,
"]".to_string(),
Some("]".to_string()),
1,
markup.len(),
markup.len(),
),
];
scan_and_assert_eq(&markup, expected_tokens);
}
#[rstest]
#[case("NOTE", TokenType::NotePara)]
#[case("TIP", TokenType::TipPara)]
#[case("IMPORTANT", TokenType::ImportantPara)]
#[case("CAUTION", TokenType::CautionPara)]
#[case("WARNING", TokenType::WarningPara)]
fn inline_admonitions(#[case] markup_check: &str, #[case] expected_token: TokenType) {
let markup = format!("{}: bar.", markup_check);
let expected_tokens = vec![
Token::new_default(
expected_token,
format!("{}: ", markup_check),
Some(format!("{}: ", markup_check)),
1,
1,
markup_check.len() + 2, ),
Token::new_default(
TokenType::Text,
"bar.".to_string(),
Some("bar.".to_string()),
1,
markup_check.len() + 3,
markup_check.len() + 6,
),
];
scan_and_assert_eq(&markup, expected_tokens);
}
#[test]
fn description_list_mark_space() {
let markup = "Txrm:: DS";
let expected_tokens = vec![
Token::new_default(
TokenType::Text,
"Txrm".to_string(),
Some("Txrm".to_string()),
1,
1,
4,
),
Token::new_default(
TokenType::DescriptionListMarker,
":: ".to_string(),
None,
1,
5,
7,
),
Token::new_default(
TokenType::Text,
"DS".to_string(),
Some("DS".to_string()),
1,
8,
9,
),
];
scan_and_assert_eq(markup, expected_tokens);
}
#[test]
fn description_list_mark_newline() {
let markup = "Txrm::\nDS";
let expected_tokens = vec![
Token::new_default(
TokenType::Text,
"Txrm".to_string(),
Some("Txrm".to_string()),
1,
1,
4,
),
Token::new_default(
TokenType::DescriptionListMarker,
"::\n".to_string(),
None,
1,
5,
7,
),
Token::new_default(
TokenType::Text,
"DS".to_string(),
Some("DS".to_string()),
1,
8,
9,
),
];
scan_and_assert_eq(markup, expected_tokens);
}
#[test]
fn description_list_mark_not_if_space_before() {
let markup = "Txrm :: bar";
let expected_tokens = vec![
Token::new_default(
TokenType::Text,
"Txrm ".to_string(),
Some("Txrm ".to_string()),
1,
1,
5,
),
Token::new_default(
TokenType::Text,
":".to_string(),
Some(":".to_string()),
1,
6,
6,
),
Token::new_default(
TokenType::Text,
": bar".to_string(),
Some(": bar".to_string()),
1,
7,
11,
),
];
scan_and_assert_eq(markup, expected_tokens);
}
#[test]
fn link_with_text() {
let markup = "Somx http://example.com[bar]";
let expected_tokens = vec![
Token::new_default(
TokenType::Text,
"Somx ".to_string(),
Some("Somx ".to_string()),
1,
1,
5,
),
Token::new_default(
TokenType::LinkMacro,
"http://example.com[".to_string(),
Some("http://example.com[".to_string()),
1,
6,
24,
),
Token::new_default(
TokenType::Text,
"bar".to_string(),
Some("bar".to_string()),
1,
25,
27,
),
Token::new_default(
TokenType::InlineMacroClose,
"]".to_string(),
Some("]".to_string()),
1,
28,
28,
),
];
scan_and_assert_eq(markup, expected_tokens);
}
#[test]
fn link_no_text() {
let markup = "Somx http://example.com[]";
let expected_tokens = vec![
Token::new_default(
TokenType::Text,
"Somx ".to_string(),
Some("Somx ".to_string()),
1,
1,
5,
),
Token::new_default(
TokenType::LinkMacro,
"http://example.com[".to_string(),
Some("http://example.com[".to_string()),
1,
6,
24,
),
Token::new_default(
TokenType::InlineMacroClose,
"]".to_string(),
Some("]".to_string()),
1,
25,
25,
),
];
scan_and_assert_eq(markup, expected_tokens);
}
#[test]
fn link_no_attribute_fencing() {
let markup = "Somx http://example.com";
let expected_tokens = vec![
Token::new_default(
TokenType::Text,
"Somx ".to_string(),
Some("Somx ".to_string()),
1,
1,
5,
),
Token::new_default(
TokenType::Hyperlink,
"http://example.com".to_string(),
Some("http://example.com".to_string()),
1,
6,
23,
),
];
scan_and_assert_eq(markup, expected_tokens);
}
#[test]
fn link_no_attribute_fencing_more_text() {
let markup = "Somx http://example.com Somx";
let expected_tokens = vec![
Token::new_default(
TokenType::Text,
"Somx ".to_string(),
Some("Somx ".to_string()),
1,
1,
5,
),
Token::new_default(
TokenType::Hyperlink,
"http://example.com".to_string(),
Some("http://example.com".to_string()),
1,
6,
23,
),
Token::new_default(
TokenType::Text,
" Somx".to_string(),
Some(" Somx".to_string()),
1,
24,
28,
),
];
scan_and_assert_eq(markup, expected_tokens);
}
#[rstest]
#[case(". ")]
#[case(".")]
#[case(", ")]
#[case(",")]
#[case(": ")]
#[case("? ")]
#[case(". ")]
#[case("/ ")]
#[case(":")]
#[case("?")]
#[case(".")]
#[case("/")]
fn link_no_attribute_fencing_sentence(#[case] end_: &str) {
let markup = format!("Somx http://example.com{}", end_);
let expected_tokens = vec![
Token::new_default(
TokenType::Text,
"Somx ".to_string(),
Some("Somx ".to_string()),
1,
1,
5,
),
Token::new_default(
TokenType::Hyperlink,
"http://example.com".to_string(),
Some("http://example.com".to_string()),
1,
6,
23,
),
Token::new_default(
TokenType::Text,
end_.to_string(),
Some(end_.to_string()),
1,
24,
23 + end_.len(),
),
];
scan_and_assert_eq(&markup, expected_tokens);
}
#[test]
fn block_image_with_alt() {
let markup = "image::path/to/img.png[alt text]";
let expected_tokens = vec![Token::new_default(
TokenType::BlockImageMacro,
"image::path/to/img.png[alt text]".to_string(),
Some("image::path/to/img.png[alt text]".to_string()),
1,
1,
32,
)];
scan_and_assert_eq(markup, expected_tokens);
}
#[test]
fn inline_image() {
let markup = "Somx image:path/to/img.png[]";
let expected_tokens = vec![
Token::new_default(
TokenType::Text,
"Somx ".to_string(),
Some("Somx ".to_string()),
1,
1,
5,
),
Token::new_default(
TokenType::InlineImageMacro,
"image:path/to/img.png[]".to_string(),
Some("image:path/to/img.png[]".to_string()),
1,
6,
28,
),
];
scan_and_assert_eq(markup, expected_tokens);
}
#[test]
fn no_inline_image_if_double_colon() {
let markup = "Somx image::bar.png[]";
let expected_tokens = vec![
Token::new_default(
TokenType::Text,
"Somx ".to_string(),
Some("Somx ".to_string()),
1,
1,
5,
),
Token::new_default(
TokenType::Text,
"imag".to_string(),
Some("imag".to_string()),
1,
6,
9,
),
Token::new_default(
TokenType::Text,
"e".to_string(),
Some("e".to_string()),
1,
10,
10,
),
Token::new_default(
TokenType::Text,
":".to_string(),
Some(":".to_string()),
1,
11,
11,
),
Token::new_default(
TokenType::Text,
":bar.".to_string(),
Some(":bar.".to_string()),
1,
12,
16,
),
Token::new_default(
TokenType::Text,
"png".to_string(),
Some("png".to_string()),
1,
17,
19,
),
Token::new_default(
TokenType::Text,
"[".to_string(),
Some("[".to_string()),
1,
20,
20,
),
Token::new_default(
TokenType::InlineMacroClose,
"]".to_string(),
Some("]".to_string()),
1,
21,
21,
),
];
scan_and_assert_eq(markup, expected_tokens);
}
#[test]
fn inline_style_in_line() {
let markup = "Somx [.style]#foo#";
let expected_tokens = vec![
Token::new_default(
TokenType::Text,
"Somx ".to_string(),
Some("Somx ".to_string()),
1,
1,
5,
),
Token::new_default(
TokenType::InlineStyle,
"[.style]".to_string(),
Some("[.style]".to_string()),
1,
6,
13,
),
Token::new_default(TokenType::Mark, "#".to_string(), None, 1, 14, 14),
Token::new_default(
TokenType::Text,
"foo".to_string(),
Some("foo".to_string()),
1,
15,
17,
),
Token::new_default(TokenType::Mark, "#".to_string(), None, 1, 18, 18),
];
scan_and_assert_eq(markup, expected_tokens);
}
#[test]
fn inline_style_new_line() {
let markup = "\n[.style]#foox#";
let expected_tokens = vec![
Token::new_default(TokenType::NewLineChar, "\n".to_string(), None, 1, 1, 1),
Token::new_default(
TokenType::InlineStyle,
"[.style]".to_string(),
Some("[.style]".to_string()),
2,
1,
8,
),
Token::new_default(TokenType::Mark, "#".to_string(), None, 2, 9, 9),
Token::new_default(
TokenType::Text,
"foox".to_string(),
Some("foox".to_string()),
2,
10,
13,
),
Token::new_default(TokenType::Mark, "#".to_string(), None, 2, 14, 14),
];
scan_and_assert_eq(markup, expected_tokens);
}
#[test]
fn inline_style_new_block() {
let markup = "\n\n[.style]#foo#";
let expected_tokens = vec![
Token::new_default(TokenType::NewLineChar, "\n".to_string(), None, 1, 1, 1),
Token::new_default(TokenType::NewLineChar, "\n".to_string(), None, 2, 1, 1),
Token::new_default(
TokenType::InlineStyle,
"[.style]".to_string(),
Some("[.style]".to_string()),
3,
1,
8,
),
Token::new_default(TokenType::Mark, "#".to_string(), None, 3, 9, 9),
Token::new_default(
TokenType::Text,
"foo".to_string(),
Some("foo".to_string()),
3,
10,
12,
),
Token::new_default(TokenType::Mark, "#".to_string(), None, 3, 13, 13),
];
scan_and_assert_eq(markup, expected_tokens);
}
#[test]
fn inline_charref() {
let markup = "bar—bar";
let expected_tokens = vec![
Token::new_default(
TokenType::Text,
"bar".to_string(),
Some("bar".to_string()),
1,
1,
3,
),
Token::new_default(
TokenType::CharRef,
"—".to_string(),
Some("—".to_string()),
1,
4,
10,
),
Token::new_default(
TokenType::Text,
"bar".to_string(),
Some("bar".to_string()),
1,
11,
13,
),
];
scan_and_assert_eq(markup, expected_tokens);
}
#[test]
fn inline_charref_spaces() {
let markup = "bar — bar";
let expected_tokens = vec![
Token::new_default(
TokenType::Text,
"bar ".to_string(),
Some("bar ".to_string()),
1,
1,
4,
),
Token::new_default(
TokenType::CharRef,
"—".to_string(),
Some("—".to_string()),
1,
5,
11,
),
Token::new_default(
TokenType::Text,
" bar".to_string(),
Some(" bar".to_string()),
1,
12,
15,
),
];
scan_and_assert_eq(markup, expected_tokens);
}
#[test]
fn document_attribute_name_value() {
let markup = ":foo: bar\n";
let expected_tokens = vec![
Token::new_default(
TokenType::Attribute,
String::from(":foo: bar"),
None,
1,
1,
9,
),
Token::new_default(TokenType::NewLineChar, "\n".to_string(), None, 1, 10, 10),
];
scan_and_assert_eq(markup, expected_tokens);
}
#[test]
fn inline_attr_reference() {
let markup = "bar{foo}bar";
let expected_tokens = vec![
Token::new_default(
TokenType::Text,
"bar".to_string(),
Some("bar".to_string()),
1,
1,
3,
),
Token::new_default(
TokenType::AttributeReference,
"{foo}".to_string(),
None,
1,
4,
8,
),
Token::new_default(
TokenType::Text,
"bar".to_string(),
Some("bar".to_string()),
1,
9,
11,
),
];
scan_and_assert_eq(markup, expected_tokens);
}
#[test]
fn cross_reference() {
let markup = "<<foo_bar>>";
let expected_tokens = vec![Token::new_default(
TokenType::CrossReference,
"<<foo_bar>>".to_string(),
Some("<<foo_bar>>".to_string()),
1,
1,
11,
)];
scan_and_assert_eq(markup, expected_tokens);
}
#[test]
fn simple_include() {
let markup = "include::partial.adoc[]";
let expected_tokens = vec![Token::new_default(
TokenType::Include,
"include::partial.adoc[]".to_string(),
Some("include::partial.adoc[]".to_string()),
1,
1,
23,
)];
scan_and_assert_eq(markup, expected_tokens);
}
#[rstest]
#[case::newline("\n")]
#[case::start_of_file("")]
fn block_anchor(#[case] beginning: &str) {
let mut addition: usize = 0;
let mut expected_tokens: Vec<Token> = vec![];
if beginning == "\n" {
addition = 1;
expected_tokens = vec![Token::new_default(
TokenType::NewLineChar,
"\n".to_string(),
None,
1,
1,
1,
)];
}
let markup = format!("{beginning}[[foo]]\n");
expected_tokens.extend(vec![
Token::new_default(
TokenType::BlockAnchor,
"[[foo]]".to_string(),
Some("[[foo]]".to_string()),
1 + addition,
1,
7,
),
Token::new_default(
TokenType::NewLineChar,
"\n".to_string(),
None,
1 + addition,
8,
8,
),
]);
scan_and_assert_eq(&markup, expected_tokens);
}
#[test]
fn simple_table() {
let markup = "[cols=\"1,1\"]\n|===\n|cell one\n|cell two\n|===";
let expected_tokens = vec![
Token::new_default(
TokenType::ElementAttributes,
"[cols=\"1,1\"]".to_string(),
Some("[cols=\"1,1\"]".to_string()),
1,
1,
12,
),
newline_token_at(1, 13),
Token::new_default(TokenType::Table, "|===".to_string(), None, 2, 1, 4),
newline_token_at(2, 5),
Token::new_default(
TokenType::TableCell,
"|cell one".to_string(),
Some("|cell one".to_string()),
3,
1,
9,
),
newline_token_at(3, 10),
Token::new_default(
TokenType::TableCell,
"|cell two".to_string(),
Some("|cell two".to_string()),
4,
1,
9,
),
newline_token_at(4, 10),
Token::new_default(TokenType::Table, "|===".to_string(), None, 5, 1, 4),
];
scan_and_assert_eq(markup, expected_tokens);
}
#[test]
fn simple_table_cols_same_line() {
let markup = "[cols=\"1,1\"]\n|===\n|cell one |cell two\n|===";
let expected_tokens = vec![
Token::new_default(
TokenType::ElementAttributes,
"[cols=\"1,1\"]".to_string(),
Some("[cols=\"1,1\"]".to_string()),
1,
1,
12,
),
newline_token_at(1, 13),
Token::new_default(TokenType::Table, "|===".to_string(), None, 2, 1, 4),
newline_token_at(2, 5),
Token::new_default(
TokenType::TableCell,
"|cell one ".to_string(),
Some("|cell one ".to_string()),
3,
1,
10,
),
Token::new_default(
TokenType::TableCell,
"|cell two".to_string(),
Some("|cell two".to_string()),
3,
11,
19,
),
newline_token_at(3, 20),
Token::new_default(TokenType::Table, "|===".to_string(), None, 4, 1, 4),
];
scan_and_assert_eq(markup, expected_tokens);
}
#[test]
fn scan_odd_boundaried_text() {
let markup = "rxgular shis…\n";
let expected_tokens = vec![
Token {
token_type: TokenType::Text,
lexeme: "rxgular s".into(),
literal: Some("rxgular s".into()),
line: 1,
startcol: 1,
endcol: 9,
file_stack: vec![],
},
Token {
token_type: TokenType::Text,
lexeme: "h".into(),
literal: Some("h".into()),
line: 1,
startcol: 10,
endcol: 10,
file_stack: vec![],
},
Token {
token_type: TokenType::Text,
lexeme: "is…".into(),
literal: Some("is…".into()),
line: 1,
startcol: 11,
endcol: 15,
file_stack: vec![],
},
Token {
token_type: TokenType::NewLineChar,
lexeme: "\n".into(),
literal: None,
line: 1,
startcol: 16,
endcol: 16,
file_stack: vec![],
},
];
scan_and_assert_eq(markup, expected_tokens);
}
#[test]
fn picks_up_code_callouts_behind_inline_comment() {
let markup = "bar // <1>\n";
let expected_tokens = vec![
Token::new_default(
TokenType::Text,
"bar // ".to_string(),
Some("bar // ".to_string()),
1,
1,
7,
),
Token::new_default(
TokenType::CodeCallout,
"<1>".to_string(),
Some("<1>".to_string()),
1,
8,
10,
),
Token::new_default(TokenType::NewLineChar, "\n".to_string(), None, 1, 11, 11),
];
scan_and_assert_eq(markup, expected_tokens);
}
#[test]
fn code_callout_list() {
let markup = "<1> Bar";
let expected_tokens = vec![
Token::new_default(
TokenType::CodeCalloutListItem,
"<1> ".to_string(),
Some("<1> ".to_string()),
1,
1,
4,
),
Token::new_default(
TokenType::Text,
"Bar".to_string(),
Some("Bar".to_string()),
1,
5,
7,
),
];
scan_and_assert_eq(markup, expected_tokens);
}
#[test]
fn typographers_apostrophe() {
let markup = "Sam's ";
let expected_tokens = vec![
Token::new_default(
TokenType::Text,
"Sam".to_string(),
Some("Sam".to_string()),
1,
1,
3,
),
Token::new_default(
TokenType::CharRef,
"'".to_string(),
Some("’".to_string()),
1,
4,
4,
),
Token::new_default(
TokenType::Text,
"s ".to_string(),
Some("s ".to_string()),
1,
5,
6,
),
];
scan_and_assert_eq(markup, expected_tokens);
}
#[test]
fn typographers_apostrophe_newline() {
let markup = "Sam's\n";
let expected_tokens = vec![
Token::new_default(
TokenType::Text,
"Sam".to_string(),
Some("Sam".to_string()),
1,
1,
3,
),
Token::new_default(
TokenType::CharRef,
"'".to_string(),
Some("’".to_string()),
1,
4,
4,
),
Token::new_default(
TokenType::Text,
"s".to_string(),
Some("s".to_string()),
1,
5,
5,
),
Token::new_default(TokenType::NewLineChar, "\n".to_string(), None, 1, 6, 6),
];
scan_and_assert_eq(markup, expected_tokens);
}
#[test]
fn typographers_apostrophe_eof() {
let markup = "Sam's";
let expected_tokens = vec![
Token::new_default(
TokenType::Text,
"Sam".to_string(),
Some("Sam".to_string()),
1,
1,
3,
),
Token::new_default(
TokenType::CharRef,
"'".to_string(),
Some("’".to_string()),
1,
4,
4,
),
Token::new_default(
TokenType::Text,
"s".to_string(),
Some("s".to_string()),
1,
5,
5,
),
];
scan_and_assert_eq(markup, expected_tokens);
}
#[rstest]
#[case::double_quotes("\"`", "`\"", TokenType::OpenDoubleQuote, TokenType::CloseDoubleQuote)]
#[case::single_quotes("'`", "`'", TokenType::OpenSingleQuote, TokenType::CloseSingleQuote)]
fn typographers_quotes(
#[case] open_markup: &str,
#[case] close_markup: &str,
#[case] open_token: TokenType,
#[case] close_token: TokenType,
) {
let markup = format!("{}bar{}", open_markup, close_markup);
let expected_tokens = vec![
Token::new_default(
open_token,
open_markup.to_string(),
Some(open_markup.to_string()),
1,
1,
2,
),
Token::new_default(
TokenType::Text,
"bar".to_string(),
Some("bar".to_string()),
1,
3,
5,
),
Token::new_default(
close_token,
close_markup.to_string(),
Some(close_markup.to_string()),
1,
6,
7,
),
];
scan_and_assert_eq(&markup, expected_tokens);
}
#[rstest]
#[case("(C)", "©")]
#[case("(R)", "®")]
#[case("(TM)", "™")]
#[case("...", "…")]
#[case("->", "→")]
#[case("=>", "⇒")]
#[case("<-", "←")]
#[case("<=", "⇐")]
fn character_replacements_minus_emdash(#[case] markup: &str, #[case] replacement: &str) {
let expected_tokens = vec![Token::new_default(
TokenType::CharRef,
markup.to_string(),
Some(replacement.to_string()),
1,
1,
markup.len(),
)];
scan_and_assert_eq(markup, expected_tokens);
}
#[test]
fn character_replacements_emdash_alone() {
let markup = "--".to_string();
let expected_tokens = vec![Token::new_default(
TokenType::CharRef,
"--".to_string(),
Some("—".to_string()),
1,
1,
markup.len(),
)];
scan_and_assert_eq(&markup, expected_tokens);
}
#[test]
fn character_replacements_emdash_space() {
let markup = "-- ".to_string();
let expected_tokens = vec![
Token::new_default(
TokenType::CharRef,
"--".to_string(),
Some("—".to_string()),
1,
1,
2,
),
Token::new_default(
TokenType::Text,
" ".to_string(),
Some(" ".to_string()),
1,
3,
3,
),
];
scan_and_assert_eq(&markup, expected_tokens);
}
#[rstest]
#[case("1", true)]
#[case("a", true)]
#[case("(", false)]
fn character_replacements_emdash_alpha_numeric(#[case] next_char: &str, #[case] pass: bool) {
let markup = format!("--{}", next_char);
let expected = Token::new_default(
TokenType::CharRef,
"--".to_string(),
Some("—".to_string()),
1,
1,
2,
);
let scanned: Vec<Token> = Scanner::new(&markup)
.filter_map(|result| result.as_ref().ok().cloned())
.collect();
if pass {
assert_eq!(scanned.first().unwrap(), &expected)
} else {
assert_ne!(scanned.first().unwrap(), &expected)
}
}
#[test]
fn character_replacements_emdash_inline() {
let markup = "This --".to_string();
let expected = Token::new_default(
TokenType::CharRef,
"--".to_string(),
Some("—".to_string()),
1,
6,
7,
);
let mut scanned: Vec<Token> = Scanner::new(&markup)
.filter_map(|result| result.as_ref().ok().cloned())
.collect();
scanned.pop(); assert_eq!(scanned.last().unwrap(), &expected)
}
#[rstest]
#[case("tag", TokenType::StartTag)]
#[case("end", TokenType::EndTag)]
fn simple_tag(#[case] marker: &str, #[case] tag: TokenType) {
let markup = format!("{}::foo[]", marker);
let expected = Token::new_default(tag, markup.clone(), Some(markup.clone()), 1, 1, 10);
let mut scanned: Vec<Token> = Scanner::new(&markup)
.filter_map(|result| result.as_ref().ok().cloned())
.collect();
scanned.pop(); assert_eq!(scanned.last().unwrap(), &expected)
}
#[rstest]
#[case("<!-- ", " -->")] #[case("// ", "")] #[case("# ", "")] fn contextual_tag_start(#[case] context_start: &str, #[case] context_end: &str) {
let tag = String::from("tag::foo[]");
let markup = format!("{}{}{}", context_start, tag, context_end);
let tag_col_start = context_start.len() + 1;
let tag_col_end = tag_col_start + tag.len() - 1;
let expected = Token::new_default(
TokenType::StartTag,
tag.clone(),
Some(tag.clone()),
1,
tag_col_start,
tag_col_end,
);
let scanned: Vec<Token> = Scanner::new(&markup)
.filter_map(|result| result.as_ref().ok().cloned())
.collect();
assert!(scanned.contains(&expected))
}
#[rstest]
#[case("<!-- ", " -->")] #[case("// ", "")] #[case("# ", "")] fn contextual_tag_end(#[case] context_start: &str, #[case] context_end: &str) {
let tag = String::from("end::foo[]");
let markup = format!("\n{}{}{}", context_start, tag, context_end);
let tag_col_start = context_start.len() + 1;
let tag_col_end = tag_col_start + tag.len() - 1;
let expected = vec![
Token::new_default(TokenType::NewLineChar, "\n".to_string(), None, 1, 1, 1),
Token::new_default(
TokenType::EndTag,
tag.clone(),
Some(tag.clone()),
2,
tag_col_start,
tag_col_end,
),
];
let mut scanner = Scanner::new(&markup);
scanner.expecting_tag_end = true;
let mut scanned: Vec<Token> = scanner
.filter_map(|result| result.as_ref().ok().cloned())
.collect();
scanned.pop(); assert_eq!(scanned, expected)
}
}