use crate::errors::TokenError;
use crate::graph::nodes::Location;
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Token {
pub token_type: TokenType,
pub lexeme: String, pub literal: Option<String>, pub line: usize,
pub startcol: usize,
pub endcol: usize,
pub file_stack: Vec<String>,
}
impl Default for Token {
fn default() -> Self {
Token {
token_type: TokenType::Eof,
lexeme: "\0".to_string(),
literal: None,
line: 0,
startcol: 1,
endcol: 1,
file_stack: vec![],
}
}
}
impl Token {
pub fn new(
token_type: TokenType,
lexeme: String,
literal: Option<String>,
line: usize,
startcol: usize,
endcol: usize,
file_stack: Vec<String>,
) -> Self {
Token {
token_type,
lexeme,
literal,
line,
startcol,
endcol,
file_stack,
}
}
pub fn new_default(
token_type: TokenType,
lexeme: String,
literal: Option<String>,
line: usize,
startcol: usize,
endcol: usize,
) -> Self {
Token {
token_type,
lexeme,
literal,
line,
startcol,
endcol,
file_stack: vec![],
}
}
pub fn final_token(line: usize, file_stack: Vec<String>) -> Self {
Token {
token_type: TokenType::Eof,
lexeme: "".to_string(),
literal: None,
line,
startcol: 0,
endcol: 0,
file_stack,
}
}
pub fn token_type(&self) -> TokenType {
self.token_type
}
pub fn text(&self) -> String {
match self.token_type() {
TokenType::OpenDoubleQuote => "“".into(),
TokenType::CloseDoubleQuote => "”".into(),
TokenType::OpenSingleQuote => "‘".into(),
TokenType::CloseSingleQuote => "’".into(),
_ => {
if let Some(text) = &self.literal {
text.clone()
} else {
self.lexeme.clone()
}
}
}
}
pub fn first_location(&self) -> Location {
Location::new(self.line, self.startcol, self.file_stack.clone())
}
pub fn last_location(&self) -> Location {
Location::new(self.line, self.endcol, self.file_stack.clone())
}
pub fn locations(&self) -> Vec<Location> {
vec![self.first_location(), self.last_location()]
}
pub fn update_token_loc_offsets_by(&mut self, lines: usize, cols: usize) {
self.line += lines - 1;
self.startcol += cols;
self.endcol += cols;
}
pub fn is_inline(&self) -> bool {
matches!(self.token_type(), |TokenType::Comment| TokenType::Text
| TokenType::Emphasis
| TokenType::Strong
| TokenType::Monospace
| TokenType::Mark
| TokenType::NewLineChar
| TokenType::UnconstrainedEmphasis
| TokenType::UnconstrainedStrong
| TokenType::UnconstrainedMonospace
| TokenType::UnconstrainedMark
| TokenType::CharRef
| TokenType::InlineStyle
| TokenType::InlineMacroClose)
}
pub fn can_be_in_document_header(&self) -> bool {
matches!(
self.token_type(),
TokenType::Heading1 | TokenType::Attribute
) || self.is_inline()
}
pub fn tag(&self) -> Option<String> {
if matches!(self.token_type(), TokenType::StartTag | TokenType::EndTag) {
return Some((self.lexeme[5..self.lexeme.len() - 2]).to_string());
}
None
}
pub fn validate(&mut self) {
match self.token_type() {
TokenType::BlockAnchor | TokenType::CrossReference => {
if self.lexeme.contains(' ') || self.lexeme.contains('\n') {
self.token_type = TokenType::Text
}
}
TokenType::CharRef => {
match self.lexeme.as_str() {
"(C)" => self.literal = Some("©".into()),
"(R)" => self.literal = Some("®".into()),
"(TM)" => self.literal = Some("™".into()),
"..." => self.literal = Some("…".into()),
"->" => self.literal = Some("→".into()),
"=>" => self.literal = Some("⇒".into()),
"<-" => self.literal = Some("←".into()),
"<=" => self.literal = Some("⇐".into()),
"--" => self.literal = Some("—".into()),
"'" => self.literal = Some("’".into()),
_ => {} }
}
_ => {}
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum TokenType {
NewLineChar, LineContinuation,
ThematicBreak,
PageBreak,
Comment,
PassthroughBlock, SidebarBlock, SourceBlock, QuoteVerseBlock, CommentBlock, LiteralBlock, ExampleBlock,
OpenBlock,
OrderedListItem,
UnorderedListItem,
DescriptionListMarker,
CodeCallout,
CodeCalloutListItem,
BlockLabel,
Heading1,
Heading2,
Heading3,
Heading4,
Heading5,
NotePara, TipPara, ImportantPara, CautionPara, WarningPara,
BlockContinuation,
Table, TableCell,
Strong, Emphasis, Monospace,
Literal,
Mark,
Superscript, Subscript,
UnconstrainedStrong, UnconstrainedEmphasis, UnconstrainedMonospace,
UnconstrainedLiteral,
UnconstrainedMark,
BlockImageMacro,
InlineImageMacro,
LinkMacro,
FootnoteMacro, PassthroughInlineMacro,
InlineMacroClose,
Hyperlink, Email, Text,
CharRef,
OpenDoubleQuote,
CloseDoubleQuote,
OpenSingleQuote,
CloseSingleQuote,
Eof,
InlineStyle,
Include,
StartTag, EndTag,
BlockAnchor,
ElementAttributes, CrossReference,
Attribute,
AttributeReference,
}
impl TokenType {
pub(crate) fn block_from_char(c: char) -> Result<TokenType, TokenError> {
match c {
'+' => Ok(Self::PassthroughBlock),
'*' => Ok(Self::SidebarBlock),
'-' => Ok(Self::SourceBlock),
'_' => Ok(Self::QuoteVerseBlock),
'/' => Ok(Self::CommentBlock),
'=' => Ok(Self::ExampleBlock),
'.' => Ok(Self::LiteralBlock),
_ => Err(TokenError::CharacterMatch),
}
}
pub(crate) fn clears_newline_after(&self) -> bool {
matches!(
self,
TokenType::ElementAttributes
| TokenType::SidebarBlock
| TokenType::OpenBlock
| TokenType::QuoteVerseBlock
| TokenType::ExampleBlock
)
}
}
#[cfg(test)]
mod tests {
use super::{Token, TokenType};
use rstest::rstest;
#[rstest]
#[case::cross_references(TokenType::CrossReference)]
#[case::block_anchor(TokenType::BlockAnchor)]
fn invalid_space_invalidates_to_text(#[case] token_type: TokenType) {
let mut token = Token::new(token_type, " ".to_string(), None, 1, 1, 1, vec![]);
token.validate();
assert_eq!(token.token_type(), TokenType::Text)
}
#[rstest]
#[case("foo")]
#[case("longer_than_foo")]
#[case("a")]
fn start_tag_extraction(#[case] tag: &str) {
let lexeme = format!("tag::{}[]", tag);
let token = Token::new(
TokenType::StartTag,
lexeme.clone(),
Some(lexeme),
1,
1,
1,
vec![],
);
assert_eq!(token.tag(), Some(tag.to_string()))
}
#[rstest]
#[case("foo")]
#[case("longer_than_foo")]
#[case("a")]
fn end_tag_extraction(#[case] tag: &str) {
let lexeme = format!("end::{}[]", tag);
let token = Token::new(
TokenType::StartTag,
lexeme.clone(),
Some(lexeme),
1,
1,
1,
vec![],
);
assert_eq!(token.tag(), Some(tag.to_string()))
}
}