use std::collections::{HashSet};
use std::fmt::{Debug, Display, Formatter};
use std::ops::Deref;
use std::rc::Rc;
use std::slice::Iter;
use crate::{Dictionary, Pattern, Value, Table, Text, Element, Composition, Attribute, AttributeValue, Entry};
use crate::lex::{lex, LexError, Position, Token};
pub fn parse_expression_str(document: &str) -> Result<ParsedValue> {
let tokens = tokenize(document)?;
let mut iter = TokenIter::new(tokens.iter());
let mut strings = HashSet::new();
let value = parse_expression_document(&mut iter, &mut strings)?;
if !matches!(iter.t0, Token::End(..)) {
return iter.expectation_error(&[TokenType::End]);
};
Ok(value)
}
pub fn parse_dictionary_str(document: &str) -> Result<ParsedDictionary> {
let tokens = tokenize(document)?;
let mut iter = TokenIter::new(tokens.iter());
let mut strings = HashSet::new();
let dictionary = parse_dictionary_document(&mut iter, &mut strings)?;
if !matches!(iter.t0, Token::End(..) ) {
return iter.expectation_error(&[TokenType::End]);
};
Ok(dictionary)
}
pub fn parse_table_str(document: &str) -> Result<ParsedTable> {
let tokens = tokenize(document)?;
let mut iter = TokenIter::new(tokens.iter());
let mut strings = HashSet::new();
let table = parse_table_document(&mut iter, &mut strings)?;
if !matches!(iter.t0, Token::End(..)) {
return iter.expectation_error(&[TokenType::End]);
};
Ok(table)
}
fn tokenize(document: &str) -> Result<Vec<Token>> {
let chars = document.chars();
let tokens = match lex(chars) {
Ok(tokens) => tokens,
Err(error) => {
return match error {
LexError::EscapeEOS => Err(ParseError::EscapingEndOfStream),
LexError::UnclosedQuotation(at) => Err(ParseError::UnclosedQuotation(at)),
LexError::InvalidEscapeSequence(at) => Err(ParseError::InvalidEscapeSequence(at)),
LexError::InvalidHashSequence(at) => Err(ParseError::IllegalHashSequence(at)),
LexError::UnclosedTextBlockTag(at) => Err(ParseError::UnclosedTextBlockTag(at)),
LexError::InvalidTextBlockConfiguration(at) => Err(ParseError::InvalidTextBlockConfiguration(at)),
};
}
};
Ok(tokens)
}
pub type Result<T> = std::result::Result<T, ParseError>;
struct TokenIter<'a> {
tokens: Iter<'a, Token>,
t0: &'a Token,
t1: &'a Token,
t2: &'a Token,
t3: &'a Token,
}
impl <'a> TokenIter<'a> {
fn new(mut tokens: Iter<'a, Token>) -> Self {
let default = &Token::End(Position { index: 0, line: 0, column: 0 });
let mut iter = TokenIter { tokens, t0: default, t1: default, t2: default, t3: default };
iter.next(); iter.next(); iter.next(); iter.next();
iter
}
fn next(&mut self) {
self.t0 = self.t1;
self.t1 = self.t2;
self.t2 = self.t3;
loop {
self.t3 = self.tokens.next().unwrap_or(self.t2);
if !(self.t2.to_type() == TokenType::Whitespace && self.t3.to_type() == TokenType::Whitespace) {
break;
}
}
}
fn skip_whitespace(&mut self) -> bool {
let mut skipped = false;
loop {
if matches!(self.t0, Token::Whitespace(..)) {
skipped = true;
self.next();
} else {
break;
};
};
skipped
}
fn position(&self) -> Position {
self.t0.position()
}
fn peek_next_glyph_token(&mut self) -> &Token {
match self.t0 {
Token::Whitespace(..) => {
self.skip_lookahead_whitespace();
self.t1
}
token => token,
}
}
fn skip_lookahead_whitespace(&mut self) {
loop {
if matches!(self.t1, Token::Whitespace(..)) {
self.t1 = self.tokens.next().unwrap_or(self.t1);
} else {
break;
}
}
}
fn consume_next_glyph_token(&mut self) {
self.skip_whitespace();
self.next();
}
fn token_type(&self) -> TokenType {
self.t0.to_type()
}
fn expectation_error<T>(&self, token_type: &'static[TokenType]) -> Result<T> {
Err(ParseError::Expected(token_type, self.token_type(), self.position()))
}
}
fn parse_expression_document(iter: &mut TokenIter, strings: &mut HashSet<Rc<str>>) -> Result<ParsedValue> {
let from = iter.position();
iter.skip_whitespace();
let value = if matches!(iter.t0, Token::Word(..) | Token::Quotation(..) | Token::TextBlock(..) | Token::LeftBracket(..) | Token::LeftSquare(..) | Token::LeftAngle(..) | Token::Tilde(..)) {
let value = parse_expression(iter, strings)?;
iter.skip_whitespace();
value
} else {
let to = iter.position();
ParsedValue::nil(from, to)
};
Ok(value)
}
fn parse_dictionary_document(iter: &mut TokenIter, strings: &mut HashSet<Rc<str>>) -> Result<ParsedDictionary> {
iter.skip_whitespace();
let dictionary = if matches!(iter.t0, Token::Word(..) | Token::Quotation(..) | Token::TextBlock(..) | Token::RightAngle(..)) {
let dictionary = parse_dictionary(iter, strings)?;
iter.skip_whitespace();
dictionary
} else {
ParsedDictionary::empty()
};
Ok(dictionary)
}
fn parse_table_document(iter: &mut TokenIter, strings: &mut HashSet<Rc<str>>) -> Result<ParsedTable> {
iter.skip_whitespace();
let table = if matches!(iter.t0, Token::Word(..) | Token::Quotation(..) | Token::TextBlock(..) | Token::LeftBracket(..) | Token::LeftSquare(..) | Token::LeftAngle(..) | Token::Tilde(..) | Token::Bar(..) | Token::RightAngle(..)) {
let table = parse_table(iter, strings)?;
iter.skip_whitespace();
table
} else {
ParsedTable::empty()
};
Ok(table)
}
fn parse_expression(iter: &mut TokenIter, strings: &mut HashSet<Rc<str>>) -> Result<ParsedValue> {
if !matches!(iter.t0, Token::Word(..) | Token::Quotation(..) | Token::TextBlock(..) | Token::LeftBracket(..) | Token::LeftSquare(..) | Token::LeftAngle(..) | Token::Tilde(..)) {
return iter.expectation_error(&[TokenType::Word, TokenType::Quotation, TokenType::TextBlock, TokenType::LeftBracket, TokenType::LeftSquare, TokenType::LeftAngle, TokenType::Tilde]);
}
let from = iter.position();
let head = parse_term_sequence(iter, strings)?;
let c0 = matches!(iter.t0, Token::Whitespace(..));
let c1 = matches!(iter.t1, Token::Colon(..));
let c2 = matches!(iter.t2, Token::Whitespace(..));
let c3 = matches!(iter.t3, Token::Word(..) | Token::Quotation(..) | Token::TextBlock(..) | Token::LeftBracket(..) | Token::LeftSquare(..) | Token::LeftAngle(..) | Token::Tilde(..));
if !(c0 && c1 && c2 && c3) {
return Ok(head);
}
let mut tail = vec![];
loop {
iter.next(); iter.next(); iter.next();
let e = parse_term_sequence(iter, strings)?;
tail.push(e);
let c0 = matches!(iter.t0, Token::Whitespace(..));
let c1 = matches!(iter.t1, Token::Colon(..));
let c2 = matches!(iter.t2, Token::Whitespace(..));
let c3 = matches!(iter.t3, Token::Word(..) | Token::Quotation(..) | Token::TextBlock(..) | Token::LeftBracket(..) | Token::LeftSquare(..) | Token::LeftAngle(..) | Token::Tilde(..));
if !(c0 && c1 && c2 && c3) {
break;
}
}
let to = iter.position();
if let ParsedValue::Pattern(ParsedPattern { name, attributes, mut arguments }, from, to) = head {
if arguments.len() == 0 {
return Ok(ParsedValue::Pattern(ParsedPattern { name, attributes, arguments: tail }, from, to));
} else {
tail.insert(0, ParsedValue::Pattern(ParsedPattern { name, attributes, arguments }, from, to));
}
} else {
tail.insert(0, head);
}
let len = tail.len();
Ok(ParsedValue::Table(ParsedTable { elements: tail, columns: len }, from, to))
}
fn parse_term_sequence(iter: &mut TokenIter, strings: &mut HashSet<Rc<str>>) -> Result<ParsedValue> {
let mut terms: Vec<ParsedValue> = vec![];
let mut whitespace = vec![];
let from = iter.position();
let mut after_whitespace = false;
if !matches!(iter.t0, Token::Word(..) | Token::Quotation(..) | Token::TextBlock(..) | Token::LeftBracket(..) | Token::LeftSquare(..) | Token::LeftAngle(..) | Token::Tilde(..)) {
return iter.expectation_error(&[TokenType::Word, TokenType::Quotation, TokenType::TextBlock, TokenType::LeftBracket, TokenType::LeftSquare, TokenType::LeftAngle, TokenType::Tilde]);
}
loop {
match iter.t0 {
Token::Word(from, word) => {
let mut text = String::new();
text.push_str(word);
let mut to = from;
iter.next();
let mut interspace = false;
loop {
match iter.t0 {
Token::Word(at, word) => {
iter.next();
if interspace {
text.push(' ');
interspace = false;
}
text.push_str(word);
to = at;
}
Token::Whitespace(..) => {
if !matches!(iter.peek_next_glyph_token(), Token::Word(..) | Token::Tilde(..)) {
break;
}
iter.skip_whitespace();
interspace = true;
}
Token::Tilde(..) => {
iter.next();
iter.skip_whitespace();
interspace = false;
}
_ => break,
}
}
let to = iter.position();
let str = store_str(strings, &text);
let text = ParsedText { str };
let text = ParsedValue::Text(text, *from, to);
push_term(&mut terms, &mut whitespace, &mut after_whitespace, text);
}
Token::Quotation(..) | Token::TextBlock(..) => {
let from = iter.position();
let text = parse_text_token(iter, strings)?;
let text = ParsedText { str: text };
let to = iter.position();
let text = ParsedValue::Text(text, from, to);
push_term(&mut terms, &mut whitespace, &mut after_whitespace, text);
},
Token::LeftBracket(..) => push_term(&mut terms, &mut whitespace, &mut after_whitespace, parse_bracketed_structure(iter, strings)?),
Token::LeftSquare(..) => push_term(&mut terms, &mut whitespace, &mut after_whitespace, parse_bracketed_table(iter, strings)?),
Token::LeftAngle(..) => push_term(&mut terms, &mut whitespace, &mut after_whitespace, parse_pattern(iter, strings)?),
Token::Whitespace(..) => {
if !matches!(iter.peek_next_glyph_token(), Token::Word(..) | Token::Quotation(..) | Token::TextBlock(..) | Token::LeftBracket(..) | Token::LeftSquare(..) | Token::LeftAngle(..) | Token::Tilde(..)) {
break;
}
iter.skip_whitespace();
after_whitespace = true;
}
Token::Tilde(..) => {
iter.next();
iter.skip_whitespace();
after_whitespace = false;
}
_ => break,
}
};
let to = iter.position();
return Ok(ParsedValue::from_terms(from, to, terms, whitespace));
fn push_term(terms: &mut Vec<ParsedValue>, whitespace: &mut Vec<bool>, after_whitespace: &mut bool, component: ParsedValue) {
if terms.len() != 0 {
if *after_whitespace {
whitespace.push(true);
} else {
whitespace.push(false);
};
*after_whitespace = false;
};
terms.push(component);
}
}
fn parse_dictionary(iter: &mut TokenIter, strings: &mut HashSet<Rc<str>>) -> Result<ParsedDictionary> {
match iter.t0 {
Token::Word(..) | Token::Quotation(..) | Token::TextBlock(..) => parse_flow_dictionary(iter, strings),
Token::RightAngle(..) => parse_bullet_dictionary(iter, strings),
_ => iter.expectation_error(&[TokenType::Word, TokenType::Quotation, TokenType::TextBlock, TokenType::RightAngle]),
}
}
fn parse_flow_dictionary(iter: &mut TokenIter, strings: &mut HashSet<Rc<str>>) -> Result<ParsedDictionary> {
let mut entries = vec![];
if !matches!(iter.t0, Token::Word(..) | Token::Quotation(..) | Token::TextBlock(..)) {
return iter.expectation_error(&[TokenType::Word, TokenType::Quotation, TokenType::TextBlock]);
};
loop {
let key = parse_text_token(iter, strings)?;
if !matches!(iter.t0, Token::Colon(..)) {
return iter.expectation_error(&[TokenType::Colon]);
}
iter.next();
iter.skip_whitespace();
let value = parse_expression(iter, strings)?;
entries.push(ParsedEntry(key, value));
if !matches!(iter.peek_next_glyph_token(), Token::Semicolon(..)) {
return Ok(ParsedDictionary { entries });
};
iter.consume_next_glyph_token();
if !matches!(iter.peek_next_glyph_token(), Token::Word(..) | Token::Quotation(..) | Token::TextBlock(..)) {
return Ok(ParsedDictionary { entries });
};
iter.skip_whitespace();
};
}
fn parse_bullet_dictionary(iter: &mut TokenIter, strings: &mut HashSet<Rc<str>>) -> Result<ParsedDictionary> {
let mut entries = vec![];
if !matches!(iter.t0, Token::RightAngle(..)) {
return iter.expectation_error(&[TokenType::RightAngle]);
}
loop {
iter.next();
iter.skip_whitespace();
let key = parse_text_token(iter, strings)?;
if !matches!(iter.t0, Token::Colon(..)) {
return iter.expectation_error(&[TokenType::Colon]);
}
iter.next();
iter.skip_whitespace();
let value = parse_expression(iter, strings)?;
entries.push(ParsedEntry(key, value));
if !matches!(iter.peek_next_glyph_token(), Token::RightAngle(..)) {
return Ok(ParsedDictionary { entries });
}
iter.skip_whitespace();
}
}
fn parse_table(iter: &mut TokenIter, strings: &mut HashSet<Rc<str>>) -> Result<ParsedTable> {
match iter.peek_next_glyph_token() {
Token::Word(..) | Token::Quotation(..) | Token::TextBlock(..) | Token::LeftBracket(..)| Token::LeftSquare(..) | Token::LeftAngle(..) | Token::Tilde(..) => parse_flow_table(iter, strings),
Token::Bar(..) => {
let (entries, columns) = parse_grid_table(iter, strings)?;
Ok(ParsedTable { elements: entries, columns })
},
Token::RightAngle(..) => {
let (entries, columns) = parse_bullet_table(iter, strings)?;
Ok(ParsedTable { elements: entries, columns })
}
Token::Whitespace(..) => unreachable!(),
_ => {
iter.skip_whitespace();
iter.expectation_error(&[TokenType::Word, TokenType::Quotation, TokenType::TextBlock, TokenType::LeftBracket, TokenType::LeftSquare, TokenType::LeftAngle, TokenType::Tilde, TokenType::Bar, TokenType::RightAngle])
}
}
}
fn parse_flow_table(iter: &mut TokenIter, strings: &mut HashSet<Rc<str>>) -> Result<ParsedTable> {
let mut entries = vec![];
let mut columns = 0;
loop { let entry = parse_expression(iter, strings)?;
entries.push(entry);
columns += 1;
match iter.peek_next_glyph_token() {
Token::Semicolon(..) => {
iter.consume_next_glyph_token();
if matches!(iter.peek_next_glyph_token(), Token::Word(..) | Token::Quotation(..) | Token::TextBlock(..) | Token::LeftBracket(..) | Token::LeftSquare(..) | Token::LeftAngle(..) | Token::Tilde(..)) {
iter.skip_whitespace();
break;
} else {
return Ok(ParsedTable { elements: entries, columns });
};
}
Token::Bar(..) => {
iter.consume_next_glyph_token();
iter.skip_whitespace();
}
_ => {
return Ok(ParsedTable { elements: entries, columns });
}
}
};
loop {
let mut c = 0;
loop {
let entry = parse_expression(iter, strings)?;
entries.push(entry);
c += 1;
match iter.peek_next_glyph_token() {
Token::Semicolon(..) => {
iter.consume_next_glyph_token();
if c != columns {
return Err(ParseError::ExpectedColumns(iter.position(), c, columns));
};
if matches!(iter.peek_next_glyph_token(), Token::Word(..) | Token::Quotation(..) | Token::TextBlock(..) | Token::LeftBracket(..) | Token::LeftSquare(..) | Token::LeftAngle(..) | Token::Tilde(..)) {
iter.skip_whitespace();
break;
} else {
return Ok(ParsedTable { elements: entries, columns });
};
}
Token::Bar(..) => {
iter.consume_next_glyph_token();
iter.skip_whitespace();
},
_ => {
if c != columns {
return Err(ParseError::ExpectedColumns(iter.position(), c, columns));
};
return Ok(ParsedTable { elements: entries, columns });
}
}
};
}
}
fn parse_grid_table(iter: &mut TokenIter, strings: &mut HashSet<Rc<str>>) -> Result<(Vec<ParsedValue>, usize)> {
let mut entries = vec![];
let mut columns = 0;
loop { if !matches!(iter.t0, Token::Bar(..)) {
return iter.expectation_error(&[TokenType::Bar]);
};
iter.next();
match iter.peek_next_glyph_token() {
Token::Word(..) | Token::Quotation(..) | Token::TextBlock(..) | Token::LeftBracket(..) | Token::LeftSquare(..) | Token::LeftAngle(..) | Token::Tilde(..) => {
iter.skip_whitespace();
let entry = parse_expression(iter, strings)?;
columns += 1;
entries.push(entry);
iter.skip_whitespace();
}
Token::Bar(..) => {
iter.skip_whitespace();
if columns == 0 {
return Err(ParseError::ZeroColumns(iter.position()));
};
break;
}
_ => {
if columns == 0 {
return Err(ParseError::ZeroColumns(iter.position()));
};
return Ok((entries, columns));
}
}
};
loop { let mut c = 0;
loop {
if !matches!(iter.t0, Token::Bar(..)) {
return iter.expectation_error(&[TokenType::Bar]);
};
iter.next();
match iter.peek_next_glyph_token() {
Token::Word(..) | Token::Quotation(..) | Token::TextBlock(..) | Token::LeftBracket(..) | Token::LeftSquare(..) | Token::LeftAngle(..) | Token::Tilde(..) => {
iter.skip_whitespace();
let entry = parse_expression(iter, strings)?;
c += 1;
entries.push(entry);
iter.skip_whitespace();
}
Token::Bar(..) => {
iter.skip_whitespace();
if c != columns {
return Err(ParseError::ExpectedColumns(iter.position(), c, columns));
};
break;
}
_ => {
if c != columns {
return Err(ParseError::ExpectedColumns(iter.position(), c, columns));
};
return Ok((entries, columns));
}
};
};
};
}
fn parse_bullet_table(iter: &mut TokenIter, strings: &mut HashSet<Rc<str>>) -> Result<(Vec<ParsedValue>, usize)> {
let mut entries = vec![];
let mut columns = 1;
if !matches!(iter.t0, Token::RightAngle(..)) {
return iter.expectation_error(&[TokenType::RightAngle]);
}
iter.next();
iter.skip_whitespace();
let entry = parse_expression(iter, strings)?;
entries.push(entry);
loop { if !matches!(iter.peek_next_glyph_token(), Token::RightAngle(..) | Token::Bar(..)) {
return Ok((entries, columns));
}
iter.skip_whitespace();
match iter.t0 {
Token::Bar(..) => {
columns += 1;
iter.next();
iter.skip_whitespace();
let entry = parse_expression(iter, strings)?;
entries.push(entry);
}
Token::RightAngle(..) => break,
_ => unreachable!(),
}
}
loop { let mut c = 1;
iter.next();
iter.skip_whitespace();
let entry = parse_expression(iter, strings)?;
entries.push(entry);
loop {
if !matches!(iter.peek_next_glyph_token(), Token::RightAngle(..) | Token::Bar(..)) {
return if columns == c {
Ok((entries, columns))
} else {
Err(ParseError::ExpectedColumns(iter.position(), c, columns))
}
}
iter.skip_whitespace();
match iter.t0 {
Token::Bar(..) => {
c += 1;
iter.next();
iter.skip_whitespace();
let entry = parse_expression(iter, strings)?;
entries.push(entry);
}
Token::RightAngle(..) => {
if columns == c {
break;
} else {
return Err(ParseError::ExpectedColumns(iter.position(), c, columns));
}
}
_ => unreachable!(),
}
}
}
}
fn parse_tag(iter: &mut TokenIter, strings: &mut HashSet<Rc<str>>) -> Result<(Rc<str>, Vec<ParsedAttribute>)> {
if !matches!(iter.t0, Token::LeftAngle(..)) {
return iter.expectation_error(&[TokenType::LeftAngle]);
};
iter.next();
let name = match iter.t0 {
Token::Word(_, name) => name,
_ => return iter.expectation_error(&[TokenType::Word]),
};
iter.next();
let name = store_str(strings, name);
if name.starts_with("#") { return Err(ParseError::PatternHashName(iter.position()));
}
if matches!(iter.t0, Token::RightAngle(..)) {
iter.next();
return Ok((name, vec![]));
}
iter.skip_whitespace();
let attributes = parse_attributes(iter, strings)?;
iter.skip_whitespace();
if !matches!(iter.t0, Token::RightAngle(..)) {
return iter.expectation_error(&[TokenType::RightAngle]);
};
iter.next();
Ok((name, attributes))
}
fn parse_attributes(iter: &mut TokenIter, strings: &mut HashSet<Rc<str>>) -> Result<Vec<ParsedAttribute>> {
let mut attributes = vec![];
if !matches!(iter.t0, Token::Word(..)) {
return iter.expectation_error(&[TokenType::Word]);
}
loop {
if let Token::Word(_, key) = iter.t0 {
iter.next();
let key = store_str(strings, key);
match iter.t0 {
Token::Colon(..) => {
iter.next();
iter.skip_whitespace();
let value = parse_text_token(iter, strings)?;
attributes.push(ParsedAttribute(key, Some(value)));
}
_ => {
attributes.push(ParsedAttribute(key, None));
}
};
if !matches!(iter.peek_next_glyph_token(), Token::Word(..)) {
return Ok(attributes);
}
iter.skip_whitespace();
} else {
unreachable!();
}
};
}
fn parse_pattern(iter: &mut TokenIter, strings: &mut HashSet<Rc<str>>) -> Result<ParsedValue> {
let from = iter.position();
let (name, attributes) = parse_tag(iter, strings)?;
let arguments = if matches!(iter.t0, Token::Colon(..)) {
parse_arguments(iter, strings)?
} else {
vec![]
};
let to = iter.position();
Ok(ParsedValue::Pattern(ParsedPattern { name, attributes, arguments }, from, to))
}
fn parse_arguments(iter: &mut TokenIter, strings: &mut HashSet<Rc<str>>) -> Result<Vec<ParsedValue>> {
let mut arguments = vec![];
if !matches!(iter.t0, Token::Colon(..)) {
return iter.expectation_error(&[TokenType::Colon]);
};
iter.next();
loop {
match iter.t0 {
Token::Word(.., s) | Token::Quotation(.., s) | Token::TextBlock(.., s) => {
let from = iter.position();
iter.next();
let to = iter.position();
let str = store_str(strings, s);
let text = ParsedValue::Text(ParsedText { str }, from, to);
arguments.push(text);
}
Token::LeftBracket(..) => {
let value = parse_bracketed_structure(iter, strings)?;
arguments.push(value);
}
Token::LeftSquare(..) => {
let table = parse_bracketed_table(iter, strings)?;
arguments.push(table);
}
Token::LeftAngle(..) => {
let from = iter.position();
let (name, attributes) = parse_tag(iter, strings)?;
let name = store_str(strings, &name);
let to = iter.position();
let pattern = ParsedValue::Pattern(ParsedPattern { name, attributes, arguments: vec![] }, from, to);
arguments.push(pattern);
}
Token::Diamond(..) => {
iter.next();
if !matches!(iter.t0, Token::Colon(..)) {
return iter.expectation_error(&[TokenType::Colon]);
};
iter.next();
let right_pattern = parse_pattern(iter, strings)?;
arguments.push(right_pattern);
return Ok(arguments);
}
_ => return iter.expectation_error(&[TokenType::Word, TokenType::Quotation, TokenType::TextBlock, TokenType::LeftBracket, TokenType::LeftSquare, TokenType::LeftAngle, TokenType::Diamond]),
}
if !matches!(iter.t0, Token::Colon(..)) {
return Ok(arguments);
};
iter.next();
}
}
fn parse_bracketed_structure(iter: &mut TokenIter, strings: &mut HashSet<Rc<str>>) -> Result<ParsedValue> {
if !matches!(iter.t0, Token::LeftBracket(..)) {
return iter.expectation_error(&[TokenType::LeftBracket]);
};
let from = iter.position();
iter.next();
iter.skip_whitespace();
match iter.t0 {
Token::Word(..) | Token::Quotation(..) | Token::TextBlock(..) => {
match iter.t1 {
Token::Colon(..) => {
let dictionary = parse_dictionary(iter, strings)?;
iter.skip_whitespace();
if !matches!(iter.t0, Token::RightBracket(..)) {
return iter.expectation_error(&[TokenType::RightBracket]);
}
iter.next();
let to = iter.position();
Ok(ParsedValue::Dictionary(dictionary, from, to))
}
Token::Whitespace(..) | Token::Word(..) | Token::Quotation(..) | Token::TextBlock(..) | Token::LeftBracket(..) | Token::LeftSquare(..) | Token::LeftAngle(..) | Token::Tilde(..) | Token::RightBracket(..) => {
let value = parse_expression(iter, strings);
iter.skip_whitespace();
if !matches!(iter.t0, Token::RightBracket(..)) {
return iter.expectation_error(&[TokenType::RightBracket]);
}
iter.next();
value
}
_ => {
iter.next();
return iter.expectation_error(&[TokenType::Colon, TokenType::Whitespace, TokenType::Word, TokenType::Quotation, TokenType::TextBlock, TokenType::LeftBracket, TokenType::LeftSquare, TokenType::LeftAngle, TokenType::Tilde, TokenType::RightBracket]);
}
}
}
Token::LeftBracket(..) | Token::LeftSquare(..) | Token::LeftAngle(..) | Token::Tilde(..) => { let value = parse_expression(iter, strings)?;
iter.skip_whitespace();
if !matches!(iter.t0, Token::RightBracket(..)) {
return iter.expectation_error(&[TokenType::RightBracket]);
};
iter.next();
Ok(value)
}
Token::RightBracket(..) => { let dictionary = ParsedDictionary::empty();
iter.next();
let to = iter.position();
Ok(ParsedValue::Dictionary(dictionary, from, to))
}
Token::RightAngle(..) => { let dictionary = parse_dictionary(iter, strings)?;
iter.skip_whitespace();
if !matches!(iter.t0, Token::RightBracket(..)) {
return iter.expectation_error(&[TokenType::RightBracket]);
}
iter.next();
let to = iter.position();
Ok(ParsedValue::Dictionary(dictionary, from, to))
}
Token::Whitespace(..) => unreachable!(),
_ => return iter.expectation_error(&[TokenType::Word, TokenType::Quotation, TokenType::TextBlock, TokenType::LeftBracket, TokenType::LeftSquare, TokenType::LeftAngle, TokenType::Tilde, TokenType::RightBracket, TokenType::RightAngle]),
}
}
fn parse_bracketed_table(iter: &mut TokenIter, strings: &mut HashSet<Rc<str>>) -> Result<ParsedValue> {
let from = iter.position();
if !matches!(iter.t0, Token::LeftSquare(..)) {
return iter.expectation_error(&[TokenType::LeftSquare]);
};
iter.next();
iter.skip_whitespace();
match iter.t0 {
Token::Word(..) | Token::Quotation(..) | Token::TextBlock(..) | Token::LeftBracket(..) | Token::LeftSquare(..) | Token::LeftAngle(..) | Token::Tilde(..) | Token::Bar(..) | Token::RightAngle(..) => {
let table = parse_table(iter, strings)?;
iter.skip_whitespace();
if !matches!(iter.t0, Token::RightSquare(..)) {
return iter.expectation_error(&[TokenType::RightSquare]);
};
iter.next();
let to = iter.position();
Ok(ParsedValue::Table(table, from, to))
}
Token::RightSquare(..) => {
iter.next();
let to = iter.position();
let table = ParsedTable::empty();
Ok(ParsedValue::Table(table, from, to))
}
Token::Whitespace(..) => unreachable!(),
_ => iter.expectation_error(&[TokenType::Word, TokenType::Quotation, TokenType::TextBlock, TokenType::LeftBracket, TokenType::LeftSquare, TokenType::LeftAngle, TokenType::Tilde, TokenType::Bar, TokenType::RightAngle, TokenType::RightSquare]),
}
}
fn parse_text_token(iter: &mut TokenIter, strings: &mut HashSet<Rc<str>>) -> Result<Rc<str>> {
match iter.t0 {
Token::Word(_, text) | Token::Quotation(_, text) | Token::TextBlock(_, text) => {
iter.next();
Ok(store_str(strings, text))
}
_ => iter.expectation_error(&[TokenType::Word, TokenType::Quotation, TokenType::TextBlock]),
}
}
fn store_str(strings: &mut HashSet<Rc<str>>, candidate: &str) -> Rc<str> {
if let Some(str) = strings.get(candidate) {
str.clone()
} else {
let count = Rc::from(candidate);
let str = Rc::clone(&count);
strings.insert(count);
str
}
}
#[derive(Clone)]
pub enum ParseError {
EscapingEndOfStream,
UnclosedQuotation(Position),
ZeroColumns(Position),
ExpectedColumns(Position, usize, usize),
Expected(&'static[TokenType], TokenType, Position),
InvalidEscapeSequence(Position),
IllegalHashSequence(Position),
PatternHashName(Position),
UnclosedTextBlockTag(Position),
InvalidTextBlockConfiguration(Position),
}
impl Debug for ParseError {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", error_to_string(self))
}
}
pub fn error_to_string(error: &ParseError) -> String {
match error {
ParseError::EscapingEndOfStream => {
format!("Escaping EOS.")
}
ParseError::UnclosedQuotation(at) => {
format!("Unclosed quote at {}:{}.", at.line, at.column)
}
ParseError::ZeroColumns(at) => {
format!("Row with zero columns at {}:{}.", at.line, at.column)
}
ParseError::ExpectedColumns(at, c, columns) => {
format!("Expected {} columns but found {} at {}:{}.", columns, c, at.line, at.column)
}
ParseError::Expected(expected, found, at) => {
format!("Expected {} but found {} at {}:{}.", list(expected), found, at.line, at.column)
}
ParseError::InvalidEscapeSequence(at) => {
format!("Encountered unknown escape sequence at {}:{}.", at.line, at.column)
}
ParseError::IllegalHashSequence(at) => {
format!("Encountered unknown hash sequence at {}:{}.", at.line, at.column)
}
ParseError::PatternHashName(at) => {
format!("Pattern name cannot start with hash sign at {}:{}", at.line, at.column)
}
ParseError::UnclosedTextBlockTag(at) => {
format!("Encountered end in multiline quote opening at {}:{}", at.line, at.column)
}
ParseError::InvalidTextBlockConfiguration(at) => {
format!("Encountered invalid configuration in multiline quote opening at {}:{}", at.line, at.column)
}
}
}
fn list(expected: &[TokenType]) -> String {
let mut str = String::new();
let mut iter = expected.iter();
if let Some(s) = iter.next() {
str.push_str(&format!("{}", s));
};
while let Some(s) = iter.next() {
str.push_str(&format!(" | {}", s));
};
str
}
#[derive(Clone)]
pub enum ParsedValue {
Nil(Position, Position),
Text(ParsedText, Position, Position),
Dictionary(ParsedDictionary, Position, Position),
Table(ParsedTable, Position, Position),
Composition(ParsedComposition, Position, Position),
Pattern(ParsedPattern, Position, Position),
}
impl ParsedValue {
pub fn nil(from: Position, to: Position) -> Self {
ParsedValue::Nil(from, to)
}
pub fn from_terms(from: Position, to: Position, mut terms: Vec<ParsedValue>, whitespace: Vec<bool>) -> Self {
let len = terms.len();
if len == 0 {
ParsedValue::Nil(from, to)
} else if len == 1 {
terms.pop().unwrap()
} else {
assert_eq!(len - 1, whitespace.len());
ParsedValue::Composition(ParsedComposition { components: terms, whitespace }, from, to)
}
}
pub fn from(&self) -> Position {
match self {
ParsedValue::Nil(.., from, _) => from,
ParsedValue::Text(.., from, _) => from,
ParsedValue::Dictionary(.., from, _) => from,
ParsedValue::Table(.., from, _) => from,
ParsedValue::Composition(.., from, _) => from,
ParsedValue::Pattern(.., from, _) => from,
}.clone()
}
pub fn to(&self) -> Position {
match self {
ParsedValue::Nil(.., to) => to,
ParsedValue::Text(.., to) => to,
ParsedValue::Dictionary(.., to) => to,
ParsedValue::Table(.., to) => to,
ParsedValue::Composition(.., to) => to,
ParsedValue::Pattern(.., to) => to,
}.clone()
}
}
impl Value<ParsedValue, ParsedText, ParsedDictionary, ParsedTable, ParsedComposition, ParsedPattern> for ParsedValue {
fn as_text(&self) -> Option<&ParsedText> {
if let ParsedValue::Text(t, ..) = self {
Some(t)
} else {
None
}
}
fn as_dictionary(&self) -> Option<&ParsedDictionary> {
if let ParsedValue::Dictionary(d, ..) = self {
Some(d)
} else {
None
}
}
fn as_table(&self) -> Option<&ParsedTable> {
if let ParsedValue::Table(t, ..) = self {
Some(t)
} else {
None
}
}
fn as_composition(&self) -> Option<&ParsedComposition> {
if let ParsedValue::Composition(d, ..) = self {
Some(d)
} else {
None
}
}
fn as_pattern(&self) -> Option<&ParsedPattern> {
if let ParsedValue::Pattern(d, ..) = self {
Some(d)
} else {
None
}
}
fn is_nil(&self) -> bool {
matches!(self, ParsedValue::Nil(..))
}
fn is_text(&self) -> bool {
matches!(self, ParsedValue::Text(..))
}
fn is_dictionary(&self) -> bool {
matches!(self, ParsedValue::Dictionary(..))
}
fn is_table(&self) -> bool {
matches!(self, ParsedValue::Table(..))
}
fn is_composition(&self) -> bool {
matches!(self, ParsedValue::Composition(..))
}
fn is_pattern(&self) -> bool {
matches!(self, ParsedValue::Pattern(..))
}
}
#[derive(PartialEq, Eq, Clone)]
pub struct ParsedText { pub str: Rc<str> }
impl Text<ParsedValue, Self, ParsedDictionary, ParsedTable, ParsedComposition, ParsedPattern> for ParsedText {
fn as_str(&self) -> &str {
&self.str
}
}
#[derive(Clone)]
pub struct ParsedDictionary {
pub entries: Vec<ParsedEntry>,
}
impl ParsedDictionary {
pub fn empty() -> Self {
ParsedDictionary { entries: vec![] }
}
}
impl Dictionary<ParsedValue, ParsedText, Self, ParsedTable, ParsedComposition, ParsedPattern> for ParsedDictionary {
type EntryIterator<'b> = EntryIterator<'b>;
fn len(&self) -> usize {
self.entries.len()
}
fn is_empty(&self) -> bool {
self.entries.is_empty()
}
fn get(&self, index: usize) -> Option<Entry<ParsedValue>> {
if let Some(entry) = self.entries.get(index) {
Some(Entry(&entry.0, &entry.1))
} else {
None
}
}
fn iter(&self) -> Self::EntryIterator<'_> {
EntryIterator(self.entries.iter())
}
}
#[derive(Clone)]
pub struct ParsedEntry(Rc<str>, ParsedValue);
pub struct EntryIterator<'a>(Iter<'a, ParsedEntry>);
impl <'a> Iterator for EntryIterator<'a> {
type Item = Entry<'a, ParsedValue>;
fn next(&mut self) -> Option<Self::Item> {
if let Some(c) = self.0.next() {
Some(Entry(&c.0, &c.1))
} else {
None
}
}
}
#[derive(Clone)]
pub struct ParsedTable {
pub elements: Vec<ParsedValue>,
pub columns: usize,
}
impl ParsedTable {
pub fn empty() -> Self {
ParsedTable { elements: vec![], columns: 0 }
}
}
impl Table<ParsedValue, ParsedText, ParsedDictionary, ParsedTable, ParsedComposition, ParsedPattern> for ParsedTable {
type RowIterator<'b> = RowIterator<'b>;
type EntryIterator<'b> = Iter<'b, ParsedValue>;
fn len(&self) -> usize {
self.elements.len()
}
fn columns(&self) -> usize {
self.columns
}
fn rows(&self) -> usize {
if self.columns != 0 {
self.elements.len() / self.columns
} else {
0
}
}
fn is_empty(&self) -> bool {
self.elements.is_empty()
}
fn is_list(&self) -> bool {
self.columns <= 1
}
fn is_tuple(&self) -> bool {
self.rows() <= 1
}
fn get_entry(&self, row: usize, column: usize) -> Option<&ParsedValue> {
self.elements.get(row * self.columns + column)
}
fn get_row(&self, row: usize) -> Option<&[ParsedValue]> {
if self.columns != 0 {
Some(&self.elements[row * self.columns .. row * (self.columns + 1)])
} else {
None
}
}
fn iter_entries(&self) -> Self::EntryIterator<'_> {
self.elements.iter()
}
fn iter_rows(&self) -> Self::RowIterator<'_> {
RowIterator { columns: self.columns, iter: self.elements.iter() }
}
}
pub struct RowIterator<'a> {
columns: usize,
iter: Iter<'a, ParsedValue>,
}
impl <'a> Iterator for RowIterator<'a> {
type Item = Box<[&'a ParsedValue]>;
fn next(&mut self) -> Option<Self::Item> {
let mut row = vec![];
let mut c = 0;
while c < self.columns {
if let Some(entry) = self.iter.next() {
row.push(entry);
c += 1;
} else {
return None;
};
};
Some(row.into_boxed_slice())
}
}
#[derive(Clone)]
pub struct ParsedComposition {
components: Vec<ParsedValue>, whitespace: Vec<bool>,
}
impl ParsedComposition {
}
impl Composition<ParsedValue, ParsedText, ParsedDictionary, ParsedTable, Self, ParsedPattern> for ParsedComposition {
type ElementIterator<'b> = ElementIterator<'b>;
fn len(&self) -> usize {
self.components.len() + self.whitespace.len()
}
fn get(&self, index: usize) -> Option<Element<&ParsedValue>> {
todo!()
}
fn iter(&self) -> Self::ElementIterator<'_> {
ElementIterator {
components: &self.components,
whitespace: &self.whitespace,
index: 0,
after_component: false,
}
}
}
pub struct ElementIterator<'b> {
components: &'b[ParsedValue],
whitespace: &'b[bool],
index: usize,
after_component: bool,
}
impl <'b> Iterator for ElementIterator<'b> {
type Item = Element<&'b ParsedValue>;
fn next(&mut self) -> Option<Self::Item> {
if self.index != self.components.len() - 1 {
if self.after_component {
let whitespace = self.whitespace[self.index];
self.index += 1;
if whitespace {
self.after_component = false;
return Some(Element::Space);
};
};
} else {
if self.after_component {
return None;
};
};
self.after_component = true;
Some(Element::Solid(&self.components[self.index]))
}
}
#[derive(Clone)]
pub struct ParsedPattern {
pub name: Rc<str>,
pub attributes: Vec<ParsedAttribute>,
pub arguments: Vec<ParsedValue>,
}
impl Pattern<ParsedValue, ParsedText, ParsedDictionary, ParsedTable, ParsedComposition, Self> for ParsedPattern {
type ArgumentIterator<'b> = Iter<'b, ParsedValue>;
type AttributeIterator<'b> = AttributeIterator<'b>;
fn name(&self) -> &str {
&self.name
}
fn len(&self) -> usize {
self.arguments.len()
}
fn has_attributes(&self) -> bool {
!self.attributes.is_empty()
}
fn has_arguments(&self) -> bool {
!self.arguments.is_empty()
}
fn get(&self, index: usize) -> Option<&ParsedValue> {
self.arguments.get(index)
}
fn get_attribute_by(&self, key: &str) -> Option<AttributeValue<'_>> {
for attribute in &self.attributes {
if key.eq(attribute.key().deref()) {
return match attribute {
ParsedAttribute(_, Some(v)) => Some(AttributeValue(Some(&v))),
ParsedAttribute(_, None) => Some(AttributeValue(None)),
};
}
}
None
}
fn get_attribute_at(&self, index: usize) -> Option<Attribute<'_>> {
if let Some(attribute) = self.attributes.get(index) {
match attribute {
ParsedAttribute(k, Some(v)) => Some(Attribute(&k, Some(&v))),
ParsedAttribute(k, None) => Some(Attribute(&k, None)),
}
} else {
None
}
}
fn iter_arguments(&self) -> Self::ArgumentIterator<'_> {
self.arguments.iter()
}
fn iter_attributes(&self) -> Self::AttributeIterator<'_> {
AttributeIterator { iter: self.attributes.iter() }
}
}
#[derive(Clone)]
pub struct ParsedAttribute(Rc<str>, Option<Rc<str>>);
impl ParsedAttribute {
fn key(&self) -> Rc<str> {
self.0.clone()
}
}
pub struct AttributeIterator<'a> {
iter: Iter<'a, ParsedAttribute>,
}
impl <'a> Iterator for AttributeIterator<'a> {
type Item = Attribute<'a>;
fn next(&mut self) -> Option<Self::Item> {
if let Some(ParsedAttribute(key, value)) = self.iter.next() {
if let Some(value) = value {
Some(Attribute(key, Some(value)))
} else {
Some(Attribute(key, None))
}
} else {
None
}
}
}
#[derive(Clone, Copy, Eq, PartialEq)]
pub enum TokenType {
Whitespace, Word, Quotation, TextBlock,
Colon, Semicolon, Bar, Tilde, Diamond,
LeftBracket, RightBracket, LeftSquare, RightSquare, LeftAngle, RightAngle,
End,
}
impl Display for TokenType {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
match self {
TokenType::Whitespace => write!(f, "Whitespace"),
TokenType::Word => write!(f, "Word"),
TokenType::Quotation => write!(f, "Quotation"),
TokenType::TextBlock => write!(f, "TextBlock"),
TokenType::Colon => write!(f, ":"),
TokenType::Semicolon => write!(f, ";"),
TokenType::Bar => write!(f, "|"),
TokenType::Tilde => write!(f, "~"),
TokenType::Diamond => write!(f, "<>"),
TokenType::LeftBracket => write!(f, "{{"),
TokenType::RightBracket => write!(f, "}}"),
TokenType::LeftSquare => write!(f, "["),
TokenType::RightSquare => write!(f, "]"),
TokenType::LeftAngle => write!(f, "<"),
TokenType::RightAngle => write!(f, ">"),
TokenType::End => write!(f, "End"),
}
}
}
impl Token {
fn to_type(&self) -> TokenType {
match self {
Token::Whitespace(..) => TokenType::Whitespace,
Token::Word(..) => TokenType::Word,
Token::Quotation(..) => TokenType::Quotation,
Token::TextBlock(..) => TokenType::TextBlock,
Token::Colon(..) => TokenType::Colon,
Token::Semicolon(..) => TokenType::Semicolon,
Token::Bar(..) => TokenType::Bar,
Token::Tilde(..) => TokenType::Tilde,
Token::Diamond(..) => TokenType::Diamond,
Token::LeftBracket(..) => TokenType::LeftBracket,
Token::RightBracket(..) => TokenType::RightBracket,
Token::LeftSquare(..) => TokenType::LeftSquare,
Token::RightSquare(..) => TokenType::RightSquare,
Token::LeftAngle(..) => TokenType::LeftAngle,
Token::RightAngle(..) => TokenType::RightAngle,
Token::End(..) => TokenType::End,
}
}
}