use smart_default::SmartDefault;
use std::iter::Peekable;
use std::str::CharIndices;
use nu_errors::ParseError;
use nu_source::{HasSpan, Span, Spanned, SpannedItem};
use super::token_group::TokenBuilder;
use super::tokens::{
CommandBuilder, CommentsBuilder, GroupBuilder, LiteBlock, LiteCommand, LiteComment,
PipelineBuilder, TokenContents,
};
type Input<'t> = Peekable<CharIndices<'t>>;
#[derive(Debug, Clone)]
pub struct Token {
pub contents: TokenContents,
pub span: Span,
}
impl Token {
pub fn new(contents: TokenContents, span: Span) -> Token {
Token { contents, span }
}
}
#[derive(Clone, Copy)]
enum BlockKind {
Paren,
CurlyBracket,
SquareBracket,
}
impl BlockKind {
fn closing(self) -> char {
match self {
BlockKind::Paren => ')',
BlockKind::SquareBracket => ']',
BlockKind::CurlyBracket => '}',
}
}
}
pub fn baseline(src: &mut Input, span_offset: usize) -> (Spanned<String>, Option<ParseError>) {
let mut token_contents = String::new();
let start_offset = if let Some((pos, _)) = src.peek() {
*pos
} else {
0
};
let mut quote_start: Option<char> = None;
let mut in_comment = false;
let mut block_level: Vec<BlockKind> = vec![];
fn is_termination(block_level: &[BlockKind], c: char) -> bool {
block_level.is_empty() && (c.is_whitespace() || c == '|' || c == ';' || c == '#')
}
while let Some((_, c)) = src.peek() {
let c = *c;
if quote_start.is_some() {
if Some(c) == quote_start {
quote_start = None;
}
} else if c == '#' {
if is_termination(&block_level, c) {
break;
}
in_comment = true;
} else if c == '\n' {
in_comment = false;
if is_termination(&block_level, c) {
break;
}
} else if in_comment {
if is_termination(&block_level, c) {
break;
}
} else if c == '\'' || c == '"' || c == '`' {
quote_start = Some(c);
} else if c == '[' {
block_level.push(BlockKind::SquareBracket);
} else if c == ']' {
if let Some(BlockKind::SquareBracket) = block_level.last() {
let _ = block_level.pop();
}
} else if c == '{' {
block_level.push(BlockKind::CurlyBracket);
} else if c == '}' {
if let Some(BlockKind::CurlyBracket) = block_level.last() {
let _ = block_level.pop();
}
} else if c == '(' {
block_level.push(BlockKind::Paren);
} else if c == ')' {
if let Some(BlockKind::Paren) = block_level.last() {
let _ = block_level.pop();
}
} else if is_termination(&block_level, c) {
break;
}
token_contents.push(c);
let _ = src.next();
}
let span = Span::new(
start_offset + span_offset,
start_offset + span_offset + token_contents.len(),
);
if let Some(block) = block_level.last() {
let delim: char = (*block).closing();
let cause = ParseError::unexpected_eof(delim.to_string(), span);
while let Some(bk) = block_level.pop() {
token_contents.push(bk.closing());
}
return (token_contents.spanned(span), Some(cause));
}
if let Some(delimiter) = quote_start {
token_contents.push(delimiter);
return (
token_contents.spanned(span),
Some(ParseError::unexpected_eof(delimiter.to_string(), span)),
);
}
if token_contents.is_empty() {
return (
token_contents.spanned(span),
Some(ParseError::unexpected_eof("command".to_string(), span)),
);
}
(token_contents.spanned(span), None)
}
fn parse_comment(input: &mut Input, hash_offset: usize) -> LiteComment {
let mut comment = String::new();
let mut in_ws = true;
let mut body_start = 0;
input.next();
while let Some((_, c)) = input.peek() {
if *c == '\n' {
break;
}
if in_ws && c.is_whitespace() {
body_start += c.len_utf8();
} else if in_ws && !c.is_whitespace() {
in_ws = false;
}
comment.push(*c);
input.next();
}
if body_start == 0 {
let len = comment.len();
LiteComment::new(comment.spanned(Span::new(hash_offset + 1, hash_offset + 1 + len)))
} else {
let ws = comment[..body_start].to_string();
let body = comment[body_start..].to_string();
let body_len = body.len();
LiteComment::new_with_ws(
ws.spanned(Span::new(hash_offset + 1, hash_offset + 1 + body_start)),
body.spanned(Span::new(
hash_offset + 1 + body_start,
hash_offset + 1 + body_start + body_len,
)),
)
}
}
#[derive(SmartDefault)]
struct BlockParser {
groups: TokenBuilder<GroupBuilder>,
group: GroupBuilder,
pipeline: PipelineBuilder,
command: CommandBuilder,
prev_token: Option<Token>,
prev_comments: CommentsBuilder,
prev_comment_indent: usize,
}
impl BlockParser {
fn consumed(&mut self, token: Token) {
self.prev_token = Some(token);
}
fn success(mut self) -> (LiteBlock, Option<ParseError>) {
self.close_group();
(LiteBlock::new(self.groups.map(|g| g.into())), None)
}
fn fail(self, error: ParseError) -> (LiteBlock, Option<ParseError>) {
(LiteBlock::new(self.groups.map(|g| g.into())), Some(error))
}
fn comment(&mut self, token: &LiteComment) {
if self.prev_comments.is_empty() {
self.prev_comment_indent = token.ws_len();
}
self.prev_comments
.push(token.unindent(self.prev_comment_indent));
}
fn eoleol(&mut self) {
self.prev_comment_indent = 0;
self.prev_comments.take();
self.eol();
}
fn eol(&mut self) {
if let Some(prev) = &self.prev_token {
if let TokenContents::Pipe = prev.contents {
return;
}
}
self.close_group();
}
fn pipe(&mut self) -> Result<(), ()> {
match self.close_command() {
None => Err(()),
Some(command) => {
self.pipeline.push(command);
Ok(())
}
}
}
fn semicolon(&mut self) {
self.close_pipeline();
}
fn baseline(&mut self, part: Spanned<String>) {
self.command.push(part);
}
fn close_command(&mut self) -> Option<LiteCommand> {
let command = self.command.take()?;
let command = LiteCommand {
parts: command.into(),
comments: self.prev_comments.take().map(|c| c.into()),
};
self.prev_comment_indent = 0;
Some(command)
}
fn close_pipeline(&mut self) {
if let Some(command) = self.close_command() {
self.pipeline.push(command);
}
if let Some(pipeline) = self.pipeline.take() {
self.group.push(pipeline);
}
}
fn close_group(&mut self) {
self.close_pipeline();
if let Some(group) = self.group.take() {
self.groups.push(group);
}
}
}
pub fn parse_block(tokens: Vec<Token>) -> (LiteBlock, Option<ParseError>) {
let mut parser = BlockParser::default();
let mut tokens = tokens.iter().peekable();
while let Some(token) = tokens.next() {
match &token.contents {
TokenContents::EOL => {
if let Some(Token {
contents: TokenContents::EOL,
..
}) = tokens.peek()
{
tokens.next();
parser.eoleol();
} else {
parser.eol();
}
}
TokenContents::Pipe => {
if parser.pipe().is_err() {
return parser.fail(ParseError::extra_tokens(
"|".to_string().spanned(token.span),
));
}
}
TokenContents::Semicolon => {
parser.semicolon();
}
TokenContents::Baseline(part) => {
parser.baseline(part.to_string().spanned(token.span));
}
TokenContents::Comment(comment) => parser.comment(comment),
}
parser.consumed(token.clone());
}
parser.success()
}
pub fn lex(input: &str, span_offset: usize) -> (Vec<Token>, Option<ParseError>) {
let mut char_indices = input.char_indices().peekable();
let mut error = None;
let mut output = vec![];
let mut is_complete = true;
while let Some((idx, c)) = char_indices.peek() {
if *c == '|' {
let idx = *idx;
let prev_idx = idx;
let _ = char_indices.next();
if let Some((idx, c)) = char_indices.peek() {
if *c == '|' {
let idx = *idx;
let _ = char_indices.next();
output.push(Token::new(
TokenContents::Baseline("||".into()),
Span::new(span_offset + prev_idx, span_offset + idx + 1),
));
continue;
}
}
output.push(Token::new(
TokenContents::Pipe,
Span::new(span_offset + idx, span_offset + idx + 1),
));
is_complete = false;
} else if *c == ';' {
if !is_complete && error.is_none() {
error = Some(ParseError::extra_tokens(
";".to_string().spanned(Span::new(*idx, idx + 1)),
));
}
let idx = *idx;
let _ = char_indices.next();
output.push(Token::new(
TokenContents::Semicolon,
Span::new(span_offset + idx, span_offset + idx + 1),
));
} else if *c == '\n' || *c == '\r' {
let idx = *idx;
let _ = char_indices.next();
output.push(Token::new(
TokenContents::EOL,
Span::new(span_offset + idx, span_offset + idx + 1),
));
} else if *c == '#' {
let idx = *idx;
let comment = parse_comment(&mut char_indices, idx);
let span = comment.span();
output.push(Token::new(TokenContents::Comment(comment), span));
} else if c.is_whitespace() {
let _ = char_indices.next();
} else {
let (result, err) = baseline(&mut char_indices, span_offset);
if error.is_none() {
error = err;
}
is_complete = true;
let Spanned { item, span } = result;
output.push(Token::new(TokenContents::Baseline(item), span));
}
}
(output, error)
}