use std::str::CharIndices;
use std::{fmt, iter::Peekable};
use nu_source::{Span, Spanned, SpannedItem};
use nu_errors::ParseError;
type Input<'t> = Peekable<CharIndices<'t>>;
#[derive(Debug)]
pub struct Token {
pub contents: TokenContents,
pub span: Span,
}
impl Token {
pub fn new(contents: TokenContents, span: Span) -> Token {
Token { contents, span }
}
}
#[derive(Debug, PartialEq, is_enum_variant)]
pub enum TokenContents {
Baseline(String),
Comment(String),
Pipe,
Semicolon,
EOL,
}
impl fmt::Display for TokenContents {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
TokenContents::Baseline(base) => write!(f, "{}", base),
TokenContents::Comment(comm) => write!(f, "#{}", comm),
TokenContents::Pipe => write!(f, "|"),
TokenContents::Semicolon => write!(f, ";"),
TokenContents::EOL => write!(f, "\\n"),
}
}
}
#[derive(Debug, Clone)]
pub struct LiteCommand {
pub parts: Vec<Spanned<String>>,
pub comments: Option<Vec<Spanned<String>>>,
}
impl LiteCommand {
fn new() -> LiteCommand {
LiteCommand {
parts: vec![],
comments: None,
}
}
pub fn comments_joined(&self) -> String {
match &self.comments {
None => "".to_string(),
Some(text) => text
.iter()
.map(|s| s.item.clone())
.collect::<Vec<_>>()
.join("\n"),
}
}
pub fn is_empty(&self) -> bool {
self.parts.is_empty()
}
pub fn has_content(&self) -> bool {
!self.is_empty()
}
pub fn push(&mut self, item: Spanned<String>) {
self.parts.push(item)
}
pub(crate) fn span(&self) -> Span {
let start = if let Some(x) = self.parts.first() {
x.span.start()
} else {
0
};
let end = if let Some(x) = self.parts.last() {
x.span.end()
} else {
0
};
Span::new(start, end)
}
}
#[derive(Debug, Clone)]
pub struct LitePipeline {
pub commands: Vec<LiteCommand>,
}
impl Default for LitePipeline {
fn default() -> Self {
Self::new()
}
}
impl LitePipeline {
pub fn new() -> Self {
Self { commands: vec![] }
}
pub fn is_empty(&self) -> bool {
self.commands.is_empty()
}
pub fn has_content(&self) -> bool {
!self.commands.is_empty()
}
pub fn push(&mut self, item: LiteCommand) {
self.commands.push(item)
}
pub(crate) fn span(&self) -> Span {
let start = if !self.commands.is_empty() {
self.commands[0].span().start()
} else {
0
};
if let Some((last, _)) = self.commands[..].split_last() {
Span::new(start, last.span().end())
} else {
Span::new(start, 0)
}
}
}
#[derive(Debug, Clone)]
pub struct LiteGroup {
pub pipelines: Vec<LitePipeline>,
}
impl Default for LiteGroup {
fn default() -> Self {
Self::new()
}
}
impl LiteGroup {
pub fn new() -> Self {
Self { pipelines: vec![] }
}
pub fn is_empty(&self) -> bool {
self.pipelines.is_empty()
}
pub fn has_content(&self) -> bool {
!self.pipelines.is_empty()
}
pub fn push(&mut self, item: LitePipeline) {
self.pipelines.push(item)
}
#[cfg(test)]
pub(crate) fn span(&self) -> Span {
let start = if !self.pipelines.is_empty() {
self.pipelines[0].span().start()
} else {
0
};
if let Some((last, _)) = self.pipelines[..].split_last() {
Span::new(start, last.span().end())
} else {
Span::new(start, 0)
}
}
}
#[derive(Debug, Clone)]
pub struct LiteBlock {
pub block: Vec<LiteGroup>,
}
impl LiteBlock {
pub fn new(block: Vec<LiteGroup>) -> Self {
Self { block }
}
pub fn is_empty(&self) -> bool {
self.block.is_empty()
}
pub fn push(&mut self, item: LiteGroup) {
self.block.push(item)
}
#[cfg(test)]
pub(crate) fn span(&self) -> Span {
let start = if !self.block.is_empty() {
self.block[0].span().start()
} else {
0
};
if let Some((last, _)) = self.block[..].split_last() {
Span::new(start, last.span().end())
} else {
Span::new(start, 0)
}
}
}
#[derive(Clone, Copy)]
enum BlockKind {
Paren,
CurlyBracket,
SquareBracket,
}
impl BlockKind {
fn closing(self) -> char {
match self {
BlockKind::Paren => ')',
BlockKind::SquareBracket => ']',
BlockKind::CurlyBracket => '}',
}
}
}
pub fn baseline(src: &mut Input, span_offset: usize) -> (Spanned<String>, Option<ParseError>) {
let mut token_contents = String::new();
let start_offset = if let Some((pos, _)) = src.peek() {
*pos
} else {
0
};
let mut quote_start: Option<char> = None;
let mut block_level: Vec<BlockKind> = vec![];
fn is_termination(block_level: &[BlockKind], c: char) -> bool {
block_level.is_empty() && (c.is_whitespace() || c == '|' || c == ';' || c == '#')
}
while let Some((_, c)) = src.peek() {
let c = *c;
if quote_start.is_some() {
if Some(c) == quote_start {
quote_start = None;
}
} else if c == '\'' || c == '"' || c == '`' {
quote_start = Some(c);
} else if c == '[' {
block_level.push(BlockKind::SquareBracket);
} else if c == ']' {
if let Some(BlockKind::SquareBracket) = block_level.last() {
let _ = block_level.pop();
}
} else if c == '{' {
block_level.push(BlockKind::CurlyBracket);
} else if c == '}' {
if let Some(BlockKind::CurlyBracket) = block_level.last() {
let _ = block_level.pop();
}
} else if c == '(' {
block_level.push(BlockKind::Paren);
} else if c == ')' {
if let Some(BlockKind::Paren) = block_level.last() {
let _ = block_level.pop();
}
} else if is_termination(&block_level, c) {
break;
}
token_contents.push(c);
let _ = src.next();
}
let span = Span::new(
start_offset + span_offset,
start_offset + span_offset + token_contents.len(),
);
if let Some(block) = block_level.last() {
let delim: char = (*block).closing();
let cause = ParseError::unexpected_eof(delim.to_string(), span);
while let Some(bk) = block_level.pop() {
token_contents.push(bk.closing());
}
return (token_contents.spanned(span), Some(cause));
}
if let Some(delimiter) = quote_start {
token_contents.push(delimiter);
return (
token_contents.spanned(span),
Some(ParseError::unexpected_eof(delimiter.to_string(), span)),
);
}
if token_contents.is_empty() {
return (
token_contents.spanned(span),
Some(ParseError::unexpected_eof("command".to_string(), span)),
);
}
(token_contents.spanned(span), None)
}
pub fn block(tokens: Vec<Token>) -> (LiteBlock, Option<ParseError>) {
let mut groups = vec![];
let mut group = LiteGroup::new();
let mut pipeline = LitePipeline::new();
let mut command = LiteCommand::new();
let mut prev_comments = None;
let mut prev_comment_indent = 0;
let mut prev_token: Option<Token> = None;
fn finish_command(
prev_comments: &mut Option<Vec<Spanned<String>>>,
command: &mut LiteCommand,
pipeline: &mut LitePipeline,
) {
if let Some(prev_comments_) = prev_comments {
command.comments = Some(prev_comments_.clone());
*prev_comments = None;
}
pipeline.push(command.clone());
*command = LiteCommand::new();
}
for token in tokens {
match &token.contents {
TokenContents::EOL => {
if let Some(prev) = &prev_token {
if let TokenContents::Pipe = prev.contents {
continue;
}
if let TokenContents::EOL = prev.contents {
prev_comments = None
}
}
if command.has_content() {
finish_command(&mut prev_comments, &mut command, &mut pipeline);
}
if pipeline.has_content() {
group.push(pipeline);
pipeline = LitePipeline::new();
}
if group.has_content() {
groups.push(group);
group = LiteGroup::new();
}
}
TokenContents::Pipe => {
if command.has_content() {
finish_command(&mut prev_comments, &mut command, &mut pipeline);
} else {
return (
LiteBlock::new(groups),
Some(ParseError::extra_tokens(
"|".to_string().spanned(token.span),
)),
);
}
}
TokenContents::Semicolon => {
if command.has_content() {
finish_command(&mut prev_comments, &mut command, &mut pipeline);
}
if pipeline.has_content() {
group.push(pipeline);
pipeline = LitePipeline::new();
}
}
TokenContents::Baseline(bare) => {
command.push(bare.to_string().spanned(token.span));
}
TokenContents::Comment(comment) => {
if prev_comments.is_none() {
if let Some((i, _)) = comment.chars().enumerate().find(|(_, ch)| *ch != ' ') {
prev_comment_indent = i;
}
}
let comment: String = comment
.chars()
.enumerate()
.skip_while(|(i, ch)| *i < prev_comment_indent && *ch == ' ')
.map(|(_, ch)| ch)
.collect();
let comment_span = Span::new(token.span.end() - comment.len(), token.span.end());
prev_comments
.get_or_insert(vec![])
.push(comment.spanned(comment_span));
}
}
prev_token = Some(token);
}
if command.has_content() {
finish_command(&mut prev_comments, &mut command, &mut pipeline)
}
if pipeline.has_content() {
group.push(pipeline);
}
if group.has_content() {
groups.push(group);
}
(LiteBlock::new(groups), None)
}
pub fn lex(input: &str, span_offset: usize) -> (Vec<Token>, Option<ParseError>) {
let mut char_indices = input.char_indices().peekable();
let mut error = None;
let mut output = vec![];
let mut is_complete = true;
while let Some((idx, c)) = char_indices.peek() {
if *c == '|' {
let idx = *idx;
let prev_idx = idx;
let _ = char_indices.next();
if let Some((idx, c)) = char_indices.peek() {
if *c == '|' {
let idx = *idx;
let _ = char_indices.next();
output.push(Token::new(
TokenContents::Baseline("||".into()),
Span::new(span_offset + prev_idx, span_offset + idx + 1),
));
continue;
}
}
output.push(Token::new(
TokenContents::Pipe,
Span::new(span_offset + idx, span_offset + idx + 1),
));
is_complete = false;
} else if *c == ';' {
if !is_complete && error.is_none() {
error = Some(ParseError::extra_tokens(
";".to_string().spanned(Span::new(*idx, idx + 1)),
));
}
let idx = *idx;
let _ = char_indices.next();
output.push(Token::new(
TokenContents::Semicolon,
Span::new(span_offset + idx, span_offset + idx + 1),
));
} else if *c == '\n' || *c == '\r' {
let idx = *idx;
let _ = char_indices.next();
output.push(Token::new(
TokenContents::EOL,
Span::new(span_offset + idx, span_offset + idx + 1),
));
} else if *c == '#' {
let comment_start = *idx + 1;
let mut comment = String::new();
char_indices.next();
while let Some((_, c)) = char_indices.peek() {
if *c == '\n' {
break;
}
comment.push(*c);
let _ = char_indices.next();
}
let token = Token::new(
TokenContents::Comment(comment.clone()),
Span::new(
span_offset + comment_start,
span_offset + comment_start + comment.len(),
),
);
output.push(token);
} else if c.is_whitespace() {
let _ = char_indices.next();
} else {
let (result, err) = baseline(&mut char_indices, span_offset);
if error.is_none() {
error = err;
}
is_complete = true;
let Spanned { item, span } = result;
output.push(Token::new(TokenContents::Baseline(item), span));
}
}
(output, error)
}
#[cfg(test)]
mod tests {
use super::*;
fn span(left: usize, right: usize) -> Span {
Span::new(left, right)
}
mod bare {
use super::*;
#[test]
fn simple_1() {
let input = "foo bar baz";
let (result, err) = lex(input, 0);
assert!(err.is_none());
assert_eq!(result[0].span, span(0, 3));
}
#[test]
fn simple_2() {
let input = "'foo bar' baz";
let (result, err) = lex(input, 0);
assert!(err.is_none());
assert_eq!(result[0].span, span(0, 9));
}
#[test]
fn simple_3() {
let input = "'foo\" bar' baz";
let (result, err) = lex(input, 0);
assert!(err.is_none());
assert_eq!(result[0].span, span(0, 10));
}
#[test]
fn simple_4() {
let input = "[foo bar] baz";
let (result, err) = lex(input, 0);
assert!(err.is_none());
assert_eq!(result[0].span, span(0, 9));
}
#[test]
fn simple_5() {
let input = "'foo 'bar baz";
let (result, err) = lex(input, 0);
assert!(err.is_none());
assert_eq!(result[0].span, span(0, 9));
}
#[test]
fn simple_6() {
let input = "''foo baz";
let (result, err) = lex(input, 0);
assert!(err.is_none());
assert_eq!(result[0].span, span(0, 5));
}
#[test]
fn simple_7() {
let input = "'' foo";
let (result, err) = lex(input, 0);
assert!(err.is_none());
assert_eq!(result[0].span, span(0, 2));
}
#[test]
fn simple_8() {
let input = " '' foo";
let (result, err) = lex(input, 0);
assert!(err.is_none());
assert_eq!(result[0].span, span(1, 3));
}
#[test]
fn simple_9() {
let input = " 'foo' foo";
let (result, err) = lex(input, 0);
assert!(err.is_none());
assert_eq!(result[0].span, span(1, 6));
}
#[test]
fn simple_10() {
let input = "[foo, bar]";
let (result, err) = lex(input, 0);
assert!(err.is_none());
assert_eq!(result[0].span, span(0, 10));
}
#[test]
fn lex_comment() {
let input = r#"
#A comment
def e [] {echo hi}
"#;
let (result, err) = lex(input, 0);
assert!(err.is_none());
assert_eq!(result[1].span, span(2, 11));
assert_eq!(
result[1].contents,
TokenContents::Comment("A comment".to_string())
);
}
#[test]
fn ignore_future() {
let input = "foo 'bar";
let (result, _) = lex(input, 0);
assert_eq!(result[0].span, span(0, 3));
}
#[test]
fn invalid_1() {
let input = "'foo bar";
let (_, err) = lex(input, 0);
assert!(err.is_some());
}
#[test]
fn invalid_2() {
let input = "'bar";
let (_, err) = lex(input, 0);
assert!(err.is_some());
}
#[test]
fn invalid_4() {
let input = " 'bar";
let (_, err) = lex(input, 0);
assert!(err.is_some());
}
}
mod lite_parse {
use super::*;
#[test]
fn pipeline() {
let (result, err) = lex("cmd1 | cmd2 ; deploy", 0);
assert!(err.is_none());
let (result, err) = block(result);
assert!(err.is_none());
assert_eq!(result.span(), span(0, 20));
assert_eq!(result.block[0].pipelines[0].span(), span(0, 11));
assert_eq!(result.block[0].pipelines[1].span(), span(14, 20));
}
#[test]
fn simple_1() {
let (result, err) = lex("foo", 0);
assert!(err.is_none());
let (result, err) = block(result);
assert!(err.is_none());
assert_eq!(result.block.len(), 1);
assert_eq!(result.block[0].pipelines.len(), 1);
assert_eq!(result.block[0].pipelines[0].commands.len(), 1);
assert_eq!(result.block[0].pipelines[0].commands[0].parts.len(), 1);
assert_eq!(
result.block[0].pipelines[0].commands[0].parts[0].span,
span(0, 3)
);
}
#[test]
fn simple_offset() {
let (result, err) = lex("foo", 10);
assert!(err.is_none());
let (result, err) = block(result);
assert!(err.is_none());
assert_eq!(result.block[0].pipelines.len(), 1);
assert_eq!(result.block[0].pipelines[0].commands.len(), 1);
assert_eq!(result.block[0].pipelines[0].commands[0].parts.len(), 1);
assert_eq!(
result.block[0].pipelines[0].commands[0].parts[0].span,
span(10, 13)
);
}
#[test]
fn incomplete_result() {
let (result, err) = lex("my_command \"foo' --test", 10);
assert!(matches!(err.unwrap().reason(), nu_errors::ParseErrorReason::Eof { .. }));
let (result, _) = block(result);
assert_eq!(result.block.len(), 1);
assert_eq!(result.block[0].pipelines.len(), 1);
assert_eq!(result.block[0].pipelines[0].commands.len(), 1);
assert_eq!(result.block[0].pipelines[0].commands[0].parts.len(), 2);
assert_eq!(
result.block[0].pipelines[0].commands[0].parts[0].item,
"my_command"
);
assert_eq!(
result.block[0].pipelines[0].commands[0].parts[1].item,
"\"foo' --test\""
);
}
#[test]
fn command_with_comment() {
let code = r#"
# My echo
# * It's much better :)
def my_echo [arg] { echo $arg }
"#;
let (result, err) = lex(code, 0);
assert!(err.is_none());
let (result, err) = block(result);
assert!(err.is_none());
assert_eq!(result.block.len(), 1);
assert_eq!(result.block[0].pipelines.len(), 1);
assert_eq!(result.block[0].pipelines[0].commands.len(), 1);
assert_eq!(result.block[0].pipelines[0].commands[0].parts.len(), 4);
assert_eq!(
result.block[0].pipelines[0].commands[0].comments,
Some(vec![
"My echo".to_string().spanned(Span::new(3, 10)),
"* It's much better :)"
.to_string()
.spanned(Span::new(13, 34))
])
);
}
#[test]
fn discarded_comment() {
let code = r#"
# This comment gets discarded, because of the following empty line
echo 42
"#;
let (result, err) = lex(code, 0);
assert!(err.is_none());
let (result, err) = block(result);
assert!(err.is_none());
assert_eq!(result.block.len(), 1);
assert_eq!(result.block[0].pipelines.len(), 1);
assert_eq!(result.block[0].pipelines[0].commands.len(), 1);
assert_eq!(result.block[0].pipelines[0].commands[0].parts.len(), 2);
assert_eq!(result.block[0].pipelines[0].commands[0].comments, None);
}
}
#[test]
fn no_discarded_white_space_start_of_comment() {
let code = r#"
#No white_space at firt line ==> No white_space discarded
# Starting space is not discarded
echo 42
"#;
let (result, err) = lex(code, 0);
assert!(err.is_none());
let (result, err) = block(result);
assert!(err.is_none());
assert_eq!(result.block.len(), 1);
assert_eq!(result.block[0].pipelines.len(), 1);
assert_eq!(result.block[0].pipelines[0].commands.len(), 1);
assert_eq!(result.block[0].pipelines[0].commands[0].parts.len(), 2);
assert_eq!(
result.block[0].pipelines[0].commands[0].comments,
Some(vec![
"No white_space at firt line ==> No white_space discarded"
.to_string()
.spanned(Span::new(2, 58)),
" Starting space is not discarded"
.to_string()
.spanned(Span::new(60, 94)),
])
);
}
#[test]
fn multiple_discarded_white_space_start_of_comment() {
let code = r#"
# Discard 2 spaces
# Discard 1 space
# Discard 2 spaces
echo 42
"#;
let (result, err) = lex(code, 0);
assert!(err.is_none());
let (result, err) = block(result);
assert!(err.is_none());
assert_eq!(result.block.len(), 1);
assert_eq!(result.block[0].pipelines.len(), 1);
assert_eq!(result.block[0].pipelines[0].commands.len(), 1);
assert_eq!(result.block[0].pipelines[0].commands[0].parts.len(), 2);
assert_eq!(
result.block[0].pipelines[0].commands[0].comments,
Some(vec![
"Discard 2 spaces".to_string().spanned(Span::new(4, 20)),
"Discard 1 space".to_string().spanned(Span::new(23, 38)),
"Discard 2 spaces".to_string().spanned(Span::new(42, 58)),
])
);
}
}