use crate::tokens::{char_tokens, LexTok};
use std::collections::VecDeque;
#[derive(Debug, Clone, Copy, Default)]
pub struct LexFlags {
pub zle: bool,
pub newline: bool,
pub comments_keep: bool,
pub comments_strip: bool,
pub active: bool,
}
#[derive(Debug, Clone)]
struct LexBuf {
data: String,
siz: usize,
}
impl LexBuf {
fn new() -> Self {
LexBuf {
data: String::with_capacity(256),
siz: 256,
}
}
fn clear(&mut self) {
self.data.clear();
}
fn add(&mut self, c: char) {
self.data.push(c);
if self.data.len() >= self.siz {
self.siz *= 2;
self.data.reserve(self.siz - self.data.len());
}
}
#[allow(dead_code)]
fn add_str(&mut self, s: &str) {
self.data.push_str(s);
}
fn len(&self) -> usize {
self.data.len()
}
fn as_str(&self) -> &str {
&self.data
}
#[allow(dead_code)]
fn into_string(self) -> String {
self.data
}
#[allow(dead_code)]
fn last_char(&self) -> Option<char> {
self.data.chars().last()
}
fn pop(&mut self) -> Option<char> {
self.data.pop()
}
}
#[derive(Debug, Clone)]
pub struct HereDoc {
pub terminator: String,
pub strip_tabs: bool,
pub content: String,
pub quoted: bool,
pub processed: bool,
}
pub struct ZshLexer<'a> {
pub(crate) input: &'a str,
pub(crate) pos: usize,
unget_buf: VecDeque<char>,
pub tokstr: Option<String>,
pub tok: LexTok,
pub tokfd: i32,
pub toklineno: u64,
pub lineno: u64,
pub lexstop: bool,
pub incmdpos: bool,
pub incond: i32,
pub incondpat: bool,
pub incasepat: i32,
pub inredir: bool,
pub oldpos: bool,
pub infor: i32,
inrepeat: i32,
pub intypeset: bool,
dbparens: bool,
pub noaliases: bool,
pub nocorrect: i32,
pub nocomments: bool,
pub lexflags: LexFlags,
pub isfirstln: bool,
#[allow(dead_code)]
isfirstch: bool,
pub heredocs: Vec<HereDoc>,
heredoc_pending: u8,
lexbuf: LexBuf,
pub isnewlin: i32,
pub error: Option<String>,
global_iterations: usize,
recursion_depth: usize,
pub lex_add_raw: i32,
lexbuf_raw: LexBuf,
}
const MAX_LEXER_RECURSION: usize = 200;
#[derive(Debug, Clone)]
pub struct AliasInfo {
pub text: String,
pub in_use: bool,
pub global: bool,
}
pub trait AliasResolver {
fn lookup_alias(&self, name: &str) -> Option<AliasInfo>;
fn lookup_suffix_alias(&self, suffix: &str) -> Option<AliasInfo>;
fn lookup_reswd(&self, name: &str) -> Option<LexTok>;
fn mark_in_use(&mut self, name: &str, in_use: bool);
}
#[derive(Debug, Clone)]
pub struct LexStack {
pub dbparens: bool,
pub isfirstln: bool,
pub isfirstch: bool,
pub lexflags: LexFlags,
pub tok: LexTok,
pub tokstr: Option<String>,
pub lexbuf_data: String,
pub lexbuf_siz: usize,
pub lexstop: bool,
pub toklineno: u64,
}
impl Default for LexStack {
fn default() -> Self {
LexStack {
dbparens: false,
isfirstln: false,
isfirstch: false,
lexflags: LexFlags::default(),
tok: LexTok::Endinput,
tokstr: None,
lexbuf_data: String::new(),
lexbuf_siz: 256,
lexstop: false,
toklineno: 0,
}
}
}
impl<'a> ZshLexer<'a> {
pub fn new(input: &'a str) -> Self {
ZshLexer {
input,
pos: 0,
unget_buf: VecDeque::new(),
tokstr: None,
tok: LexTok::Endinput,
tokfd: -1,
toklineno: 1,
lineno: 1,
lexstop: false,
incmdpos: true,
incond: 0,
incondpat: false,
incasepat: 0,
inredir: false,
oldpos: true,
infor: 0,
inrepeat: 0,
intypeset: false,
dbparens: false,
noaliases: false,
nocorrect: 0,
nocomments: false,
lexflags: LexFlags::default(),
isfirstln: true,
isfirstch: true,
heredocs: Vec::new(),
heredoc_pending: 0,
lexbuf: LexBuf::new(),
isnewlin: 0,
error: None,
global_iterations: 0,
recursion_depth: 0,
lex_add_raw: 0,
lexbuf_raw: LexBuf::new(),
}
}
pub fn zshlex_raw_add(&mut self, c: char) {
if self.lex_add_raw == 0 {
return;
}
self.lexbuf_raw.add(c);
}
pub fn exalias<R: AliasResolver>(&mut self, resolver: &mut R) -> bool {
if self.tokstr.is_none() {
if self.tok == LexTok::Newlin {
return false;
}
let text = match self.tok {
LexTok::Semi => ";",
LexTok::Amper => "&",
LexTok::Bar => "|",
_ => return false,
};
return self.check_alias(resolver, text);
}
let tokstr = self.tokstr.clone().unwrap();
let lextext = if has_token(&tokstr) {
untokenize(&tokstr)
} else {
tokstr.clone()
};
if self.lexflags.zle {
let zp = self.lexflags;
self.gotword();
if zp.zle && !self.lexflags.zle {
return false;
}
}
if self.tok == LexTok::String {
if self.check_alias(resolver, &lextext) {
return true;
}
if self.incmdpos || lextext == "}" {
if let Some(rwtok) = resolver.lookup_reswd(&lextext) {
self.tok = rwtok;
if rwtok == LexTok::Repeat {
self.inrepeat = 1;
}
if rwtok == LexTok::Dinbrack {
self.incond = 1;
}
}
} else if self.incond > 0 && lextext == "]]" {
self.tok = LexTok::Doutbrack;
self.incond = 0;
} else if self.incond == 1 && lextext == "!" {
self.tok = LexTok::Bang;
}
}
false
}
fn check_alias<R: AliasResolver>(&mut self, resolver: &mut R, lextext: &str) -> bool {
if lextext.is_empty() {
return false;
}
if self.noaliases {
return false;
}
if let Some(alias) = resolver.lookup_alias(lextext) {
if !alias.in_use && (alias.global || (self.incmdpos && self.tok == LexTok::String)) {
if !self.lexstop {
if let Some(c) = self.peek() {
if !Self::is_blank(c) {
self.inject_alias_text(" ");
}
}
}
self.inject_alias_text(&alias.text);
resolver.mark_in_use(lextext, true);
self.lexstop = false;
return true;
}
}
if self.incmdpos {
if let Some(dot_pos) = lextext.rfind('.') {
if dot_pos > 0 && dot_pos + 1 < lextext.len() {
let suffix = &lextext[dot_pos + 1..];
if let Some(alias) = resolver.lookup_suffix_alias(suffix) {
if !alias.in_use {
self.inject_alias_text(&alias.text);
self.inject_alias_text(" ");
self.inject_alias_text(lextext);
resolver.mark_in_use(suffix, true);
self.lexstop = false;
return true;
}
}
}
}
}
false
}
fn inject_alias_text(&mut self, text: &str) {
for c in text.chars().rev() {
self.unget_buf.push_front(c);
}
}
pub fn zshlex_raw_back(&mut self) {
if self.lex_add_raw == 0 {
return;
}
self.lexbuf_raw.pop();
}
pub fn zshlex_raw_mark(&self, offset: i64) -> i64 {
if self.lex_add_raw == 0 {
return 0;
}
(self.lexbuf_raw.len() as i64) + offset
}
pub fn zshlex_raw_back_to_mark(&mut self, mark: i64) {
if self.lex_add_raw == 0 {
return;
}
let m = mark.max(0) as usize;
self.lexbuf_raw.data.truncate(m);
}
pub fn take_raw_buf(&mut self) -> String {
std::mem::take(&mut self.lexbuf_raw.data)
}
pub fn lex_context_save(&mut self, ls: &mut LexStack) {
ls.dbparens = self.dbparens;
ls.isfirstln = self.isfirstln;
ls.isfirstch = self.isfirstch;
ls.lexflags = self.lexflags;
ls.tok = self.tok;
ls.tokstr = self.tokstr.take();
ls.lexbuf_data = std::mem::take(&mut self.lexbuf.data);
ls.lexbuf_siz = self.lexbuf.siz;
ls.lexstop = self.lexstop;
ls.toklineno = self.toklineno;
self.tokstr = None;
self.lexbuf.data.clear();
self.lexbuf.siz = 256;
}
pub fn lex_context_restore(&mut self, ls: &mut LexStack) {
self.dbparens = ls.dbparens;
self.isfirstln = ls.isfirstln;
self.isfirstch = ls.isfirstch;
self.lexflags = ls.lexflags;
self.tok = ls.tok;
self.tokstr = ls.tokstr.take();
self.lexbuf.data = std::mem::take(&mut ls.lexbuf_data);
self.lexbuf.siz = ls.lexbuf_siz;
self.lexstop = ls.lexstop;
self.toklineno = ls.toklineno;
}
pub fn lexinit(&mut self) {
self.nocorrect = 0;
self.dbparens = false;
self.lexstop = false;
self.tok = LexTok::Endinput;
}
#[inline]
fn check_recursion(&mut self) -> bool {
if self.recursion_depth > MAX_LEXER_RECURSION {
self.error = Some("lexer exceeded max recursion depth".to_string());
self.lexstop = true;
true
} else {
false
}
}
#[inline]
fn check_iterations(&mut self) -> bool {
self.global_iterations += 1;
if self.global_iterations > 50_000 {
self.error = Some("lexer exceeded 50K iterations".to_string());
self.lexstop = true;
self.tok = LexTok::Lexerr;
true
} else {
false
}
}
fn hgetc(&mut self) -> Option<char> {
if self.check_iterations() {
return None;
}
if let Some(c) = self.unget_buf.pop_front() {
if c == '\n' {
self.lineno += 1;
}
return Some(c);
}
let c = self.input[self.pos..].chars().next()?;
self.pos += c.len_utf8();
if c == '\n' {
self.lineno += 1;
}
Some(c)
}
fn hungetc(&mut self, c: char) {
self.unget_buf.push_front(c);
if c == '\n' && self.lineno > 1 {
self.lineno -= 1;
}
self.lexstop = false;
}
#[allow(dead_code)]
fn peek(&mut self) -> Option<char> {
if let Some(&c) = self.unget_buf.front() {
return Some(c);
}
self.input[self.pos..].chars().next()
}
fn add(&mut self, c: char) {
self.lexbuf.add(c);
}
fn is_blank(c: char) -> bool {
c == ' ' || c == '\t'
}
fn try_numeric_range_glob(&mut self) -> Option<String> {
let mut buf: Vec<char> = Vec::new();
loop {
match self.hgetc() {
Some(c) if c.is_ascii_digit() => buf.push(c),
Some(c) => {
buf.push(c);
break;
}
None => break,
}
}
if buf.last() != Some(&'-') {
for c in buf.iter().rev() {
self.hungetc(*c);
}
return None;
}
loop {
match self.hgetc() {
Some(c) if c.is_ascii_digit() => buf.push(c),
Some(c) => {
buf.push(c);
break;
}
None => break,
}
}
if buf.last() != Some(&'>') {
for c in buf.iter().rev() {
self.hungetc(*c);
}
return None;
}
Some(buf.into_iter().collect())
}
fn is_inblank(c: char) -> bool {
matches!(c, ' ' | '\t' | '\x0b' | '\x0c' | '\r')
}
fn is_digit(c: char) -> bool {
c.is_ascii_digit()
}
#[allow(dead_code)]
fn is_ident_start(c: char) -> bool {
c.is_ascii_alphabetic() || c == '_'
}
fn is_ident(c: char) -> bool {
c.is_ascii_alphanumeric() || c == '_'
}
pub fn zshlex(&mut self) {
if self.tok == LexTok::Lexerr {
return;
}
loop {
if self.inrepeat > 0 {
self.inrepeat += 1;
}
if self.inrepeat == 3 {
self.incmdpos = true;
}
self.tok = self.gettok();
break;
}
self.nocorrect &= 1;
if self.tok == LexTok::Newlin || self.tok == LexTok::Endinput {
self.process_heredocs();
}
if self.tok != LexTok::Newlin {
self.isnewlin = 0;
} else {
self.isnewlin = if self.pos < self.input.len() { -1 } else { 1 };
}
if self.tok == LexTok::Semi || (self.tok == LexTok::Newlin && !self.lexflags.newline) {
self.tok = LexTok::Seper;
}
if self.tok == LexTok::String {
if let Some(ref s) = self.tokstr {
if s == "{" && self.incmdpos {
self.tok = LexTok::Inbrace;
} else if s == "}" {
self.tok = LexTok::Outbrace;
} else if self.incasepat == 0 {
self.check_reserved_word();
}
}
}
if self.heredoc_pending > 0 && self.tok == LexTok::String {
if let Some(ref terminator) = self.tokstr {
let strip_tabs = self.heredoc_pending == 2;
let quoted = terminator.contains('\u{9d}')
|| terminator.contains('\u{9e}')
|| terminator.contains('\u{9f}')
|| terminator.starts_with('\'')
|| terminator.starts_with('"');
let term = terminator
.chars()
.filter(|c| {
*c != '\''
&& *c != '"'
&& *c != '\u{9d}'
&& *c != '\u{9e}'
&& *c != '\u{9f}'
})
.collect::<String>();
self.heredocs.push(HereDoc {
terminator: term,
strip_tabs,
content: String::new(),
quoted,
processed: false,
});
}
self.heredoc_pending = 0;
}
if self.incond > 0 {
if let Some(ref s) = self.tokstr {
if s == "="
|| s == "=="
|| s == "!="
|| s == "=~"
|| s == "\u{8d}"
|| s == "\u{8d}\u{8d}"
|| s == "!\u{8d}"
|| s == "\u{8d}~"
|| s == "\u{8d}\u{98}"
{
self.incondpat = true;
} else if self.incondpat {
}
}
match self.tok {
LexTok::Doutbrack
| LexTok::Damper
| LexTok::Dbar
| LexTok::Inpar
| LexTok::Outpar
| LexTok::Bang => {
self.incondpat = false;
}
_ => {}
}
} else {
self.incondpat = false;
}
match self.tok {
LexTok::Seper
| LexTok::Newlin
| LexTok::Semi
| LexTok::Dsemi
| LexTok::Semiamp
| LexTok::Semibar
| LexTok::Amper
| LexTok::Amperbang
| LexTok::Inpar
| LexTok::Inbrace
| LexTok::Dbar
| LexTok::Damper
| LexTok::Baramp
| LexTok::Inoutpar
| LexTok::Doloop
| LexTok::Then
| LexTok::Elif
| LexTok::Else
| LexTok::Doutbrack
| LexTok::Func => {
self.incmdpos = true;
}
LexTok::Bar
if self.incasepat <= 0 => {
self.incmdpos = true;
}
LexTok::String
| LexTok::Typeset
| LexTok::Envarray
| LexTok::Outpar
| LexTok::Case
| LexTok::Dinbrack => {
self.incmdpos = false;
}
_ => {}
}
if self.tok != LexTok::Dinpar {
self.infor = if self.tok == LexTok::For { 2 } else { 0 };
}
if self.tok.is_redirop()
|| self.tok == LexTok::For
|| self.tok == LexTok::Foreach
|| self.tok == LexTok::Select
{
self.inredir = true;
self.oldpos = self.incmdpos;
self.incmdpos = false;
} else if self.inredir {
self.incmdpos = self.oldpos;
self.inredir = false;
}
}
fn process_heredocs(&mut self) {
let n = self.heredocs.len();
for i in 0..n {
if self.heredocs[i].processed || self.heredocs[i].terminator.is_empty() {
continue;
}
let strip_tabs = self.heredocs[i].strip_tabs;
let terminator = self.heredocs[i].terminator.clone();
let mut content = String::new();
let mut line_count = 0;
loop {
line_count += 1;
if line_count > 10000 {
self.error = Some("heredoc exceeded 10000 lines".to_string());
self.tok = LexTok::Lexerr;
return;
}
let line = self.read_line();
if line.is_none() {
self.error = Some("here document too large or unterminated".to_string());
self.tok = LexTok::Lexerr;
return;
}
let line = line.unwrap();
let check_line = if strip_tabs {
line.trim_start_matches('\t')
} else {
line.as_str()
};
if check_line.trim_end_matches('\n') == terminator {
break;
}
if strip_tabs {
content.push_str(check_line);
} else {
content.push_str(&line);
}
}
self.heredocs[i].content = content;
self.heredocs[i].processed = true;
}
}
fn read_line(&mut self) -> Option<String> {
let mut line = String::new();
loop {
match self.hgetc() {
Some(c) => {
line.push(c);
if c == '\n' {
break;
}
}
None => {
if line.is_empty() {
return None;
}
break;
}
}
}
Some(line)
}
fn gettok(&mut self) -> LexTok {
self.tokstr = None;
self.tokfd = -1;
let mut ws_iterations = 0;
loop {
ws_iterations += 1;
if ws_iterations > 100_000 {
self.error = Some("gettok: infinite loop in whitespace skip".to_string());
return LexTok::Lexerr;
}
let c = match self.hgetc() {
Some(c) => c,
None => {
self.lexstop = true;
return if self.error.is_some() {
LexTok::Lexerr
} else {
LexTok::Endinput
};
}
};
if !Self::is_blank(c) {
self.hungetc(c);
break;
}
}
let c = match self.hgetc() {
Some(c) => c,
None => {
self.lexstop = true;
return LexTok::Endinput;
}
};
self.toklineno = self.lineno;
self.isfirstln = false;
if self.dbparens {
return self.lex_arith(c);
}
if Self::is_digit(c) {
let d = self.hgetc();
match d {
Some('&') => {
let e = self.hgetc();
if e == Some('>') {
self.tokfd = (c as u8 - b'0') as i32;
self.hungetc('>');
return self.lex_initial('&');
}
if let Some(e) = e {
self.hungetc(e);
}
self.hungetc('&');
}
Some('>') | Some('<') => {
self.tokfd = (c as u8 - b'0') as i32;
return self.lex_initial(d.unwrap());
}
Some(d) => {
self.hungetc(d);
}
None => {}
}
self.lexstop = false;
}
self.lex_initial(c)
}
fn lex_arith(&mut self, c: char) -> LexTok {
self.lexbuf.clear();
self.hungetc(c);
let end_char = if self.infor > 0 { ';' } else { ')' };
if self.dquote_parse(end_char, false).is_err() {
return LexTok::Lexerr;
}
self.tokstr = Some(self.lexbuf.as_str().to_string());
if !self.lexstop && self.infor > 0 {
self.infor -= 1;
return LexTok::Dinpar;
}
match self.hgetc() {
Some(')') => {
self.dbparens = false;
LexTok::Doutpar
}
c => {
if let Some(c) = c {
self.hungetc(c);
}
LexTok::Lexerr
}
}
}
fn lex_initial(&mut self, c: char) -> LexTok {
if c == '#' && !self.nocomments {
return self.lex_comment();
}
match c {
'\\' => {
let d = self.hgetc();
if d == Some('\n') {
return self.gettok();
}
if let Some(d) = d {
self.hungetc(d);
}
self.lexstop = false;
self.gettokstr(c, false)
}
'\n' => LexTok::Newlin,
';' => {
let d = self.hgetc();
match d {
Some(';') => LexTok::Dsemi,
Some('&') => LexTok::Semiamp,
Some('|') => LexTok::Semibar,
_ => {
if let Some(d) = d {
self.hungetc(d);
}
self.lexstop = false;
LexTok::Semi
}
}
}
'&' => {
let d = self.hgetc();
match d {
Some('&') => LexTok::Damper,
Some('!') | Some('|') => LexTok::Amperbang,
Some('>') => {
self.tokfd = self.tokfd.max(0);
let e = self.hgetc();
match e {
Some('!') | Some('|') => LexTok::Outangampbang,
Some('>') => {
let f = self.hgetc();
match f {
Some('!') | Some('|') => LexTok::Doutangampbang,
_ => {
if let Some(f) = f {
self.hungetc(f);
}
self.lexstop = false;
LexTok::Doutangamp
}
}
}
_ => {
if let Some(e) = e {
self.hungetc(e);
}
self.lexstop = false;
LexTok::Ampoutang
}
}
}
_ => {
if let Some(d) = d {
self.hungetc(d);
}
self.lexstop = false;
LexTok::Amper
}
}
}
'|' => {
let d = self.hgetc();
match d {
Some('|') if self.incasepat <= 0 => LexTok::Dbar,
Some('&') => LexTok::Baramp,
_ => {
if let Some(d) = d {
self.hungetc(d);
}
self.lexstop = false;
LexTok::Bar
}
}
}
'(' => {
let d = self.hgetc();
match d {
Some('(') => {
if self.infor > 0 {
self.dbparens = true;
return LexTok::Dinpar;
}
if self.incmdpos {
self.lexbuf.clear();
match self.cmd_or_math() {
CmdOrMath::Math => {
self.tokstr = Some(self.lexbuf.as_str().to_string());
return LexTok::Dinpar;
}
CmdOrMath::Cmd => {
self.tokstr = None;
return LexTok::Inpar;
}
CmdOrMath::Err => return LexTok::Lexerr,
}
}
self.hungetc('(');
self.lexstop = false;
self.gettokstr('(', false)
}
Some(')') => LexTok::Inoutpar,
_ => {
if let Some(d) = d {
self.hungetc(d);
}
self.lexstop = false;
if self.incond == 1 || self.incmdpos || self.incasepat >= 1 {
LexTok::Inpar
} else {
self.gettokstr('(', false)
}
}
}
}
')' => LexTok::Outpar,
'{' => {
let next = self.hgetc();
let next_is_close = matches!(next, Some('}'));
if self.incmdpos {
let is_brace_group = match next {
Some(' ') | Some('\t') | Some('\n') | Some('}') | None => true,
_ => false,
};
if let Some(ch) = next {
self.hungetc(ch);
}
if is_brace_group {
self.tokstr = Some("{".to_string());
LexTok::Inbrace
} else {
self.gettokstr(c, false)
}
} else if next_is_close {
if let Some(ch) = next {
self.hungetc(ch);
}
self.tokstr = Some("{".to_string());
LexTok::Inbrace
} else {
if let Some(ch) = next {
self.hungetc(ch);
}
self.gettokstr(c, false)
}
}
'}' => {
self.tokstr = Some("}".to_string());
LexTok::Outbrace
}
'[' => {
if self.incasepat > 0 {
self.gettokstr(c, false)
} else if self.incmdpos {
let next = self.hgetc();
if next == Some('[') {
self.tokstr = Some("[[".to_string());
self.incond = 1;
return LexTok::Dinbrack;
}
if let Some(ch) = next {
self.hungetc(ch);
}
self.tokstr = Some("[".to_string());
LexTok::String
} else {
self.gettokstr(c, false)
}
}
']' => {
if self.incond > 0 {
let next = self.hgetc();
if next == Some(']') {
self.tokstr = Some("]]".to_string());
self.incond = 0;
return LexTok::Doutbrack;
}
if let Some(ch) = next {
self.hungetc(ch);
}
}
self.gettokstr(c, false)
}
'<' => {
if self.incondpat || self.incasepat > 0 {
self.gettokstr(c, false)
} else {
self.lex_inang()
}
}
'>' => {
if self.incondpat || self.incasepat > 0 {
self.gettokstr(c, false)
} else {
self.lex_outang()
}
}
_ => self.gettokstr(c, false),
}
}
fn lex_comment(&mut self) -> LexTok {
if self.lexflags.comments_keep {
self.lexbuf.clear();
self.add('#');
}
loop {
let c = self.hgetc();
match c {
Some('\n') | None => break,
Some(c) => {
if self.lexflags.comments_keep {
self.add(c);
}
}
}
}
if self.lexflags.comments_keep {
self.tokstr = Some(self.lexbuf.as_str().to_string());
if !self.lexstop {
self.hungetc('\n');
}
return LexTok::String;
}
if self.lexflags.comments_strip && self.lexstop {
return LexTok::Endinput;
}
LexTok::Newlin
}
fn lex_inang(&mut self) -> LexTok {
let d = self.hgetc();
match d {
Some('(') => {
self.hungetc('(');
self.lexstop = false;
self.gettokstr('<', false)
}
Some('>') => LexTok::Inoutang,
Some('<') => {
let e = self.hgetc();
match e {
Some('(') => {
self.hungetc('(');
self.hungetc('<');
LexTok::Inang
}
Some('<') => LexTok::Trinang,
Some('-') => {
self.heredoc_pending = 2; LexTok::Dinangdash
}
_ => {
if let Some(e) = e {
self.hungetc(e);
}
self.lexstop = false;
self.heredoc_pending = 1; LexTok::Dinang
}
}
}
Some('&') => LexTok::Inangamp,
_ => {
if let Some(d) = d {
self.hungetc(d);
}
self.lexstop = false;
LexTok::Inang
}
}
}
fn lex_outang(&mut self) -> LexTok {
let d = self.hgetc();
match d {
Some('(') => {
self.hungetc('(');
self.lexstop = false;
self.gettokstr('>', false)
}
Some('&') => {
let e = self.hgetc();
match e {
Some('!') | Some('|') => LexTok::Outangampbang,
_ => {
if let Some(e) = e {
self.hungetc(e);
}
self.lexstop = false;
LexTok::Outangamp
}
}
}
Some('!') | Some('|') => LexTok::Outangbang,
Some('>') => {
let e = self.hgetc();
match e {
Some('&') => {
let f = self.hgetc();
match f {
Some('!') | Some('|') => LexTok::Doutangampbang,
_ => {
if let Some(f) = f {
self.hungetc(f);
}
self.lexstop = false;
LexTok::Doutangamp
}
}
}
Some('!') | Some('|') => LexTok::Doutangbang,
Some('(') => {
self.hungetc('(');
self.hungetc('>');
LexTok::Outang
}
_ => {
if let Some(e) = e {
self.hungetc(e);
}
self.lexstop = false;
LexTok::Doutang
}
}
}
_ => {
if let Some(d) = d {
self.hungetc(d);
}
self.lexstop = false;
LexTok::Outang
}
}
}
fn gettokstr(&mut self, c: char, sub: bool) -> LexTok {
let mut bct = 0; let mut pct = 0; let mut brct = 0; let mut in_brace_param = 0;
let mut peek = LexTok::String;
let mut intpos = 1;
let mut unmatched = '\0';
let mut c = c;
const MAX_ITERATIONS: usize = 100_000;
let mut iterations = 0;
if !sub {
self.lexbuf.clear();
}
loop {
iterations += 1;
if iterations > MAX_ITERATIONS {
self.error = Some("gettokstr exceeded maximum iterations".to_string());
return LexTok::Lexerr;
}
let inbl = Self::is_inblank(c);
if inbl && in_brace_param == 0 && pct == 0 {
break;
}
match c {
')' => {
if in_brace_param > 0 || sub {
self.add(char_tokens::OUTPAR);
} else if pct > 0 {
pct -= 1;
self.add(char_tokens::OUTPAR);
} else {
break;
}
}
'|' => {
if pct == 0 && in_brace_param == 0 {
if sub {
self.add(c);
} else {
break;
}
} else {
self.add(char_tokens::BAR);
}
}
'$' => {
let e = self.hgetc();
match e {
Some('\\') => {
let f = self.hgetc();
if f != Some('\n') {
if let Some(f) = f {
self.hungetc(f);
}
self.hungetc('\\');
self.add(char_tokens::STRING);
} else {
continue;
}
}
Some('[') => {
self.add(char_tokens::STRING);
self.add(char_tokens::INBRACK);
if self.dquote_parse(']', sub).is_err() {
peek = LexTok::Lexerr;
break;
}
self.add(char_tokens::OUTBRACK);
}
Some('(') => {
self.add(char_tokens::STRING);
match self.cmd_or_math_sub() {
CmdOrMath::Cmd => self.add(char_tokens::OUTPAR),
CmdOrMath::Math => self.add(char_tokens::OUTPARMATH),
CmdOrMath::Err => {
peek = LexTok::Lexerr;
break;
}
}
}
Some('{') => {
self.add(c);
self.add(char_tokens::INBRACE);
bct += 1;
if in_brace_param == 0 {
in_brace_param = bct;
}
}
Some('\'') => {
self.add(char_tokens::QSTRING);
self.add(char_tokens::SNULL);
loop {
let ch = self.hgetc();
match ch {
Some('\'') => break,
Some('\\') => {
self.add(char_tokens::BNULL);
match self.hgetc() {
Some(n) => self.add(n),
None => {
self.lexstop = true;
unmatched = '\'';
peek = LexTok::Lexerr;
break;
}
}
}
Some(ch) => self.add(ch),
None => {
self.lexstop = true;
unmatched = '\'';
peek = LexTok::Lexerr;
break;
}
}
}
if unmatched != '\0' {
break;
}
self.add(char_tokens::SNULL);
}
Some('"') => {
self.add(char_tokens::QSTRING);
self.add(char_tokens::DNULL);
if self.dquote_parse('"', sub).is_err() {
peek = LexTok::Lexerr;
break;
}
self.add(char_tokens::DNULL);
}
_ => {
if let Some(e) = e {
self.hungetc(e);
}
self.lexstop = false;
self.add(char_tokens::STRING);
}
}
}
'[' => {
if in_brace_param == 0 {
brct += 1;
}
self.add(char_tokens::INBRACK);
}
']' => {
if in_brace_param == 0 && brct > 0 {
brct -= 1;
}
self.add(char_tokens::OUTBRACK);
}
'(' => {
if in_brace_param == 0 && !sub {
let e = self.hgetc();
if let Some(ch) = e {
self.hungetc(ch);
}
self.lexstop = false;
if e == Some(')') {
break;
}
}
if in_brace_param == 0 {
pct += 1;
}
self.add(char_tokens::INPAR);
}
'{' => {
bct += 1;
self.add(c);
}
'}' => {
if in_brace_param > 0 {
if bct == in_brace_param {
in_brace_param = 0;
}
bct -= 1;
self.add(char_tokens::OUTBRACE);
} else if bct > 0 {
bct -= 1;
self.add(c);
} else {
break;
}
}
'>' => {
if in_brace_param > 0 || sub || self.incondpat || self.incasepat > 0 {
self.add(c);
} else {
let e = self.hgetc();
if e != Some('(') {
if let Some(e) = e {
self.hungetc(e);
}
self.lexstop = false;
break;
}
self.add(char_tokens::OUTANGPROC);
if self.skip_command_sub().is_err() {
peek = LexTok::Lexerr;
break;
}
self.add(char_tokens::OUTPAR);
}
}
'<' => {
if in_brace_param > 0 || sub || self.incondpat || self.incasepat > 0 {
self.add(c);
} else if let Some(range_chars) = self.try_numeric_range_glob() {
self.add(c);
for ch in range_chars.chars() {
self.add(ch);
}
} else {
let e = self.hgetc();
if e != Some('(') {
if let Some(e) = e {
self.hungetc(e);
}
self.lexstop = false;
break;
}
self.add(char_tokens::INANG);
if self.skip_command_sub().is_err() {
peek = LexTok::Lexerr;
break;
}
self.add(char_tokens::OUTPAR);
}
}
'=' => {
if !sub {
if intpos > 0 {
let e = self.hgetc();
if e == Some('(') {
self.add(char_tokens::EQUALS);
if self.skip_command_sub().is_err() {
peek = LexTok::Lexerr;
break;
}
self.add(char_tokens::OUTPAR);
} else {
if let Some(e) = e {
self.hungetc(e);
}
self.lexstop = false;
self.add(char_tokens::EQUALS);
}
} else if peek != LexTok::Envstring
&& (self.incmdpos || self.intypeset)
&& bct == 0
&& brct == 0
&& self.incasepat == 0
{
let tok_so_far = self.lexbuf.as_str().to_string();
if self.is_valid_assignment_target(&tok_so_far) {
let next = self.hgetc();
if next == Some('(') {
self.tokstr = Some(self.lexbuf.as_str().to_string());
return LexTok::Envarray;
}
if let Some(next) = next {
self.hungetc(next);
}
self.lexstop = false;
peek = LexTok::Envstring;
intpos = 2;
self.add(char_tokens::EQUALS);
} else {
self.add(char_tokens::EQUALS);
}
} else {
self.add(char_tokens::EQUALS);
}
} else {
self.add(char_tokens::EQUALS);
}
}
'\\' => {
let next = self.hgetc();
if next == Some('\n') {
let next = self.hgetc();
if let Some(next) = next {
c = next;
continue;
}
break;
} else {
self.add(char_tokens::BNULL);
if let Some(next) = next {
self.add(next);
}
}
}
'\'' => {
self.add(char_tokens::SNULL);
loop {
let ch = self.hgetc();
match ch {
Some('\'') => break,
Some(ch) => self.add(ch),
None => {
self.lexstop = true;
unmatched = '\'';
peek = LexTok::Lexerr;
break;
}
}
}
if unmatched != '\0' {
break;
}
self.add(char_tokens::SNULL);
}
'"' => {
self.add(char_tokens::DNULL);
if self.dquote_parse('"', sub).is_err() {
unmatched = '"';
if !self.lexflags.active {
peek = LexTok::Lexerr;
}
break;
}
self.add(char_tokens::DNULL);
}
'`' => {
self.add(char_tokens::TICK);
loop {
let ch = self.hgetc();
match ch {
Some('`') => break,
Some('\\') => {
let next = self.hgetc();
match next {
Some('\n') => continue, Some(c) if c == '`' || c == '\\' || c == '$' => {
self.add(char_tokens::BNULL);
self.add(c);
}
Some(c) => {
self.add('\\');
self.add(c);
}
None => break,
}
}
Some(ch) => self.add(ch),
None => {
self.lexstop = true;
unmatched = '`';
peek = LexTok::Lexerr;
break;
}
}
}
if unmatched != '\0' {
break;
}
self.add(char_tokens::TICK);
}
'~' => {
self.add(char_tokens::TILDE);
}
'#' => {
self.add(char_tokens::POUND);
}
'^' => {
self.add(char_tokens::HAT);
}
'*' => {
self.add(char_tokens::STAR);
}
'?' => {
self.add(char_tokens::QUEST);
}
',' if bct > in_brace_param => {
self.add(char_tokens::COMMA);
}
'-' => {
self.add(char_tokens::DASH);
}
'!' if brct > 0 => {
self.add(char_tokens::BANG);
}
'\n' | ';' | '&' => {
break;
}
_ => {
self.add(c);
}
}
c = match self.hgetc() {
Some(c) => c,
None => {
self.lexstop = true;
break;
}
};
if intpos > 0 {
intpos -= 1;
}
}
if !self.lexstop {
self.hungetc(c);
}
if unmatched != '\0' && !self.lexflags.active {
self.error = Some(format!("unmatched {}", unmatched));
}
if in_brace_param > 0 {
self.error = Some("closing brace expected".to_string());
}
self.tokstr = Some(self.lexbuf.as_str().to_string());
peek
}
fn is_valid_assignment_target(&self, s: &str) -> bool {
let mut chars = s.chars().peekable();
if let Some(&c) = chars.peek() {
if char_tokens::is_token(c) {
return false;
}
}
if let Some(&c) = chars.peek() {
if c.is_ascii_digit() {
while let Some(&c) = chars.peek() {
if !c.is_ascii_digit() {
break;
}
chars.next();
}
return chars.peek().is_none();
}
}
let mut has_ident = false;
while let Some(&c) = chars.peek() {
if c == char_tokens::INBRACK || c == '[' {
break;
}
if c == '+' {
chars.next();
return chars.peek().is_none() || chars.peek() == Some(&'=');
}
if !Self::is_ident(c) && c != char_tokens::STRING && !char_tokens::is_token(c) {
return false;
}
has_ident = true;
chars.next();
}
has_ident
}
fn dquote_parse(&mut self, endchar: char, sub: bool) -> Result<(), ()> {
self.recursion_depth += 1;
if self.check_recursion() {
self.recursion_depth -= 1;
return Err(());
}
let result = self.dquote_parse_inner(endchar, sub);
self.recursion_depth -= 1;
result
}
fn dquote_parse_inner(&mut self, endchar: char, sub: bool) -> Result<(), ()> {
let mut pct = 0; let mut brct = 0; let mut bct = 0; let mut intick = false; let is_math = endchar == ')' || endchar == ']' || self.infor > 0;
const MAX_ITERATIONS: usize = 100_000;
let mut iterations = 0;
loop {
iterations += 1;
if iterations > MAX_ITERATIONS {
self.error = Some("dquote_parse exceeded maximum iterations".to_string());
return Err(());
}
let c = self.hgetc();
let c = match c {
Some(c) if c == endchar && !intick && bct == 0 => {
if is_math && (pct > 0 || brct > 0) {
self.add(c);
if c == ')' {
pct -= 1;
} else if c == ']' {
brct -= 1;
}
continue;
}
return Ok(());
}
Some(c) => c,
None => {
self.lexstop = true;
return Err(());
}
};
match c {
'\\' => {
let next = self.hgetc();
match next {
Some('\n') if !sub => continue, Some(c)
if c == '$'
|| c == '\\'
|| (c == '}' && !intick && bct > 0)
|| c == endchar
|| c == '`'
|| (endchar == ']'
&& (c == '['
|| c == ']'
|| c == '('
|| c == ')'
|| c == '{'
|| c == '}'
|| (c == '"' && sub))) =>
{
self.add(char_tokens::BNULL);
self.add(c);
}
Some(c) => {
self.add('\\');
self.hungetc(c);
continue;
}
None => {
self.add('\\');
}
}
}
'$' => {
if intick {
self.add(c);
continue;
}
let next = self.hgetc();
match next {
Some('(') => {
self.add(char_tokens::QSTRING);
match self.cmd_or_math_sub() {
CmdOrMath::Cmd => self.add(char_tokens::OUTPAR),
CmdOrMath::Math => self.add(char_tokens::OUTPARMATH),
CmdOrMath::Err => return Err(()),
}
}
Some('[') => {
self.add(char_tokens::STRING);
self.add(char_tokens::INBRACK);
self.dquote_parse(']', sub)?;
self.add(char_tokens::OUTBRACK);
}
Some('{') => {
self.add(char_tokens::QSTRING);
self.add(char_tokens::INBRACE);
bct += 1;
}
Some('$') => {
self.add(char_tokens::QSTRING);
self.add('$');
}
_ => {
if let Some(next) = next {
self.hungetc(next);
}
self.lexstop = false;
self.add(char_tokens::QSTRING);
}
}
}
'}' => {
if intick || bct == 0 {
self.add(c);
} else {
self.add(char_tokens::OUTBRACE);
bct -= 1;
}
}
'`' => {
self.add(char_tokens::QTICK);
intick = !intick;
}
'(' => {
if !is_math || bct == 0 {
pct += 1;
}
self.add(c);
}
')' => {
if !is_math || bct == 0 {
if pct == 0 && is_math {
return Err(());
}
pct -= 1;
}
self.add(c);
}
'[' => {
if !is_math || bct == 0 {
brct += 1;
}
self.add(c);
}
']' => {
if !is_math || bct == 0 {
if brct == 0 && is_math {
return Err(());
}
brct -= 1;
}
self.add(c);
}
'"' => {
if intick || (endchar != '"' && bct == 0) {
self.add(c);
} else if bct > 0 {
self.add(char_tokens::DNULL);
self.dquote_parse('"', sub)?;
self.add(char_tokens::DNULL);
} else {
return Err(());
}
}
_ => {
self.add(c);
}
}
}
}
fn cmd_or_math(&mut self) -> CmdOrMath {
let oldlen = self.lexbuf.len();
if self.dquote_parse(')', false).is_err() {
while self.lexbuf.len() > oldlen {
if let Some(c) = self.lexbuf.pop() {
self.hungetc(c);
}
}
self.hungetc('(');
self.lexstop = false;
return if self.skip_command_sub().is_err() {
CmdOrMath::Err
} else {
CmdOrMath::Cmd
};
}
let c = self.hgetc();
if c == Some(')') {
return CmdOrMath::Math;
}
if let Some(c) = c {
self.hungetc(c);
}
self.lexstop = false;
while self.lexbuf.len() > oldlen {
if let Some(c) = self.lexbuf.pop() {
self.hungetc(c);
}
}
self.hungetc('(');
if self.skip_command_sub().is_err() {
CmdOrMath::Err
} else {
CmdOrMath::Cmd
}
}
fn cmd_or_math_sub(&mut self) -> CmdOrMath {
const MAX_CONTINUATIONS: usize = 10_000;
let mut continuations = 0;
loop {
continuations += 1;
if continuations > MAX_CONTINUATIONS {
self.error = Some("cmd_or_math_sub: too many line continuations".to_string());
return CmdOrMath::Err;
}
let c = self.hgetc();
if c == Some('\\') {
let c2 = self.hgetc();
if c2 != Some('\n') {
if let Some(c2) = c2 {
self.hungetc(c2);
}
self.hungetc('\\');
self.lexstop = false;
return if self.skip_command_sub().is_err() {
CmdOrMath::Err
} else {
CmdOrMath::Cmd
};
}
continue;
}
if c == Some('(') {
let lexpos = self.lexbuf.len();
self.add(char_tokens::INPAR);
self.add('(');
if self.dquote_parse(')', false).is_ok() {
let c2 = self.hgetc();
if c2 == Some(')') {
self.add(')');
return CmdOrMath::Math;
}
if let Some(c2) = c2 {
self.hungetc(c2);
}
}
while self.lexbuf.len() > lexpos {
if let Some(ch) = self.lexbuf.pop() {
self.hungetc(ch);
}
}
self.hungetc('(');
self.lexstop = false;
} else {
if let Some(c) = c {
self.hungetc(c);
}
self.lexstop = false;
}
return if self.skip_command_sub().is_err() {
CmdOrMath::Err
} else {
CmdOrMath::Cmd
};
}
}
fn skip_command_sub(&mut self) -> Result<(), ()> {
let mut pct = 1;
let mut start = true;
const MAX_ITERATIONS: usize = 100_000;
let mut iterations = 0;
self.add(char_tokens::INPAR);
loop {
iterations += 1;
if iterations > MAX_ITERATIONS {
self.error = Some("skip_command_sub exceeded maximum iterations".to_string());
return Err(());
}
let c = self.hgetc();
let c = match c {
Some(c) => c,
None => {
self.lexstop = true;
return Err(());
}
};
let iswhite = Self::is_inblank(c);
match c {
'(' => {
pct += 1;
self.add(c);
}
')' => {
pct -= 1;
if pct == 0 {
return Ok(());
}
self.add(c);
}
'\\' => {
self.add(c);
if let Some(c) = self.hgetc() {
self.add(c);
}
}
'\'' => {
self.add(c);
loop {
let ch = self.hgetc();
match ch {
Some('\'') => {
self.add('\'');
break;
}
Some(ch) => self.add(ch),
None => {
self.lexstop = true;
return Err(());
}
}
}
}
'"' => {
self.add(c);
loop {
let ch = self.hgetc();
match ch {
Some('"') => {
self.add('"');
break;
}
Some('\\') => {
self.add('\\');
if let Some(ch) = self.hgetc() {
self.add(ch);
}
}
Some(ch) => self.add(ch),
None => {
self.lexstop = true;
return Err(());
}
}
}
}
'`' => {
self.add(c);
loop {
let ch = self.hgetc();
match ch {
Some('`') => {
self.add('`');
break;
}
Some('\\') => {
self.add('\\');
if let Some(ch) = self.hgetc() {
self.add(ch);
}
}
Some(ch) => self.add(ch),
None => {
self.lexstop = true;
return Err(());
}
}
}
}
'#' if start => {
self.add(c);
loop {
let ch = self.hgetc();
match ch {
Some('\n') => {
self.add('\n');
break;
}
Some(ch) => self.add(ch),
None => break,
}
}
}
_ => {
self.add(c);
}
}
start = iswhite;
}
}
pub fn ctxtlex(&mut self) {
self.zshlex();
match self.tok {
LexTok::Seper
| LexTok::Newlin
| LexTok::Semi
| LexTok::Dsemi
| LexTok::Semiamp
| LexTok::Semibar
| LexTok::Amper
| LexTok::Amperbang
| LexTok::Inpar
| LexTok::Inbrace
| LexTok::Dbar
| LexTok::Damper
| LexTok::Bar
| LexTok::Baramp
| LexTok::Inoutpar
| LexTok::Doloop
| LexTok::Then
| LexTok::Elif
| LexTok::Else
| LexTok::Doutbrack => {
self.incmdpos = true;
}
LexTok::String
| LexTok::Typeset
| LexTok::Envarray
| LexTok::Outpar
| LexTok::Case
| LexTok::Dinbrack => {
self.incmdpos = false;
}
_ => {}
}
if self.tok != LexTok::Dinpar {
self.infor = if self.tok == LexTok::For { 2 } else { 0 };
}
if self.tok.is_redirop()
|| self.tok == LexTok::For
|| self.tok == LexTok::Foreach
|| self.tok == LexTok::Select
{
self.inredir = true;
self.oldpos = self.incmdpos;
self.incmdpos = false;
} else if self.inredir {
self.incmdpos = self.oldpos;
self.inredir = false;
}
}
pub fn gotword(&mut self) {
self.lexflags = LexFlags::default();
}
pub fn register_heredoc(&mut self, terminator: String, strip_tabs: bool) {
self.heredocs.push(HereDoc {
terminator,
strip_tabs,
content: String::new(),
quoted: false,
processed: false,
});
}
pub fn check_reserved_word(&mut self) -> bool {
if let Some(ref tokstr) = self.tokstr {
if self.incmdpos || (tokstr == "}" && self.tok == LexTok::String) {
if let Some(tok) = crate::tokens::lookup_reserved_word(tokstr) {
self.tok = tok;
if tok == LexTok::Repeat {
self.inrepeat = 1;
}
if tok == LexTok::Dinbrack {
self.incond = 1;
}
return true;
}
if tokstr == "]]" && self.incond > 0 {
self.tok = LexTok::Doutbrack;
self.incond = 0;
return true;
}
}
if self.incond > 0 && tokstr == "]]" {
self.tok = LexTok::Doutbrack;
self.incond = 0;
return true;
}
if self.incond == 1 && tokstr == "!" {
self.tok = LexTok::Bang;
return true;
}
}
false
}
}
enum CmdOrMath {
Cmd,
Math,
Err,
}
pub fn isnumglob(input: &str, pos: usize) -> bool {
let chars: Vec<char> = input[pos..].chars().collect();
let mut i = 0;
let mut expect_close = false;
while i < chars.len() {
let c = chars[i];
if c.is_ascii_digit() {
i += 1;
} else if c == '-' && !expect_close {
expect_close = true;
i += 1;
} else if c == '>' && expect_close {
return true;
} else {
break;
}
}
false
}
pub fn parsestrnoerr(s: &str) -> Result<String, String> {
parsestr_inner(s)
}
pub fn parsestr(s: &str) -> Result<String, String> {
parsestr_inner(s)
}
fn parsestr_inner(s: &str) -> Result<String, String> {
let mut result = String::with_capacity(s.len());
let chars: Vec<char> = s.chars().collect();
let mut i = 0;
while i < chars.len() {
let c = chars[i];
match c {
'\\' => {
i += 1;
if i < chars.len() {
let next = chars[i];
match next {
'$' | '\\' | '`' | '"' | '\n' => {
result.push(char_tokens::BNULL);
result.push(next);
}
_ => {
result.push('\\');
result.push(next);
}
}
} else {
result.push('\\');
}
}
'$' => {
result.push(char_tokens::QSTRING);
if i + 1 < chars.len() {
let next = chars[i + 1];
if next == '{' {
result.push(char_tokens::INBRACE);
i += 1;
} else if next == '(' {
result.push(char_tokens::INPAR);
i += 1;
}
}
}
'`' => {
result.push(char_tokens::QTICK);
}
_ => {
result.push(c);
}
}
i += 1;
}
Ok(result)
}
pub fn parse_subscript(s: &str, endchar: char) -> Option<usize> {
if s.is_empty() || s.starts_with(endchar) {
return None;
}
let chars: Vec<char> = s.chars().collect();
let mut i = 0;
let mut depth = 0;
let mut in_dquote = false;
let mut in_squote = false;
while i < chars.len() {
let c = chars[i];
if in_squote {
if c == '\'' {
in_squote = false;
}
i += 1;
continue;
}
if in_dquote {
if c == '"' {
in_dquote = false;
} else if c == '\\' && i + 1 < chars.len() {
i += 1; }
i += 1;
continue;
}
match c {
'\\' => {
i += 1; }
'\'' => {
in_squote = true;
}
'"' => {
in_dquote = true;
}
'[' | '(' => {
depth += 1;
}
']' | ')' => {
if depth > 0 {
depth -= 1;
} else if c == endchar {
return Some(i);
}
}
_ => {}
}
if c == endchar && depth == 0 {
return Some(i);
}
i += 1;
}
None
}
pub fn parse_subst_string(s: &str) -> Result<String, String> {
if s.is_empty() {
return Ok(String::new());
}
let mut result = String::with_capacity(s.len());
let chars: Vec<char> = s.chars().collect();
let mut i = 0;
while i < chars.len() {
let c = chars[i];
match c {
'\\' => {
result.push(char_tokens::BNULL);
i += 1;
if i < chars.len() {
result.push(chars[i]);
}
}
'\'' => {
result.push(char_tokens::SNULL);
i += 1;
while i < chars.len() && chars[i] != '\'' {
result.push(chars[i]);
i += 1;
}
result.push(char_tokens::SNULL);
}
'"' => {
result.push(char_tokens::DNULL);
i += 1;
while i < chars.len() && chars[i] != '"' {
if chars[i] == '\\' && i + 1 < chars.len() {
result.push(char_tokens::BNULL);
i += 1;
result.push(chars[i]);
} else if chars[i] == '$' {
result.push(char_tokens::QSTRING);
} else {
result.push(chars[i]);
}
i += 1;
}
result.push(char_tokens::DNULL);
}
'$' => {
result.push(char_tokens::STRING);
if i + 1 < chars.len() {
match chars[i + 1] {
'{' => {
result.push(char_tokens::INBRACE);
i += 1;
}
'(' => {
result.push(char_tokens::INPAR);
i += 1;
}
_ => {}
}
}
}
'*' => result.push(char_tokens::STAR),
'?' => result.push(char_tokens::QUEST),
'[' => result.push(char_tokens::INBRACK),
']' => result.push(char_tokens::OUTBRACK),
'{' => result.push(char_tokens::INBRACE),
'}' => result.push(char_tokens::OUTBRACE),
'~' => result.push(char_tokens::TILDE),
'#' => result.push(char_tokens::POUND),
'^' => result.push(char_tokens::HAT),
_ => result.push(c),
}
i += 1;
}
Ok(result)
}
pub fn untokenize_preserve_quotes(s: &str) -> String {
let mut result = String::with_capacity(s.len() + 4);
for c in s.chars() {
let cu = c as u32;
if (0x83..=0x9f).contains(&cu) {
match c {
c if c == char_tokens::POUND => result.push('#'),
c if c == char_tokens::STRING => result.push('$'),
c if c == char_tokens::HAT => result.push('^'),
c if c == char_tokens::STAR => result.push('*'),
c if c == char_tokens::INPAR => result.push('('),
c if c == char_tokens::OUTPAR => result.push(')'),
c if c == char_tokens::INPARMATH => result.push('('),
c if c == char_tokens::OUTPARMATH => result.push(')'),
c if c == char_tokens::QSTRING => result.push('$'),
c if c == char_tokens::EQUALS => result.push('='),
c if c == char_tokens::BAR => result.push('|'),
c if c == char_tokens::INBRACE => result.push('{'),
c if c == char_tokens::OUTBRACE => result.push('}'),
c if c == char_tokens::INBRACK => result.push('['),
c if c == char_tokens::OUTBRACK => result.push(']'),
c if c == char_tokens::TICK => result.push('`'),
c if c == char_tokens::INANG => result.push('<'),
c if c == char_tokens::OUTANG => result.push('>'),
c if c == char_tokens::OUTANGPROC => result.push('>'),
c if c == char_tokens::QUEST => result.push('?'),
c if c == char_tokens::TILDE => result.push('~'),
c if c == char_tokens::QTICK => result.push('`'),
c if c == char_tokens::COMMA => result.push(','),
c if c == char_tokens::DASH => result.push('-'),
c if c == char_tokens::BANG => result.push('!'),
c if c == char_tokens::SNULL => result.push('\''),
c if c == char_tokens::DNULL => result.push('"'),
c if c == char_tokens::BNULL => result.push('\\'),
_ => {
let idx = c as usize;
if idx < char_tokens::ZTOKENS.len() {
result.push(char_tokens::ZTOKENS.chars().nth(idx).unwrap_or(c));
} else {
result.push(c);
}
}
}
} else {
result.push(c);
}
}
result
}
pub fn untokenize(s: &str) -> String {
let mut result = String::with_capacity(s.len());
let chars: Vec<char> = s.chars().collect();
let mut i = 0;
while i < chars.len() {
let c = chars[i];
let cu = c as u32;
if (0x83..=0x9f).contains(&cu) {
match c {
c if c == char_tokens::POUND => result.push('#'),
c if c == char_tokens::STRING => result.push('$'),
c if c == char_tokens::HAT => result.push('^'),
c if c == char_tokens::STAR => result.push('*'),
c if c == char_tokens::INPAR => result.push('('),
c if c == char_tokens::OUTPAR => result.push(')'),
c if c == char_tokens::INPARMATH => result.push('('),
c if c == char_tokens::OUTPARMATH => result.push(')'),
c if c == char_tokens::QSTRING => result.push('$'),
c if c == char_tokens::EQUALS => result.push('='),
c if c == char_tokens::BAR => result.push('|'),
c if c == char_tokens::INBRACE => result.push('{'),
c if c == char_tokens::OUTBRACE => result.push('}'),
c if c == char_tokens::INBRACK => result.push('['),
c if c == char_tokens::OUTBRACK => result.push(']'),
c if c == char_tokens::TICK => result.push('`'),
c if c == char_tokens::INANG => result.push('<'),
c if c == char_tokens::OUTANG => result.push('>'),
c if c == char_tokens::OUTANGPROC => result.push('>'),
c if c == char_tokens::QUEST => result.push('?'),
c if c == char_tokens::TILDE => result.push('~'),
c if c == char_tokens::QTICK => result.push('`'),
c if c == char_tokens::COMMA => result.push(','),
c if c == char_tokens::DASH => result.push('-'),
c if c == char_tokens::BANG => result.push('!'),
c if c == char_tokens::SNULL
|| c == char_tokens::DNULL
|| c == char_tokens::BNULL =>
{
}
_ => {
let idx = c as usize;
if idx < char_tokens::ZTOKENS.len() {
result.push(char_tokens::ZTOKENS.chars().nth(idx).unwrap_or(c));
} else {
result.push(c);
}
}
}
} else {
result.push(c);
}
i += 1;
}
result
}
pub fn has_token(s: &str) -> bool {
s.chars().any(|c| (c as u32) < 32)
}
pub fn tokens_to_printable(s: &str) -> String {
untokenize(s)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_simple_command() {
let mut lexer = ZshLexer::new("echo hello");
lexer.zshlex();
assert_eq!(lexer.tok, LexTok::String);
assert_eq!(lexer.tokstr, Some("echo".to_string()));
lexer.zshlex();
assert_eq!(lexer.tok, LexTok::String);
assert_eq!(lexer.tokstr, Some("hello".to_string()));
lexer.zshlex();
assert_eq!(lexer.tok, LexTok::Endinput);
}
#[test]
fn test_pipeline() {
let mut lexer = ZshLexer::new("ls | grep foo");
lexer.zshlex();
assert_eq!(lexer.tok, LexTok::String);
lexer.zshlex();
assert_eq!(lexer.tok, LexTok::Bar);
lexer.zshlex();
assert_eq!(lexer.tok, LexTok::String);
lexer.zshlex();
assert_eq!(lexer.tok, LexTok::String);
}
#[test]
fn test_redirections() {
let mut lexer = ZshLexer::new("echo > file");
lexer.zshlex();
assert_eq!(lexer.tok, LexTok::String);
lexer.zshlex();
assert_eq!(lexer.tok, LexTok::Outang);
lexer.zshlex();
assert_eq!(lexer.tok, LexTok::String);
}
#[test]
fn test_heredoc() {
let mut lexer = ZshLexer::new("cat << EOF");
lexer.zshlex();
assert_eq!(lexer.tok, LexTok::String);
lexer.zshlex();
assert_eq!(lexer.tok, LexTok::Dinang);
lexer.zshlex();
assert_eq!(lexer.tok, LexTok::String);
}
#[test]
fn test_single_quotes() {
let mut lexer = ZshLexer::new("echo 'hello world'");
lexer.zshlex();
assert_eq!(lexer.tok, LexTok::String);
lexer.zshlex();
assert_eq!(lexer.tok, LexTok::String);
assert!(lexer.tokstr.is_some());
}
#[test]
fn test_function_tokens() {
let mut lexer = ZshLexer::new("function foo { }");
lexer.zshlex();
assert_eq!(
lexer.tok,
LexTok::Func,
"expected Func, got {:?}",
lexer.tok
);
lexer.zshlex();
assert_eq!(
lexer.tok,
LexTok::String,
"expected String for 'foo', got {:?}",
lexer.tok
);
assert_eq!(lexer.tokstr, Some("foo".to_string()));
lexer.zshlex();
assert_eq!(
lexer.tok,
LexTok::Inbrace,
"expected Inbrace, got {:?} tokstr={:?}",
lexer.tok,
lexer.tokstr
);
lexer.zshlex();
assert_eq!(
lexer.tok,
LexTok::Outbrace,
"expected Outbrace, got {:?} tokstr={:?} incmdpos={}",
lexer.tok,
lexer.tokstr,
lexer.incmdpos
);
}
#[test]
fn test_double_quotes() {
let mut lexer = ZshLexer::new("echo \"hello $name\"");
lexer.zshlex();
assert_eq!(lexer.tok, LexTok::String);
lexer.zshlex();
assert_eq!(lexer.tok, LexTok::String);
assert!(lexer.tokstr.is_some());
}
#[test]
fn test_command_substitution() {
let mut lexer = ZshLexer::new("echo $(pwd)");
lexer.zshlex();
assert_eq!(lexer.tok, LexTok::String);
lexer.zshlex();
assert_eq!(lexer.tok, LexTok::String);
}
#[test]
fn test_env_assignment() {
let mut lexer = ZshLexer::new("FOO=bar echo");
lexer.incmdpos = true;
lexer.zshlex();
assert_eq!(
lexer.tok,
LexTok::Envstring,
"tok={:?} tokstr={:?}",
lexer.tok,
lexer.tokstr
);
lexer.zshlex();
assert_eq!(lexer.tok, LexTok::String);
}
#[test]
fn test_array_assignment() {
let mut lexer = ZshLexer::new("arr=(a b c)");
lexer.incmdpos = true;
lexer.zshlex();
assert_eq!(lexer.tok, LexTok::Envarray);
}
#[test]
fn test_process_substitution() {
let mut lexer = ZshLexer::new("diff <(ls) >(cat)");
lexer.zshlex();
assert_eq!(lexer.tok, LexTok::String);
lexer.zshlex();
assert_eq!(lexer.tok, LexTok::String);
lexer.zshlex();
assert_eq!(lexer.tok, LexTok::String);
}
#[test]
fn test_arithmetic() {
let mut lexer = ZshLexer::new("echo $((1+2))");
lexer.zshlex();
assert_eq!(lexer.tok, LexTok::String);
lexer.zshlex();
assert_eq!(lexer.tok, LexTok::String);
}
#[test]
fn test_semicolon_variants() {
let mut lexer = ZshLexer::new("case x in a) cmd;; b) cmd;& c) cmd;| esac");
loop {
lexer.zshlex();
if lexer.tok == LexTok::Dsemi || lexer.tok == LexTok::Endinput {
break;
}
}
assert_eq!(lexer.tok, LexTok::Dsemi);
loop {
lexer.zshlex();
if lexer.tok == LexTok::Semiamp || lexer.tok == LexTok::Endinput {
break;
}
}
assert_eq!(lexer.tok, LexTok::Semiamp);
loop {
lexer.zshlex();
if lexer.tok == LexTok::Semibar || lexer.tok == LexTok::Endinput {
break;
}
}
assert_eq!(lexer.tok, LexTok::Semibar);
}
}