Skip to main content

zsh/
lexer.rs

1//! Zsh lexical analyzer - Direct port from zsh/Src/lex.c
2//!
3//! This lexer tokenizes zsh shell input into a stream of tokens.
4//! It handles all zsh-specific syntax including:
5//! - Single/double/dollar quotes
6//! - Command substitution $(...)  and `...`
7//! - Arithmetic $((...))
8//! - Parameter expansion ${...}
9//! - Process substitution <(...) >(...)
10//! - Here documents
11//! - All redirection operators
12//! - Comments
13//! - Continuation lines
14
15use crate::tokens::{char_tokens, LexTok};
16use std::collections::VecDeque;
17
18/// Lexer flags controlling behavior
19#[derive(Debug, Clone, Copy, Default)]
20pub struct LexFlags {
21    /// Parsing for ZLE (line editor) completion
22    pub zle: bool,
23    /// Return newlines as tokens
24    pub newline: bool,
25    /// Preserve comments in output
26    pub comments_keep: bool,
27    /// Strip comments from output
28    pub comments_strip: bool,
29    /// Active lexing (from bufferwords)
30    pub active: bool,
31}
32
33/// Buffer state for building tokens
34#[derive(Debug, Clone)]
35struct LexBuf {
36    data: String,
37    siz: usize,
38}
39
40impl LexBuf {
41    fn new() -> Self {
42        LexBuf {
43            data: String::with_capacity(256),
44            siz: 256,
45        }
46    }
47
48    fn clear(&mut self) {
49        self.data.clear();
50    }
51
52    fn add(&mut self, c: char) {
53        self.data.push(c);
54        if self.data.len() >= self.siz {
55            self.siz *= 2;
56            self.data.reserve(self.siz - self.data.len());
57        }
58    }
59
60    #[allow(dead_code)]
61    fn add_str(&mut self, s: &str) {
62        self.data.push_str(s);
63    }
64
65    fn len(&self) -> usize {
66        self.data.len()
67    }
68
69    fn as_str(&self) -> &str {
70        &self.data
71    }
72
73    #[allow(dead_code)]
74    fn into_string(self) -> String {
75        self.data
76    }
77
78    #[allow(dead_code)]
79    fn last_char(&self) -> Option<char> {
80        self.data.chars().last()
81    }
82
83    fn pop(&mut self) -> Option<char> {
84        self.data.pop()
85    }
86}
87
88/// Here-document state
89#[derive(Debug, Clone)]
90pub struct HereDoc {
91    pub terminator: String,
92    pub strip_tabs: bool,
93    pub content: String,
94}
95
96/// The Zsh Lexer
97pub struct ZshLexer<'a> {
98    /// Input source
99    input: &'a str,
100    /// Current position in input
101    pos: usize,
102    /// Look-ahead buffer for ungotten characters
103    unget_buf: VecDeque<char>,
104    /// Current token string
105    pub tokstr: Option<String>,
106    /// Current token type
107    pub tok: LexTok,
108    /// File descriptor for redirections (e.g., 2> means fd=2)
109    pub tokfd: i32,
110    /// Line number at start of current token
111    pub toklineno: u64,
112    /// Current line number
113    pub lineno: u64,
114    /// Lexer has stopped (EOF or error)
115    pub lexstop: bool,
116    /// In command position (can accept reserved words)
117    pub incmdpos: bool,
118    /// In condition [[ ... ]]
119    pub incond: i32,
120    /// In pattern context (RHS of == != =~ in [[ ]])
121    pub incondpat: bool,
122    /// In case pattern
123    pub incasepat: i32,
124    /// In redirection
125    pub inredir: bool,
126    /// After 'for' keyword
127    pub infor: i32,
128    /// After 'repeat' keyword
129    inrepeat: i32,
130    /// Parsing typeset arguments
131    pub intypeset: bool,
132    /// Inside (( ... )) arithmetic
133    dbparens: bool,
134    /// Disable alias expansion
135    pub noaliases: bool,
136    /// Disable spelling correction
137    pub nocorrect: i32,
138    /// Disable comment recognition
139    pub nocomments: bool,
140    /// Lexer flags
141    pub lexflags: LexFlags,
142    /// Whether this is the first line
143    pub isfirstln: bool,
144    /// Whether this is the first char of command
145    #[allow(dead_code)]
146    isfirstch: bool,
147    /// Pending here-documents
148    pub heredocs: Vec<HereDoc>,
149    /// Expecting heredoc terminator (0 = no, 1 = <<, 2 = <<-)
150    heredoc_pending: u8,
151    /// Token buffer
152    lexbuf: LexBuf,
153    /// After newline
154    pub isnewlin: i32,
155    /// Error message if any
156    pub error: Option<String>,
157    /// Global iteration counter for infinite loop detection
158    global_iterations: usize,
159    /// Recursion depth counter
160    recursion_depth: usize,
161}
162
163const MAX_LEXER_RECURSION: usize = 200;
164
165impl<'a> ZshLexer<'a> {
166    /// Create a new lexer for the given input
167    pub fn new(input: &'a str) -> Self {
168        ZshLexer {
169            input,
170            pos: 0,
171            unget_buf: VecDeque::new(),
172            tokstr: None,
173            tok: LexTok::Endinput,
174            tokfd: -1,
175            toklineno: 1,
176            lineno: 1,
177            lexstop: false,
178            incmdpos: true,
179            incond: 0,
180            incondpat: false,
181            incasepat: 0,
182            inredir: false,
183            infor: 0,
184            inrepeat: 0,
185            intypeset: false,
186            dbparens: false,
187            noaliases: false,
188            nocorrect: 0,
189            nocomments: false,
190            lexflags: LexFlags::default(),
191            isfirstln: true,
192            isfirstch: true,
193            heredocs: Vec::new(),
194            heredoc_pending: 0,
195            lexbuf: LexBuf::new(),
196            isnewlin: 0,
197            error: None,
198            global_iterations: 0,
199            recursion_depth: 0,
200        }
201    }
202
203    /// Check recursion depth; returns true if exceeded
204    #[inline]
205    fn check_recursion(&mut self) -> bool {
206        if self.recursion_depth > MAX_LEXER_RECURSION {
207            self.error = Some("lexer exceeded max recursion depth".to_string());
208            self.lexstop = true;
209            true
210        } else {
211            false
212        }
213    }
214
215    /// Check and increment global iteration counter; returns true if limit exceeded
216    #[inline]
217    fn check_iterations(&mut self) -> bool {
218        self.global_iterations += 1;
219        if self.global_iterations > 50_000 {
220            self.error = Some("lexer exceeded 50K iterations".to_string());
221            self.lexstop = true;
222            self.tok = LexTok::Lexerr;
223            true
224        } else {
225            false
226        }
227    }
228
229    /// Get next character from input
230    fn hgetc(&mut self) -> Option<char> {
231        if self.check_iterations() {
232            return None;
233        }
234
235        if let Some(c) = self.unget_buf.pop_front() {
236            return Some(c);
237        }
238
239        let c = self.input[self.pos..].chars().next()?;
240        self.pos += c.len_utf8();
241
242        if c == '\n' {
243            self.lineno += 1;
244        }
245
246        Some(c)
247    }
248
249    /// Put character back into input
250    fn hungetc(&mut self, c: char) {
251        self.unget_buf.push_front(c);
252        if c == '\n' && self.lineno > 1 {
253            self.lineno -= 1;
254        }
255        self.lexstop = false;
256    }
257
258    /// Peek at next character without consuming
259    #[allow(dead_code)]
260    fn peek(&mut self) -> Option<char> {
261        if let Some(&c) = self.unget_buf.front() {
262            return Some(c);
263        }
264        self.input[self.pos..].chars().next()
265    }
266
267    /// Add character to token buffer
268    fn add(&mut self, c: char) {
269        self.lexbuf.add(c);
270    }
271
272    /// Check if character is blank (space or tab)
273    fn is_blank(c: char) -> bool {
274        c == ' ' || c == '\t'
275    }
276
277    /// Check if character is blank (including other whitespace except newline)
278    fn is_inblank(c: char) -> bool {
279        matches!(c, ' ' | '\t' | '\x0b' | '\x0c' | '\r')
280    }
281
282    /// Check if character is a digit
283    fn is_digit(c: char) -> bool {
284        c.is_ascii_digit()
285    }
286
287    /// Check if character is identifier start
288    #[allow(dead_code)]
289    fn is_ident_start(c: char) -> bool {
290        c.is_ascii_alphabetic() || c == '_'
291    }
292
293    /// Check if character is identifier continuation
294    fn is_ident(c: char) -> bool {
295        c.is_ascii_alphanumeric() || c == '_'
296    }
297
298    /// Main lexer entry point - get next token
299    pub fn zshlex(&mut self) {
300        if self.tok == LexTok::Lexerr {
301            return;
302        }
303
304        // Note: Do NOT reset global_iterations here - it must accumulate across all
305        // zshlex calls in a parse to prevent infinite loops in the parser
306
307        loop {
308            if self.inrepeat > 0 {
309                self.inrepeat += 1;
310            }
311            if self.inrepeat == 3 {
312                self.incmdpos = true;
313            }
314
315            self.tok = self.gettok();
316
317            // Handle alias expansion would go here
318            break;
319        }
320
321        self.nocorrect &= 1;
322
323        // Handle here-documents at end of line
324        if self.tok == LexTok::Newlin || self.tok == LexTok::Endinput {
325            self.process_heredocs();
326        }
327
328        if self.tok != LexTok::Newlin {
329            self.isnewlin = 0;
330        } else {
331            self.isnewlin = if self.pos < self.input.len() { -1 } else { 1 };
332        }
333
334        if self.tok == LexTok::Semi || (self.tok == LexTok::Newlin && !self.lexflags.newline) {
335            self.tok = LexTok::Seper;
336        }
337
338        // Check for reserved words when in command position
339        // Also check for "{" and "}" which are special in many contexts
340        if self.tok == LexTok::String {
341            if let Some(ref s) = self.tokstr {
342                if s == "{" {
343                    self.tok = LexTok::Inbrace;
344                } else if s == "}" {
345                    self.tok = LexTok::Outbrace;
346                } else if self.incasepat == 0 {
347                    // Skip reserved word checking in case pattern context
348                    // Words like "time", "end", etc. should be patterns, not reserved words
349                    self.check_reserved_word();
350                }
351            }
352        }
353
354        // If we were expecting a heredoc terminator, register it now
355        if self.heredoc_pending > 0 && self.tok == LexTok::String {
356            if let Some(ref terminator) = self.tokstr {
357                let strip_tabs = self.heredoc_pending == 2;
358                // Handle quoted terminators (e.g., 'EOF' or "EOF")
359                let term = terminator
360                    .trim_matches(|c| c == '\'' || c == '"')
361                    .to_string();
362                self.heredocs.push(HereDoc {
363                    terminator: term,
364                    strip_tabs,
365                    content: String::new(),
366                });
367            }
368            self.heredoc_pending = 0;
369        }
370
371        // Track pattern context inside [[ ... ]] - after = == != =~ the RHS is a pattern
372        if self.incond > 0 {
373            if let Some(ref s) = self.tokstr {
374                // Check if this token is a comparison operator
375                // Note: single = is also a comparison operator in [[ ]]
376                // The internal marker \u{8d} is used for =
377                if s == "="
378                    || s == "=="
379                    || s == "!="
380                    || s == "=~"
381                    || s == "\u{8d}"
382                    || s == "\u{8d}\u{8d}"
383                    || s == "!\u{8d}"
384                    || s == "\u{8d}~"
385                {
386                    self.incondpat = true;
387                } else if self.incondpat {
388                    // We were in pattern context, now we've consumed the pattern
389                    // Reset after the pattern token is consumed
390                    // But actually, pattern can span multiple tokens, so we should
391                    // stay in pattern mode until ]] or && or ||
392                }
393            }
394            // Reset pattern context on ]] or logical operators
395            if self.tok == LexTok::Doutbrack {
396                self.incondpat = false;
397            }
398        } else {
399            self.incondpat = false;
400        }
401
402        // Update command position for next token based on current token
403        // Note: In case patterns (incasepat > 0), | is a pattern separator, not pipeline,
404        // so we don't set incmdpos after Bar in that context
405        match self.tok {
406            LexTok::Seper
407            | LexTok::Newlin
408            | LexTok::Semi
409            | LexTok::Dsemi
410            | LexTok::Semiamp
411            | LexTok::Semibar
412            | LexTok::Amper
413            | LexTok::Amperbang
414            | LexTok::Inpar
415            | LexTok::Inbrace
416            | LexTok::Dbar
417            | LexTok::Damper
418            | LexTok::Baramp
419            | LexTok::Inoutpar
420            | LexTok::Doloop
421            | LexTok::Then
422            | LexTok::Elif
423            | LexTok::Else
424            | LexTok::Doutbrack
425            | LexTok::Func => {
426                self.incmdpos = true;
427            }
428            LexTok::Bar => {
429                // In case patterns, | is a pattern separator - don't change incmdpos
430                if self.incasepat <= 0 {
431                    self.incmdpos = true;
432                }
433            }
434            LexTok::String
435            | LexTok::Typeset
436            | LexTok::Envarray
437            | LexTok::Outpar
438            | LexTok::Case
439            | LexTok::Dinbrack => {
440                self.incmdpos = false;
441            }
442            _ => {}
443        }
444
445        // Track 'for' keyword for C-style for loop: for (( init; cond; step ))
446        // When we see 'for', set infor=2 to expect the init and cond parts
447        // Each Dinpar (after semicolon in arithmetic) decrements it
448        if self.tok != LexTok::Dinpar {
449            self.infor = if self.tok == LexTok::For { 2 } else { 0 };
450        }
451
452        // Handle redirection context
453        let oldpos = self.incmdpos;
454        if self.tok.is_redirop()
455            || self.tok == LexTok::For
456            || self.tok == LexTok::Foreach
457            || self.tok == LexTok::Select
458        {
459            self.inredir = true;
460            self.incmdpos = false;
461        } else if self.inredir {
462            self.incmdpos = oldpos;
463            self.inredir = false;
464        }
465    }
466
467    /// Process pending here-documents
468    fn process_heredocs(&mut self) {
469        let heredocs = std::mem::take(&mut self.heredocs);
470
471        for mut hdoc in heredocs {
472            let mut content = String::new();
473            let mut line_count = 0;
474
475            loop {
476                line_count += 1;
477                if line_count > 10000 {
478                    self.error = Some("heredoc exceeded 10000 lines".to_string());
479                    self.tok = LexTok::Lexerr;
480                    return;
481                }
482
483                let line = self.read_line();
484                if line.is_none() {
485                    self.error = Some("here document too large or unterminated".to_string());
486                    self.tok = LexTok::Lexerr;
487                    return;
488                }
489
490                let line = line.unwrap();
491                let check_line = if hdoc.strip_tabs {
492                    line.trim_start_matches('\t')
493                } else {
494                    &line
495                };
496
497                if check_line.trim_end_matches('\n') == hdoc.terminator {
498                    break;
499                }
500
501                content.push_str(&line);
502            }
503
504            hdoc.content = content;
505        }
506    }
507
508    /// Read a line from input (returns partial line at EOF)
509    fn read_line(&mut self) -> Option<String> {
510        let mut line = String::new();
511
512        loop {
513            match self.hgetc() {
514                Some(c) => {
515                    line.push(c);
516                    if c == '\n' {
517                        break;
518                    }
519                }
520                None => {
521                    // EOF - return partial line if any
522                    if line.is_empty() {
523                        return None;
524                    }
525                    break;
526                }
527            }
528        }
529
530        Some(line)
531    }
532
533    /// Get the next token
534    fn gettok(&mut self) -> LexTok {
535        self.tokstr = None;
536        self.tokfd = -1;
537
538        // Skip whitespace
539        let mut ws_iterations = 0;
540        loop {
541            ws_iterations += 1;
542            if ws_iterations > 100_000 {
543                self.error = Some("gettok: infinite loop in whitespace skip".to_string());
544                return LexTok::Lexerr;
545            }
546            let c = match self.hgetc() {
547                Some(c) => c,
548                None => {
549                    self.lexstop = true;
550                    return if self.error.is_some() {
551                        LexTok::Lexerr
552                    } else {
553                        LexTok::Endinput
554                    };
555                }
556            };
557
558            if !Self::is_blank(c) {
559                self.hungetc(c);
560                break;
561            }
562        }
563
564        let c = match self.hgetc() {
565            Some(c) => c,
566            None => {
567                self.lexstop = true;
568                return LexTok::Endinput;
569            }
570        };
571
572        self.toklineno = self.lineno;
573        self.isfirstln = false;
574
575        // Handle (( ... )) arithmetic
576        if self.dbparens {
577            return self.lex_arith(c);
578        }
579
580        // Handle digit followed by redirection
581        if Self::is_digit(c) {
582            let d = self.hgetc();
583            match d {
584                Some('&') => {
585                    let e = self.hgetc();
586                    if e == Some('>') {
587                        self.tokfd = (c as u8 - b'0') as i32;
588                        self.hungetc('>');
589                        return self.lex_initial('&');
590                    }
591                    if let Some(e) = e {
592                        self.hungetc(e);
593                    }
594                    self.hungetc('&');
595                }
596                Some('>') | Some('<') => {
597                    self.tokfd = (c as u8 - b'0') as i32;
598                    return self.lex_initial(d.unwrap());
599                }
600                Some(d) => {
601                    self.hungetc(d);
602                }
603                None => {}
604            }
605            self.lexstop = false;
606        }
607
608        self.lex_initial(c)
609    }
610
611    /// Lex (( ... )) arithmetic expression
612    fn lex_arith(&mut self, c: char) -> LexTok {
613        self.lexbuf.clear();
614        self.hungetc(c);
615
616        let end_char = if self.infor > 0 { ';' } else { ')' };
617        if self.dquote_parse(end_char, false).is_err() {
618            return LexTok::Lexerr;
619        }
620
621        self.tokstr = Some(self.lexbuf.as_str().to_string());
622
623        if !self.lexstop && self.infor > 0 {
624            self.infor -= 1;
625            return LexTok::Dinpar;
626        }
627
628        // Check for closing ))
629        match self.hgetc() {
630            Some(')') => {
631                self.dbparens = false;
632                LexTok::Doutpar
633            }
634            c => {
635                if let Some(c) = c {
636                    self.hungetc(c);
637                }
638                LexTok::Lexerr
639            }
640        }
641    }
642
643    /// Handle initial character of token
644    fn lex_initial(&mut self, c: char) -> LexTok {
645        // Handle comments
646        if c == '#' && !self.nocomments {
647            return self.lex_comment();
648        }
649
650        match c {
651            '\\' => {
652                let d = self.hgetc();
653                if d == Some('\n') {
654                    // Line continuation - get next token
655                    return self.gettok();
656                }
657                if let Some(d) = d {
658                    self.hungetc(d);
659                }
660                self.lexstop = false;
661                self.gettokstr(c, false)
662            }
663
664            '\n' => LexTok::Newlin,
665
666            ';' => {
667                let d = self.hgetc();
668                match d {
669                    Some(';') => LexTok::Dsemi,
670                    Some('&') => LexTok::Semiamp,
671                    Some('|') => LexTok::Semibar,
672                    _ => {
673                        if let Some(d) = d {
674                            self.hungetc(d);
675                        }
676                        self.lexstop = false;
677                        LexTok::Semi
678                    }
679                }
680            }
681
682            '&' => {
683                let d = self.hgetc();
684                match d {
685                    Some('&') => LexTok::Damper,
686                    Some('!') | Some('|') => LexTok::Amperbang,
687                    Some('>') => {
688                        self.tokfd = self.tokfd.max(0);
689                        let e = self.hgetc();
690                        match e {
691                            Some('!') | Some('|') => LexTok::Outangampbang,
692                            Some('>') => {
693                                let f = self.hgetc();
694                                match f {
695                                    Some('!') | Some('|') => LexTok::Doutangampbang,
696                                    _ => {
697                                        if let Some(f) = f {
698                                            self.hungetc(f);
699                                        }
700                                        self.lexstop = false;
701                                        LexTok::Doutangamp
702                                    }
703                                }
704                            }
705                            _ => {
706                                if let Some(e) = e {
707                                    self.hungetc(e);
708                                }
709                                self.lexstop = false;
710                                LexTok::Ampoutang
711                            }
712                        }
713                    }
714                    _ => {
715                        if let Some(d) = d {
716                            self.hungetc(d);
717                        }
718                        self.lexstop = false;
719                        LexTok::Amper
720                    }
721                }
722            }
723
724            '|' => {
725                let d = self.hgetc();
726                match d {
727                    Some('|') if self.incasepat <= 0 => LexTok::Dbar,
728                    Some('&') => LexTok::Baramp,
729                    _ => {
730                        if let Some(d) = d {
731                            self.hungetc(d);
732                        }
733                        self.lexstop = false;
734                        LexTok::Bar
735                    }
736                }
737            }
738
739            '(' => {
740                let d = self.hgetc();
741                match d {
742                    Some('(') => {
743                        if self.infor > 0 {
744                            self.dbparens = true;
745                            return LexTok::Dinpar;
746                        }
747                        if self.incmdpos {
748                            // Could be (( arithmetic )) or ( subshell )
749                            self.lexbuf.clear();
750                            match self.cmd_or_math() {
751                                CmdOrMath::Math => {
752                                    self.tokstr = Some(self.lexbuf.as_str().to_string());
753                                    return LexTok::Dinpar;
754                                }
755                                CmdOrMath::Cmd => {
756                                    self.tokstr = None;
757                                    return LexTok::Inpar;
758                                }
759                                CmdOrMath::Err => return LexTok::Lexerr,
760                            }
761                        }
762                        self.hungetc('(');
763                        self.lexstop = false;
764                        self.gettokstr('(', false)
765                    }
766                    Some(')') => LexTok::Inoutpar,
767                    _ => {
768                        if let Some(d) = d {
769                            self.hungetc(d);
770                        }
771                        self.lexstop = false;
772                        // In pattern context (after == != =~ in [[ ]]), ( is part of pattern
773                        // In case pattern context, ( at start is optional delimiter, not pattern
774                        // incasepat == 1 means "at start of pattern", > 1 means "inside pattern"
775                        if self.incondpat || self.incasepat > 1 {
776                            self.gettokstr('(', false)
777                        } else if self.incond == 1 || self.incmdpos || self.incasepat == 1 {
778                            LexTok::Inpar
779                        } else {
780                            self.gettokstr('(', false)
781                        }
782                    }
783                }
784            }
785
786            ')' => LexTok::Outpar,
787
788            '{' => {
789                // { is a command group only if followed by whitespace or newline
790                // {a,b} is brace expansion, not a command group
791                if self.incmdpos {
792                    let next = self.hgetc();
793                    let is_brace_group = match next {
794                        Some(' ') | Some('\t') | Some('\n') | None => true,
795                        _ => false,
796                    };
797                    if let Some(ch) = next {
798                        self.hungetc(ch);
799                    }
800                    if is_brace_group {
801                        self.tokstr = Some("{".to_string());
802                        LexTok::Inbrace
803                    } else {
804                        self.gettokstr(c, false)
805                    }
806                } else {
807                    self.gettokstr(c, false)
808                }
809            }
810
811            '}' => {
812                // } at start of token is always Outbrace (ends command group)
813                // Inside a word, } would be handled by gettokstr but we never reach here mid-word
814                self.tokstr = Some("}".to_string());
815                LexTok::Outbrace
816            }
817
818            '[' => {
819                // [[ is a conditional expression start
820                // [ can also be a command (test builtin) or array subscript
821                // In case patterns (incasepat > 0), [ is part of glob pattern like [yY]
822                if self.incasepat > 0 {
823                    self.gettokstr(c, false)
824                } else if self.incmdpos {
825                    let next = self.hgetc();
826                    if next == Some('[') {
827                        // [[ - double bracket conditional
828                        self.tokstr = Some("[[".to_string());
829                        self.incond = 1;
830                        return LexTok::Dinbrack;
831                    }
832                    // Single [ - either test command or start of glob pattern
833                    if let Some(ch) = next {
834                        self.hungetc(ch);
835                    }
836                    self.tokstr = Some("[".to_string());
837                    LexTok::String
838                } else {
839                    self.gettokstr(c, false)
840                }
841            }
842
843            ']' => {
844                // ]] ends a conditional expression started by [[
845                if self.incond > 0 {
846                    let next = self.hgetc();
847                    if next == Some(']') {
848                        self.tokstr = Some("]]".to_string());
849                        self.incond = 0;
850                        return LexTok::Doutbrack;
851                    }
852                    if let Some(ch) = next {
853                        self.hungetc(ch);
854                    }
855                }
856                self.gettokstr(c, false)
857            }
858
859            '<' => {
860                // In pattern context, < is literal (e.g., <-> in glob)
861                if self.incondpat || self.incasepat > 0 {
862                    self.gettokstr(c, false)
863                } else {
864                    self.lex_inang()
865                }
866            }
867
868            '>' => {
869                // In pattern context, > is literal
870                if self.incondpat || self.incasepat > 0 {
871                    self.gettokstr(c, false)
872                } else {
873                    self.lex_outang()
874                }
875            }
876
877            _ => self.gettokstr(c, false),
878        }
879    }
880
881    /// Lex comment
882    fn lex_comment(&mut self) -> LexTok {
883        if self.lexflags.comments_keep {
884            self.lexbuf.clear();
885            self.add('#');
886        }
887
888        loop {
889            let c = self.hgetc();
890            match c {
891                Some('\n') | None => break,
892                Some(c) => {
893                    if self.lexflags.comments_keep {
894                        self.add(c);
895                    }
896                }
897            }
898        }
899
900        if self.lexflags.comments_keep {
901            self.tokstr = Some(self.lexbuf.as_str().to_string());
902            if !self.lexstop {
903                self.hungetc('\n');
904            }
905            return LexTok::String;
906        }
907
908        if self.lexflags.comments_strip && self.lexstop {
909            return LexTok::Endinput;
910        }
911
912        LexTok::Newlin
913    }
914
915    /// Lex < and variants
916    fn lex_inang(&mut self) -> LexTok {
917        let d = self.hgetc();
918        match d {
919            Some('(') => {
920                // Process substitution <(...)
921                self.hungetc('(');
922                self.lexstop = false;
923                return self.gettokstr('<', false);
924            }
925            Some('>') => return LexTok::Inoutang,
926            Some('<') => {
927                let e = self.hgetc();
928                match e {
929                    Some('(') => {
930                        self.hungetc('(');
931                        self.hungetc('<');
932                        return LexTok::Inang;
933                    }
934                    Some('<') => return LexTok::Trinang,
935                    Some('-') => {
936                        self.heredoc_pending = 2; // <<- expects terminator next
937                        return LexTok::Dinangdash;
938                    }
939                    _ => {
940                        if let Some(e) = e {
941                            self.hungetc(e);
942                        }
943                        self.lexstop = false;
944                        self.heredoc_pending = 1; // << expects terminator next
945                        return LexTok::Dinang;
946                    }
947                }
948            }
949            Some('&') => return LexTok::Inangamp,
950            _ => {
951                if let Some(d) = d {
952                    self.hungetc(d);
953                }
954                self.lexstop = false;
955                return LexTok::Inang;
956            }
957        }
958    }
959
960    /// Lex > and variants
961    fn lex_outang(&mut self) -> LexTok {
962        let d = self.hgetc();
963        match d {
964            Some('(') => {
965                // Process substitution >(...)
966                self.hungetc('(');
967                self.lexstop = false;
968                return self.gettokstr('>', false);
969            }
970            Some('&') => {
971                let e = self.hgetc();
972                match e {
973                    Some('!') | Some('|') => return LexTok::Outangampbang,
974                    _ => {
975                        if let Some(e) = e {
976                            self.hungetc(e);
977                        }
978                        self.lexstop = false;
979                        return LexTok::Outangamp;
980                    }
981                }
982            }
983            Some('!') | Some('|') => return LexTok::Outangbang,
984            Some('>') => {
985                let e = self.hgetc();
986                match e {
987                    Some('&') => {
988                        let f = self.hgetc();
989                        match f {
990                            Some('!') | Some('|') => return LexTok::Doutangampbang,
991                            _ => {
992                                if let Some(f) = f {
993                                    self.hungetc(f);
994                                }
995                                self.lexstop = false;
996                                return LexTok::Doutangamp;
997                            }
998                        }
999                    }
1000                    Some('!') | Some('|') => return LexTok::Doutangbang,
1001                    Some('(') => {
1002                        self.hungetc('(');
1003                        self.hungetc('>');
1004                        return LexTok::Outang;
1005                    }
1006                    _ => {
1007                        if let Some(e) = e {
1008                            self.hungetc(e);
1009                        }
1010                        self.lexstop = false;
1011                        return LexTok::Doutang;
1012                    }
1013                }
1014            }
1015            _ => {
1016                if let Some(d) = d {
1017                    self.hungetc(d);
1018                }
1019                self.lexstop = false;
1020                return LexTok::Outang;
1021            }
1022        }
1023    }
1024
1025    /// Get rest of token string
1026    fn gettokstr(&mut self, c: char, sub: bool) -> LexTok {
1027        let mut bct = 0; // brace count
1028        let mut pct = 0; // parenthesis count
1029        let mut brct = 0; // bracket count
1030        let mut in_brace_param = 0;
1031        let mut peek = LexTok::String;
1032        let mut intpos = 1;
1033        let mut unmatched = '\0';
1034        let mut c = c;
1035        const MAX_ITERATIONS: usize = 100_000;
1036        let mut iterations = 0;
1037
1038        if !sub {
1039            self.lexbuf.clear();
1040        }
1041
1042        loop {
1043            iterations += 1;
1044            if iterations > MAX_ITERATIONS {
1045                self.error = Some("gettokstr exceeded maximum iterations".to_string());
1046                return LexTok::Lexerr;
1047            }
1048
1049            let inbl = Self::is_inblank(c);
1050
1051            if inbl && in_brace_param == 0 && pct == 0 {
1052                // Whitespace outside brace param ends token
1053                break;
1054            }
1055
1056            match c {
1057                // Whitespace is handled above for most cases
1058                ')' => {
1059                    if in_brace_param > 0 || sub {
1060                        self.add(char_tokens::OUTPAR);
1061                    } else if pct > 0 {
1062                        pct -= 1;
1063                        self.add(char_tokens::OUTPAR);
1064                    } else {
1065                        break;
1066                    }
1067                }
1068
1069                '|' => {
1070                    if pct == 0 && in_brace_param == 0 {
1071                        if sub {
1072                            self.add(c);
1073                        } else {
1074                            break;
1075                        }
1076                    } else {
1077                        self.add(char_tokens::BAR);
1078                    }
1079                }
1080
1081                '$' => {
1082                    let e = self.hgetc();
1083                    match e {
1084                        Some('\\') => {
1085                            let f = self.hgetc();
1086                            if f != Some('\n') {
1087                                if let Some(f) = f {
1088                                    self.hungetc(f);
1089                                }
1090                                self.hungetc('\\');
1091                                self.add(char_tokens::STRING);
1092                            } else {
1093                                // Line continuation after $
1094                                continue;
1095                            }
1096                        }
1097                        Some('[') => {
1098                            // $[...] arithmetic
1099                            self.add(char_tokens::STRING);
1100                            self.add(char_tokens::INBRACK);
1101                            if self.dquote_parse(']', sub).is_err() {
1102                                peek = LexTok::Lexerr;
1103                                break;
1104                            }
1105                            self.add(char_tokens::OUTBRACK);
1106                        }
1107                        Some('(') => {
1108                            // $(...) or $((...))
1109                            self.add(char_tokens::STRING);
1110                            match self.cmd_or_math_sub() {
1111                                CmdOrMath::Cmd => self.add(char_tokens::OUTPAR),
1112                                CmdOrMath::Math => self.add(char_tokens::OUTPARMATH),
1113                                CmdOrMath::Err => {
1114                                    peek = LexTok::Lexerr;
1115                                    break;
1116                                }
1117                            }
1118                        }
1119                        Some('{') => {
1120                            self.add(c);
1121                            self.add(char_tokens::INBRACE);
1122                            bct += 1;
1123                            if in_brace_param == 0 {
1124                                in_brace_param = bct;
1125                            }
1126                        }
1127                        _ => {
1128                            if let Some(e) = e {
1129                                self.hungetc(e);
1130                            }
1131                            self.lexstop = false;
1132                            self.add(char_tokens::STRING);
1133                        }
1134                    }
1135                }
1136
1137                '[' => {
1138                    if in_brace_param == 0 {
1139                        brct += 1;
1140                    }
1141                    self.add(char_tokens::INBRACK);
1142                }
1143
1144                ']' => {
1145                    if in_brace_param == 0 && brct > 0 {
1146                        brct -= 1;
1147                    }
1148                    self.add(char_tokens::OUTBRACK);
1149                }
1150
1151                '(' => {
1152                    if in_brace_param == 0 {
1153                        pct += 1;
1154                    }
1155                    self.add(char_tokens::INPAR);
1156                }
1157
1158                '{' => {
1159                    // Track braces for both ${...} param expansion and {...} brace expansion
1160                    bct += 1;
1161                    self.add(c);
1162                }
1163
1164                '}' => {
1165                    if in_brace_param > 0 {
1166                        if bct == in_brace_param {
1167                            in_brace_param = 0;
1168                        }
1169                        bct -= 1;
1170                        self.add(char_tokens::OUTBRACE);
1171                    } else if bct > 0 {
1172                        // Closing a brace expansion like {a,b}
1173                        bct -= 1;
1174                        self.add(c);
1175                    } else {
1176                        break;
1177                    }
1178                }
1179
1180                '>' => {
1181                    // In pattern context (incondpat), > is literal
1182                    if in_brace_param > 0 || sub || self.incondpat || self.incasepat > 0 {
1183                        self.add(c);
1184                    } else {
1185                        let e = self.hgetc();
1186                        if e != Some('(') {
1187                            if let Some(e) = e {
1188                                self.hungetc(e);
1189                            }
1190                            self.lexstop = false;
1191                            break;
1192                        }
1193                        // >(...)
1194                        self.add(char_tokens::OUTANGPROC);
1195                        if self.skip_command_sub().is_err() {
1196                            peek = LexTok::Lexerr;
1197                            break;
1198                        }
1199                        self.add(char_tokens::OUTPAR);
1200                    }
1201                }
1202
1203                '<' => {
1204                    // In pattern context (incondpat), < is literal
1205                    if in_brace_param > 0 || sub || self.incondpat || self.incasepat > 0 {
1206                        self.add(c);
1207                    } else {
1208                        let e = self.hgetc();
1209                        if e != Some('(') {
1210                            if let Some(e) = e {
1211                                self.hungetc(e);
1212                            }
1213                            self.lexstop = false;
1214                            break;
1215                        }
1216                        // <(...)
1217                        self.add(char_tokens::INANG);
1218                        if self.skip_command_sub().is_err() {
1219                            peek = LexTok::Lexerr;
1220                            break;
1221                        }
1222                        self.add(char_tokens::OUTPAR);
1223                    }
1224                }
1225
1226                '=' => {
1227                    if !sub {
1228                        if intpos > 0 {
1229                            // At start of token, check for =(...) process substitution
1230                            let e = self.hgetc();
1231                            if e == Some('(') {
1232                                self.add(char_tokens::EQUALS);
1233                                if self.skip_command_sub().is_err() {
1234                                    peek = LexTok::Lexerr;
1235                                    break;
1236                                }
1237                                self.add(char_tokens::OUTPAR);
1238                            } else {
1239                                if let Some(e) = e {
1240                                    self.hungetc(e);
1241                                }
1242                                self.lexstop = false;
1243                                self.add(char_tokens::EQUALS);
1244                            }
1245                        } else if peek != LexTok::Envstring
1246                            && (self.incmdpos || self.intypeset)
1247                            && bct == 0
1248                            && brct == 0
1249                            && self.incasepat == 0
1250                        {
1251                            // Check for VAR=value assignment (but not in case pattern context)
1252                            let tok_so_far = self.lexbuf.as_str().to_string();
1253                            if self.is_valid_assignment_target(&tok_so_far) {
1254                                let next = self.hgetc();
1255                                if next == Some('(') {
1256                                    // VAR=(...) array assignment - include '=' in tokstr
1257                                    self.add(char_tokens::EQUALS);
1258                                    self.tokstr = Some(self.lexbuf.as_str().to_string());
1259                                    return LexTok::Envarray;
1260                                }
1261                                if let Some(next) = next {
1262                                    self.hungetc(next);
1263                                }
1264                                self.lexstop = false;
1265                                peek = LexTok::Envstring;
1266                                intpos = 2;
1267                                self.add(char_tokens::EQUALS);
1268                            } else {
1269                                self.add(char_tokens::EQUALS);
1270                            }
1271                        } else {
1272                            self.add(char_tokens::EQUALS);
1273                        }
1274                    } else {
1275                        self.add(char_tokens::EQUALS);
1276                    }
1277                }
1278
1279                '\\' => {
1280                    let next = self.hgetc();
1281                    if next == Some('\n') {
1282                        // Line continuation
1283                        let next = self.hgetc();
1284                        if let Some(next) = next {
1285                            c = next;
1286                            continue;
1287                        }
1288                        break;
1289                    } else {
1290                        self.add(char_tokens::BNULL);
1291                        if let Some(next) = next {
1292                            self.add(next);
1293                        }
1294                    }
1295                }
1296
1297                '\'' => {
1298                    // Single quoted string - everything literal until '
1299                    self.add(char_tokens::SNULL);
1300                    loop {
1301                        let ch = self.hgetc();
1302                        match ch {
1303                            Some('\'') => break,
1304                            Some(ch) => self.add(ch),
1305                            None => {
1306                                self.lexstop = true;
1307                                unmatched = '\'';
1308                                peek = LexTok::Lexerr;
1309                                break;
1310                            }
1311                        }
1312                    }
1313                    if unmatched != '\0' {
1314                        break;
1315                    }
1316                    self.add(char_tokens::SNULL);
1317                }
1318
1319                '"' => {
1320                    // Double quoted string
1321                    self.add(char_tokens::DNULL);
1322                    if self.dquote_parse('"', sub).is_err() {
1323                        unmatched = '"';
1324                        if !self.lexflags.active {
1325                            peek = LexTok::Lexerr;
1326                        }
1327                        break;
1328                    }
1329                    self.add(char_tokens::DNULL);
1330                }
1331
1332                '`' => {
1333                    // Backtick command substitution
1334                    self.add(char_tokens::TICK);
1335                    loop {
1336                        let ch = self.hgetc();
1337                        match ch {
1338                            Some('`') => break,
1339                            Some('\\') => {
1340                                let next = self.hgetc();
1341                                match next {
1342                                    Some('\n') => continue, // Line continuation
1343                                    Some(c) if c == '`' || c == '\\' || c == '$' => {
1344                                        self.add(char_tokens::BNULL);
1345                                        self.add(c);
1346                                    }
1347                                    Some(c) => {
1348                                        self.add('\\');
1349                                        self.add(c);
1350                                    }
1351                                    None => break,
1352                                }
1353                            }
1354                            Some(ch) => self.add(ch),
1355                            None => {
1356                                self.lexstop = true;
1357                                unmatched = '`';
1358                                peek = LexTok::Lexerr;
1359                                break;
1360                            }
1361                        }
1362                    }
1363                    if unmatched != '\0' {
1364                        break;
1365                    }
1366                    self.add(char_tokens::TICK);
1367                }
1368
1369                '~' => {
1370                    self.add(char_tokens::TILDE);
1371                }
1372
1373                '#' => {
1374                    self.add(char_tokens::POUND);
1375                }
1376
1377                '^' => {
1378                    self.add(char_tokens::HAT);
1379                }
1380
1381                '*' => {
1382                    self.add(char_tokens::STAR);
1383                }
1384
1385                '?' => {
1386                    self.add(char_tokens::QUEST);
1387                }
1388
1389                ',' => {
1390                    if bct > in_brace_param {
1391                        self.add(char_tokens::COMMA);
1392                    } else {
1393                        self.add(c);
1394                    }
1395                }
1396
1397                '-' => {
1398                    self.add(char_tokens::DASH);
1399                }
1400
1401                '!' => {
1402                    if brct > 0 {
1403                        self.add(char_tokens::BANG);
1404                    } else {
1405                        self.add(c);
1406                    }
1407                }
1408
1409                // Terminators
1410                '\n' | ';' | '&' => {
1411                    break;
1412                }
1413
1414                _ => {
1415                    self.add(c);
1416                }
1417            }
1418
1419            c = match self.hgetc() {
1420                Some(c) => c,
1421                None => {
1422                    self.lexstop = true;
1423                    break;
1424                }
1425            };
1426
1427            if intpos > 0 {
1428                intpos -= 1;
1429            }
1430        }
1431
1432        // Put back the character that ended the token
1433        if !self.lexstop {
1434            self.hungetc(c);
1435        }
1436
1437        if unmatched != '\0' && !self.lexflags.active {
1438            self.error = Some(format!("unmatched {}", unmatched));
1439        }
1440
1441        if in_brace_param > 0 {
1442            self.error = Some("closing brace expected".to_string());
1443        }
1444
1445        self.tokstr = Some(self.lexbuf.as_str().to_string());
1446        peek
1447    }
1448
1449    /// Check if a string is a valid assignment target (identifier or array ref)
1450    fn is_valid_assignment_target(&self, s: &str) -> bool {
1451        let mut chars = s.chars().peekable();
1452
1453        // Check for leading digit (invalid)
1454        if let Some(&c) = chars.peek() {
1455            if c.is_ascii_digit() {
1456                // Could be array index, check rest
1457                while let Some(&c) = chars.peek() {
1458                    if !c.is_ascii_digit() {
1459                        break;
1460                    }
1461                    chars.next();
1462                }
1463                return chars.peek().is_none();
1464            }
1465        }
1466
1467        // Check identifier
1468        let mut has_ident = false;
1469        while let Some(&c) = chars.peek() {
1470            if c == char_tokens::INBRACK || c == '[' {
1471                break;
1472            }
1473            if c == '+' {
1474                // foo+=value
1475                chars.next();
1476                return chars.peek().is_none() || chars.peek() == Some(&'=');
1477            }
1478            if !Self::is_ident(c) && c != char_tokens::STRING && !char_tokens::is_token(c) {
1479                return false;
1480            }
1481            has_ident = true;
1482            chars.next();
1483        }
1484
1485        has_ident
1486    }
1487
1488    /// Parse double-quoted string content
1489    fn dquote_parse(&mut self, endchar: char, sub: bool) -> Result<(), ()> {
1490        self.recursion_depth += 1;
1491        if self.check_recursion() {
1492            self.recursion_depth -= 1;
1493            return Err(());
1494        }
1495
1496        let result = self.dquote_parse_inner(endchar, sub);
1497        self.recursion_depth -= 1;
1498        result
1499    }
1500
1501    fn dquote_parse_inner(&mut self, endchar: char, sub: bool) -> Result<(), ()> {
1502        let mut pct = 0; // parenthesis count
1503        let mut brct = 0; // bracket count
1504        let mut bct = 0; // brace count (for ${...})
1505        let mut intick = false; // inside backtick
1506        let is_math = endchar == ')' || endchar == ']' || self.infor > 0;
1507        const MAX_ITERATIONS: usize = 100_000;
1508        let mut iterations = 0;
1509
1510        loop {
1511            iterations += 1;
1512            if iterations > MAX_ITERATIONS {
1513                self.error = Some("dquote_parse exceeded maximum iterations".to_string());
1514                return Err(());
1515            }
1516            let c = self.hgetc();
1517            let c = match c {
1518                Some(c) if c == endchar && !intick && bct == 0 => {
1519                    if is_math && (pct > 0 || brct > 0) {
1520                        self.add(c);
1521                        if c == ')' {
1522                            pct -= 1;
1523                        } else if c == ']' {
1524                            brct -= 1;
1525                        }
1526                        continue;
1527                    }
1528                    return Ok(());
1529                }
1530                Some(c) => c,
1531                None => {
1532                    self.lexstop = true;
1533                    return Err(());
1534                }
1535            };
1536
1537            match c {
1538                '\\' => {
1539                    let next = self.hgetc();
1540                    match next {
1541                        Some('\n') if !sub => continue, // Line continuation
1542                        Some(c)
1543                            if c == '$'
1544                                || c == '\\'
1545                                || (c == '}' && !intick && bct > 0)
1546                                || c == endchar
1547                                || c == '`'
1548                                || (endchar == ']'
1549                                    && (c == '['
1550                                        || c == ']'
1551                                        || c == '('
1552                                        || c == ')'
1553                                        || c == '{'
1554                                        || c == '}'
1555                                        || (c == '"' && sub))) =>
1556                        {
1557                            self.add(char_tokens::BNULL);
1558                            self.add(c);
1559                        }
1560                        Some(c) => {
1561                            self.add('\\');
1562                            self.hungetc(c);
1563                            continue;
1564                        }
1565                        None => {
1566                            self.add('\\');
1567                        }
1568                    }
1569                }
1570
1571                '$' => {
1572                    if intick {
1573                        self.add(c);
1574                        continue;
1575                    }
1576                    let next = self.hgetc();
1577                    match next {
1578                        Some('(') => {
1579                            self.add(char_tokens::QSTRING);
1580                            match self.cmd_or_math_sub() {
1581                                CmdOrMath::Cmd => self.add(char_tokens::OUTPAR),
1582                                CmdOrMath::Math => self.add(char_tokens::OUTPARMATH),
1583                                CmdOrMath::Err => return Err(()),
1584                            }
1585                        }
1586                        Some('[') => {
1587                            self.add(char_tokens::STRING);
1588                            self.add(char_tokens::INBRACK);
1589                            self.dquote_parse(']', sub)?;
1590                            self.add(char_tokens::OUTBRACK);
1591                        }
1592                        Some('{') => {
1593                            self.add(char_tokens::QSTRING);
1594                            self.add(char_tokens::INBRACE);
1595                            bct += 1;
1596                        }
1597                        Some('$') => {
1598                            self.add(char_tokens::QSTRING);
1599                            self.add('$');
1600                        }
1601                        _ => {
1602                            if let Some(next) = next {
1603                                self.hungetc(next);
1604                            }
1605                            self.lexstop = false;
1606                            self.add(char_tokens::QSTRING);
1607                        }
1608                    }
1609                }
1610
1611                '}' => {
1612                    if intick || bct == 0 {
1613                        self.add(c);
1614                    } else {
1615                        self.add(char_tokens::OUTBRACE);
1616                        bct -= 1;
1617                    }
1618                }
1619
1620                '`' => {
1621                    self.add(char_tokens::QTICK);
1622                    intick = !intick;
1623                }
1624
1625                '(' => {
1626                    if !is_math || bct == 0 {
1627                        pct += 1;
1628                    }
1629                    self.add(c);
1630                }
1631
1632                ')' => {
1633                    if !is_math || bct == 0 {
1634                        if pct == 0 && is_math {
1635                            return Err(());
1636                        }
1637                        pct -= 1;
1638                    }
1639                    self.add(c);
1640                }
1641
1642                '[' => {
1643                    if !is_math || bct == 0 {
1644                        brct += 1;
1645                    }
1646                    self.add(c);
1647                }
1648
1649                ']' => {
1650                    if !is_math || bct == 0 {
1651                        if brct == 0 && is_math {
1652                            return Err(());
1653                        }
1654                        brct -= 1;
1655                    }
1656                    self.add(c);
1657                }
1658
1659                '"' => {
1660                    if intick || (endchar != '"' && bct == 0) {
1661                        self.add(c);
1662                    } else if bct > 0 {
1663                        self.add(char_tokens::DNULL);
1664                        self.dquote_parse('"', sub)?;
1665                        self.add(char_tokens::DNULL);
1666                    } else {
1667                        return Err(());
1668                    }
1669                }
1670
1671                _ => {
1672                    self.add(c);
1673                }
1674            }
1675        }
1676    }
1677
1678    /// Determine if (( is arithmetic or command
1679    fn cmd_or_math(&mut self) -> CmdOrMath {
1680        let oldlen = self.lexbuf.len();
1681
1682        self.add(char_tokens::INPAR);
1683        self.add('(');
1684
1685        if self.dquote_parse(')', false).is_err() {
1686            // Back up and try as command
1687            while self.lexbuf.len() > oldlen {
1688                if let Some(c) = self.lexbuf.pop() {
1689                    self.hungetc(c);
1690                }
1691            }
1692            self.hungetc('(');
1693            self.lexstop = false;
1694            return if self.skip_command_sub().is_err() {
1695                CmdOrMath::Err
1696            } else {
1697                CmdOrMath::Cmd
1698            };
1699        }
1700
1701        // Check for closing )
1702        let c = self.hgetc();
1703        if c == Some(')') {
1704            self.add(')');
1705            return CmdOrMath::Math;
1706        }
1707
1708        // Not math, back up
1709        if let Some(c) = c {
1710            self.hungetc(c);
1711        }
1712        self.lexstop = false;
1713
1714        // Back up token
1715        while self.lexbuf.len() > oldlen {
1716            if let Some(c) = self.lexbuf.pop() {
1717                self.hungetc(c);
1718            }
1719        }
1720        self.hungetc('(');
1721
1722        if self.skip_command_sub().is_err() {
1723            CmdOrMath::Err
1724        } else {
1725            CmdOrMath::Cmd
1726        }
1727    }
1728
1729    /// Parse $(...) or $((...))
1730    fn cmd_or_math_sub(&mut self) -> CmdOrMath {
1731        const MAX_CONTINUATIONS: usize = 10_000;
1732        let mut continuations = 0;
1733
1734        loop {
1735            continuations += 1;
1736            if continuations > MAX_CONTINUATIONS {
1737                self.error = Some("cmd_or_math_sub: too many line continuations".to_string());
1738                return CmdOrMath::Err;
1739            }
1740
1741            let c = self.hgetc();
1742            if c == Some('\\') {
1743                let c2 = self.hgetc();
1744                if c2 != Some('\n') {
1745                    if let Some(c2) = c2 {
1746                        self.hungetc(c2);
1747                    }
1748                    self.hungetc('\\');
1749                    self.lexstop = false;
1750                    return if self.skip_command_sub().is_err() {
1751                        CmdOrMath::Err
1752                    } else {
1753                        CmdOrMath::Cmd
1754                    };
1755                }
1756                // Line continuation, try again (loop instead of recursion)
1757                continue;
1758            }
1759
1760            // Not a line continuation, process normally
1761            if c == Some('(') {
1762                // Might be $((...))
1763                let lexpos = self.lexbuf.len();
1764                self.add(char_tokens::INPAR);
1765                self.add('(');
1766
1767                if self.dquote_parse(')', false).is_ok() {
1768                    let c2 = self.hgetc();
1769                    if c2 == Some(')') {
1770                        self.add(')');
1771                        return CmdOrMath::Math;
1772                    }
1773                    if let Some(c2) = c2 {
1774                        self.hungetc(c2);
1775                    }
1776                }
1777
1778                // Not math, restore and parse as command
1779                while self.lexbuf.len() > lexpos {
1780                    if let Some(ch) = self.lexbuf.pop() {
1781                        self.hungetc(ch);
1782                    }
1783                }
1784                self.hungetc('(');
1785                self.lexstop = false;
1786            } else {
1787                if let Some(c) = c {
1788                    self.hungetc(c);
1789                }
1790                self.lexstop = false;
1791            }
1792
1793            return if self.skip_command_sub().is_err() {
1794                CmdOrMath::Err
1795            } else {
1796                CmdOrMath::Cmd
1797            };
1798        }
1799    }
1800
1801    /// Skip over command substitution (...), adding chars to token
1802    fn skip_command_sub(&mut self) -> Result<(), ()> {
1803        let mut pct = 1;
1804        let mut start = true;
1805        const MAX_ITERATIONS: usize = 100_000;
1806        let mut iterations = 0;
1807
1808        self.add(char_tokens::INPAR);
1809
1810        loop {
1811            iterations += 1;
1812            if iterations > MAX_ITERATIONS {
1813                self.error = Some("skip_command_sub exceeded maximum iterations".to_string());
1814                return Err(());
1815            }
1816
1817            let c = self.hgetc();
1818            let c = match c {
1819                Some(c) => c,
1820                None => {
1821                    self.lexstop = true;
1822                    return Err(());
1823                }
1824            };
1825
1826            let iswhite = Self::is_inblank(c);
1827
1828            match c {
1829                '(' => {
1830                    pct += 1;
1831                    self.add(c);
1832                }
1833                ')' => {
1834                    pct -= 1;
1835                    if pct == 0 {
1836                        return Ok(());
1837                    }
1838                    self.add(c);
1839                }
1840                '\\' => {
1841                    self.add(c);
1842                    if let Some(c) = self.hgetc() {
1843                        self.add(c);
1844                    }
1845                }
1846                '\'' => {
1847                    self.add(c);
1848                    loop {
1849                        let ch = self.hgetc();
1850                        match ch {
1851                            Some('\'') => {
1852                                self.add('\'');
1853                                break;
1854                            }
1855                            Some(ch) => self.add(ch),
1856                            None => {
1857                                self.lexstop = true;
1858                                return Err(());
1859                            }
1860                        }
1861                    }
1862                }
1863                '"' => {
1864                    self.add(c);
1865                    loop {
1866                        let ch = self.hgetc();
1867                        match ch {
1868                            Some('"') => {
1869                                self.add('"');
1870                                break;
1871                            }
1872                            Some('\\') => {
1873                                self.add('\\');
1874                                if let Some(ch) = self.hgetc() {
1875                                    self.add(ch);
1876                                }
1877                            }
1878                            Some(ch) => self.add(ch),
1879                            None => {
1880                                self.lexstop = true;
1881                                return Err(());
1882                            }
1883                        }
1884                    }
1885                }
1886                '`' => {
1887                    self.add(c);
1888                    loop {
1889                        let ch = self.hgetc();
1890                        match ch {
1891                            Some('`') => {
1892                                self.add('`');
1893                                break;
1894                            }
1895                            Some('\\') => {
1896                                self.add('\\');
1897                                if let Some(ch) = self.hgetc() {
1898                                    self.add(ch);
1899                                }
1900                            }
1901                            Some(ch) => self.add(ch),
1902                            None => {
1903                                self.lexstop = true;
1904                                return Err(());
1905                            }
1906                        }
1907                    }
1908                }
1909                '#' => {
1910                    if start {
1911                        self.add(c);
1912                        // Skip comment to end of line
1913                        loop {
1914                            let ch = self.hgetc();
1915                            match ch {
1916                                Some('\n') => {
1917                                    self.add('\n');
1918                                    break;
1919                                }
1920                                Some(ch) => self.add(ch),
1921                                None => break,
1922                            }
1923                        }
1924                    } else {
1925                        self.add(c);
1926                    }
1927                }
1928                _ => {
1929                    self.add(c);
1930                }
1931            }
1932
1933            start = iswhite;
1934        }
1935    }
1936
1937    /// Update parser state after lexing based on token type
1938    pub fn ctxtlex(&mut self) {
1939        self.zshlex();
1940
1941        match self.tok {
1942            LexTok::Seper
1943            | LexTok::Newlin
1944            | LexTok::Semi
1945            | LexTok::Dsemi
1946            | LexTok::Semiamp
1947            | LexTok::Semibar
1948            | LexTok::Amper
1949            | LexTok::Amperbang
1950            | LexTok::Inpar
1951            | LexTok::Inbrace
1952            | LexTok::Dbar
1953            | LexTok::Damper
1954            | LexTok::Bar
1955            | LexTok::Baramp
1956            | LexTok::Inoutpar
1957            | LexTok::Doloop
1958            | LexTok::Then
1959            | LexTok::Elif
1960            | LexTok::Else
1961            | LexTok::Doutbrack => {
1962                self.incmdpos = true;
1963            }
1964
1965            LexTok::String
1966            | LexTok::Typeset
1967            | LexTok::Envarray
1968            | LexTok::Outpar
1969            | LexTok::Case
1970            | LexTok::Dinbrack => {
1971                self.incmdpos = false;
1972            }
1973
1974            _ => {}
1975        }
1976
1977        if self.tok != LexTok::Dinpar {
1978            self.infor = if self.tok == LexTok::For { 2 } else { 0 };
1979        }
1980
1981        let oldpos = self.incmdpos;
1982        if self.tok.is_redirop()
1983            || self.tok == LexTok::For
1984            || self.tok == LexTok::Foreach
1985            || self.tok == LexTok::Select
1986        {
1987            self.inredir = true;
1988            self.incmdpos = false;
1989        } else if self.inredir {
1990            self.incmdpos = oldpos;
1991            self.inredir = false;
1992        }
1993    }
1994
1995    /// Register a heredoc to be processed at next newline
1996    pub fn register_heredoc(&mut self, terminator: String, strip_tabs: bool) {
1997        self.heredocs.push(HereDoc {
1998            terminator,
1999            strip_tabs,
2000            content: String::new(),
2001        });
2002    }
2003
2004    /// Check for reserved word
2005    pub fn check_reserved_word(&mut self) -> bool {
2006        if let Some(ref tokstr) = self.tokstr {
2007            if self.incmdpos || (tokstr == "}" && self.tok == LexTok::String) {
2008                if let Some(tok) = crate::tokens::lookup_reserved_word(tokstr) {
2009                    self.tok = tok;
2010                    if tok == LexTok::Repeat {
2011                        self.inrepeat = 1;
2012                    }
2013                    if tok == LexTok::Dinbrack {
2014                        self.incond = 1;
2015                    }
2016                    return true;
2017                }
2018                if tokstr == "]]" && self.incond > 0 {
2019                    self.tok = LexTok::Doutbrack;
2020                    self.incond = 0;
2021                    return true;
2022                }
2023            }
2024        }
2025        false
2026    }
2027}
2028
2029/// Result of determining if (( is arithmetic or command
2030enum CmdOrMath {
2031    Cmd,
2032    Math,
2033    Err,
2034}
2035
2036// ============================================================================
2037// Additional parsing functions ported from lex.c
2038// ============================================================================
2039
2040/// Check whether we're looking at valid numeric globbing syntax
2041/// (/\<[0-9]*-[0-9]*\>/). Call pointing just after the opening "<".
2042/// Leaves the input in the same place, returning true or false.
2043///
2044/// Port of isnumglob() from lex.c
2045pub fn isnumglob(input: &str, pos: usize) -> bool {
2046    let chars: Vec<char> = input[pos..].chars().collect();
2047    let mut i = 0;
2048    let mut expect_close = false;
2049
2050    // Look for digits, then -, then digits, then >
2051    while i < chars.len() {
2052        let c = chars[i];
2053        if c.is_ascii_digit() {
2054            i += 1;
2055        } else if c == '-' && !expect_close {
2056            expect_close = true;
2057            i += 1;
2058        } else if c == '>' && expect_close {
2059            return true;
2060        } else {
2061            break;
2062        }
2063    }
2064    false
2065}
2066
2067/// Tokenize a string as if in double quotes.
2068/// This is usually called before singsub().
2069///
2070/// Port of parsestr() / parsestrnoerr() from lex.c
2071pub fn parsestr(s: &str) -> Result<String, String> {
2072    let mut result = String::with_capacity(s.len());
2073    let chars: Vec<char> = s.chars().collect();
2074    let mut i = 0;
2075
2076    while i < chars.len() {
2077        let c = chars[i];
2078        match c {
2079            '\\' => {
2080                i += 1;
2081                if i < chars.len() {
2082                    let next = chars[i];
2083                    match next {
2084                        '$' | '\\' | '`' | '"' | '\n' => {
2085                            result.push(char_tokens::BNULL);
2086                            result.push(next);
2087                        }
2088                        _ => {
2089                            result.push('\\');
2090                            result.push(next);
2091                        }
2092                    }
2093                } else {
2094                    result.push('\\');
2095                }
2096            }
2097            '$' => {
2098                result.push(char_tokens::QSTRING);
2099                if i + 1 < chars.len() {
2100                    let next = chars[i + 1];
2101                    if next == '{' {
2102                        result.push(char_tokens::INBRACE);
2103                        i += 1;
2104                    } else if next == '(' {
2105                        result.push(char_tokens::INPAR);
2106                        i += 1;
2107                    }
2108                }
2109            }
2110            '`' => {
2111                result.push(char_tokens::QTICK);
2112            }
2113            _ => {
2114                result.push(c);
2115            }
2116        }
2117        i += 1;
2118    }
2119
2120    Ok(result)
2121}
2122
2123/// Parse a subscript in string s.
2124/// Return the position after the closing bracket, or None on error.
2125///
2126/// Port of parse_subscript() from lex.c
2127pub fn parse_subscript(s: &str, endchar: char) -> Option<usize> {
2128    if s.is_empty() || s.starts_with(endchar) {
2129        return None;
2130    }
2131
2132    let chars: Vec<char> = s.chars().collect();
2133    let mut i = 0;
2134    let mut depth = 0;
2135    let mut in_dquote = false;
2136    let mut in_squote = false;
2137
2138    while i < chars.len() {
2139        let c = chars[i];
2140
2141        if in_squote {
2142            if c == '\'' {
2143                in_squote = false;
2144            }
2145            i += 1;
2146            continue;
2147        }
2148
2149        if in_dquote {
2150            if c == '"' {
2151                in_dquote = false;
2152            } else if c == '\\' && i + 1 < chars.len() {
2153                i += 1; // skip escaped char
2154            }
2155            i += 1;
2156            continue;
2157        }
2158
2159        match c {
2160            '\\' => {
2161                i += 1; // skip next char
2162            }
2163            '\'' => {
2164                in_squote = true;
2165            }
2166            '"' => {
2167                in_dquote = true;
2168            }
2169            '[' | '(' => {
2170                depth += 1;
2171            }
2172            ']' | ')' => {
2173                if depth > 0 {
2174                    depth -= 1;
2175                } else if c == endchar {
2176                    return Some(i);
2177                }
2178            }
2179            _ => {}
2180        }
2181
2182        if c == endchar && depth == 0 {
2183            return Some(i);
2184        }
2185
2186        i += 1;
2187    }
2188
2189    None
2190}
2191
2192/// Tokenize a string as if it were a normal command-line argument
2193/// but it may contain separators. Used for ${...%...} substitutions.
2194///
2195/// Port of parse_subst_string() from lex.c
2196pub fn parse_subst_string(s: &str) -> Result<String, String> {
2197    if s.is_empty() {
2198        return Ok(String::new());
2199    }
2200
2201    let mut result = String::with_capacity(s.len());
2202    let chars: Vec<char> = s.chars().collect();
2203    let mut i = 0;
2204
2205    while i < chars.len() {
2206        let c = chars[i];
2207        match c {
2208            '\\' => {
2209                result.push(char_tokens::BNULL);
2210                i += 1;
2211                if i < chars.len() {
2212                    result.push(chars[i]);
2213                }
2214            }
2215            '\'' => {
2216                result.push(char_tokens::SNULL);
2217                i += 1;
2218                while i < chars.len() && chars[i] != '\'' {
2219                    result.push(chars[i]);
2220                    i += 1;
2221                }
2222                result.push(char_tokens::SNULL);
2223            }
2224            '"' => {
2225                result.push(char_tokens::DNULL);
2226                i += 1;
2227                while i < chars.len() && chars[i] != '"' {
2228                    if chars[i] == '\\' && i + 1 < chars.len() {
2229                        result.push(char_tokens::BNULL);
2230                        i += 1;
2231                        result.push(chars[i]);
2232                    } else if chars[i] == '$' {
2233                        result.push(char_tokens::QSTRING);
2234                    } else {
2235                        result.push(chars[i]);
2236                    }
2237                    i += 1;
2238                }
2239                result.push(char_tokens::DNULL);
2240            }
2241            '$' => {
2242                result.push(char_tokens::STRING);
2243                if i + 1 < chars.len() {
2244                    match chars[i + 1] {
2245                        '{' => {
2246                            result.push(char_tokens::INBRACE);
2247                            i += 1;
2248                        }
2249                        '(' => {
2250                            result.push(char_tokens::INPAR);
2251                            i += 1;
2252                        }
2253                        _ => {}
2254                    }
2255                }
2256            }
2257            '*' => result.push(char_tokens::STAR),
2258            '?' => result.push(char_tokens::QUEST),
2259            '[' => result.push(char_tokens::INBRACK),
2260            ']' => result.push(char_tokens::OUTBRACK),
2261            '{' => result.push(char_tokens::INBRACE),
2262            '}' => result.push(char_tokens::OUTBRACE),
2263            '~' => result.push(char_tokens::TILDE),
2264            '#' => result.push(char_tokens::POUND),
2265            '^' => result.push(char_tokens::HAT),
2266            _ => result.push(c),
2267        }
2268        i += 1;
2269    }
2270
2271    Ok(result)
2272}
2273
2274/// Untokenize a string - convert tokenized chars back to original
2275///
2276/// Port of untokenize() from exec.c (but used by lexer too)
2277pub fn untokenize(s: &str) -> String {
2278    let mut result = String::with_capacity(s.len());
2279    let chars: Vec<char> = s.chars().collect();
2280    let mut i = 0;
2281
2282    while i < chars.len() {
2283        let c = chars[i];
2284        // Check if it's a token character (in the special range)
2285        if (c as u32) < 32 {
2286            // Convert token back to original character
2287            match c {
2288                c if c == char_tokens::POUND => result.push('#'),
2289                c if c == char_tokens::STRING => result.push('$'),
2290                c if c == char_tokens::HAT => result.push('^'),
2291                c if c == char_tokens::STAR => result.push('*'),
2292                c if c == char_tokens::INPAR => result.push('('),
2293                c if c == char_tokens::OUTPAR => result.push(')'),
2294                c if c == char_tokens::INPARMATH => result.push('('),
2295                c if c == char_tokens::OUTPARMATH => result.push(')'),
2296                c if c == char_tokens::QSTRING => result.push('$'),
2297                c if c == char_tokens::EQUALS => result.push('='),
2298                c if c == char_tokens::BAR => result.push('|'),
2299                c if c == char_tokens::INBRACE => result.push('{'),
2300                c if c == char_tokens::OUTBRACE => result.push('}'),
2301                c if c == char_tokens::INBRACK => result.push('['),
2302                c if c == char_tokens::OUTBRACK => result.push(']'),
2303                c if c == char_tokens::TICK => result.push('`'),
2304                c if c == char_tokens::INANG => result.push('<'),
2305                c if c == char_tokens::OUTANG => result.push('>'),
2306                c if c == char_tokens::QUEST => result.push('?'),
2307                c if c == char_tokens::TILDE => result.push('~'),
2308                c if c == char_tokens::QTICK => result.push('`'),
2309                c if c == char_tokens::COMMA => result.push(','),
2310                c if c == char_tokens::DASH => result.push('-'),
2311                c if c == char_tokens::BANG => result.push('!'),
2312                c if c == char_tokens::SNULL
2313                    || c == char_tokens::DNULL
2314                    || c == char_tokens::BNULL =>
2315                {
2316                    // Null markers - skip
2317                }
2318                _ => {
2319                    // Unknown token, try ztokens lookup
2320                    let idx = c as usize;
2321                    if idx < char_tokens::ZTOKENS.len() {
2322                        result.push(char_tokens::ZTOKENS.chars().nth(idx).unwrap_or(c));
2323                    } else {
2324                        result.push(c);
2325                    }
2326                }
2327            }
2328        } else {
2329            result.push(c);
2330        }
2331        i += 1;
2332    }
2333
2334    result
2335}
2336
2337/// Check if a string contains any token characters
2338pub fn has_token(s: &str) -> bool {
2339    s.chars().any(|c| (c as u32) < 32)
2340}
2341
2342/// Convert token characters to their printable form for display
2343pub fn tokens_to_printable(s: &str) -> String {
2344    untokenize(s)
2345}
2346
2347#[cfg(test)]
2348mod tests {
2349    use super::*;
2350
2351    #[test]
2352    fn test_simple_command() {
2353        let mut lexer = ZshLexer::new("echo hello");
2354        lexer.zshlex();
2355        assert_eq!(lexer.tok, LexTok::String);
2356        assert_eq!(lexer.tokstr, Some("echo".to_string()));
2357
2358        lexer.zshlex();
2359        assert_eq!(lexer.tok, LexTok::String);
2360        assert_eq!(lexer.tokstr, Some("hello".to_string()));
2361
2362        lexer.zshlex();
2363        assert_eq!(lexer.tok, LexTok::Endinput);
2364    }
2365
2366    #[test]
2367    fn test_pipeline() {
2368        let mut lexer = ZshLexer::new("ls | grep foo");
2369        lexer.zshlex();
2370        assert_eq!(lexer.tok, LexTok::String);
2371
2372        lexer.zshlex();
2373        assert_eq!(lexer.tok, LexTok::Bar);
2374
2375        lexer.zshlex();
2376        assert_eq!(lexer.tok, LexTok::String);
2377
2378        lexer.zshlex();
2379        assert_eq!(lexer.tok, LexTok::String);
2380    }
2381
2382    #[test]
2383    fn test_redirections() {
2384        let mut lexer = ZshLexer::new("echo > file");
2385        lexer.zshlex();
2386        assert_eq!(lexer.tok, LexTok::String);
2387
2388        lexer.zshlex();
2389        assert_eq!(lexer.tok, LexTok::Outang);
2390
2391        lexer.zshlex();
2392        assert_eq!(lexer.tok, LexTok::String);
2393    }
2394
2395    #[test]
2396    fn test_heredoc() {
2397        let mut lexer = ZshLexer::new("cat << EOF");
2398        lexer.zshlex();
2399        assert_eq!(lexer.tok, LexTok::String);
2400
2401        lexer.zshlex();
2402        assert_eq!(lexer.tok, LexTok::Dinang);
2403
2404        lexer.zshlex();
2405        assert_eq!(lexer.tok, LexTok::String);
2406    }
2407
2408    #[test]
2409    fn test_single_quotes() {
2410        let mut lexer = ZshLexer::new("echo 'hello world'");
2411        lexer.zshlex();
2412        assert_eq!(lexer.tok, LexTok::String);
2413
2414        lexer.zshlex();
2415        assert_eq!(lexer.tok, LexTok::String);
2416        // Should contain Snull markers around literal content
2417        assert!(lexer.tokstr.is_some());
2418    }
2419
2420    #[test]
2421    fn test_function_tokens() {
2422        let mut lexer = ZshLexer::new("function foo { }");
2423        lexer.zshlex();
2424        assert_eq!(
2425            lexer.tok,
2426            LexTok::Func,
2427            "expected Func, got {:?}",
2428            lexer.tok
2429        );
2430
2431        lexer.zshlex();
2432        assert_eq!(
2433            lexer.tok,
2434            LexTok::String,
2435            "expected String for 'foo', got {:?}",
2436            lexer.tok
2437        );
2438        assert_eq!(lexer.tokstr, Some("foo".to_string()));
2439
2440        lexer.zshlex();
2441        assert_eq!(
2442            lexer.tok,
2443            LexTok::Inbrace,
2444            "expected Inbrace, got {:?} tokstr={:?}",
2445            lexer.tok,
2446            lexer.tokstr
2447        );
2448
2449        lexer.zshlex();
2450        assert_eq!(
2451            lexer.tok,
2452            LexTok::Outbrace,
2453            "expected Outbrace, got {:?} tokstr={:?} incmdpos={}",
2454            lexer.tok,
2455            lexer.tokstr,
2456            lexer.incmdpos
2457        );
2458    }
2459
2460    #[test]
2461    fn test_double_quotes() {
2462        let mut lexer = ZshLexer::new("echo \"hello $name\"");
2463        lexer.zshlex();
2464        assert_eq!(lexer.tok, LexTok::String);
2465
2466        lexer.zshlex();
2467        assert_eq!(lexer.tok, LexTok::String);
2468        // Should contain tokenized content
2469        assert!(lexer.tokstr.is_some());
2470    }
2471
2472    #[test]
2473    fn test_command_substitution() {
2474        let mut lexer = ZshLexer::new("echo $(pwd)");
2475        lexer.zshlex();
2476        assert_eq!(lexer.tok, LexTok::String);
2477
2478        lexer.zshlex();
2479        assert_eq!(lexer.tok, LexTok::String);
2480    }
2481
2482    #[test]
2483    fn test_env_assignment() {
2484        let mut lexer = ZshLexer::new("FOO=bar echo");
2485        lexer.incmdpos = true;
2486        lexer.zshlex();
2487        assert_eq!(
2488            lexer.tok,
2489            LexTok::Envstring,
2490            "tok={:?} tokstr={:?}",
2491            lexer.tok,
2492            lexer.tokstr
2493        );
2494
2495        lexer.zshlex();
2496        assert_eq!(lexer.tok, LexTok::String);
2497    }
2498
2499    #[test]
2500    fn test_array_assignment() {
2501        let mut lexer = ZshLexer::new("arr=(a b c)");
2502        lexer.incmdpos = true;
2503        lexer.zshlex();
2504        assert_eq!(lexer.tok, LexTok::Envarray);
2505    }
2506
2507    #[test]
2508    fn test_process_substitution() {
2509        let mut lexer = ZshLexer::new("diff <(ls) >(cat)");
2510        lexer.zshlex();
2511        assert_eq!(lexer.tok, LexTok::String);
2512
2513        lexer.zshlex();
2514        assert_eq!(lexer.tok, LexTok::String);
2515        // <(ls) is tokenized into the string
2516
2517        lexer.zshlex();
2518        assert_eq!(lexer.tok, LexTok::String);
2519        // >(cat) is tokenized
2520    }
2521
2522    #[test]
2523    fn test_arithmetic() {
2524        let mut lexer = ZshLexer::new("echo $((1+2))");
2525        lexer.zshlex();
2526        assert_eq!(lexer.tok, LexTok::String);
2527
2528        lexer.zshlex();
2529        assert_eq!(lexer.tok, LexTok::String);
2530    }
2531
2532    #[test]
2533    fn test_semicolon_variants() {
2534        let mut lexer = ZshLexer::new("case x in a) cmd;; b) cmd;& c) cmd;| esac");
2535
2536        // Skip to first ;;
2537        loop {
2538            lexer.zshlex();
2539            if lexer.tok == LexTok::Dsemi || lexer.tok == LexTok::Endinput {
2540                break;
2541            }
2542        }
2543        assert_eq!(lexer.tok, LexTok::Dsemi);
2544
2545        // Find ;&
2546        loop {
2547            lexer.zshlex();
2548            if lexer.tok == LexTok::Semiamp || lexer.tok == LexTok::Endinput {
2549                break;
2550            }
2551        }
2552        assert_eq!(lexer.tok, LexTok::Semiamp);
2553
2554        // Find ;|
2555        loop {
2556            lexer.zshlex();
2557            if lexer.tok == LexTok::Semibar || lexer.tok == LexTok::Endinput {
2558                break;
2559            }
2560        }
2561        assert_eq!(lexer.tok, LexTok::Semibar);
2562    }
2563}