1use crate::tokens::{char_tokens, LexTok};
16use std::collections::VecDeque;
17
18#[derive(Debug, Clone, Copy, Default)]
20pub struct LexFlags {
21 pub zle: bool,
23 pub newline: bool,
25 pub comments_keep: bool,
27 pub comments_strip: bool,
29 pub active: bool,
31}
32
33#[derive(Debug, Clone)]
35struct LexBuf {
36 data: String,
37 siz: usize,
38}
39
40impl LexBuf {
41 fn new() -> Self {
42 LexBuf {
43 data: String::with_capacity(256),
44 siz: 256,
45 }
46 }
47
48 fn clear(&mut self) {
49 self.data.clear();
50 }
51
52 fn add(&mut self, c: char) {
53 self.data.push(c);
54 if self.data.len() >= self.siz {
55 self.siz *= 2;
56 self.data.reserve(self.siz - self.data.len());
57 }
58 }
59
60 #[allow(dead_code)]
61 fn add_str(&mut self, s: &str) {
62 self.data.push_str(s);
63 }
64
65 fn len(&self) -> usize {
66 self.data.len()
67 }
68
69 fn as_str(&self) -> &str {
70 &self.data
71 }
72
73 #[allow(dead_code)]
74 fn into_string(self) -> String {
75 self.data
76 }
77
78 #[allow(dead_code)]
79 fn last_char(&self) -> Option<char> {
80 self.data.chars().last()
81 }
82
83 fn pop(&mut self) -> Option<char> {
84 self.data.pop()
85 }
86}
87
88#[derive(Debug, Clone)]
90pub struct HereDoc {
91 pub terminator: String,
92 pub strip_tabs: bool,
93 pub content: String,
94}
95
96pub struct ZshLexer<'a> {
98 input: &'a str,
100 pos: usize,
102 unget_buf: VecDeque<char>,
104 pub tokstr: Option<String>,
106 pub tok: LexTok,
108 pub tokfd: i32,
110 pub toklineno: u64,
112 pub lineno: u64,
114 pub lexstop: bool,
116 pub incmdpos: bool,
118 pub incond: i32,
120 pub incondpat: bool,
122 pub incasepat: i32,
124 pub inredir: bool,
126 pub infor: i32,
128 inrepeat: i32,
130 pub intypeset: bool,
132 dbparens: bool,
134 pub noaliases: bool,
136 pub nocorrect: i32,
138 pub nocomments: bool,
140 pub lexflags: LexFlags,
142 pub isfirstln: bool,
144 #[allow(dead_code)]
146 isfirstch: bool,
147 pub heredocs: Vec<HereDoc>,
149 heredoc_pending: u8,
151 lexbuf: LexBuf,
153 pub isnewlin: i32,
155 pub error: Option<String>,
157 global_iterations: usize,
159 recursion_depth: usize,
161}
162
163const MAX_LEXER_RECURSION: usize = 200;
164
165impl<'a> ZshLexer<'a> {
166 pub fn new(input: &'a str) -> Self {
168 ZshLexer {
169 input,
170 pos: 0,
171 unget_buf: VecDeque::new(),
172 tokstr: None,
173 tok: LexTok::Endinput,
174 tokfd: -1,
175 toklineno: 1,
176 lineno: 1,
177 lexstop: false,
178 incmdpos: true,
179 incond: 0,
180 incondpat: false,
181 incasepat: 0,
182 inredir: false,
183 infor: 0,
184 inrepeat: 0,
185 intypeset: false,
186 dbparens: false,
187 noaliases: false,
188 nocorrect: 0,
189 nocomments: false,
190 lexflags: LexFlags::default(),
191 isfirstln: true,
192 isfirstch: true,
193 heredocs: Vec::new(),
194 heredoc_pending: 0,
195 lexbuf: LexBuf::new(),
196 isnewlin: 0,
197 error: None,
198 global_iterations: 0,
199 recursion_depth: 0,
200 }
201 }
202
203 #[inline]
205 fn check_recursion(&mut self) -> bool {
206 if self.recursion_depth > MAX_LEXER_RECURSION {
207 self.error = Some("lexer exceeded max recursion depth".to_string());
208 self.lexstop = true;
209 true
210 } else {
211 false
212 }
213 }
214
215 #[inline]
217 fn check_iterations(&mut self) -> bool {
218 self.global_iterations += 1;
219 if self.global_iterations > 50_000 {
220 self.error = Some("lexer exceeded 50K iterations".to_string());
221 self.lexstop = true;
222 self.tok = LexTok::Lexerr;
223 true
224 } else {
225 false
226 }
227 }
228
229 fn hgetc(&mut self) -> Option<char> {
231 if self.check_iterations() {
232 return None;
233 }
234
235 if let Some(c) = self.unget_buf.pop_front() {
236 return Some(c);
237 }
238
239 let c = self.input[self.pos..].chars().next()?;
240 self.pos += c.len_utf8();
241
242 if c == '\n' {
243 self.lineno += 1;
244 }
245
246 Some(c)
247 }
248
249 fn hungetc(&mut self, c: char) {
251 self.unget_buf.push_front(c);
252 if c == '\n' && self.lineno > 1 {
253 self.lineno -= 1;
254 }
255 self.lexstop = false;
256 }
257
258 #[allow(dead_code)]
260 fn peek(&mut self) -> Option<char> {
261 if let Some(&c) = self.unget_buf.front() {
262 return Some(c);
263 }
264 self.input[self.pos..].chars().next()
265 }
266
267 fn add(&mut self, c: char) {
269 self.lexbuf.add(c);
270 }
271
272 fn is_blank(c: char) -> bool {
274 c == ' ' || c == '\t'
275 }
276
277 fn is_inblank(c: char) -> bool {
279 matches!(c, ' ' | '\t' | '\x0b' | '\x0c' | '\r')
280 }
281
282 fn is_digit(c: char) -> bool {
284 c.is_ascii_digit()
285 }
286
287 #[allow(dead_code)]
289 fn is_ident_start(c: char) -> bool {
290 c.is_ascii_alphabetic() || c == '_'
291 }
292
293 fn is_ident(c: char) -> bool {
295 c.is_ascii_alphanumeric() || c == '_'
296 }
297
298 pub fn zshlex(&mut self) {
300 if self.tok == LexTok::Lexerr {
301 return;
302 }
303
304 loop {
308 if self.inrepeat > 0 {
309 self.inrepeat += 1;
310 }
311 if self.inrepeat == 3 {
312 self.incmdpos = true;
313 }
314
315 self.tok = self.gettok();
316
317 break;
319 }
320
321 self.nocorrect &= 1;
322
323 if self.tok == LexTok::Newlin || self.tok == LexTok::Endinput {
325 self.process_heredocs();
326 }
327
328 if self.tok != LexTok::Newlin {
329 self.isnewlin = 0;
330 } else {
331 self.isnewlin = if self.pos < self.input.len() { -1 } else { 1 };
332 }
333
334 if self.tok == LexTok::Semi || (self.tok == LexTok::Newlin && !self.lexflags.newline) {
335 self.tok = LexTok::Seper;
336 }
337
338 if self.tok == LexTok::String {
341 if let Some(ref s) = self.tokstr {
342 if s == "{" {
343 self.tok = LexTok::Inbrace;
344 } else if s == "}" {
345 self.tok = LexTok::Outbrace;
346 } else if self.incasepat == 0 {
347 self.check_reserved_word();
350 }
351 }
352 }
353
354 if self.heredoc_pending > 0 && self.tok == LexTok::String {
356 if let Some(ref terminator) = self.tokstr {
357 let strip_tabs = self.heredoc_pending == 2;
358 let term = terminator
360 .trim_matches(|c| c == '\'' || c == '"')
361 .to_string();
362 self.heredocs.push(HereDoc {
363 terminator: term,
364 strip_tabs,
365 content: String::new(),
366 });
367 }
368 self.heredoc_pending = 0;
369 }
370
371 if self.incond > 0 {
373 if let Some(ref s) = self.tokstr {
374 if s == "="
378 || s == "=="
379 || s == "!="
380 || s == "=~"
381 || s == "\u{8d}"
382 || s == "\u{8d}\u{8d}"
383 || s == "!\u{8d}"
384 || s == "\u{8d}~"
385 {
386 self.incondpat = true;
387 } else if self.incondpat {
388 }
393 }
394 if self.tok == LexTok::Doutbrack {
396 self.incondpat = false;
397 }
398 } else {
399 self.incondpat = false;
400 }
401
402 match self.tok {
406 LexTok::Seper
407 | LexTok::Newlin
408 | LexTok::Semi
409 | LexTok::Dsemi
410 | LexTok::Semiamp
411 | LexTok::Semibar
412 | LexTok::Amper
413 | LexTok::Amperbang
414 | LexTok::Inpar
415 | LexTok::Inbrace
416 | LexTok::Dbar
417 | LexTok::Damper
418 | LexTok::Baramp
419 | LexTok::Inoutpar
420 | LexTok::Doloop
421 | LexTok::Then
422 | LexTok::Elif
423 | LexTok::Else
424 | LexTok::Doutbrack
425 | LexTok::Func => {
426 self.incmdpos = true;
427 }
428 LexTok::Bar => {
429 if self.incasepat <= 0 {
431 self.incmdpos = true;
432 }
433 }
434 LexTok::String
435 | LexTok::Typeset
436 | LexTok::Envarray
437 | LexTok::Outpar
438 | LexTok::Case
439 | LexTok::Dinbrack => {
440 self.incmdpos = false;
441 }
442 _ => {}
443 }
444
445 if self.tok != LexTok::Dinpar {
449 self.infor = if self.tok == LexTok::For { 2 } else { 0 };
450 }
451
452 let oldpos = self.incmdpos;
454 if self.tok.is_redirop()
455 || self.tok == LexTok::For
456 || self.tok == LexTok::Foreach
457 || self.tok == LexTok::Select
458 {
459 self.inredir = true;
460 self.incmdpos = false;
461 } else if self.inredir {
462 self.incmdpos = oldpos;
463 self.inredir = false;
464 }
465 }
466
467 fn process_heredocs(&mut self) {
469 let heredocs = std::mem::take(&mut self.heredocs);
470
471 for mut hdoc in heredocs {
472 let mut content = String::new();
473 let mut line_count = 0;
474
475 loop {
476 line_count += 1;
477 if line_count > 10000 {
478 self.error = Some("heredoc exceeded 10000 lines".to_string());
479 self.tok = LexTok::Lexerr;
480 return;
481 }
482
483 let line = self.read_line();
484 if line.is_none() {
485 self.error = Some("here document too large or unterminated".to_string());
486 self.tok = LexTok::Lexerr;
487 return;
488 }
489
490 let line = line.unwrap();
491 let check_line = if hdoc.strip_tabs {
492 line.trim_start_matches('\t')
493 } else {
494 &line
495 };
496
497 if check_line.trim_end_matches('\n') == hdoc.terminator {
498 break;
499 }
500
501 content.push_str(&line);
502 }
503
504 hdoc.content = content;
505 }
506 }
507
508 fn read_line(&mut self) -> Option<String> {
510 let mut line = String::new();
511
512 loop {
513 match self.hgetc() {
514 Some(c) => {
515 line.push(c);
516 if c == '\n' {
517 break;
518 }
519 }
520 None => {
521 if line.is_empty() {
523 return None;
524 }
525 break;
526 }
527 }
528 }
529
530 Some(line)
531 }
532
533 fn gettok(&mut self) -> LexTok {
535 self.tokstr = None;
536 self.tokfd = -1;
537
538 let mut ws_iterations = 0;
540 loop {
541 ws_iterations += 1;
542 if ws_iterations > 100_000 {
543 self.error = Some("gettok: infinite loop in whitespace skip".to_string());
544 return LexTok::Lexerr;
545 }
546 let c = match self.hgetc() {
547 Some(c) => c,
548 None => {
549 self.lexstop = true;
550 return if self.error.is_some() {
551 LexTok::Lexerr
552 } else {
553 LexTok::Endinput
554 };
555 }
556 };
557
558 if !Self::is_blank(c) {
559 self.hungetc(c);
560 break;
561 }
562 }
563
564 let c = match self.hgetc() {
565 Some(c) => c,
566 None => {
567 self.lexstop = true;
568 return LexTok::Endinput;
569 }
570 };
571
572 self.toklineno = self.lineno;
573 self.isfirstln = false;
574
575 if self.dbparens {
577 return self.lex_arith(c);
578 }
579
580 if Self::is_digit(c) {
582 let d = self.hgetc();
583 match d {
584 Some('&') => {
585 let e = self.hgetc();
586 if e == Some('>') {
587 self.tokfd = (c as u8 - b'0') as i32;
588 self.hungetc('>');
589 return self.lex_initial('&');
590 }
591 if let Some(e) = e {
592 self.hungetc(e);
593 }
594 self.hungetc('&');
595 }
596 Some('>') | Some('<') => {
597 self.tokfd = (c as u8 - b'0') as i32;
598 return self.lex_initial(d.unwrap());
599 }
600 Some(d) => {
601 self.hungetc(d);
602 }
603 None => {}
604 }
605 self.lexstop = false;
606 }
607
608 self.lex_initial(c)
609 }
610
611 fn lex_arith(&mut self, c: char) -> LexTok {
613 self.lexbuf.clear();
614 self.hungetc(c);
615
616 let end_char = if self.infor > 0 { ';' } else { ')' };
617 if self.dquote_parse(end_char, false).is_err() {
618 return LexTok::Lexerr;
619 }
620
621 self.tokstr = Some(self.lexbuf.as_str().to_string());
622
623 if !self.lexstop && self.infor > 0 {
624 self.infor -= 1;
625 return LexTok::Dinpar;
626 }
627
628 match self.hgetc() {
630 Some(')') => {
631 self.dbparens = false;
632 LexTok::Doutpar
633 }
634 c => {
635 if let Some(c) = c {
636 self.hungetc(c);
637 }
638 LexTok::Lexerr
639 }
640 }
641 }
642
643 fn lex_initial(&mut self, c: char) -> LexTok {
645 if c == '#' && !self.nocomments {
647 return self.lex_comment();
648 }
649
650 match c {
651 '\\' => {
652 let d = self.hgetc();
653 if d == Some('\n') {
654 return self.gettok();
656 }
657 if let Some(d) = d {
658 self.hungetc(d);
659 }
660 self.lexstop = false;
661 self.gettokstr(c, false)
662 }
663
664 '\n' => LexTok::Newlin,
665
666 ';' => {
667 let d = self.hgetc();
668 match d {
669 Some(';') => LexTok::Dsemi,
670 Some('&') => LexTok::Semiamp,
671 Some('|') => LexTok::Semibar,
672 _ => {
673 if let Some(d) = d {
674 self.hungetc(d);
675 }
676 self.lexstop = false;
677 LexTok::Semi
678 }
679 }
680 }
681
682 '&' => {
683 let d = self.hgetc();
684 match d {
685 Some('&') => LexTok::Damper,
686 Some('!') | Some('|') => LexTok::Amperbang,
687 Some('>') => {
688 self.tokfd = self.tokfd.max(0);
689 let e = self.hgetc();
690 match e {
691 Some('!') | Some('|') => LexTok::Outangampbang,
692 Some('>') => {
693 let f = self.hgetc();
694 match f {
695 Some('!') | Some('|') => LexTok::Doutangampbang,
696 _ => {
697 if let Some(f) = f {
698 self.hungetc(f);
699 }
700 self.lexstop = false;
701 LexTok::Doutangamp
702 }
703 }
704 }
705 _ => {
706 if let Some(e) = e {
707 self.hungetc(e);
708 }
709 self.lexstop = false;
710 LexTok::Ampoutang
711 }
712 }
713 }
714 _ => {
715 if let Some(d) = d {
716 self.hungetc(d);
717 }
718 self.lexstop = false;
719 LexTok::Amper
720 }
721 }
722 }
723
724 '|' => {
725 let d = self.hgetc();
726 match d {
727 Some('|') if self.incasepat <= 0 => LexTok::Dbar,
728 Some('&') => LexTok::Baramp,
729 _ => {
730 if let Some(d) = d {
731 self.hungetc(d);
732 }
733 self.lexstop = false;
734 LexTok::Bar
735 }
736 }
737 }
738
739 '(' => {
740 let d = self.hgetc();
741 match d {
742 Some('(') => {
743 if self.infor > 0 {
744 self.dbparens = true;
745 return LexTok::Dinpar;
746 }
747 if self.incmdpos {
748 self.lexbuf.clear();
750 match self.cmd_or_math() {
751 CmdOrMath::Math => {
752 self.tokstr = Some(self.lexbuf.as_str().to_string());
753 return LexTok::Dinpar;
754 }
755 CmdOrMath::Cmd => {
756 self.tokstr = None;
757 return LexTok::Inpar;
758 }
759 CmdOrMath::Err => return LexTok::Lexerr,
760 }
761 }
762 self.hungetc('(');
763 self.lexstop = false;
764 self.gettokstr('(', false)
765 }
766 Some(')') => LexTok::Inoutpar,
767 _ => {
768 if let Some(d) = d {
769 self.hungetc(d);
770 }
771 self.lexstop = false;
772 if self.incondpat || self.incasepat > 1 {
776 self.gettokstr('(', false)
777 } else if self.incond == 1 || self.incmdpos || self.incasepat == 1 {
778 LexTok::Inpar
779 } else {
780 self.gettokstr('(', false)
781 }
782 }
783 }
784 }
785
786 ')' => LexTok::Outpar,
787
788 '{' => {
789 if self.incmdpos {
792 let next = self.hgetc();
793 let is_brace_group = match next {
794 Some(' ') | Some('\t') | Some('\n') | None => true,
795 _ => false,
796 };
797 if let Some(ch) = next {
798 self.hungetc(ch);
799 }
800 if is_brace_group {
801 self.tokstr = Some("{".to_string());
802 LexTok::Inbrace
803 } else {
804 self.gettokstr(c, false)
805 }
806 } else {
807 self.gettokstr(c, false)
808 }
809 }
810
811 '}' => {
812 self.tokstr = Some("}".to_string());
815 LexTok::Outbrace
816 }
817
818 '[' => {
819 if self.incasepat > 0 {
823 self.gettokstr(c, false)
824 } else if self.incmdpos {
825 let next = self.hgetc();
826 if next == Some('[') {
827 self.tokstr = Some("[[".to_string());
829 self.incond = 1;
830 return LexTok::Dinbrack;
831 }
832 if let Some(ch) = next {
834 self.hungetc(ch);
835 }
836 self.tokstr = Some("[".to_string());
837 LexTok::String
838 } else {
839 self.gettokstr(c, false)
840 }
841 }
842
843 ']' => {
844 if self.incond > 0 {
846 let next = self.hgetc();
847 if next == Some(']') {
848 self.tokstr = Some("]]".to_string());
849 self.incond = 0;
850 return LexTok::Doutbrack;
851 }
852 if let Some(ch) = next {
853 self.hungetc(ch);
854 }
855 }
856 self.gettokstr(c, false)
857 }
858
859 '<' => {
860 if self.incondpat || self.incasepat > 0 {
862 self.gettokstr(c, false)
863 } else {
864 self.lex_inang()
865 }
866 }
867
868 '>' => {
869 if self.incondpat || self.incasepat > 0 {
871 self.gettokstr(c, false)
872 } else {
873 self.lex_outang()
874 }
875 }
876
877 _ => self.gettokstr(c, false),
878 }
879 }
880
881 fn lex_comment(&mut self) -> LexTok {
883 if self.lexflags.comments_keep {
884 self.lexbuf.clear();
885 self.add('#');
886 }
887
888 loop {
889 let c = self.hgetc();
890 match c {
891 Some('\n') | None => break,
892 Some(c) => {
893 if self.lexflags.comments_keep {
894 self.add(c);
895 }
896 }
897 }
898 }
899
900 if self.lexflags.comments_keep {
901 self.tokstr = Some(self.lexbuf.as_str().to_string());
902 if !self.lexstop {
903 self.hungetc('\n');
904 }
905 return LexTok::String;
906 }
907
908 if self.lexflags.comments_strip && self.lexstop {
909 return LexTok::Endinput;
910 }
911
912 LexTok::Newlin
913 }
914
915 fn lex_inang(&mut self) -> LexTok {
917 let d = self.hgetc();
918 match d {
919 Some('(') => {
920 self.hungetc('(');
922 self.lexstop = false;
923 return self.gettokstr('<', false);
924 }
925 Some('>') => return LexTok::Inoutang,
926 Some('<') => {
927 let e = self.hgetc();
928 match e {
929 Some('(') => {
930 self.hungetc('(');
931 self.hungetc('<');
932 return LexTok::Inang;
933 }
934 Some('<') => return LexTok::Trinang,
935 Some('-') => {
936 self.heredoc_pending = 2; return LexTok::Dinangdash;
938 }
939 _ => {
940 if let Some(e) = e {
941 self.hungetc(e);
942 }
943 self.lexstop = false;
944 self.heredoc_pending = 1; return LexTok::Dinang;
946 }
947 }
948 }
949 Some('&') => return LexTok::Inangamp,
950 _ => {
951 if let Some(d) = d {
952 self.hungetc(d);
953 }
954 self.lexstop = false;
955 return LexTok::Inang;
956 }
957 }
958 }
959
960 fn lex_outang(&mut self) -> LexTok {
962 let d = self.hgetc();
963 match d {
964 Some('(') => {
965 self.hungetc('(');
967 self.lexstop = false;
968 return self.gettokstr('>', false);
969 }
970 Some('&') => {
971 let e = self.hgetc();
972 match e {
973 Some('!') | Some('|') => return LexTok::Outangampbang,
974 _ => {
975 if let Some(e) = e {
976 self.hungetc(e);
977 }
978 self.lexstop = false;
979 return LexTok::Outangamp;
980 }
981 }
982 }
983 Some('!') | Some('|') => return LexTok::Outangbang,
984 Some('>') => {
985 let e = self.hgetc();
986 match e {
987 Some('&') => {
988 let f = self.hgetc();
989 match f {
990 Some('!') | Some('|') => return LexTok::Doutangampbang,
991 _ => {
992 if let Some(f) = f {
993 self.hungetc(f);
994 }
995 self.lexstop = false;
996 return LexTok::Doutangamp;
997 }
998 }
999 }
1000 Some('!') | Some('|') => return LexTok::Doutangbang,
1001 Some('(') => {
1002 self.hungetc('(');
1003 self.hungetc('>');
1004 return LexTok::Outang;
1005 }
1006 _ => {
1007 if let Some(e) = e {
1008 self.hungetc(e);
1009 }
1010 self.lexstop = false;
1011 return LexTok::Doutang;
1012 }
1013 }
1014 }
1015 _ => {
1016 if let Some(d) = d {
1017 self.hungetc(d);
1018 }
1019 self.lexstop = false;
1020 return LexTok::Outang;
1021 }
1022 }
1023 }
1024
1025 fn gettokstr(&mut self, c: char, sub: bool) -> LexTok {
1027 let mut bct = 0; let mut pct = 0; let mut brct = 0; let mut in_brace_param = 0;
1031 let mut peek = LexTok::String;
1032 let mut intpos = 1;
1033 let mut unmatched = '\0';
1034 let mut c = c;
1035 const MAX_ITERATIONS: usize = 100_000;
1036 let mut iterations = 0;
1037
1038 if !sub {
1039 self.lexbuf.clear();
1040 }
1041
1042 loop {
1043 iterations += 1;
1044 if iterations > MAX_ITERATIONS {
1045 self.error = Some("gettokstr exceeded maximum iterations".to_string());
1046 return LexTok::Lexerr;
1047 }
1048
1049 let inbl = Self::is_inblank(c);
1050
1051 if inbl && in_brace_param == 0 && pct == 0 {
1052 break;
1054 }
1055
1056 match c {
1057 ')' => {
1059 if in_brace_param > 0 || sub {
1060 self.add(char_tokens::OUTPAR);
1061 } else if pct > 0 {
1062 pct -= 1;
1063 self.add(char_tokens::OUTPAR);
1064 } else {
1065 break;
1066 }
1067 }
1068
1069 '|' => {
1070 if pct == 0 && in_brace_param == 0 {
1071 if sub {
1072 self.add(c);
1073 } else {
1074 break;
1075 }
1076 } else {
1077 self.add(char_tokens::BAR);
1078 }
1079 }
1080
1081 '$' => {
1082 let e = self.hgetc();
1083 match e {
1084 Some('\\') => {
1085 let f = self.hgetc();
1086 if f != Some('\n') {
1087 if let Some(f) = f {
1088 self.hungetc(f);
1089 }
1090 self.hungetc('\\');
1091 self.add(char_tokens::STRING);
1092 } else {
1093 continue;
1095 }
1096 }
1097 Some('[') => {
1098 self.add(char_tokens::STRING);
1100 self.add(char_tokens::INBRACK);
1101 if self.dquote_parse(']', sub).is_err() {
1102 peek = LexTok::Lexerr;
1103 break;
1104 }
1105 self.add(char_tokens::OUTBRACK);
1106 }
1107 Some('(') => {
1108 self.add(char_tokens::STRING);
1110 match self.cmd_or_math_sub() {
1111 CmdOrMath::Cmd => self.add(char_tokens::OUTPAR),
1112 CmdOrMath::Math => self.add(char_tokens::OUTPARMATH),
1113 CmdOrMath::Err => {
1114 peek = LexTok::Lexerr;
1115 break;
1116 }
1117 }
1118 }
1119 Some('{') => {
1120 self.add(c);
1121 self.add(char_tokens::INBRACE);
1122 bct += 1;
1123 if in_brace_param == 0 {
1124 in_brace_param = bct;
1125 }
1126 }
1127 _ => {
1128 if let Some(e) = e {
1129 self.hungetc(e);
1130 }
1131 self.lexstop = false;
1132 self.add(char_tokens::STRING);
1133 }
1134 }
1135 }
1136
1137 '[' => {
1138 if in_brace_param == 0 {
1139 brct += 1;
1140 }
1141 self.add(char_tokens::INBRACK);
1142 }
1143
1144 ']' => {
1145 if in_brace_param == 0 && brct > 0 {
1146 brct -= 1;
1147 }
1148 self.add(char_tokens::OUTBRACK);
1149 }
1150
1151 '(' => {
1152 if in_brace_param == 0 {
1153 pct += 1;
1154 }
1155 self.add(char_tokens::INPAR);
1156 }
1157
1158 '{' => {
1159 bct += 1;
1161 self.add(c);
1162 }
1163
1164 '}' => {
1165 if in_brace_param > 0 {
1166 if bct == in_brace_param {
1167 in_brace_param = 0;
1168 }
1169 bct -= 1;
1170 self.add(char_tokens::OUTBRACE);
1171 } else if bct > 0 {
1172 bct -= 1;
1174 self.add(c);
1175 } else {
1176 break;
1177 }
1178 }
1179
1180 '>' => {
1181 if in_brace_param > 0 || sub || self.incondpat || self.incasepat > 0 {
1183 self.add(c);
1184 } else {
1185 let e = self.hgetc();
1186 if e != Some('(') {
1187 if let Some(e) = e {
1188 self.hungetc(e);
1189 }
1190 self.lexstop = false;
1191 break;
1192 }
1193 self.add(char_tokens::OUTANGPROC);
1195 if self.skip_command_sub().is_err() {
1196 peek = LexTok::Lexerr;
1197 break;
1198 }
1199 self.add(char_tokens::OUTPAR);
1200 }
1201 }
1202
1203 '<' => {
1204 if in_brace_param > 0 || sub || self.incondpat || self.incasepat > 0 {
1206 self.add(c);
1207 } else {
1208 let e = self.hgetc();
1209 if e != Some('(') {
1210 if let Some(e) = e {
1211 self.hungetc(e);
1212 }
1213 self.lexstop = false;
1214 break;
1215 }
1216 self.add(char_tokens::INANG);
1218 if self.skip_command_sub().is_err() {
1219 peek = LexTok::Lexerr;
1220 break;
1221 }
1222 self.add(char_tokens::OUTPAR);
1223 }
1224 }
1225
1226 '=' => {
1227 if !sub {
1228 if intpos > 0 {
1229 let e = self.hgetc();
1231 if e == Some('(') {
1232 self.add(char_tokens::EQUALS);
1233 if self.skip_command_sub().is_err() {
1234 peek = LexTok::Lexerr;
1235 break;
1236 }
1237 self.add(char_tokens::OUTPAR);
1238 } else {
1239 if let Some(e) = e {
1240 self.hungetc(e);
1241 }
1242 self.lexstop = false;
1243 self.add(char_tokens::EQUALS);
1244 }
1245 } else if peek != LexTok::Envstring
1246 && (self.incmdpos || self.intypeset)
1247 && bct == 0
1248 && brct == 0
1249 && self.incasepat == 0
1250 {
1251 let tok_so_far = self.lexbuf.as_str().to_string();
1253 if self.is_valid_assignment_target(&tok_so_far) {
1254 let next = self.hgetc();
1255 if next == Some('(') {
1256 self.add(char_tokens::EQUALS);
1258 self.tokstr = Some(self.lexbuf.as_str().to_string());
1259 return LexTok::Envarray;
1260 }
1261 if let Some(next) = next {
1262 self.hungetc(next);
1263 }
1264 self.lexstop = false;
1265 peek = LexTok::Envstring;
1266 intpos = 2;
1267 self.add(char_tokens::EQUALS);
1268 } else {
1269 self.add(char_tokens::EQUALS);
1270 }
1271 } else {
1272 self.add(char_tokens::EQUALS);
1273 }
1274 } else {
1275 self.add(char_tokens::EQUALS);
1276 }
1277 }
1278
1279 '\\' => {
1280 let next = self.hgetc();
1281 if next == Some('\n') {
1282 let next = self.hgetc();
1284 if let Some(next) = next {
1285 c = next;
1286 continue;
1287 }
1288 break;
1289 } else {
1290 self.add(char_tokens::BNULL);
1291 if let Some(next) = next {
1292 self.add(next);
1293 }
1294 }
1295 }
1296
1297 '\'' => {
1298 self.add(char_tokens::SNULL);
1300 loop {
1301 let ch = self.hgetc();
1302 match ch {
1303 Some('\'') => break,
1304 Some(ch) => self.add(ch),
1305 None => {
1306 self.lexstop = true;
1307 unmatched = '\'';
1308 peek = LexTok::Lexerr;
1309 break;
1310 }
1311 }
1312 }
1313 if unmatched != '\0' {
1314 break;
1315 }
1316 self.add(char_tokens::SNULL);
1317 }
1318
1319 '"' => {
1320 self.add(char_tokens::DNULL);
1322 if self.dquote_parse('"', sub).is_err() {
1323 unmatched = '"';
1324 if !self.lexflags.active {
1325 peek = LexTok::Lexerr;
1326 }
1327 break;
1328 }
1329 self.add(char_tokens::DNULL);
1330 }
1331
1332 '`' => {
1333 self.add(char_tokens::TICK);
1335 loop {
1336 let ch = self.hgetc();
1337 match ch {
1338 Some('`') => break,
1339 Some('\\') => {
1340 let next = self.hgetc();
1341 match next {
1342 Some('\n') => continue, Some(c) if c == '`' || c == '\\' || c == '$' => {
1344 self.add(char_tokens::BNULL);
1345 self.add(c);
1346 }
1347 Some(c) => {
1348 self.add('\\');
1349 self.add(c);
1350 }
1351 None => break,
1352 }
1353 }
1354 Some(ch) => self.add(ch),
1355 None => {
1356 self.lexstop = true;
1357 unmatched = '`';
1358 peek = LexTok::Lexerr;
1359 break;
1360 }
1361 }
1362 }
1363 if unmatched != '\0' {
1364 break;
1365 }
1366 self.add(char_tokens::TICK);
1367 }
1368
1369 '~' => {
1370 self.add(char_tokens::TILDE);
1371 }
1372
1373 '#' => {
1374 self.add(char_tokens::POUND);
1375 }
1376
1377 '^' => {
1378 self.add(char_tokens::HAT);
1379 }
1380
1381 '*' => {
1382 self.add(char_tokens::STAR);
1383 }
1384
1385 '?' => {
1386 self.add(char_tokens::QUEST);
1387 }
1388
1389 ',' => {
1390 if bct > in_brace_param {
1391 self.add(char_tokens::COMMA);
1392 } else {
1393 self.add(c);
1394 }
1395 }
1396
1397 '-' => {
1398 self.add(char_tokens::DASH);
1399 }
1400
1401 '!' => {
1402 if brct > 0 {
1403 self.add(char_tokens::BANG);
1404 } else {
1405 self.add(c);
1406 }
1407 }
1408
1409 '\n' | ';' | '&' => {
1411 break;
1412 }
1413
1414 _ => {
1415 self.add(c);
1416 }
1417 }
1418
1419 c = match self.hgetc() {
1420 Some(c) => c,
1421 None => {
1422 self.lexstop = true;
1423 break;
1424 }
1425 };
1426
1427 if intpos > 0 {
1428 intpos -= 1;
1429 }
1430 }
1431
1432 if !self.lexstop {
1434 self.hungetc(c);
1435 }
1436
1437 if unmatched != '\0' && !self.lexflags.active {
1438 self.error = Some(format!("unmatched {}", unmatched));
1439 }
1440
1441 if in_brace_param > 0 {
1442 self.error = Some("closing brace expected".to_string());
1443 }
1444
1445 self.tokstr = Some(self.lexbuf.as_str().to_string());
1446 peek
1447 }
1448
1449 fn is_valid_assignment_target(&self, s: &str) -> bool {
1451 let mut chars = s.chars().peekable();
1452
1453 if let Some(&c) = chars.peek() {
1455 if c.is_ascii_digit() {
1456 while let Some(&c) = chars.peek() {
1458 if !c.is_ascii_digit() {
1459 break;
1460 }
1461 chars.next();
1462 }
1463 return chars.peek().is_none();
1464 }
1465 }
1466
1467 let mut has_ident = false;
1469 while let Some(&c) = chars.peek() {
1470 if c == char_tokens::INBRACK || c == '[' {
1471 break;
1472 }
1473 if c == '+' {
1474 chars.next();
1476 return chars.peek().is_none() || chars.peek() == Some(&'=');
1477 }
1478 if !Self::is_ident(c) && c != char_tokens::STRING && !char_tokens::is_token(c) {
1479 return false;
1480 }
1481 has_ident = true;
1482 chars.next();
1483 }
1484
1485 has_ident
1486 }
1487
1488 fn dquote_parse(&mut self, endchar: char, sub: bool) -> Result<(), ()> {
1490 self.recursion_depth += 1;
1491 if self.check_recursion() {
1492 self.recursion_depth -= 1;
1493 return Err(());
1494 }
1495
1496 let result = self.dquote_parse_inner(endchar, sub);
1497 self.recursion_depth -= 1;
1498 result
1499 }
1500
1501 fn dquote_parse_inner(&mut self, endchar: char, sub: bool) -> Result<(), ()> {
1502 let mut pct = 0; let mut brct = 0; let mut bct = 0; let mut intick = false; let is_math = endchar == ')' || endchar == ']' || self.infor > 0;
1507 const MAX_ITERATIONS: usize = 100_000;
1508 let mut iterations = 0;
1509
1510 loop {
1511 iterations += 1;
1512 if iterations > MAX_ITERATIONS {
1513 self.error = Some("dquote_parse exceeded maximum iterations".to_string());
1514 return Err(());
1515 }
1516 let c = self.hgetc();
1517 let c = match c {
1518 Some(c) if c == endchar && !intick && bct == 0 => {
1519 if is_math && (pct > 0 || brct > 0) {
1520 self.add(c);
1521 if c == ')' {
1522 pct -= 1;
1523 } else if c == ']' {
1524 brct -= 1;
1525 }
1526 continue;
1527 }
1528 return Ok(());
1529 }
1530 Some(c) => c,
1531 None => {
1532 self.lexstop = true;
1533 return Err(());
1534 }
1535 };
1536
1537 match c {
1538 '\\' => {
1539 let next = self.hgetc();
1540 match next {
1541 Some('\n') if !sub => continue, Some(c)
1543 if c == '$'
1544 || c == '\\'
1545 || (c == '}' && !intick && bct > 0)
1546 || c == endchar
1547 || c == '`'
1548 || (endchar == ']'
1549 && (c == '['
1550 || c == ']'
1551 || c == '('
1552 || c == ')'
1553 || c == '{'
1554 || c == '}'
1555 || (c == '"' && sub))) =>
1556 {
1557 self.add(char_tokens::BNULL);
1558 self.add(c);
1559 }
1560 Some(c) => {
1561 self.add('\\');
1562 self.hungetc(c);
1563 continue;
1564 }
1565 None => {
1566 self.add('\\');
1567 }
1568 }
1569 }
1570
1571 '$' => {
1572 if intick {
1573 self.add(c);
1574 continue;
1575 }
1576 let next = self.hgetc();
1577 match next {
1578 Some('(') => {
1579 self.add(char_tokens::QSTRING);
1580 match self.cmd_or_math_sub() {
1581 CmdOrMath::Cmd => self.add(char_tokens::OUTPAR),
1582 CmdOrMath::Math => self.add(char_tokens::OUTPARMATH),
1583 CmdOrMath::Err => return Err(()),
1584 }
1585 }
1586 Some('[') => {
1587 self.add(char_tokens::STRING);
1588 self.add(char_tokens::INBRACK);
1589 self.dquote_parse(']', sub)?;
1590 self.add(char_tokens::OUTBRACK);
1591 }
1592 Some('{') => {
1593 self.add(char_tokens::QSTRING);
1594 self.add(char_tokens::INBRACE);
1595 bct += 1;
1596 }
1597 Some('$') => {
1598 self.add(char_tokens::QSTRING);
1599 self.add('$');
1600 }
1601 _ => {
1602 if let Some(next) = next {
1603 self.hungetc(next);
1604 }
1605 self.lexstop = false;
1606 self.add(char_tokens::QSTRING);
1607 }
1608 }
1609 }
1610
1611 '}' => {
1612 if intick || bct == 0 {
1613 self.add(c);
1614 } else {
1615 self.add(char_tokens::OUTBRACE);
1616 bct -= 1;
1617 }
1618 }
1619
1620 '`' => {
1621 self.add(char_tokens::QTICK);
1622 intick = !intick;
1623 }
1624
1625 '(' => {
1626 if !is_math || bct == 0 {
1627 pct += 1;
1628 }
1629 self.add(c);
1630 }
1631
1632 ')' => {
1633 if !is_math || bct == 0 {
1634 if pct == 0 && is_math {
1635 return Err(());
1636 }
1637 pct -= 1;
1638 }
1639 self.add(c);
1640 }
1641
1642 '[' => {
1643 if !is_math || bct == 0 {
1644 brct += 1;
1645 }
1646 self.add(c);
1647 }
1648
1649 ']' => {
1650 if !is_math || bct == 0 {
1651 if brct == 0 && is_math {
1652 return Err(());
1653 }
1654 brct -= 1;
1655 }
1656 self.add(c);
1657 }
1658
1659 '"' => {
1660 if intick || (endchar != '"' && bct == 0) {
1661 self.add(c);
1662 } else if bct > 0 {
1663 self.add(char_tokens::DNULL);
1664 self.dquote_parse('"', sub)?;
1665 self.add(char_tokens::DNULL);
1666 } else {
1667 return Err(());
1668 }
1669 }
1670
1671 _ => {
1672 self.add(c);
1673 }
1674 }
1675 }
1676 }
1677
1678 fn cmd_or_math(&mut self) -> CmdOrMath {
1680 let oldlen = self.lexbuf.len();
1681
1682 self.add(char_tokens::INPAR);
1683 self.add('(');
1684
1685 if self.dquote_parse(')', false).is_err() {
1686 while self.lexbuf.len() > oldlen {
1688 if let Some(c) = self.lexbuf.pop() {
1689 self.hungetc(c);
1690 }
1691 }
1692 self.hungetc('(');
1693 self.lexstop = false;
1694 return if self.skip_command_sub().is_err() {
1695 CmdOrMath::Err
1696 } else {
1697 CmdOrMath::Cmd
1698 };
1699 }
1700
1701 let c = self.hgetc();
1703 if c == Some(')') {
1704 self.add(')');
1705 return CmdOrMath::Math;
1706 }
1707
1708 if let Some(c) = c {
1710 self.hungetc(c);
1711 }
1712 self.lexstop = false;
1713
1714 while self.lexbuf.len() > oldlen {
1716 if let Some(c) = self.lexbuf.pop() {
1717 self.hungetc(c);
1718 }
1719 }
1720 self.hungetc('(');
1721
1722 if self.skip_command_sub().is_err() {
1723 CmdOrMath::Err
1724 } else {
1725 CmdOrMath::Cmd
1726 }
1727 }
1728
1729 fn cmd_or_math_sub(&mut self) -> CmdOrMath {
1731 const MAX_CONTINUATIONS: usize = 10_000;
1732 let mut continuations = 0;
1733
1734 loop {
1735 continuations += 1;
1736 if continuations > MAX_CONTINUATIONS {
1737 self.error = Some("cmd_or_math_sub: too many line continuations".to_string());
1738 return CmdOrMath::Err;
1739 }
1740
1741 let c = self.hgetc();
1742 if c == Some('\\') {
1743 let c2 = self.hgetc();
1744 if c2 != Some('\n') {
1745 if let Some(c2) = c2 {
1746 self.hungetc(c2);
1747 }
1748 self.hungetc('\\');
1749 self.lexstop = false;
1750 return if self.skip_command_sub().is_err() {
1751 CmdOrMath::Err
1752 } else {
1753 CmdOrMath::Cmd
1754 };
1755 }
1756 continue;
1758 }
1759
1760 if c == Some('(') {
1762 let lexpos = self.lexbuf.len();
1764 self.add(char_tokens::INPAR);
1765 self.add('(');
1766
1767 if self.dquote_parse(')', false).is_ok() {
1768 let c2 = self.hgetc();
1769 if c2 == Some(')') {
1770 self.add(')');
1771 return CmdOrMath::Math;
1772 }
1773 if let Some(c2) = c2 {
1774 self.hungetc(c2);
1775 }
1776 }
1777
1778 while self.lexbuf.len() > lexpos {
1780 if let Some(ch) = self.lexbuf.pop() {
1781 self.hungetc(ch);
1782 }
1783 }
1784 self.hungetc('(');
1785 self.lexstop = false;
1786 } else {
1787 if let Some(c) = c {
1788 self.hungetc(c);
1789 }
1790 self.lexstop = false;
1791 }
1792
1793 return if self.skip_command_sub().is_err() {
1794 CmdOrMath::Err
1795 } else {
1796 CmdOrMath::Cmd
1797 };
1798 }
1799 }
1800
1801 fn skip_command_sub(&mut self) -> Result<(), ()> {
1803 let mut pct = 1;
1804 let mut start = true;
1805 const MAX_ITERATIONS: usize = 100_000;
1806 let mut iterations = 0;
1807
1808 self.add(char_tokens::INPAR);
1809
1810 loop {
1811 iterations += 1;
1812 if iterations > MAX_ITERATIONS {
1813 self.error = Some("skip_command_sub exceeded maximum iterations".to_string());
1814 return Err(());
1815 }
1816
1817 let c = self.hgetc();
1818 let c = match c {
1819 Some(c) => c,
1820 None => {
1821 self.lexstop = true;
1822 return Err(());
1823 }
1824 };
1825
1826 let iswhite = Self::is_inblank(c);
1827
1828 match c {
1829 '(' => {
1830 pct += 1;
1831 self.add(c);
1832 }
1833 ')' => {
1834 pct -= 1;
1835 if pct == 0 {
1836 return Ok(());
1837 }
1838 self.add(c);
1839 }
1840 '\\' => {
1841 self.add(c);
1842 if let Some(c) = self.hgetc() {
1843 self.add(c);
1844 }
1845 }
1846 '\'' => {
1847 self.add(c);
1848 loop {
1849 let ch = self.hgetc();
1850 match ch {
1851 Some('\'') => {
1852 self.add('\'');
1853 break;
1854 }
1855 Some(ch) => self.add(ch),
1856 None => {
1857 self.lexstop = true;
1858 return Err(());
1859 }
1860 }
1861 }
1862 }
1863 '"' => {
1864 self.add(c);
1865 loop {
1866 let ch = self.hgetc();
1867 match ch {
1868 Some('"') => {
1869 self.add('"');
1870 break;
1871 }
1872 Some('\\') => {
1873 self.add('\\');
1874 if let Some(ch) = self.hgetc() {
1875 self.add(ch);
1876 }
1877 }
1878 Some(ch) => self.add(ch),
1879 None => {
1880 self.lexstop = true;
1881 return Err(());
1882 }
1883 }
1884 }
1885 }
1886 '`' => {
1887 self.add(c);
1888 loop {
1889 let ch = self.hgetc();
1890 match ch {
1891 Some('`') => {
1892 self.add('`');
1893 break;
1894 }
1895 Some('\\') => {
1896 self.add('\\');
1897 if let Some(ch) = self.hgetc() {
1898 self.add(ch);
1899 }
1900 }
1901 Some(ch) => self.add(ch),
1902 None => {
1903 self.lexstop = true;
1904 return Err(());
1905 }
1906 }
1907 }
1908 }
1909 '#' => {
1910 if start {
1911 self.add(c);
1912 loop {
1914 let ch = self.hgetc();
1915 match ch {
1916 Some('\n') => {
1917 self.add('\n');
1918 break;
1919 }
1920 Some(ch) => self.add(ch),
1921 None => break,
1922 }
1923 }
1924 } else {
1925 self.add(c);
1926 }
1927 }
1928 _ => {
1929 self.add(c);
1930 }
1931 }
1932
1933 start = iswhite;
1934 }
1935 }
1936
1937 pub fn ctxtlex(&mut self) {
1939 self.zshlex();
1940
1941 match self.tok {
1942 LexTok::Seper
1943 | LexTok::Newlin
1944 | LexTok::Semi
1945 | LexTok::Dsemi
1946 | LexTok::Semiamp
1947 | LexTok::Semibar
1948 | LexTok::Amper
1949 | LexTok::Amperbang
1950 | LexTok::Inpar
1951 | LexTok::Inbrace
1952 | LexTok::Dbar
1953 | LexTok::Damper
1954 | LexTok::Bar
1955 | LexTok::Baramp
1956 | LexTok::Inoutpar
1957 | LexTok::Doloop
1958 | LexTok::Then
1959 | LexTok::Elif
1960 | LexTok::Else
1961 | LexTok::Doutbrack => {
1962 self.incmdpos = true;
1963 }
1964
1965 LexTok::String
1966 | LexTok::Typeset
1967 | LexTok::Envarray
1968 | LexTok::Outpar
1969 | LexTok::Case
1970 | LexTok::Dinbrack => {
1971 self.incmdpos = false;
1972 }
1973
1974 _ => {}
1975 }
1976
1977 if self.tok != LexTok::Dinpar {
1978 self.infor = if self.tok == LexTok::For { 2 } else { 0 };
1979 }
1980
1981 let oldpos = self.incmdpos;
1982 if self.tok.is_redirop()
1983 || self.tok == LexTok::For
1984 || self.tok == LexTok::Foreach
1985 || self.tok == LexTok::Select
1986 {
1987 self.inredir = true;
1988 self.incmdpos = false;
1989 } else if self.inredir {
1990 self.incmdpos = oldpos;
1991 self.inredir = false;
1992 }
1993 }
1994
1995 pub fn register_heredoc(&mut self, terminator: String, strip_tabs: bool) {
1997 self.heredocs.push(HereDoc {
1998 terminator,
1999 strip_tabs,
2000 content: String::new(),
2001 });
2002 }
2003
2004 pub fn check_reserved_word(&mut self) -> bool {
2006 if let Some(ref tokstr) = self.tokstr {
2007 if self.incmdpos || (tokstr == "}" && self.tok == LexTok::String) {
2008 if let Some(tok) = crate::tokens::lookup_reserved_word(tokstr) {
2009 self.tok = tok;
2010 if tok == LexTok::Repeat {
2011 self.inrepeat = 1;
2012 }
2013 if tok == LexTok::Dinbrack {
2014 self.incond = 1;
2015 }
2016 return true;
2017 }
2018 if tokstr == "]]" && self.incond > 0 {
2019 self.tok = LexTok::Doutbrack;
2020 self.incond = 0;
2021 return true;
2022 }
2023 }
2024 }
2025 false
2026 }
2027}
2028
2029enum CmdOrMath {
2031 Cmd,
2032 Math,
2033 Err,
2034}
2035
2036pub fn isnumglob(input: &str, pos: usize) -> bool {
2046 let chars: Vec<char> = input[pos..].chars().collect();
2047 let mut i = 0;
2048 let mut expect_close = false;
2049
2050 while i < chars.len() {
2052 let c = chars[i];
2053 if c.is_ascii_digit() {
2054 i += 1;
2055 } else if c == '-' && !expect_close {
2056 expect_close = true;
2057 i += 1;
2058 } else if c == '>' && expect_close {
2059 return true;
2060 } else {
2061 break;
2062 }
2063 }
2064 false
2065}
2066
2067pub fn parsestr(s: &str) -> Result<String, String> {
2072 let mut result = String::with_capacity(s.len());
2073 let chars: Vec<char> = s.chars().collect();
2074 let mut i = 0;
2075
2076 while i < chars.len() {
2077 let c = chars[i];
2078 match c {
2079 '\\' => {
2080 i += 1;
2081 if i < chars.len() {
2082 let next = chars[i];
2083 match next {
2084 '$' | '\\' | '`' | '"' | '\n' => {
2085 result.push(char_tokens::BNULL);
2086 result.push(next);
2087 }
2088 _ => {
2089 result.push('\\');
2090 result.push(next);
2091 }
2092 }
2093 } else {
2094 result.push('\\');
2095 }
2096 }
2097 '$' => {
2098 result.push(char_tokens::QSTRING);
2099 if i + 1 < chars.len() {
2100 let next = chars[i + 1];
2101 if next == '{' {
2102 result.push(char_tokens::INBRACE);
2103 i += 1;
2104 } else if next == '(' {
2105 result.push(char_tokens::INPAR);
2106 i += 1;
2107 }
2108 }
2109 }
2110 '`' => {
2111 result.push(char_tokens::QTICK);
2112 }
2113 _ => {
2114 result.push(c);
2115 }
2116 }
2117 i += 1;
2118 }
2119
2120 Ok(result)
2121}
2122
2123pub fn parse_subscript(s: &str, endchar: char) -> Option<usize> {
2128 if s.is_empty() || s.starts_with(endchar) {
2129 return None;
2130 }
2131
2132 let chars: Vec<char> = s.chars().collect();
2133 let mut i = 0;
2134 let mut depth = 0;
2135 let mut in_dquote = false;
2136 let mut in_squote = false;
2137
2138 while i < chars.len() {
2139 let c = chars[i];
2140
2141 if in_squote {
2142 if c == '\'' {
2143 in_squote = false;
2144 }
2145 i += 1;
2146 continue;
2147 }
2148
2149 if in_dquote {
2150 if c == '"' {
2151 in_dquote = false;
2152 } else if c == '\\' && i + 1 < chars.len() {
2153 i += 1; }
2155 i += 1;
2156 continue;
2157 }
2158
2159 match c {
2160 '\\' => {
2161 i += 1; }
2163 '\'' => {
2164 in_squote = true;
2165 }
2166 '"' => {
2167 in_dquote = true;
2168 }
2169 '[' | '(' => {
2170 depth += 1;
2171 }
2172 ']' | ')' => {
2173 if depth > 0 {
2174 depth -= 1;
2175 } else if c == endchar {
2176 return Some(i);
2177 }
2178 }
2179 _ => {}
2180 }
2181
2182 if c == endchar && depth == 0 {
2183 return Some(i);
2184 }
2185
2186 i += 1;
2187 }
2188
2189 None
2190}
2191
2192pub fn parse_subst_string(s: &str) -> Result<String, String> {
2197 if s.is_empty() {
2198 return Ok(String::new());
2199 }
2200
2201 let mut result = String::with_capacity(s.len());
2202 let chars: Vec<char> = s.chars().collect();
2203 let mut i = 0;
2204
2205 while i < chars.len() {
2206 let c = chars[i];
2207 match c {
2208 '\\' => {
2209 result.push(char_tokens::BNULL);
2210 i += 1;
2211 if i < chars.len() {
2212 result.push(chars[i]);
2213 }
2214 }
2215 '\'' => {
2216 result.push(char_tokens::SNULL);
2217 i += 1;
2218 while i < chars.len() && chars[i] != '\'' {
2219 result.push(chars[i]);
2220 i += 1;
2221 }
2222 result.push(char_tokens::SNULL);
2223 }
2224 '"' => {
2225 result.push(char_tokens::DNULL);
2226 i += 1;
2227 while i < chars.len() && chars[i] != '"' {
2228 if chars[i] == '\\' && i + 1 < chars.len() {
2229 result.push(char_tokens::BNULL);
2230 i += 1;
2231 result.push(chars[i]);
2232 } else if chars[i] == '$' {
2233 result.push(char_tokens::QSTRING);
2234 } else {
2235 result.push(chars[i]);
2236 }
2237 i += 1;
2238 }
2239 result.push(char_tokens::DNULL);
2240 }
2241 '$' => {
2242 result.push(char_tokens::STRING);
2243 if i + 1 < chars.len() {
2244 match chars[i + 1] {
2245 '{' => {
2246 result.push(char_tokens::INBRACE);
2247 i += 1;
2248 }
2249 '(' => {
2250 result.push(char_tokens::INPAR);
2251 i += 1;
2252 }
2253 _ => {}
2254 }
2255 }
2256 }
2257 '*' => result.push(char_tokens::STAR),
2258 '?' => result.push(char_tokens::QUEST),
2259 '[' => result.push(char_tokens::INBRACK),
2260 ']' => result.push(char_tokens::OUTBRACK),
2261 '{' => result.push(char_tokens::INBRACE),
2262 '}' => result.push(char_tokens::OUTBRACE),
2263 '~' => result.push(char_tokens::TILDE),
2264 '#' => result.push(char_tokens::POUND),
2265 '^' => result.push(char_tokens::HAT),
2266 _ => result.push(c),
2267 }
2268 i += 1;
2269 }
2270
2271 Ok(result)
2272}
2273
2274pub fn untokenize(s: &str) -> String {
2278 let mut result = String::with_capacity(s.len());
2279 let chars: Vec<char> = s.chars().collect();
2280 let mut i = 0;
2281
2282 while i < chars.len() {
2283 let c = chars[i];
2284 if (c as u32) < 32 {
2286 match c {
2288 c if c == char_tokens::POUND => result.push('#'),
2289 c if c == char_tokens::STRING => result.push('$'),
2290 c if c == char_tokens::HAT => result.push('^'),
2291 c if c == char_tokens::STAR => result.push('*'),
2292 c if c == char_tokens::INPAR => result.push('('),
2293 c if c == char_tokens::OUTPAR => result.push(')'),
2294 c if c == char_tokens::INPARMATH => result.push('('),
2295 c if c == char_tokens::OUTPARMATH => result.push(')'),
2296 c if c == char_tokens::QSTRING => result.push('$'),
2297 c if c == char_tokens::EQUALS => result.push('='),
2298 c if c == char_tokens::BAR => result.push('|'),
2299 c if c == char_tokens::INBRACE => result.push('{'),
2300 c if c == char_tokens::OUTBRACE => result.push('}'),
2301 c if c == char_tokens::INBRACK => result.push('['),
2302 c if c == char_tokens::OUTBRACK => result.push(']'),
2303 c if c == char_tokens::TICK => result.push('`'),
2304 c if c == char_tokens::INANG => result.push('<'),
2305 c if c == char_tokens::OUTANG => result.push('>'),
2306 c if c == char_tokens::QUEST => result.push('?'),
2307 c if c == char_tokens::TILDE => result.push('~'),
2308 c if c == char_tokens::QTICK => result.push('`'),
2309 c if c == char_tokens::COMMA => result.push(','),
2310 c if c == char_tokens::DASH => result.push('-'),
2311 c if c == char_tokens::BANG => result.push('!'),
2312 c if c == char_tokens::SNULL
2313 || c == char_tokens::DNULL
2314 || c == char_tokens::BNULL =>
2315 {
2316 }
2318 _ => {
2319 let idx = c as usize;
2321 if idx < char_tokens::ZTOKENS.len() {
2322 result.push(char_tokens::ZTOKENS.chars().nth(idx).unwrap_or(c));
2323 } else {
2324 result.push(c);
2325 }
2326 }
2327 }
2328 } else {
2329 result.push(c);
2330 }
2331 i += 1;
2332 }
2333
2334 result
2335}
2336
2337pub fn has_token(s: &str) -> bool {
2339 s.chars().any(|c| (c as u32) < 32)
2340}
2341
2342pub fn tokens_to_printable(s: &str) -> String {
2344 untokenize(s)
2345}
2346
2347#[cfg(test)]
2348mod tests {
2349 use super::*;
2350
2351 #[test]
2352 fn test_simple_command() {
2353 let mut lexer = ZshLexer::new("echo hello");
2354 lexer.zshlex();
2355 assert_eq!(lexer.tok, LexTok::String);
2356 assert_eq!(lexer.tokstr, Some("echo".to_string()));
2357
2358 lexer.zshlex();
2359 assert_eq!(lexer.tok, LexTok::String);
2360 assert_eq!(lexer.tokstr, Some("hello".to_string()));
2361
2362 lexer.zshlex();
2363 assert_eq!(lexer.tok, LexTok::Endinput);
2364 }
2365
2366 #[test]
2367 fn test_pipeline() {
2368 let mut lexer = ZshLexer::new("ls | grep foo");
2369 lexer.zshlex();
2370 assert_eq!(lexer.tok, LexTok::String);
2371
2372 lexer.zshlex();
2373 assert_eq!(lexer.tok, LexTok::Bar);
2374
2375 lexer.zshlex();
2376 assert_eq!(lexer.tok, LexTok::String);
2377
2378 lexer.zshlex();
2379 assert_eq!(lexer.tok, LexTok::String);
2380 }
2381
2382 #[test]
2383 fn test_redirections() {
2384 let mut lexer = ZshLexer::new("echo > file");
2385 lexer.zshlex();
2386 assert_eq!(lexer.tok, LexTok::String);
2387
2388 lexer.zshlex();
2389 assert_eq!(lexer.tok, LexTok::Outang);
2390
2391 lexer.zshlex();
2392 assert_eq!(lexer.tok, LexTok::String);
2393 }
2394
2395 #[test]
2396 fn test_heredoc() {
2397 let mut lexer = ZshLexer::new("cat << EOF");
2398 lexer.zshlex();
2399 assert_eq!(lexer.tok, LexTok::String);
2400
2401 lexer.zshlex();
2402 assert_eq!(lexer.tok, LexTok::Dinang);
2403
2404 lexer.zshlex();
2405 assert_eq!(lexer.tok, LexTok::String);
2406 }
2407
2408 #[test]
2409 fn test_single_quotes() {
2410 let mut lexer = ZshLexer::new("echo 'hello world'");
2411 lexer.zshlex();
2412 assert_eq!(lexer.tok, LexTok::String);
2413
2414 lexer.zshlex();
2415 assert_eq!(lexer.tok, LexTok::String);
2416 assert!(lexer.tokstr.is_some());
2418 }
2419
2420 #[test]
2421 fn test_function_tokens() {
2422 let mut lexer = ZshLexer::new("function foo { }");
2423 lexer.zshlex();
2424 assert_eq!(
2425 lexer.tok,
2426 LexTok::Func,
2427 "expected Func, got {:?}",
2428 lexer.tok
2429 );
2430
2431 lexer.zshlex();
2432 assert_eq!(
2433 lexer.tok,
2434 LexTok::String,
2435 "expected String for 'foo', got {:?}",
2436 lexer.tok
2437 );
2438 assert_eq!(lexer.tokstr, Some("foo".to_string()));
2439
2440 lexer.zshlex();
2441 assert_eq!(
2442 lexer.tok,
2443 LexTok::Inbrace,
2444 "expected Inbrace, got {:?} tokstr={:?}",
2445 lexer.tok,
2446 lexer.tokstr
2447 );
2448
2449 lexer.zshlex();
2450 assert_eq!(
2451 lexer.tok,
2452 LexTok::Outbrace,
2453 "expected Outbrace, got {:?} tokstr={:?} incmdpos={}",
2454 lexer.tok,
2455 lexer.tokstr,
2456 lexer.incmdpos
2457 );
2458 }
2459
2460 #[test]
2461 fn test_double_quotes() {
2462 let mut lexer = ZshLexer::new("echo \"hello $name\"");
2463 lexer.zshlex();
2464 assert_eq!(lexer.tok, LexTok::String);
2465
2466 lexer.zshlex();
2467 assert_eq!(lexer.tok, LexTok::String);
2468 assert!(lexer.tokstr.is_some());
2470 }
2471
2472 #[test]
2473 fn test_command_substitution() {
2474 let mut lexer = ZshLexer::new("echo $(pwd)");
2475 lexer.zshlex();
2476 assert_eq!(lexer.tok, LexTok::String);
2477
2478 lexer.zshlex();
2479 assert_eq!(lexer.tok, LexTok::String);
2480 }
2481
2482 #[test]
2483 fn test_env_assignment() {
2484 let mut lexer = ZshLexer::new("FOO=bar echo");
2485 lexer.incmdpos = true;
2486 lexer.zshlex();
2487 assert_eq!(
2488 lexer.tok,
2489 LexTok::Envstring,
2490 "tok={:?} tokstr={:?}",
2491 lexer.tok,
2492 lexer.tokstr
2493 );
2494
2495 lexer.zshlex();
2496 assert_eq!(lexer.tok, LexTok::String);
2497 }
2498
2499 #[test]
2500 fn test_array_assignment() {
2501 let mut lexer = ZshLexer::new("arr=(a b c)");
2502 lexer.incmdpos = true;
2503 lexer.zshlex();
2504 assert_eq!(lexer.tok, LexTok::Envarray);
2505 }
2506
2507 #[test]
2508 fn test_process_substitution() {
2509 let mut lexer = ZshLexer::new("diff <(ls) >(cat)");
2510 lexer.zshlex();
2511 assert_eq!(lexer.tok, LexTok::String);
2512
2513 lexer.zshlex();
2514 assert_eq!(lexer.tok, LexTok::String);
2515 lexer.zshlex();
2518 assert_eq!(lexer.tok, LexTok::String);
2519 }
2521
2522 #[test]
2523 fn test_arithmetic() {
2524 let mut lexer = ZshLexer::new("echo $((1+2))");
2525 lexer.zshlex();
2526 assert_eq!(lexer.tok, LexTok::String);
2527
2528 lexer.zshlex();
2529 assert_eq!(lexer.tok, LexTok::String);
2530 }
2531
2532 #[test]
2533 fn test_semicolon_variants() {
2534 let mut lexer = ZshLexer::new("case x in a) cmd;; b) cmd;& c) cmd;| esac");
2535
2536 loop {
2538 lexer.zshlex();
2539 if lexer.tok == LexTok::Dsemi || lexer.tok == LexTok::Endinput {
2540 break;
2541 }
2542 }
2543 assert_eq!(lexer.tok, LexTok::Dsemi);
2544
2545 loop {
2547 lexer.zshlex();
2548 if lexer.tok == LexTok::Semiamp || lexer.tok == LexTok::Endinput {
2549 break;
2550 }
2551 }
2552 assert_eq!(lexer.tok, LexTok::Semiamp);
2553
2554 loop {
2556 lexer.zshlex();
2557 if lexer.tok == LexTok::Semibar || lexer.tok == LexTok::Endinput {
2558 break;
2559 }
2560 }
2561 assert_eq!(lexer.tok, LexTok::Semibar);
2562 }
2563}