1use crate::error::{ErrorKind, PerlError, PerlResult};
2use crate::token::{keyword_or_ident, Token};
3
4pub const LITERAL_DOLLAR_IN_DQUOTE: char = '\u{E000}';
7pub const LITERAL_AT_IN_DQUOTE: char = '\u{E001}';
10
11fn parse_unicode_name(name: &str) -> Option<char> {
13 if let Some(hex) = name.strip_prefix("U+") {
14 let val = u32::from_str_radix(hex, 16).ok()?;
15 char::from_u32(val)
16 } else {
17 unicode_names2::character(name)
18 }
19}
20
21const REGEX_FLAG_CHARS: &str = "gimsxecor";
23
24pub struct Lexer {
25 input: Vec<char>,
26 pos: usize,
27 pub line: usize,
28 last_was_term: bool,
31 last_was_arrow: bool,
37 prev_arrow: bool,
41 error_file: String,
43 pub suppress_m_regex: u32,
47 pub last_was_bare_positional: bool,
53 pub bare_positional_indices: std::collections::HashSet<usize>,
58}
59
60impl Lexer {
61 pub fn new(input: &str) -> Self {
62 Self::new_with_file(input, "-e")
63 }
64
65 pub fn new_with_file(input: &str, file: impl Into<String>) -> Self {
66 Self {
67 input: input.chars().collect(),
68 pos: 0,
69 line: 1,
70 last_was_term: false,
71 last_was_arrow: false,
72 prev_arrow: false,
73 error_file: file.into(),
74 suppress_m_regex: 0,
75 last_was_bare_positional: false,
76 bare_positional_indices: std::collections::HashSet::new(),
77 }
78 }
79
80 fn syntax_err(&self, message: impl Into<String>, line: usize) -> PerlError {
81 PerlError::new(ErrorKind::Syntax, message, line, self.error_file.clone())
82 }
83
84 fn lookahead_is_comma_delim_subst(&self) -> bool {
92 let mut commas = 0usize;
93 let mut depth_paren = 0i32;
94 let mut depth_bracket = 0i32;
95 let mut depth_brace = 0i32;
96 let mut i = self.pos;
97 while i < self.input.len() {
98 let c = self.input[i];
99 match c {
100 '\\' => {
101 i += 2; continue;
103 }
104 '(' => depth_paren += 1,
105 ')' => {
106 if depth_paren == 0 {
107 break;
108 }
109 depth_paren -= 1;
110 }
111 '[' => depth_bracket += 1,
112 ']' => {
113 if depth_bracket == 0 {
114 break;
115 }
116 depth_bracket -= 1;
117 }
118 '{' => depth_brace += 1,
119 '}' => {
120 if depth_brace == 0 {
121 break;
122 }
123 depth_brace -= 1;
124 }
125 ';' | '\n' => break,
126 ',' if depth_paren == 0 && depth_bracket == 0 && depth_brace == 0 => {
127 commas += 1;
128 if commas >= 3 {
129 return true;
130 }
131 }
132 _ => {}
133 }
134 i += 1;
135 }
136 commas >= 3
139 }
140
141 fn peek(&self) -> Option<char> {
142 self.input.get(self.pos).copied()
143 }
144
145 fn peek_at(&self, offset: usize) -> Option<char> {
146 self.input.get(self.pos + offset).copied()
147 }
148
149 fn at_line_start_for_pod(&self, eq_pos: usize) -> bool {
152 let mut i = eq_pos;
153 while i > 0 {
154 i -= 1;
155 let c = self.input[i];
156 if c == '\n' {
157 return true;
158 }
159 if !c.is_whitespace() {
160 return false;
161 }
162 }
163 true
164 }
165
166 fn advance(&mut self) -> Option<char> {
167 let ch = self.input.get(self.pos).copied();
168 if let Some(c) = ch {
169 if c == '\n' {
170 self.line += 1;
171 }
172 self.pos += 1;
173 }
174 ch
175 }
176
177 fn skip_whitespace_and_comments(&mut self) {
178 while self.pos < self.input.len() {
179 let ch = self.input[self.pos];
180 if ch == '#' {
181 while self.pos < self.input.len() && self.input[self.pos] != '\n' {
183 self.pos += 1;
184 }
185 } else if ch == '\\' && self.peek_at(1) == Some('\n') {
186 self.pos += 2;
189 } else if ch.is_whitespace() {
190 if ch == '\n' {
191 self.line += 1;
192 }
193 self.pos += 1;
194 } else {
195 break;
196 }
197 }
198 }
199
200 fn skip_whitespace_only(&mut self) {
203 while self.pos < self.input.len() {
204 let ch = self.input[self.pos];
205 if ch.is_whitespace() {
206 if ch == '\n' {
207 self.line += 1;
208 }
209 self.pos += 1;
210 } else {
211 break;
212 }
213 }
214 }
215
216 fn read_while(&mut self, pred: impl Fn(char) -> bool) -> String {
217 let mut s = String::new();
218 while let Some(ch) = self.peek() {
219 if pred(ch) {
220 s.push(ch);
221 self.advance();
222 } else {
223 break;
224 }
225 }
226 s
227 }
228
229 fn next_is_range_separator(&self) -> bool {
236 let mut i = self.pos;
237 while i < self.input.len() && matches!(self.input[i], ' ' | '\t') {
238 i += 1;
239 }
240 if i >= self.input.len() {
241 return false;
242 }
243 match self.input[i] {
244 ':' => true,
245 '.' if self.input.get(i + 1) == Some(&'.') => true,
246 '~' if self.input.get(i + 1) != Some(&'>') => true,
247 _ => false,
248 }
249 }
250
251 fn try_consume_iso_date_tail(&mut self, start: usize) -> Option<String> {
262 let saved = self.pos;
263 let year: String = self.input[start..self.pos].iter().collect();
264 if year.len() != 4 || year.parse::<u16>().is_err() {
265 return None;
266 }
267 if self.peek() != Some('-') {
269 return None;
270 }
271 if !self.peek_at(1).is_some_and(|c| c.is_ascii_digit())
272 || !self.peek_at(2).is_some_and(|c| c.is_ascii_digit())
273 {
274 return None;
275 }
276 if self.peek_at(3).is_some_and(|c| c.is_ascii_digit()) {
279 return None;
280 }
281 let month_str: String = self.input[self.pos + 1..self.pos + 3].iter().collect();
282 let month: u8 = match month_str.parse() {
283 Ok(m) if (1..=12).contains(&m) => m,
284 _ => return None,
285 };
286 self.advance(); self.advance(); self.advance(); if self.peek() == Some('-')
292 && self.peek_at(1).is_some_and(|c| c.is_ascii_digit())
293 && self.peek_at(2).is_some_and(|c| c.is_ascii_digit())
294 && !self.peek_at(3).is_some_and(|c| c.is_ascii_digit())
295 {
296 let day_str: String = self.input[self.pos + 1..self.pos + 3].iter().collect();
297 if let Ok(day) = day_str.parse::<u8>() {
298 if (1..=31).contains(&day) {
299 self.advance(); self.advance(); self.advance(); let _ = month; return Some(format!("{}-{}-{:02}", year, month_str, day));
304 }
305 }
306 }
307 Some(format!("{}-{}", year, month_str)).filter(|_| {
310 let _ = saved;
312 true
313 })
314 }
315
316 fn try_consume_ipv6_tail(&mut self, start: usize) -> Option<String> {
329 let saved = self.pos;
330 self.pos = start;
331 let mut seen_double_colon = false;
332 let mut prev_was_colon = false;
333 let mut colon_count = 0usize;
334 while self.pos < self.input.len() {
335 let c = self.input[self.pos];
336 if c == ':' {
337 colon_count += 1;
338 if prev_was_colon {
339 if seen_double_colon {
340 break;
341 }
342 seen_double_colon = true;
343 }
344 prev_was_colon = true;
345 self.advance();
346 continue;
347 }
348 if c.is_ascii_hexdigit() {
349 prev_was_colon = false;
350 self.advance();
351 continue;
352 }
353 break;
354 }
355 if self.pos > start
358 && self.input[self.pos - 1] == ':'
359 && (self.pos < start + 2 || self.input[self.pos - 2] != ':')
360 {
361 self.pos -= 1;
362 colon_count -= 1;
363 }
364 if self.pos < self.input.len() {
370 let next = self.input[self.pos];
371 if next.is_ascii_alphabetic() && !next.is_ascii_hexdigit() || next == '_' {
372 self.pos = saved;
373 return None;
374 }
375 if next == ':'
381 && self.input.get(self.pos + 1) == Some(&':')
382 && self
383 .input
384 .get(self.pos + 2)
385 .is_some_and(|c| c.is_ascii_alphabetic() || *c == '_')
386 {
387 self.pos = saved;
388 return None;
389 }
390 }
391 let candidate: String = self.input[start..self.pos].iter().collect();
392 if colon_count < 2
398 || !candidate.chars().any(|c| c.is_ascii_hexdigit())
399 || candidate.parse::<std::net::Ipv6Addr>().is_err()
400 {
401 self.pos = saved;
402 return None;
403 }
404 Some(candidate)
405 }
406
407 fn try_consume_ipv4_tail(&mut self, start: usize) -> Option<String> {
415 let saved = self.pos;
416 let first: String = self.input[start..self.pos].iter().collect();
418 if first.parse::<u8>().is_err() {
419 return None;
420 }
421 let mut octets: Vec<String> = vec![first];
422 for _ in 0..3 {
423 if self.peek() != Some('.') {
424 self.pos = saved;
425 return None;
426 }
427 if !self.peek_at(1).is_some_and(|c| c.is_ascii_digit()) {
428 self.pos = saved;
429 return None;
430 }
431 self.advance(); let oct_start = self.pos;
433 while self.peek().is_some_and(|c| c.is_ascii_digit()) {
434 self.advance();
435 }
436 let octet: String = self.input[oct_start..self.pos].iter().collect();
437 if octet.parse::<u8>().is_err() {
438 self.pos = saved;
439 return None;
440 }
441 octets.push(octet);
442 }
443 if self.peek() == Some('.') && self.peek_at(1).is_some_and(|c| c.is_ascii_digit()) {
447 self.pos = saved;
448 return None;
449 }
450 Some(octets.join("."))
451 }
452
453 fn read_number(&mut self) -> PerlResult<Token> {
454 let start = self.pos;
455 let mut is_float = false;
456 let mut is_hex = false;
457 let mut is_oct = false;
458 let mut is_bin = false;
459
460 if self.peek() == Some('0') {
461 match self.peek_at(1) {
462 Some('x') | Some('X') => {
463 is_hex = true;
464 self.advance();
465 self.advance();
466 }
467 Some('b') | Some('B') => {
468 is_bin = true;
469 self.advance();
470 self.advance();
471 }
472 Some('o') | Some('O') => {
476 self.advance();
477 self.advance();
478 let digits = self.read_while(|c| c.is_ascii_digit() || c == '_');
479 let clean: String = digits.chars().filter(|&c| c != '_').collect();
480 let val = i64::from_str_radix(&clean, 8)
481 .map_err(|_| self.syntax_err("Invalid octal literal", self.line))?;
482 return Ok(Token::Integer(val));
483 }
484 Some(c) if c.is_ascii_digit() => {
485 is_oct = true;
486 }
487 _ => {}
488 }
489 }
490
491 if is_hex {
492 let digits = self.read_while(|c| c.is_ascii_hexdigit() || c == '_');
493 let clean: String = digits.chars().filter(|&c| c != '_').collect();
494 let val = i64::from_str_radix(&clean, 16)
495 .map_err(|_| self.syntax_err("Invalid hex literal", self.line))?;
496 if self.next_is_range_separator() {
504 let raw: String = self.input[start..self.pos].iter().collect();
505 return Ok(Token::DoubleString(raw));
506 }
507 return Ok(Token::Integer(val));
508 }
509 if is_bin {
510 let digits = self.read_while(|c| c == '0' || c == '1' || c == '_');
511 let clean: String = digits.chars().filter(|&c| c != '_').collect();
512 let val = i64::from_str_radix(&clean, 2)
513 .map_err(|_| self.syntax_err("Invalid binary literal", self.line))?;
514 return Ok(Token::Integer(val));
515 }
516
517 let _int_part = self.read_while(|c| c.is_ascii_digit() || c == '_');
519 if self.peek() == Some('.') && self.peek_at(1).is_some_and(|c| c.is_ascii_digit()) {
527 if let Some(consumed) = self.try_consume_ipv4_tail(start) {
528 return Ok(Token::DoubleString(consumed));
529 }
530 is_float = true;
531 self.advance(); let _frac = self.read_while(|c| c.is_ascii_digit() || c == '_');
533 }
534 if !is_float
540 && self.peek() == Some('-')
541 && self.peek_at(1).is_some_and(|c| c.is_ascii_digit())
542 {
543 if let Some(consumed) = self.try_consume_iso_date_tail(start) {
544 return Ok(Token::DoubleString(consumed));
545 }
546 }
547 if !is_float && self.peek() == Some(':') {
552 if let Some(consumed) = self.try_consume_ipv6_tail(start) {
553 return Ok(Token::DoubleString(consumed));
554 }
555 }
556 if let Some('e') | Some('E') = self.peek() {
558 is_float = true;
559 self.advance();
560 if let Some('+') | Some('-') = self.peek() {
561 self.advance();
562 }
563 let _exp = self.read_while(|c| c.is_ascii_digit() || c == '_');
564 }
565
566 let raw: String = self.input[start..self.pos].iter().collect();
567 let clean: String = raw.chars().filter(|&c| c != '_').collect();
568
569 if is_float {
570 let val: f64 = clean
571 .parse()
572 .map_err(|_| self.syntax_err("Invalid float literal", self.line))?;
573 Ok(Token::Float(val))
574 } else if is_oct && clean.starts_with('0') && clean.len() > 1 {
575 let val = i64::from_str_radix(&clean[1..], 8)
576 .map_err(|_| self.syntax_err("Invalid octal literal", self.line))?;
577 Ok(Token::Integer(val))
578 } else {
579 let val: i64 = clean
580 .parse()
581 .map_err(|_| self.syntax_err("Invalid integer literal", self.line))?;
582 Ok(Token::Integer(val))
583 }
584 }
585
586 fn read_single_quoted_string(&mut self) -> PerlResult<Token> {
587 self.advance(); let mut s = String::new();
589 loop {
590 match self.advance() {
591 Some('\\') => match self.peek() {
592 Some('\\') => {
593 s.push('\\');
594 self.advance();
595 }
596 Some('\'') => {
597 s.push('\'');
598 self.advance();
599 }
600 _ => s.push('\\'),
601 },
602 Some('\'') => break,
603 Some(c) => s.push(c),
604 None => return Err(self.syntax_err("Unterminated single-quoted string", self.line)),
605 }
606 }
607 Ok(Token::SingleString(s))
608 }
609
610 fn read_double_quoted_string(&mut self) -> PerlResult<Token> {
611 self.advance(); let s = self.read_escaped_until('"')?;
613 Ok(Token::DoubleString(s))
614 }
615
616 fn read_escaped_until(&mut self, term: char) -> PerlResult<String> {
617 let mut s = String::new();
618 loop {
619 match self.advance() {
620 Some('\\') => match self.advance() {
621 Some('n') => s.push('\n'),
622 Some('t') => s.push('\t'),
623 Some('r') => s.push('\r'),
624 Some('\\') => s.push('\\'),
625 Some(c @ '0'..='7') => {
626 let mut oct = String::new();
627 oct.push(c);
628 for _ in 0..2 {
629 match self.peek() {
630 Some(d) if ('0'..='7').contains(&d) => {
631 oct.push(self.advance().unwrap());
632 }
633 _ => break,
634 }
635 }
636 let val = u32::from_str_radix(&oct, 8).unwrap();
637 let ch = char::from_u32(val)
638 .ok_or_else(|| self.syntax_err("Invalid octal escape", self.line))?;
639 s.push(ch);
640 }
641 Some('a') => s.push('\x07'),
642 Some('b') => s.push('\x08'),
643 Some('f') => s.push('\x0C'),
644 Some('e') => s.push('\x1B'),
645 Some('$') => s.push(LITERAL_DOLLAR_IN_DQUOTE),
646 Some('@') => s.push(LITERAL_AT_IN_DQUOTE),
647 Some('c') => {
648 let ch = self
649 .advance()
650 .ok_or_else(|| self.syntax_err("Unterminated \\c escape", self.line))?;
651 s.push(char::from(ch.to_ascii_uppercase() as u8 ^ 0x40));
652 }
653 Some('o') if self.peek() == Some('{') => {
654 self.advance(); let oct = self.read_while(|c| c != '}');
656 if self.peek() != Some('}') {
657 return Err(
658 self.syntax_err("Unterminated \\o{...} in string", self.line)
659 );
660 }
661 self.advance(); if oct.is_empty() {
663 return Err(self.syntax_err("Empty \\o{} in string", self.line));
664 }
665 let val = u32::from_str_radix(&oct, 8).map_err(|_| {
666 self.syntax_err("Invalid octal digits in \\o{...}", self.line)
667 })?;
668 let c = char::from_u32(val).ok_or_else(|| {
669 self.syntax_err("Invalid Unicode scalar value in \\o{...}", self.line)
670 })?;
671 s.push(c);
672 }
673 Some('u') if self.peek() == Some('{') => {
674 self.advance(); let hex = self.read_while(|c| c != '}');
676 if self.peek() != Some('}') {
677 return Err(
678 self.syntax_err("Unterminated \\u{...} in string", self.line)
679 );
680 }
681 self.advance(); if hex.is_empty() {
683 return Err(self.syntax_err("Empty \\u{} in string", self.line));
684 }
685 let val = u32::from_str_radix(&hex, 16).map_err(|_| {
686 self.syntax_err("Invalid hex digits in \\u{...}", self.line)
687 })?;
688 let c = char::from_u32(val).ok_or_else(|| {
689 self.syntax_err("Invalid Unicode scalar value in \\u{...}", self.line)
690 })?;
691 s.push(c);
692 }
693 Some('N') if self.peek() == Some('{') => {
694 self.advance(); let name = self.read_while(|c| c != '}');
696 if self.peek() != Some('}') {
697 return Err(
698 self.syntax_err("Unterminated \\N{...} in string", self.line)
699 );
700 }
701 self.advance(); if name.is_empty() {
703 return Err(self.syntax_err("Empty \\N{} in string", self.line));
704 }
705 let c = parse_unicode_name(&name).ok_or_else(|| {
706 self.syntax_err(
707 format!("Unknown Unicode character name: {name}"),
708 self.line,
709 )
710 })?;
711 s.push(c);
712 }
713 Some('x') => {
714 if self.peek() == Some('{') {
715 self.advance(); let hex = self.read_while(|c| c != '}');
717 if self.peek() != Some('}') {
718 return Err(
719 self.syntax_err("Unterminated \\x{...} in string", self.line)
720 );
721 }
722 self.advance(); if hex.is_empty() {
724 return Err(self.syntax_err("Empty \\x{} in string", self.line));
725 }
726 let val = u32::from_str_radix(&hex, 16).map_err(|_| {
727 self.syntax_err("Invalid hex digits in \\x{...}", self.line)
728 })?;
729 let c = char::from_u32(val).ok_or_else(|| {
730 self.syntax_err(
731 "Invalid Unicode scalar value in \\x{...}",
732 self.line,
733 )
734 })?;
735 s.push(c);
736 } else {
737 let mut hex = String::new();
739 for _ in 0..2 {
740 match self.peek() {
741 Some(c) if c.is_ascii_hexdigit() => {
742 hex.push(self.advance().unwrap());
743 }
744 _ => break,
745 }
746 }
747 if hex.is_empty() {
748 s.push('\0');
750 } else if let Ok(val) = u32::from_str_radix(&hex, 16) {
751 if let Some(c) = char::from_u32(val) {
752 s.push(c);
753 } else {
754 return Err(self.syntax_err(
755 "Invalid code point in \\x escape",
756 self.line,
757 ));
758 }
759 }
760 }
761 }
762 Some(c) if c == term => s.push(c),
763 Some(c) => {
764 s.push('\\');
765 s.push(c);
766 }
767 None => return Err(self.syntax_err("Unterminated string", self.line)),
768 },
769 Some(c) if c == term => break,
770 Some(c) => s.push(c),
771 None => return Err(self.syntax_err("Unterminated string", self.line)),
772 }
773 }
774 Ok(s)
775 }
776
777 fn read_q_qq_balanced_body(
780 &mut self,
781 open: char,
782 close: char,
783 is_qq: bool,
784 ) -> PerlResult<String> {
785 let mut s = String::new();
786 let mut depth: usize = 1;
787 loop {
788 match self.peek() {
789 Some('\\') => {
790 self.advance();
791 if is_qq {
792 match self.advance() {
793 Some('n') => s.push('\n'),
794 Some('t') => s.push('\t'),
795 Some('r') => s.push('\r'),
796 Some('\\') => s.push('\\'),
797 Some(c @ '0'..='7') => {
798 let mut oct = String::new();
799 oct.push(c);
800 for _ in 0..2 {
801 match self.peek() {
802 Some(d) if ('0'..='7').contains(&d) => {
803 oct.push(self.advance().unwrap());
804 }
805 _ => break,
806 }
807 }
808 let val = u32::from_str_radix(&oct, 8).unwrap();
809 let ch = char::from_u32(val).ok_or_else(|| {
810 self.syntax_err("Invalid octal escape", self.line)
811 })?;
812 s.push(ch);
813 }
814 Some('a') => s.push('\x07'),
815 Some('b') => s.push('\x08'),
816 Some('f') => s.push('\x0C'),
817 Some('e') => s.push('\x1B'),
818 Some('$') => s.push(LITERAL_DOLLAR_IN_DQUOTE),
819 Some('@') => s.push(LITERAL_AT_IN_DQUOTE),
820 Some('c') => {
821 let ch = self.advance().ok_or_else(|| {
822 self.syntax_err("Unterminated \\c escape", self.line)
823 })?;
824 s.push(char::from(ch.to_ascii_uppercase() as u8 ^ 0x40));
825 }
826 Some('o') if self.peek() == Some('{') => {
827 self.advance();
828 let oct = self.read_while(|c| c != '}');
829 if self.peek() != Some('}') {
830 return Err(self.syntax_err(
831 "Unterminated \\o{...} in qq string",
832 self.line,
833 ));
834 }
835 self.advance();
836 if oct.is_empty() {
837 return Err(
838 self.syntax_err("Empty \\o{} in qq string", self.line)
839 );
840 }
841 let val = u32::from_str_radix(&oct, 8).map_err(|_| {
842 self.syntax_err("Invalid octal digits in \\o{...}", self.line)
843 })?;
844 let c = char::from_u32(val).ok_or_else(|| {
845 self.syntax_err(
846 "Invalid Unicode scalar value in \\o{...}",
847 self.line,
848 )
849 })?;
850 s.push(c);
851 }
852 Some('u') if self.peek() == Some('{') => {
853 self.advance();
854 let hex = self.read_while(|c| c != '}');
855 if self.peek() != Some('}') {
856 return Err(self.syntax_err(
857 "Unterminated \\u{...} in qq string",
858 self.line,
859 ));
860 }
861 self.advance();
862 if hex.is_empty() {
863 return Err(
864 self.syntax_err("Empty \\u{} in qq string", self.line)
865 );
866 }
867 let val = u32::from_str_radix(&hex, 16).map_err(|_| {
868 self.syntax_err("Invalid hex digits in \\u{...}", self.line)
869 })?;
870 let c = char::from_u32(val).ok_or_else(|| {
871 self.syntax_err(
872 "Invalid Unicode scalar value in \\u{...}",
873 self.line,
874 )
875 })?;
876 s.push(c);
877 }
878 Some('N') if self.peek() == Some('{') => {
879 self.advance();
880 let name = self.read_while(|c| c != '}');
881 if self.peek() != Some('}') {
882 return Err(self.syntax_err(
883 "Unterminated \\N{...} in qq string",
884 self.line,
885 ));
886 }
887 self.advance();
888 if name.is_empty() {
889 return Err(
890 self.syntax_err("Empty \\N{} in qq string", self.line)
891 );
892 }
893 let c = parse_unicode_name(&name).ok_or_else(|| {
894 self.syntax_err(
895 format!("Unknown Unicode character name: {name}"),
896 self.line,
897 )
898 })?;
899 s.push(c);
900 }
901 Some('x') => {
902 if self.peek() == Some('{') {
903 self.advance();
904 let hex = self.read_while(|c| c != '}');
905 if self.peek() != Some('}') {
906 return Err(self.syntax_err(
907 "Unterminated \\x{...} in qq string",
908 self.line,
909 ));
910 }
911 self.advance();
912 if hex.is_empty() {
913 return Err(
914 self.syntax_err("Empty \\x{} in qq string", self.line)
915 );
916 }
917 let val = u32::from_str_radix(&hex, 16).map_err(|_| {
918 self.syntax_err("Invalid hex digits in \\x{...}", self.line)
919 })?;
920 let c = char::from_u32(val).ok_or_else(|| {
921 self.syntax_err(
922 "Invalid Unicode scalar value in \\x{...}",
923 self.line,
924 )
925 })?;
926 s.push(c);
927 } else {
928 let mut hex = String::new();
929 for _ in 0..2 {
930 match self.peek() {
931 Some(c) if c.is_ascii_hexdigit() => {
932 hex.push(self.advance().unwrap());
933 }
934 _ => break,
935 }
936 }
937 if hex.is_empty() {
938 s.push('\0');
939 } else if let Ok(val) = u32::from_str_radix(&hex, 16) {
940 if let Some(c) = char::from_u32(val) {
941 s.push(c);
942 } else {
943 return Err(self.syntax_err(
944 "Invalid code point in \\x escape",
945 self.line,
946 ));
947 }
948 }
949 }
950 }
951 Some(c) if c == close && depth == 1 => s.push(close),
952 Some(c) => {
953 s.push('\\');
954 s.push(c);
955 }
956 None => {
957 return Err(
958 self.syntax_err("Unterminated qq(...) string", self.line)
959 );
960 }
961 }
962 } else {
963 match self.advance() {
964 Some(c) if c == close && depth == 1 => s.push(close),
965 Some(c) => {
966 s.push('\\');
967 s.push(c);
968 }
969 None => {
970 return Err(
971 self.syntax_err("Unterminated q(...) string", self.line)
972 );
973 }
974 }
975 }
976 }
977 Some(c) if c == open => {
978 self.advance();
979 depth += 1;
980 s.push(open);
981 }
982 Some(c) if c == close => {
983 self.advance();
984 if depth == 1 {
985 break;
986 }
987 depth -= 1;
988 s.push(close);
989 }
990 Some(c) => {
991 self.advance();
992 s.push(c);
993 }
994 None => {
995 return Err(self.syntax_err("Unterminated q/qq bracketed string", self.line));
996 }
997 }
998 }
999 Ok(s)
1000 }
1001
1002 fn read_regex(&mut self) -> PerlResult<Token> {
1003 self.advance(); let mut pattern = String::new();
1005 loop {
1006 match self.advance() {
1007 Some('\\') => {
1008 pattern.push('\\');
1009 if let Some(c) = self.advance() {
1010 pattern.push(c);
1011 }
1012 }
1013 Some('/') => break,
1014 Some(c) => pattern.push(c),
1015 None => return Err(self.syntax_err("Unterminated regex", self.line)),
1016 }
1017 }
1018 let flags = self.read_while(|c| REGEX_FLAG_CHARS.contains(c));
1019 Ok(Token::Regex(pattern, flags, '/'))
1020 }
1021
1022 fn read_qw(&mut self) -> PerlResult<Token> {
1023 self.skip_whitespace_only();
1025 let open = self
1026 .advance()
1027 .ok_or_else(|| self.syntax_err("Expected delimiter after qw", self.line))?;
1028 let close = match open {
1029 '(' => ')',
1030 '[' => ']',
1031 '{' => '}',
1032 '<' => '>',
1033 c => c,
1034 };
1035 let mut words = Vec::new();
1036 if matches!(open, '(' | '[' | '{' | '<') {
1037 let mut depth: usize = 1;
1040 let mut buf = String::new();
1041 loop {
1042 match self.peek() {
1043 None => {
1044 return Err(self.syntax_err("Unterminated qw()", self.line));
1045 }
1046 Some(c) if depth == 1 && c.is_whitespace() => {
1047 self.advance();
1048 if !buf.is_empty() {
1049 words.push(buf.clone());
1050 buf.clear();
1051 }
1052 while self.peek().is_some_and(|c| c.is_whitespace()) {
1053 self.advance();
1054 }
1055 }
1056 Some(c) if c == close && depth == 1 => {
1057 self.advance();
1058 if !buf.is_empty() {
1059 words.push(buf);
1060 }
1061 break;
1062 }
1063 Some(c) if c == open => {
1064 depth += 1;
1065 buf.push(self.advance().unwrap());
1066 }
1067 Some(c) if c == close => {
1068 debug_assert!(depth >= 2);
1070 depth -= 1;
1071 buf.push(self.advance().unwrap());
1072 }
1073 Some(_) => {
1074 buf.push(self.advance().unwrap());
1075 }
1076 }
1077 }
1078 return Ok(Token::QW(words));
1079 }
1080 loop {
1081 while let Some(ch) = self.peek() {
1083 if ch.is_whitespace() {
1084 self.advance();
1085 } else {
1086 break;
1087 }
1088 }
1089 if self.peek() == Some(close) {
1090 self.advance();
1091 break;
1092 }
1093 if self.peek().is_none() {
1094 return Err(self.syntax_err("Unterminated qw()", self.line));
1095 }
1096 let word = self.read_while(|c| !c.is_whitespace() && c != close);
1097 if !word.is_empty() {
1098 words.push(word);
1099 }
1100 }
1101 Ok(Token::QW(words))
1102 }
1103
1104 fn read_heredoc_tag(&mut self) -> PerlResult<(String, bool, bool)> {
1105 self.read_heredoc_tag_inner(false)
1106 }
1107
1108 fn read_heredoc_tag_inner(&mut self, indented: bool) -> PerlResult<(String, bool, bool)> {
1109 let quoted;
1112 let tag;
1113 match self.peek() {
1114 Some('\'') => {
1115 self.advance();
1116 tag = self.read_while(|c| c != '\'');
1117 self.advance(); quoted = false; }
1120 Some('"') => {
1121 self.advance();
1122 tag = self.read_while(|c| c != '"');
1123 self.advance();
1124 quoted = true;
1125 }
1126 Some('~') => {
1127 self.advance(); return self.read_heredoc_tag_inner(true); }
1130 _ => {
1131 tag = self.read_while(|c| c.is_alphanumeric() || c == '_');
1132 quoted = true;
1133 }
1134 }
1135 Ok((tag, quoted, indented))
1136 }
1137
1138 fn read_heredoc_body(&mut self, tag: &str, indented: bool) -> PerlResult<String> {
1139 let mut lines: Vec<String> = Vec::new();
1142 while let Some(ch) = self.peek() {
1144 if ch == '\n' {
1145 self.advance();
1146 break;
1147 }
1148 self.advance();
1149 }
1150 let mut terminator_indent: Option<usize> = None;
1151 loop {
1152 let _line_start = self.pos;
1153 let line = self.read_while(|c| c != '\n');
1154 if line.trim() == tag {
1155 if indented {
1158 terminator_indent = Some(line.len() - line.trim_start().len());
1159 }
1160 break;
1161 }
1162 lines.push(line);
1163 if self.peek() == Some('\n') {
1164 self.advance();
1165 } else if self.pos >= self.input.len() {
1166 return Err(self.syntax_err(
1167 format!("Unterminated heredoc (looking for '{tag}')"),
1168 self.line,
1169 ));
1170 }
1171 }
1172 if self.peek() == Some('\n') {
1173 self.advance();
1174 }
1175 if indented {
1178 let strip = terminator_indent.unwrap_or(0);
1179 let mut body = String::new();
1180 for line in lines {
1181 let ws_count = line.len() - line.trim_start().len();
1182 let to_strip = ws_count.min(strip);
1183 body.push_str(&line[to_strip..]);
1184 body.push('\n');
1185 }
1186 Ok(body)
1187 } else {
1188 let mut body = String::new();
1189 for line in lines {
1190 body.push_str(&line);
1191 body.push('\n');
1192 }
1193 Ok(body)
1194 }
1195 }
1196
1197 fn read_identifier(&mut self) -> String {
1198 self.read_while(|c| c.is_alphanumeric() || c == '_')
1199 }
1200
1201 fn read_package_qualified_identifier(&mut self) -> String {
1203 let mut s = self.read_identifier();
1204 while self.peek() == Some(':') && self.input.get(self.pos + 1) == Some(&':') {
1205 self.advance();
1206 self.advance();
1207 s.push_str("::");
1208 s.push_str(&self.read_identifier());
1209 }
1210 s
1211 }
1212
1213 fn read_format_body(&mut self) -> PerlResult<Vec<String>> {
1215 while self.peek().is_some_and(|c| c == ' ' || c == '\t') {
1216 self.advance();
1217 }
1218 if self.peek() == Some('\n') {
1219 self.advance();
1220 }
1221 let mut lines = Vec::new();
1222 loop {
1223 let mut line = String::new();
1224 while let Some(c) = self.peek() {
1225 if c == '\n' {
1226 self.advance();
1227 break;
1228 }
1229 if c == '\r' {
1230 self.advance();
1231 if self.peek() == Some('\n') {
1232 self.advance();
1233 }
1234 break;
1235 }
1236 line.push(c);
1237 self.advance();
1238 }
1239 if line.trim() == "." {
1240 break;
1241 }
1242 lines.push(line);
1243 if self.peek().is_none() {
1244 return Err(self.syntax_err(
1245 "Unterminated format (expected '.' on its own line before end of file)",
1246 self.line,
1247 ));
1248 }
1249 }
1250 Ok(lines)
1251 }
1252
1253 fn read_variable_name(&mut self) -> String {
1254 match self.peek() {
1256 Some('$')
1258 if self.input.get(self.pos + 1) == Some(&'_')
1259 && self.input.get(self.pos + 2) == Some(&'{') =>
1260 {
1261 self.advance(); self.advance(); "_".to_string()
1264 }
1265 Some(':') if self.input.get(self.pos + 1) == Some(&':') => {
1267 self.advance();
1268 self.advance();
1269 let mut s = "::".to_string();
1270 if self.peek().is_some_and(|c| c.is_alphabetic() || c == '_') {
1271 s.push_str(&self.read_identifier());
1272 }
1273 while self.peek() == Some(':') && self.input.get(self.pos + 1) == Some(&':') {
1274 self.advance();
1275 self.advance();
1276 s.push_str("::");
1277 s.push_str(&self.read_identifier());
1278 }
1279 s
1280 }
1281 Some(c) if c.is_alphabetic() || c == '_' => {
1282 let ident = self.read_package_qualified_identifier();
1283 let is_topic_slot = ident == "_"
1292 || (ident.len() > 1
1293 && ident.starts_with('_')
1294 && ident[1..].bytes().all(|b| b.is_ascii_digit()));
1295 if is_topic_slot {
1296 let mut lts = String::new();
1297 while self.peek() == Some('<') {
1298 self.advance();
1299 lts.push('<');
1300 }
1301 if lts.len() == 1 && self.peek().is_some_and(|c| c.is_ascii_digit()) {
1305 let mut peek_off = 0usize;
1306 while self.peek_at(peek_off).is_some_and(|c| c.is_ascii_digit()) {
1307 peek_off += 1;
1308 }
1309 let trailing = self.peek_at(peek_off);
1310 let is_slice = matches!(trailing, Some(':') | Some('>'));
1311 if !is_slice {
1312 let mut digits = String::new();
1313 for _ in 0..peek_off {
1314 if let Some(c) = self.advance() {
1315 digits.push(c);
1316 }
1317 }
1318 if let Ok(n) = digits.parse::<usize>() {
1319 if n >= 1 {
1320 for _ in 1..n {
1323 lts.push('<');
1324 }
1325 }
1326 }
1327 }
1328 }
1329 if !lts.is_empty() {
1330 return format!("{}{}", ident, lts);
1331 }
1332 }
1333 ident
1334 }
1335 Some('^') => {
1336 self.advance();
1337 if self.peek().is_some_and(|c| c.is_alphabetic()) {
1339 let c2 = self.advance().unwrap();
1340 format!("^{}", c2)
1341 } else {
1342 "^".to_string()
1343 }
1344 }
1345 Some('{') => {
1347 self.advance(); let name = self.read_while(|c| c != '}');
1349 if self.peek() == Some('}') {
1350 self.advance();
1351 }
1352 name
1353 }
1354 Some('#') => {
1356 self.advance();
1357 if self.peek().is_some_and(|c| c.is_alphabetic() || c == '_') {
1358 let mut name = String::from("#");
1359 name.push_str(&self.read_package_qualified_identifier());
1360 name
1361 } else {
1362 "#".to_string()
1363 }
1364 }
1365 Some(c) if "!@$&*+;',\"\\|?/<>.0123456789~%-=()[]{}".contains(c) => {
1366 self.advance();
1367 c.to_string()
1368 }
1369 _ => String::new(),
1370 }
1371 }
1372
1373 fn braced_body_symbolic_scalar_deref_name(body: &str) -> Option<&str> {
1377 let body = body.trim();
1378 let rest = body.strip_prefix('$')?;
1379 if rest.is_empty() {
1380 return None;
1381 }
1382 let mut chars = rest.chars();
1383 let c0 = chars.next()?;
1384 if !(c0.is_alphabetic() || c0 == '_') {
1385 return None;
1386 }
1387 for c in chars {
1388 if !(c.is_alphanumeric() || c == '_' || c == ':') {
1389 return None;
1390 }
1391 }
1392 Some(rest)
1393 }
1394
1395 pub fn next_token(&mut self) -> PerlResult<Token> {
1396 self.skip_whitespace_and_comments();
1397
1398 if self.pos >= self.input.len() {
1399 return Ok(Token::Eof);
1400 }
1401
1402 self.prev_arrow = self.last_was_arrow;
1409 self.last_was_arrow = false;
1410 self.last_was_bare_positional = false;
1411
1412 let ch = self.input[self.pos];
1413 match ch {
1414 '$' => {
1416 self.advance();
1417 if self.peek() == Some('$') {
1419 let is_dollar_under_brace = self.input.get(self.pos + 1) == Some(&'_')
1421 && self.input.get(self.pos + 2) == Some(&'{');
1422 if !is_dollar_under_brace {
1423 self.advance();
1424 if self.peek().is_some_and(|c| c.is_alphabetic() || c == '_') {
1425 let name = self.read_identifier();
1426 self.last_was_term = true;
1427 return Ok(Token::DerefScalarVar(name));
1428 }
1429 self.last_was_term = true;
1431 return Ok(Token::ScalarVar("$$".to_string()));
1432 }
1433 }
1434 let name = self.read_variable_name();
1435 if name.is_empty() {
1436 return Err(self.syntax_err("Expected variable name after $", self.line));
1437 }
1438 if crate::no_interop_mode() && (name == "a" || name == "b") {
1443 return Err(self.syntax_err(
1444 format!(
1445 "stryke uses `$_0` / `$_1` instead of `${}` (--no-interop is active)",
1446 name
1447 ),
1448 self.line,
1449 ));
1450 }
1451 self.last_was_term = true;
1452 if let Some(tail) = Self::braced_body_symbolic_scalar_deref_name(&name) {
1453 return Ok(Token::DerefScalarVar(tail.to_string()));
1454 }
1455 Ok(Token::ScalarVar(name))
1456 }
1457 '@' => {
1458 self.advance();
1459 if self.peek() == Some('-') {
1460 self.advance();
1461 self.last_was_term = true;
1462 return Ok(Token::ArrayVar("-".to_string()));
1463 }
1464 if self.peek() == Some('+') {
1465 self.advance();
1466 self.last_was_term = true;
1467 return Ok(Token::ArrayVar("+".to_string()));
1468 }
1469 if self.peek() == Some('^')
1470 && self
1471 .input
1472 .get(self.pos + 1)
1473 .is_some_and(|c| c.is_alphabetic() || *c == '_')
1474 {
1475 self.advance();
1476 let name = format!("^{}", self.read_package_qualified_identifier());
1477 self.last_was_term = true;
1478 return Ok(Token::ArrayVar(name));
1479 }
1480 if self.peek() == Some('_') || self.peek().is_some_and(|c| c.is_alphabetic()) {
1481 let name = self.read_package_qualified_identifier();
1482 self.last_was_term = true;
1483 return Ok(Token::ArrayVar(name));
1484 }
1485 self.last_was_term = false;
1486 Ok(Token::ArrayAt)
1487 }
1488 '%' if !self.last_was_term => {
1489 self.advance();
1490 if self.peek() == Some('+') {
1492 self.advance();
1493 self.last_was_term = true;
1494 return Ok(Token::HashVar("+".to_string()));
1495 }
1496 if self.peek() == Some('^')
1497 && self
1498 .input
1499 .get(self.pos + 1)
1500 .is_some_and(|c| c.is_alphabetic() || *c == '_')
1501 {
1502 self.advance();
1503 let name = format!("^{}", self.read_package_qualified_identifier());
1504 self.last_was_term = true;
1505 return Ok(Token::HashVar(name));
1506 }
1507 if self.peek().is_some_and(|c| c.is_alphabetic() || c == '_') {
1508 let name = self.read_package_qualified_identifier();
1509 self.last_was_term = true;
1510 return Ok(Token::HashVar(name));
1511 }
1512 self.last_was_term = false;
1513 Ok(Token::HashPercent)
1514 }
1515
1516 '0'..='9' => {
1518 let tok = self.read_number()?;
1519 self.last_was_term = true;
1520 Ok(tok)
1521 }
1522
1523 '\'' => {
1525 let tok = self.read_single_quoted_string()?;
1526 self.last_was_term = true;
1527 Ok(tok)
1528 }
1529 '"' => {
1530 let tok = self.read_double_quoted_string()?;
1531 self.last_was_term = true;
1532 Ok(tok)
1533 }
1534
1535 '`' => {
1537 self.advance();
1538 let cmd = self.read_escaped_until('`')?;
1539 self.last_was_term = true;
1540 Ok(Token::BacktickString(cmd))
1541 }
1542
1543 '/' => {
1545 if !self.last_was_term {
1546 let tok = self.read_regex()?;
1547 self.last_was_term = true;
1548 return Ok(tok);
1549 }
1550 self.advance();
1551 if self.peek() == Some('=') {
1552 self.advance();
1553 self.last_was_term = false;
1554 return Ok(Token::DivAssign);
1555 }
1556 if self.peek() == Some('/') {
1557 self.advance();
1558 if self.peek() == Some('=') {
1559 self.advance();
1560 self.last_was_term = false;
1561 return Ok(Token::DefinedOrAssign);
1562 }
1563 self.last_was_term = false;
1564 return Ok(Token::DefinedOr);
1565 }
1566 self.last_was_term = false;
1567 Ok(Token::Slash)
1568 }
1569
1570 '+' => {
1572 self.advance();
1573 if self.peek() == Some('+') {
1574 self.advance();
1575 return Ok(Token::Increment);
1577 }
1578 if self.peek() == Some('=') {
1579 self.advance();
1580 self.last_was_term = false;
1581 return Ok(Token::PlusAssign);
1582 }
1583 self.last_was_term = false;
1584 Ok(Token::Plus)
1585 }
1586 '-' => {
1587 self.advance();
1588 if !self.last_was_term {
1590 if let Some(c) = self.peek() {
1591 if "efdlpSszrwxoRWXOBCTMAgut".contains(c)
1592 && self.peek_at(1).is_none_or(|n| {
1593 n.is_whitespace()
1594 || n == '$'
1595 || n == '\''
1596 || n == '"'
1597 || n == '('
1598 || n == ')'
1599 || n == '}'
1600 || n == ';'
1601 || n == ','
1602 })
1603 {
1604 self.advance();
1605 self.last_was_term = false;
1606 return Ok(Token::FileTest(c));
1607 }
1608 }
1609 }
1610 if self.peek() == Some('-') {
1611 self.advance();
1612 return Ok(Token::Decrement);
1613 }
1614 if self.peek() == Some('=') {
1615 self.advance();
1616 self.last_was_term = false;
1617 return Ok(Token::MinusAssign);
1618 }
1619 if self.peek() == Some('>') {
1620 self.advance();
1621 if self.peek() == Some('>') {
1622 self.advance();
1623 self.last_was_term = false;
1624 return Ok(Token::ThreadArrowLast);
1625 }
1626 self.last_was_term = false;
1627 self.last_was_arrow = true;
1631 return Ok(Token::Arrow);
1632 }
1633 self.last_was_term = false;
1634 Ok(Token::Minus)
1635 }
1636 '*' => {
1637 self.advance();
1638 if self.peek() == Some('*') {
1639 self.advance();
1640 if self.peek() == Some('=') {
1641 self.advance();
1642 self.last_was_term = false;
1643 return Ok(Token::PowAssign);
1644 }
1645 self.last_was_term = false;
1646 return Ok(Token::Power);
1647 }
1648 if self.peek() == Some('=') {
1649 self.advance();
1650 self.last_was_term = false;
1651 return Ok(Token::MulAssign);
1652 }
1653 self.last_was_term = false;
1654 Ok(Token::Star)
1655 }
1656 '%' => {
1657 self.advance();
1659 if self.peek() == Some('=') {
1660 self.advance();
1661 self.last_was_term = false;
1662 return Ok(Token::ModAssign);
1663 }
1664 self.last_was_term = false;
1665 Ok(Token::Percent)
1666 }
1667 '.' => {
1668 self.advance();
1669 if self.peek() == Some('.') {
1670 self.advance();
1671 if self.peek() == Some('.') {
1672 self.advance();
1673 self.last_was_term = false;
1674 return Ok(Token::RangeExclusive);
1675 }
1676 self.last_was_term = false;
1677 return Ok(Token::Range);
1678 }
1679 if self.peek() == Some('=') {
1680 self.advance();
1681 self.last_was_term = false;
1682 return Ok(Token::DotAssign);
1683 }
1684 self.last_was_term = false;
1685 Ok(Token::Dot)
1686 }
1687 '=' => {
1688 let eq_pos = self.pos;
1689 self.advance();
1690 if self.peek() == Some('=') {
1691 self.advance();
1692 self.last_was_term = false;
1693 return Ok(Token::NumEq);
1694 }
1695 if self.peek() == Some('~') {
1696 self.advance();
1697 self.last_was_term = false;
1698 return Ok(Token::BindMatch);
1699 }
1700 if self.peek() == Some('>') {
1701 self.advance();
1702 self.last_was_term = false;
1703 return Ok(Token::FatArrow);
1704 }
1705 if self.peek().is_some_and(|c| c.is_alphabetic())
1707 && self.at_line_start_for_pod(eq_pos)
1708 {
1709 loop {
1711 let line = self.read_while(|c| c != '\n');
1712 if self.peek() == Some('\n') {
1713 self.advance();
1714 }
1715 if line.starts_with("=cut") || self.pos >= self.input.len() {
1716 break;
1717 }
1718 }
1719 return self.next_token();
1720 }
1721 self.last_was_term = false;
1722 Ok(Token::Assign)
1723 }
1724 '!' => {
1725 self.advance();
1726 if self.peek() == Some('=') {
1727 self.advance();
1728 self.last_was_term = false;
1729 return Ok(Token::NumNe);
1730 }
1731 if self.peek() == Some('~') {
1732 self.advance();
1733 self.last_was_term = false;
1734 return Ok(Token::BindNotMatch);
1735 }
1736 self.last_was_term = false;
1737 Ok(Token::LogNot)
1738 }
1739 '<' => {
1740 self.advance();
1741 let after_lt = self.pos;
1742 if self.peek() == Some('$') {
1744 self.advance();
1745 let name = self.read_variable_name();
1746 if !name.is_empty() && self.peek() == Some('>') {
1747 self.advance();
1748 self.last_was_term = true;
1749 return Ok(Token::ReadLine(name));
1750 }
1751 self.pos = after_lt;
1752 }
1753 if self.peek() == Some('>') {
1755 self.advance();
1756 self.last_was_term = true;
1757 return Ok(Token::Diamond);
1758 }
1759 if self.peek().is_some_and(|c| c.is_uppercase()) {
1760 let name = self.read_identifier();
1761 if self.peek() == Some('>') {
1762 self.advance();
1763 self.last_was_term = true;
1764 return Ok(Token::ReadLine(name));
1765 }
1766 self.last_was_term = false;
1769 return Ok(Token::NumLt);
1770 }
1771 if self.peek() == Some('=') {
1772 self.advance();
1773 if self.peek() == Some('>') {
1774 self.advance();
1775 self.last_was_term = false;
1776 return Ok(Token::Spaceship);
1777 }
1778 self.last_was_term = false;
1779 return Ok(Token::NumLe);
1780 }
1781 if self.peek() == Some('<') {
1782 self.advance();
1783 if self.peek() == Some('=') {
1784 self.advance();
1785 self.last_was_term = false;
1786 return Ok(Token::ShiftLeftAssign);
1787 }
1788 if self.last_was_term {
1791 self.last_was_term = false;
1792 return Ok(Token::ShiftLeft);
1793 }
1794 let (tag, interpolate, indented) = self.read_heredoc_tag()?;
1795 let body = self.read_heredoc_body(&tag, indented)?;
1796 self.last_was_term = true;
1797 return Ok(Token::HereDoc(tag, body, interpolate));
1798 }
1799 self.last_was_term = false;
1800 Ok(Token::NumLt)
1801 }
1802 '>' => {
1803 self.advance();
1804 if self.peek() == Some('{') {
1805 self.advance();
1806 self.last_was_term = false;
1807 return Ok(Token::ArrowBrace);
1808 }
1809 if self.peek() == Some('=') {
1810 self.advance();
1811 self.last_was_term = false;
1812 return Ok(Token::NumGe);
1813 }
1814 if self.peek() == Some('>') {
1815 self.advance();
1816 if self.peek() == Some('=') {
1817 self.advance();
1818 self.last_was_term = false;
1819 return Ok(Token::ShiftRightAssign);
1820 }
1821 self.last_was_term = false;
1822 return Ok(Token::ShiftRight);
1823 }
1824 self.last_was_term = false;
1825 Ok(Token::NumGt)
1826 }
1827 '&' => {
1828 self.advance();
1829 if self.peek() == Some('&') {
1830 self.advance();
1831 if self.peek() == Some('=') {
1832 self.advance();
1833 self.last_was_term = false;
1834 return Ok(Token::AndAssign);
1835 }
1836 self.last_was_term = false;
1837 return Ok(Token::LogAnd);
1838 }
1839 if self.peek() == Some('=') {
1840 self.advance();
1841 self.last_was_term = false;
1842 return Ok(Token::BitAndAssign);
1843 }
1844 self.last_was_term = false;
1845 Ok(Token::BitAnd)
1846 }
1847 '|' => {
1848 self.advance();
1849 if self.peek() == Some('|') {
1850 self.advance();
1851 if self.peek() == Some('=') {
1852 self.advance();
1853 self.last_was_term = false;
1854 return Ok(Token::OrAssign);
1855 }
1856 self.last_was_term = false;
1857 return Ok(Token::LogOr);
1858 }
1859 if self.peek() == Some('=') {
1860 self.advance();
1861 self.last_was_term = false;
1862 return Ok(Token::BitOrAssign);
1863 }
1864 if self.peek() == Some('>') {
1865 self.advance();
1866 self.last_was_term = false;
1867 return Ok(Token::PipeForward);
1868 }
1869 self.last_was_term = false;
1870 Ok(Token::BitOr)
1871 }
1872 '^' => {
1873 self.advance();
1874 if self.peek() == Some('=') {
1875 self.advance();
1876 self.last_was_term = false;
1877 return Ok(Token::XorAssign);
1878 }
1879 self.last_was_term = false;
1880 Ok(Token::BitXor)
1881 }
1882 '~' => {
1883 self.advance();
1884 if self.peek() == Some('>') {
1885 self.advance();
1886 if self.peek() == Some('>') {
1887 self.advance();
1888 self.last_was_term = false;
1889 return Ok(Token::ThreadArrowLast);
1890 }
1891 self.last_was_term = false;
1892 return Ok(Token::ThreadArrow);
1893 }
1894 if self.peek() == Some('s') && self.peek_at(1) == Some('>') {
1899 self.advance(); self.advance(); if self.peek() == Some('>') {
1902 self.advance(); self.last_was_term = false;
1904 return Ok(Token::ThreadArrowStreamLast);
1905 }
1906 self.last_was_term = false;
1907 return Ok(Token::ThreadArrowStream);
1908 }
1909 if self.peek() == Some('p') && self.peek_at(1) == Some('>') {
1914 self.advance(); self.advance(); if self.peek() == Some('>') {
1917 self.advance(); self.last_was_term = false;
1919 return Ok(Token::ThreadArrowParLast);
1920 }
1921 self.last_was_term = false;
1922 return Ok(Token::ThreadArrowPar);
1923 }
1924 self.last_was_term = false;
1925 Ok(Token::BitNot)
1926 }
1927 '?' => {
1928 self.advance();
1929 self.last_was_term = false;
1930 Ok(Token::Question)
1931 }
1932 ':' => {
1933 self.advance();
1934 if self.peek() == Some(':') {
1935 self.advance();
1936 let in_bracket_subscript =
1943 self.input.get(self.pos.saturating_sub(3)).copied() == Some('[');
1944 if !self.last_was_term && !in_bracket_subscript {
1945 let saved = self.pos - 2;
1946 if let Some(consumed) = self.try_consume_ipv6_tail(saved) {
1947 self.last_was_term = true;
1948 return Ok(Token::DoubleString(consumed));
1949 }
1950 }
1951 self.last_was_term = false;
1952 return Ok(Token::PackageSep);
1953 }
1954 self.last_was_term = false;
1955 Ok(Token::Colon)
1956 }
1957 '\\' => {
1958 self.advance();
1959 if self.peek() == Some('\n') {
1962 self.pos += 1; return self.next_token();
1964 }
1965 self.last_was_term = false;
1966 Ok(Token::Backslash)
1967 }
1968 ',' => {
1969 self.advance();
1970 self.last_was_term = false;
1971 Ok(Token::Comma)
1972 }
1973 ';' => {
1974 self.advance();
1975 self.last_was_term = false;
1976 Ok(Token::Semicolon)
1977 }
1978 '(' => {
1979 self.advance();
1980 self.last_was_term = false;
1981 Ok(Token::LParen)
1982 }
1983 ')' => {
1984 self.advance();
1985 self.last_was_term = true;
1986 Ok(Token::RParen)
1987 }
1988 '[' => {
1989 self.advance();
1990 self.last_was_term = false;
1991 Ok(Token::LBracket)
1992 }
1993 ']' => {
1994 self.advance();
1995 self.last_was_term = true;
1996 Ok(Token::RBracket)
1997 }
1998 '{' => {
1999 self.advance();
2000 self.last_was_term = false;
2001 Ok(Token::LBrace)
2002 }
2003 '}' => {
2004 self.advance();
2005 self.last_was_term = true;
2006 Ok(Token::RBrace)
2007 }
2008
2009 c if c.is_alphabetic() || c == '_' => {
2011 let ident_start = self.pos;
2012 let mut ident = self.read_identifier();
2013
2014 let after_package_sep = ident_start >= 2
2029 && self.input.get(ident_start.saturating_sub(2)) == Some(&':')
2030 && self.input.get(ident_start.saturating_sub(1)) == Some(&':');
2031 if !after_package_sep
2032 && self.peek() == Some(':')
2033 && ident.len() <= 4
2034 && ident.chars().all(|ch| ch.is_ascii_hexdigit())
2035 {
2036 if let Some(consumed) = self.try_consume_ipv6_tail(ident_start) {
2037 self.last_was_term = true;
2038 return Ok(Token::DoubleString(consumed));
2039 }
2040 }
2041
2042 let is_topic_slot = ident == "_"
2050 || (ident.len() > 1
2051 && ident.starts_with('_')
2052 && ident[1..].bytes().all(|b| b.is_ascii_digit()));
2053 if is_topic_slot {
2054 let mut peek_off = 0usize;
2067 while self.peek_at(peek_off) == Some('<') {
2068 peek_off += 1;
2069 }
2070 let trailing = self.peek_at(peek_off);
2071 let mut indexed_ascent: Option<usize> = None;
2076 if peek_off == 1 && trailing.is_some_and(|c: char| c.is_ascii_digit()) {
2077 let mut off = 1usize;
2078 while self.peek_at(off).is_some_and(|c| c.is_ascii_digit()) {
2079 off += 1;
2080 }
2081 let after_digits = self.peek_at(off);
2082 let still_a_slice = matches!(after_digits, Some(':') | Some('>'));
2083 if !still_a_slice {
2084 let mut digits = String::new();
2086 for k in 1..off {
2087 if let Some(c) = self.peek_at(k) {
2088 digits.push(c);
2089 }
2090 }
2091 if let Ok(n) = digits.parse::<usize>() {
2092 if n >= 1 {
2093 indexed_ascent = Some(n);
2094 self.advance();
2096 for _ in 1..off {
2097 self.advance();
2098 }
2099 }
2100 }
2101 }
2102 }
2103 if let Some(n) = indexed_ascent {
2104 for _ in 0..n {
2105 ident.push('<');
2106 }
2107 } else {
2108 let is_slice = peek_off > 0
2109 && matches!(trailing, Some(c) if c.is_ascii_digit() || c == '-' || c == ':' || c == '>');
2110 if !is_slice {
2111 for _ in 0..peek_off {
2112 self.advance();
2113 ident.push('<');
2114 }
2115 }
2116 }
2117 if ident.len() > 1
2125 && ident.starts_with('_')
2126 && ident.chars().nth(1).is_some_and(|c| c.is_ascii_digit())
2127 {
2128 self.last_was_term = true;
2129 self.last_was_bare_positional = true;
2130 return Ok(Token::ScalarVar(ident));
2131 }
2132 if ident.starts_with('_') && ident.contains('<') {
2135 self.last_was_term = true;
2136 self.last_was_bare_positional = true;
2137 return Ok(Token::ScalarVar(ident));
2138 }
2139 }
2140
2141 match ident.as_str() {
2143 "format" => {
2144 self.skip_whitespace_and_comments();
2145 let fname = self.read_package_qualified_identifier();
2146 self.skip_whitespace_and_comments();
2147 if self.peek() != Some('=') {
2148 return Err(
2149 self.syntax_err("Expected '=' after format name", self.line)
2150 );
2151 }
2152 self.advance();
2153 let lines = self.read_format_body()?;
2154 self.last_was_term = false;
2155 return Ok(Token::FormatDecl { name: fname, lines });
2156 }
2157 "qw" => {
2158 if self.prev_arrow {
2160 self.last_was_term = true;
2161 return Ok(Token::Ident(ident));
2162 }
2163 let start_pos = self.pos;
2165 self.skip_whitespace_only();
2166 if let Some(c) = self.peek() {
2167 if c == '=' && self.peek_at(1) == Some('>') {
2168 self.pos = start_pos;
2169 self.last_was_term = true;
2170 return Ok(Token::Ident(ident));
2171 }
2172 if matches!(c, ';' | ',' | ')' | ']' | '}' | '\n') {
2173 self.pos = start_pos;
2174 self.last_was_term = true;
2175 return Ok(Token::Ident(ident));
2176 }
2177 }
2178 self.pos = start_pos; let tok = self.read_qw()?;
2180 self.last_was_term = true;
2181 return Ok(tok);
2182 }
2183 "qq" | "q" => {
2184 if self.prev_arrow {
2186 self.last_was_term = true;
2187 return Ok(Token::Ident(ident));
2188 }
2189 let start_pos = self.pos;
2193 self.skip_whitespace_only();
2194 if let Some(c) = self.peek() {
2195 if c == '=' && self.peek_at(1) == Some('>') {
2197 self.pos = start_pos; self.last_was_term = true;
2199 return Ok(Token::Ident(ident));
2200 }
2201 if matches!(c, ';' | ',' | ')' | ']' | '}' | '\n') {
2203 self.pos = start_pos;
2204 self.last_was_term = true;
2205 return Ok(Token::Ident(ident));
2206 }
2207 }
2208 let delim = self.advance().ok_or_else(|| {
2209 self.syntax_err("Expected delimiter after q/qq", self.line)
2210 })?;
2211 let close = match delim {
2212 '(' => ')',
2213 '[' => ']',
2214 '{' => '}',
2215 '<' => '>',
2216 c => c,
2217 };
2218 let s = if matches!(delim, '(' | '[' | '{' | '<') {
2219 self.read_q_qq_balanced_body(delim, close, ident == "qq")?
2220 } else {
2221 self.read_escaped_until(close)?
2222 };
2223 self.last_was_term = true;
2224 if ident == "qq" {
2225 return Ok(Token::DoubleString(s));
2226 }
2227 return Ok(Token::SingleString(s));
2228 }
2229 "qx" => {
2230 if self.prev_arrow {
2232 self.last_was_term = true;
2233 return Ok(Token::Ident(ident));
2234 }
2235 let start_pos = self.pos;
2237 self.skip_whitespace_only();
2238 if let Some(c) = self.peek() {
2239 if c == '=' && self.peek_at(1) == Some('>') {
2240 self.pos = start_pos;
2241 self.last_was_term = true;
2242 return Ok(Token::Ident(ident));
2243 }
2244 if matches!(c, ';' | ',' | ')' | ']' | '}' | '\n') {
2245 self.pos = start_pos;
2246 self.last_was_term = true;
2247 return Ok(Token::Ident(ident));
2248 }
2249 }
2250 let delim = self.advance().ok_or_else(|| {
2251 self.syntax_err("Expected delimiter after qx", self.line)
2252 })?;
2253 let close = match delim {
2254 '(' => ')',
2255 '[' => ']',
2256 '{' => '}',
2257 '<' => '>',
2258 c => c,
2259 };
2260 let s = self.read_escaped_until(close)?;
2261 self.last_was_term = true;
2262 return Ok(Token::BacktickString(s));
2263 }
2264 "qr" => {
2265 if self.prev_arrow {
2267 self.last_was_term = true;
2268 return Ok(Token::Ident(ident));
2269 }
2270 let start_pos = self.pos;
2272 self.skip_whitespace_only();
2273 if let Some(c) = self.peek() {
2274 if c == '=' && self.peek_at(1) == Some('>') {
2275 self.pos = start_pos;
2276 self.last_was_term = true;
2277 return Ok(Token::Ident(ident));
2278 }
2279 if matches!(c, ';' | ',' | ')' | ']' | '}' | '\n') {
2280 self.pos = start_pos;
2281 self.last_was_term = true;
2282 return Ok(Token::Ident(ident));
2283 }
2284 }
2285 let delim = self.advance().ok_or_else(|| {
2286 self.syntax_err("Expected delimiter after qr", self.line)
2287 })?;
2288 let close = match delim {
2289 '(' => ')',
2290 '[' => ']',
2291 '{' => '}',
2292 '<' => '>',
2293 c => c,
2294 };
2295 let mut pattern = String::new();
2302 loop {
2303 match self.advance() {
2304 Some('\\') => {
2305 pattern.push('\\');
2306 if let Some(c) = self.advance() {
2307 pattern.push(c);
2308 }
2309 }
2310 Some(c) if c == close => break,
2311 Some(c) => pattern.push(c),
2312 None => {
2313 return Err(self.syntax_err("Unterminated qr regex", self.line))
2314 }
2315 }
2316 }
2317 let flags = self.read_while(|c| REGEX_FLAG_CHARS.contains(c));
2318 self.last_was_term = true;
2319 return Ok(Token::Regex(pattern, flags, delim));
2320 }
2321 "m" => {
2322 if self.prev_arrow {
2324 self.last_was_term = true;
2325 return Ok(Token::Ident(ident));
2326 }
2327 let start_pos = self.pos;
2330 self.skip_whitespace_only();
2331 if let Some(d) = self.peek() {
2332 if d == '=' && self.peek_at(1) == Some('>') {
2333 self.pos = start_pos;
2334 self.last_was_term = true;
2335 return Ok(Token::Ident(ident));
2336 }
2337 if matches!(d, ';' | ',' | ')' | ']' | '}' | '>' | ':' | '\n') {
2338 self.pos = start_pos;
2339 self.last_was_term = true;
2340 return Ok(Token::Ident(ident));
2341 }
2342 }
2343 self.pos = start_pos;
2344 if self.suppress_m_regex == 0 {
2347 if let Some(delim) = self.peek() {
2348 if !delim.is_alphanumeric() && delim != '_' {
2349 let saved_pos = self.pos;
2351 let saved_line = self.line;
2352 self.advance(); let close = match delim {
2354 '(' => ')',
2355 '[' => ']',
2356 '{' => '}',
2357 '<' => '>',
2358 c => c,
2359 };
2360 let mut pattern = String::new();
2361 let mut terminated = true;
2362 loop {
2363 match self.advance() {
2364 Some('\\') => {
2365 pattern.push('\\');
2366 if let Some(c) = self.advance() {
2367 pattern.push(c);
2368 }
2369 }
2370 Some(c) if c == close => break,
2371 Some(c) if c == '\n' && close == '/' => {
2372 terminated = false;
2374 break;
2375 }
2376 Some(c) => pattern.push(c),
2377 None => {
2378 return Err(self.syntax_err(
2379 "Search pattern not terminated",
2380 saved_line,
2381 ));
2382 }
2383 }
2384 }
2385 if terminated {
2386 let flags =
2387 self.read_while(|c| REGEX_FLAG_CHARS.contains(c));
2388 self.last_was_term = true;
2389 return Ok(Token::Regex(pattern, flags, delim));
2390 }
2391 self.pos = saved_pos;
2393 self.line = saved_line;
2394 }
2395 }
2396 }
2397 self.last_was_term = true;
2399 return Ok(Token::Ident(ident));
2400 }
2401 "s" => {
2402 if self.prev_arrow {
2404 self.last_was_term = true;
2405 return Ok(Token::Ident(ident));
2406 }
2407 let start_pos = self.pos;
2416 self.skip_whitespace_only();
2417 if let Some(d) = self.peek() {
2418 if d == '=' && self.peek_at(1) == Some('>') {
2419 self.pos = start_pos;
2420 self.last_was_term = true;
2421 return Ok(Token::Ident(ident));
2422 }
2423 if matches!(d, ';' | ')' | ']' | '}' | '>' | ':' | '\n') {
2424 self.pos = start_pos;
2425 self.last_was_term = true;
2426 return Ok(Token::Ident(ident));
2427 }
2428 if d == ',' && !self.lookahead_is_comma_delim_subst() {
2429 self.pos = start_pos;
2430 self.last_was_term = true;
2431 return Ok(Token::Ident(ident));
2432 }
2433 }
2434 self.pos = start_pos;
2435 if let Some(delim) = self.peek() {
2437 if !delim.is_alphanumeric() && delim != '_' && delim != ' ' {
2438 self.advance();
2439 let close = match delim {
2440 '(' => ')',
2441 '[' => ']',
2442 '{' => '}',
2443 '<' => '>',
2444 c => c,
2445 };
2446 let mut pattern = String::new();
2447 loop {
2448 match self.advance() {
2449 Some('\\') => {
2450 pattern.push('\\');
2451 if let Some(c) = self.advance() {
2452 pattern.push(c);
2453 }
2454 }
2455 Some(c) if c == close => break,
2456 Some(c) => pattern.push(c),
2457 None => {
2458 return Err(self.syntax_err(
2459 "Unterminated s/// pattern",
2460 self.line,
2461 ))
2462 }
2463 }
2464 }
2465 if "([{<".contains(delim) {
2467 self.skip_whitespace_only();
2468 let open2 = self.advance().unwrap_or(delim);
2469 let close = match open2 {
2470 '(' => ')',
2471 '[' => ']',
2472 '{' => '}',
2473 '<' => '>',
2474 c => c,
2475 };
2476 let replacement = self.read_escaped_until(close)?;
2477 let flags = self.read_while(|c| REGEX_FLAG_CHARS.contains(c));
2478 self.last_was_term = true;
2479 return Ok(Token::Ident(format!(
2482 "\x00s\x00{}\x00{}\x00{}\x00{}",
2483 pattern, replacement, flags, delim
2484 )));
2485 }
2486 let replacement = self.read_escaped_until(close)?;
2487 let flags = self.read_while(|c| REGEX_FLAG_CHARS.contains(c));
2488 self.last_was_term = true;
2489 return Ok(Token::Ident(format!(
2490 "\x00s\x00{}\x00{}\x00{}\x00{}",
2491 pattern, replacement, flags, delim
2492 )));
2493 }
2494 }
2495 self.last_was_term = true;
2496 return Ok(Token::Ident(ident));
2497 }
2498 "tr" | "y" => {
2499 if self.prev_arrow {
2502 self.last_was_term = true;
2503 return Ok(Token::Ident(ident));
2504 }
2505 if self.pos >= ident.len() + 2 {
2508 let prev_start = self.pos - ident.len() - 2;
2509 if self.input.get(prev_start) == Some(&':')
2510 && self.input.get(prev_start + 1) == Some(&':')
2511 {
2512 self.last_was_term = true;
2513 return Ok(Token::Ident(ident));
2514 }
2515 }
2516 if let Some(d) = self.peek() {
2523 if matches!(d, ';' | ')' | ']' | '}' | '>' | ':' | '\n') {
2524 self.last_was_term = true;
2525 return Ok(Token::Ident(ident));
2526 }
2527 if d == ',' && !self.lookahead_is_comma_delim_subst() {
2528 self.last_was_term = true;
2529 return Ok(Token::Ident(ident));
2530 }
2531 } else {
2532 self.last_was_term = true;
2533 return Ok(Token::Ident(ident));
2534 }
2535 let start_pos = self.pos;
2537 self.skip_whitespace_only();
2538 if let Some(d) = self.peek() {
2539 if d == '=' && self.peek_at(1) != Some('=') {
2541 self.pos = start_pos;
2542 self.last_was_term = true;
2543 return Ok(Token::Ident(ident));
2544 }
2545 }
2546 self.pos = start_pos;
2547 if self.peek() == Some('(') {
2549 let scan_pos = self.pos;
2551 self.advance(); let mut depth = 1;
2553 while depth > 0 {
2554 match self.peek() {
2555 Some('(') => {
2556 self.advance();
2557 depth += 1;
2558 }
2559 Some(')') => {
2560 self.advance();
2561 depth -= 1;
2562 }
2563 Some(_) => {
2564 self.advance();
2565 }
2566 None => break,
2567 }
2568 }
2569 self.skip_whitespace_only();
2570 let is_func_def = self.peek() == Some('{');
2571 self.pos = scan_pos;
2572 if is_func_def {
2573 self.last_was_term = true;
2574 return Ok(Token::Ident(ident));
2575 }
2576 }
2577 if let Some(delim) = self.peek() {
2579 if !delim.is_alphanumeric() && delim != '_' && delim != ' ' {
2580 self.advance();
2581 let close = match delim {
2582 '(' => ')',
2583 '[' => ']',
2584 '{' => '}',
2585 '<' => '>',
2586 c => c,
2587 };
2588 let from = self.read_escaped_until(close)?;
2589 if "([{<".contains(delim) {
2591 self.skip_whitespace_only();
2592 self.advance(); }
2594 let to = self.read_escaped_until(close)?;
2595 let flags = self.read_while(|c| "cdsr".contains(c));
2596 self.last_was_term = true;
2597 return Ok(Token::Ident(format!(
2598 "\x00tr\x00{}\x00{}\x00{}\x00{}",
2599 from, to, flags, delim
2600 )));
2601 }
2602 }
2603 self.last_was_term = true;
2604 return Ok(Token::Ident(ident));
2605 }
2606 _ => {}
2607 }
2608
2609 let saved_pos2 = self.pos;
2611 self.skip_whitespace_and_comments();
2612 if self.peek() == Some('=') && self.peek_at(1) == Some('>') {
2613 self.pos = saved_pos2;
2614 self.last_was_term = true;
2615 return Ok(Token::Ident(ident));
2616 }
2617 self.pos = saved_pos2;
2618
2619 let tok = if ident == "x" && !self.last_was_term {
2623 Token::Ident("x".to_string())
2624 } else {
2625 keyword_or_ident(&ident)
2626 };
2627 if matches!(tok, Token::Ident(ref s) if s == "_") {
2628 self.last_was_bare_positional = true;
2629 }
2630 self.last_was_term = match ident.as_str() {
2633 "my"
2636 | "mysync"
2637 | "frozen"
2638 | "const"
2639 | "typed"
2640 | "our"
2641 | "oursync"
2642 | "local"
2643 | "state"
2644 | "return"
2645 | "print"
2646 | "pr"
2647 | "say"
2648 | "p"
2649 | "die"
2650 | "warn"
2651 | "push"
2652 | "pop"
2653 | "shift"
2654 | "shuffle"
2655 | "chunked"
2656 | "windowed"
2657 | "unshift"
2658 | "splice"
2659 | "delete"
2660 | "exists"
2661 | "chomp"
2662 | "chop"
2663 | "defined"
2664 | "keys"
2665 | "values"
2666 | "each"
2667 | "sub"
2668 | "struct"
2669 | "if"
2670 | "unless"
2671 | "while"
2672 | "until"
2673 | "for"
2674 | "foreach"
2675 | "elsif"
2676 | "use"
2677 | "no"
2678 | "require"
2679 | "eval"
2680 | "do"
2681 | "map"
2682 | "maps"
2683 | "flat_maps"
2684 | "grep"
2685 | "greps"
2686 | "sort"
2687 | "all"
2688 | "any"
2689 | "none"
2690 | "take_while"
2691 | "drop_while"
2692 | "skip_while"
2693 | "skip"
2694 | "first_or"
2695 | "tap"
2696 | "peek"
2697 | "with_index"
2698 | "pmap"
2699 | "pflat_map"
2700 | "puniq"
2701 | "pfirst"
2702 | "pany"
2703 | "pmap_chunked"
2704 | "pipeline"
2705 | "pgrep"
2706 | "pfor"
2707 | "par_lines"
2708 | "par_walk"
2709 | "pwatch"
2710 | "watch"
2711 | "psort"
2712 | "reduce"
2713 | "fold"
2714 | "inject"
2715 | "first"
2716 | "detect"
2717 | "find"
2718 | "find_all"
2719 | "preduce"
2720 | "preduce_init"
2721 | "pmap_reduce"
2722 | "pcache"
2723 | "fan"
2724 | "fan_cap"
2725 | "pchannel"
2726 | "pselect"
2727 | "uniq"
2728 | "distinct"
2729 | "flatten"
2730 | "set"
2731 | "list_count"
2732 | "list_size"
2733 | "count"
2734 | "len"
2735 | "size"
2736 | "cnt"
2737 | "zip"
2738 | "async"
2739 | "trace"
2740 | "timer"
2741 | "await"
2742 | "slurp"
2743 | "capture"
2744 | "fetch_url"
2745 | "fetch"
2746 | "fetch_json"
2747 | "fetch_async"
2748 | "fetch_async_json"
2749 | "par_fetch"
2750 | "par_csv_read"
2751 | "par_pipeline"
2752 | "par_pipeline_stream"
2753 | "par_sed"
2754 | "join"
2755 | "json_encode"
2756 | "json_decode"
2757 | "json_jq"
2758 | "jwt_encode"
2759 | "jwt_decode"
2760 | "jwt_decode_unsafe"
2761 | "log_info"
2762 | "log_warn"
2763 | "log_error"
2764 | "log_debug"
2765 | "log_trace"
2766 | "log_json"
2767 | "log_level"
2768 | "sha256"
2769 | "sha1"
2770 | "md5"
2771 | "hmac_sha256"
2772 | "hmac"
2773 | "uuid"
2774 | "base64_encode"
2775 | "base64_decode"
2776 | "hex_encode"
2777 | "hex_decode"
2778 | "gzip"
2779 | "gunzip"
2780 | "zstd"
2781 | "zstd_decode"
2782 | "datetime_utc"
2783 | "datetime_from_epoch"
2784 | "datetime_parse_rfc3339"
2785 | "datetime_strftime"
2786 | "toml_decode"
2787 | "toml_encode"
2788 | "yaml_decode"
2789 | "yaml_encode"
2790 | "url_encode"
2791 | "url_decode"
2792 | "uri_escape"
2793 | "uri_unescape"
2794 | "split"
2795 | "reverse"
2796 | "reversed"
2797 | "not"
2798 | "ref"
2799 | "scalar"
2800 | "try"
2801 | "catch"
2802 | "finally"
2803 | "given"
2804 | "when"
2805 | "default"
2806 | "eval_timeout"
2807 | "tie"
2808 | "retry"
2809 | "rate_limit"
2810 | "every"
2811 | "gen"
2812 | "yield"
2813 | "match"
2814 | "filter"
2815 | "f"
2816 | "reject"
2817 | "grepv"
2818 | "collect"
2819 | "compact"
2820 | "concat"
2821 | "chain"
2822 | "min_by"
2823 | "max_by"
2824 | "sort_by"
2825 | "tally"
2826 | "find_index"
2827 | "each_with_index"
2828 | "fore"
2829 | "e"
2830 | "ep"
2831 | "flat_map"
2832 | "group_by"
2833 | "chunk_by"
2834 | "bench" => false,
2835 "thread" | "t" => !self.last_was_term,
2839 _ => matches!(tok, Token::Ident(_)),
2840 };
2841 Ok(tok)
2842 }
2843
2844 c => Err(self.syntax_err(format!("Unexpected character '{c}'"), self.line)),
2845 }
2846 }
2847
2848 pub fn tokenize(&mut self) -> PerlResult<Vec<(Token, usize)>> {
2850 let mut tokens = Vec::new();
2851 loop {
2852 self.skip_whitespace_and_comments();
2857 let line = self.line;
2858 let tok = self.next_token()?;
2859 if self.last_was_bare_positional {
2860 self.bare_positional_indices.insert(tokens.len());
2861 }
2862 if tok == Token::Eof {
2863 tokens.push((Token::Eof, line));
2864 break;
2865 }
2866 tokens.push((tok, line));
2867 }
2868 Ok(tokens)
2869 }
2870}
2871
2872#[cfg(test)]
2873mod tests {
2874 use super::*;
2875 use crate::token::Token;
2876
2877 #[test]
2878 fn tokenize_empty_yields_eof() {
2879 let mut l = Lexer::new("");
2880 let t = l.tokenize().expect("tokenize");
2881 assert_eq!(t.len(), 1);
2882 assert!(matches!(t[0].0, Token::Eof));
2883 }
2884
2885 #[test]
2886 fn tokenize_integer_literal() {
2887 let mut l = Lexer::new("42");
2888 let t = l.tokenize().expect("tokenize");
2889 assert!(matches!(t[0].0, Token::Integer(42)));
2890 }
2891
2892 #[test]
2893 fn tokenize_keyword_my_and_semicolon() {
2894 let mut l = Lexer::new("my;");
2895 let t = l.tokenize().expect("tokenize");
2896 assert!(matches!(t[0].0, Token::Ident(ref s) if s == "my"));
2897 assert!(matches!(t[1].0, Token::Semicolon));
2898 }
2899
2900 #[test]
2901 fn tokenize_skips_hash_line_comment() {
2902 let mut l = Lexer::new("1#comment\n2");
2903 let t = l.tokenize().expect("tokenize");
2904 assert!(matches!(t[0].0, Token::Integer(1)));
2905 assert!(matches!(t[1].0, Token::Integer(2)));
2906 assert!(matches!(t[2].0, Token::Eof));
2907 }
2908
2909 #[test]
2910 fn tokenize_double_quoted_string_literal() {
2911 let mut l = Lexer::new(r#""hi""#);
2912 let t = l.tokenize().expect("tokenize");
2913 assert!(matches!(t[0].0, Token::DoubleString(ref s) if s == "hi"));
2914 }
2915
2916 #[test]
2917 fn tokenize_double_string_escaped_sigils_are_literal() {
2918 let mut l = Lexer::new(r#""my \$x""#);
2920 let t = l.tokenize().expect("tokenize");
2921 let want = format!("my {}x", LITERAL_DOLLAR_IN_DQUOTE);
2922 assert!(matches!(t[0].0, Token::DoubleString(ref s) if *s == want));
2923 }
2924
2925 #[test]
2926 fn tokenize_double_string_braced_hex_unicode_escape() {
2927 let mut l = Lexer::new(r#""\x{1215}""#);
2928 let t = l.tokenize().expect("tokenize");
2929 let want: String = ['\u{1215}'].into_iter().collect();
2930 assert!(matches!(t[0].0, Token::DoubleString(ref s) if *s == want));
2931 }
2932
2933 #[test]
2934 fn tokenize_double_string_braced_unicode_u_escape() {
2935 let mut l = Lexer::new(r#""\u{0301}""#);
2936 let t = l.tokenize().expect("tokenize");
2937 let want: String = ['\u{0301}'].into_iter().collect();
2938 assert!(matches!(t[0].0, Token::DoubleString(ref s) if *s == want));
2939 }
2940
2941 #[test]
2942 fn tokenize_double_string_braced_unicode_u_escape_multi() {
2943 let mut l = Lexer::new(r#""\u{0041}\u{00E9}\u{1F600}""#);
2945 let t = l.tokenize().expect("tokenize");
2946 assert!(matches!(t[0].0, Token::DoubleString(ref s) if s == "Aé😀"));
2947 }
2948
2949 #[test]
2950 fn tokenize_double_string_octal_escape() {
2951 let mut l = Lexer::new(r#""\101""#);
2952 let t = l.tokenize().expect("tokenize");
2953 assert!(matches!(t[0].0, Token::DoubleString(ref s) if s == "A"));
2954 }
2955
2956 #[test]
2957 fn tokenize_double_string_braced_octal_escape() {
2958 let mut l = Lexer::new(r#""\o{101}""#);
2959 let t = l.tokenize().expect("tokenize");
2960 assert!(matches!(t[0].0, Token::DoubleString(ref s) if s == "A"));
2961 }
2962
2963 #[test]
2964 fn tokenize_double_string_control_char_escape() {
2965 let mut l = Lexer::new(r#""\cA""#);
2966 let t = l.tokenize().expect("tokenize");
2967 assert!(matches!(t[0].0, Token::DoubleString(ref s) if s == "\x01"));
2968 }
2969
2970 #[test]
2971 fn tokenize_double_string_named_unicode_escape() {
2972 let mut l = Lexer::new(r#""\N{SNOWMAN}""#);
2973 let t = l.tokenize().expect("tokenize");
2974 assert!(matches!(t[0].0, Token::DoubleString(ref s) if s == "☃"));
2975 }
2976
2977 #[test]
2978 fn tokenize_double_string_named_unicode_u_plus() {
2979 let mut l = Lexer::new(r#""\N{U+2603}""#);
2980 let t = l.tokenize().expect("tokenize");
2981 assert!(matches!(t[0].0, Token::DoubleString(ref s) if s == "☃"));
2982 }
2983
2984 #[test]
2985 fn tokenize_double_string_unbraced_hex_two_digits() {
2986 let mut l = Lexer::new(r#""\x41""#);
2987 let t = l.tokenize().expect("tokenize");
2988 assert!(matches!(t[0].0, Token::DoubleString(ref s) if s == "A"));
2989 }
2990
2991 #[test]
2992 fn tokenize_single_quoted_string_literal() {
2993 let mut l = Lexer::new("'x'");
2994 let t = l.tokenize().expect("tokenize");
2995 assert!(matches!(t[0].0, Token::SingleString(ref s) if s == "x"));
2996 }
2997
2998 #[test]
2999 fn tokenize_spaceship_operator() {
3000 let mut l = Lexer::new("1 <=> 2");
3001 let t = l.tokenize().expect("tokenize");
3002 assert!(matches!(t[0].0, Token::Integer(1)));
3003 assert!(matches!(t[1].0, Token::Spaceship));
3004 assert!(matches!(t[2].0, Token::Integer(2)));
3005 }
3006
3007 #[test]
3008 fn tokenize_m_regex_literal() {
3009 let mut l = Lexer::new("m/abc/");
3010 let t = l.tokenize().expect("tokenize");
3011 assert!(matches!(t[0].0, Token::Regex(ref p, ref f, _) if p == "abc" && f.is_empty()));
3012 }
3013
3014 #[test]
3015 fn tokenize_q_brace_constructor() {
3016 let mut l = Lexer::new("q{lit}");
3017 let t = l.tokenize().expect("tokenize");
3018 assert!(matches!(t[0].0, Token::SingleString(ref s) if s == "lit"));
3019 }
3020
3021 #[test]
3023 fn tokenize_q_paren_balances_nested_parens_in_prototype() {
3024 let mut l = Lexer::new("q(fn ($) { 1 })");
3025 let t = l.tokenize().expect("tokenize");
3026 assert!(matches!(t[0].0, Token::SingleString(ref s) if s == "fn ($) { 1 }"));
3027 }
3028
3029 #[test]
3031 fn tokenize_qw_paren_balances_nested_parens() {
3032 let mut l = Lexer::new("qw( (SV*)pWARN_ALL )");
3033 let t = l.tokenize().expect("tokenize");
3034 assert!(matches!(t[0].0, Token::QW(ref w) if w.len() == 1 && w[0] == "(SV*)pWARN_ALL"));
3035 }
3036
3037 #[test]
3038 fn tokenize_float_literal() {
3039 let mut l = Lexer::new("3.25");
3040 let t = l.tokenize().expect("tokenize");
3041 assert!(matches!(t[0].0, Token::Float(f) if (f - 3.25).abs() < f64::EPSILON));
3042 }
3043
3044 #[test]
3045 fn tokenize_scientific_float() {
3046 let mut l = Lexer::new("1e2");
3047 let t = l.tokenize().expect("tokenize");
3048 assert!(matches!(t[0].0, Token::Float(f) if (f - 100.0).abs() < 1e-9));
3049 }
3050
3051 #[test]
3052 fn tokenize_hex_with_underscore_separators() {
3053 let mut l = Lexer::new("0x_FF");
3054 let t = l.tokenize().expect("tokenize");
3055 assert!(matches!(t[0].0, Token::Integer(255)));
3056 }
3057
3058 #[test]
3059 fn tokenize_qr_regex_with_flags() {
3060 let mut l = Lexer::new("qr/pat/i");
3061 let t = l.tokenize().expect("tokenize");
3062 assert!(matches!(t[0].0, Token::Regex(ref p, ref f, _) if p == "pat" && f == "i"));
3063 }
3064
3065 #[test]
3066 fn tokenize_m_slash_includes_gc_flags() {
3067 let mut l = Lexer::new("m/./gc");
3068 let t = l.tokenize().expect("tokenize");
3069 assert!(matches!(&t[0].0, Token::Regex(p, f, _) if p == "." && f == "gc"));
3070 }
3071
3072 #[test]
3073 fn tokenize_m_hash_delimiter_includes_gc_flags() {
3074 let mut l = Lexer::new("m#\\w#gc");
3075 let t = l.tokenize().expect("tokenize");
3076 assert!(matches!(&t[0].0, Token::Regex(p, f, _) if p == r"\w" && f == "gc"));
3077 }
3078
3079 #[test]
3080 fn tokenize_qr_slash_includes_gco_flags() {
3081 let mut l = Lexer::new("qr/x/gco");
3082 let t = l.tokenize().expect("tokenize");
3083 assert!(matches!(&t[0].0, Token::Regex(p, f, _) if p == "x" && f == "gco"));
3084 }
3085
3086 #[test]
3087 fn tokenize_qw_hash_delimiter_not_line_comment() {
3088 let mut l = Lexer::new("qw# a b #;");
3090 let t = l.tokenize().expect("tokenize");
3091 assert!(
3092 matches!(&t[0].0, Token::QW(w) if w == &["a", "b"]),
3093 "first={:?}",
3094 t.first()
3095 );
3096 }
3097
3098 #[test]
3099 fn tokenize_qq_hash_delimiter_single_line() {
3100 let mut l = Lexer::new("qq#x#;");
3101 let t = l.tokenize().expect("tokenize");
3102 assert!(matches!(&t[0].0, Token::DoubleString(s) if s == "x"));
3103 }
3104
3105 #[test]
3106 fn tokenize_qr_hash_delimiter_text_balanced_preamble() {
3107 let src = "qr#(\n [!=]~\n | split|grep|map\n | not|and|or|xor\n)#x";
3108 let mut l = Lexer::new(src);
3109 let t = l.tokenize().expect("tokenize");
3110 let Token::Regex(p, f, _) = &t[0].0 else {
3111 panic!("expected Regex, got {:?}", t[0].0);
3112 };
3113 let rest: Vec<_> = t.iter().skip(1).take(8).map(|x| &x.0).collect();
3114 assert!(f.contains('x'), "flags={f:?} pattern={p:?} rest={rest:?}");
3115 assert!(p.contains("[!=]~"), "{p:?}");
3116 assert!(p.contains("split|grep|map"), "{p:?}");
3117 }
3118
3119 #[test]
3120 fn tokenize_octal_integer_literal() {
3121 let mut l = Lexer::new("010");
3122 let t = l.tokenize().expect("tokenize");
3123 assert!(matches!(t[0].0, Token::Integer(8)));
3124 }
3125
3126 #[test]
3127 fn tokenize_binary_integer_literal() {
3128 let mut l = Lexer::new("0b1010");
3129 let t = l.tokenize().expect("tokenize");
3130 assert!(matches!(t[0].0, Token::Integer(10)));
3131 }
3132
3133 #[test]
3134 fn tokenize_filetest_exists() {
3135 let mut l = Lexer::new("-e '.'");
3136 let t = l.tokenize().expect("tokenize");
3137 assert!(matches!(t[0].0, Token::FileTest('e')));
3138 assert!(matches!(t[1].0, Token::SingleString(ref s) if s == "."));
3139 }
3140
3141 #[test]
3142 fn tokenize_filetest_tty() {
3143 let mut l = Lexer::new("-t 'STDIN'");
3144 let t = l.tokenize().expect("tokenize");
3145 assert!(matches!(t[0].0, Token::FileTest('t')));
3146 assert!(matches!(t[1].0, Token::SingleString(ref s) if s == "STDIN"));
3147 }
3148
3149 #[test]
3150 fn tokenize_power_and_range_operators() {
3151 let mut l = Lexer::new("2 ** 3");
3152 let t = l.tokenize().expect("tokenize");
3153 assert!(matches!(t[0].0, Token::Integer(2)));
3154 assert!(matches!(t[1].0, Token::Power));
3155 assert!(matches!(t[2].0, Token::Integer(3)));
3156
3157 let mut l = Lexer::new("1..4");
3158 let t = l.tokenize().expect("tokenize");
3159 assert!(matches!(t[0].0, Token::Integer(1)));
3160 assert!(matches!(t[1].0, Token::Range));
3161 assert!(matches!(t[2].0, Token::Integer(4)));
3162 }
3163
3164 #[test]
3165 fn tokenize_numeric_equality_operators() {
3166 let mut l = Lexer::new("1 == 2");
3167 let t = l.tokenize().expect("tokenize");
3168 assert!(matches!(t[0].0, Token::Integer(1)));
3169 assert!(matches!(t[1].0, Token::NumEq));
3170 assert!(matches!(t[2].0, Token::Integer(2)));
3171
3172 let mut l = Lexer::new("3 != 4");
3173 let t = l.tokenize().expect("tokenize");
3174 assert!(matches!(t[0].0, Token::Integer(3)));
3175 assert!(matches!(t[1].0, Token::NumNe));
3176 assert!(matches!(t[2].0, Token::Integer(4)));
3177 }
3178
3179 #[test]
3180 fn tokenize_logical_and_or_plus_assign() {
3181 let mut l = Lexer::new("1 && 0");
3182 let t = l.tokenize().expect("tokenize");
3183 assert!(matches!(t[0].0, Token::Integer(1)));
3184 assert!(matches!(t[1].0, Token::LogAnd));
3185 assert!(matches!(t[2].0, Token::Integer(0)));
3186
3187 let mut l = Lexer::new("0 || 9");
3188 let t = l.tokenize().expect("tokenize");
3189 assert!(matches!(t[0].0, Token::Integer(0)));
3190 assert!(matches!(t[1].0, Token::LogOr));
3191 assert!(matches!(t[2].0, Token::Integer(9)));
3192
3193 let mut l = Lexer::new("n += 1");
3194 let t = l.tokenize().expect("tokenize");
3195 assert!(matches!(t[0].0, Token::Ident(ref s) if s == "n"));
3196 assert!(matches!(t[1].0, Token::PlusAssign));
3197 assert!(matches!(t[2].0, Token::Integer(1)));
3198 }
3199
3200 #[test]
3201 fn tokenize_bitwise_and_operator() {
3202 let mut l = Lexer::new("3 & 5");
3203 let t = l.tokenize().expect("tokenize");
3204 assert!(matches!(t[0].0, Token::Integer(3)));
3205 assert!(matches!(t[1].0, Token::BitAnd));
3206 assert!(matches!(t[2].0, Token::Integer(5)));
3207 }
3208
3209 #[test]
3210 fn tokenize_braced_caret_scalar_global_phase() {
3211 let mut l = Lexer::new(r#"print ${^GLOBAL_PHASE}, "\n";"#);
3212 let t = l.tokenize().expect("tokenize");
3213 assert!(matches!(t[0].0, Token::Ident(ref s) if s == "print"));
3214 assert!(matches!(t[1].0, Token::ScalarVar(ref s) if s == "^GLOBAL_PHASE"));
3215 assert!(matches!(t[2].0, Token::Comma));
3216 assert!(matches!(t[3].0, Token::DoubleString(ref s) if s == "\n"));
3217 assert!(matches!(t[4].0, Token::Semicolon));
3218 }
3219
3220 #[test]
3221 fn tokenize_bitwise_or_and_assign() {
3222 let mut l = Lexer::new("$a |= $b");
3223 let t = l.tokenize().expect("tokenize");
3224 assert!(matches!(t[0].0, Token::ScalarVar(ref s) if s == "a"));
3225 assert!(matches!(t[1].0, Token::BitOrAssign));
3226 assert!(matches!(t[2].0, Token::ScalarVar(ref s) if s == "b"));
3227
3228 let mut l = Lexer::new("$a &= $b");
3229 let t = l.tokenize().expect("tokenize");
3230 assert!(matches!(t[1].0, Token::BitAndAssign));
3231 }
3232
3233 #[test]
3234 fn tokenize_division_and_modulo() {
3235 let mut l = Lexer::new("7 / 2");
3236 let t = l.tokenize().expect("tokenize");
3237 assert!(matches!(t[1].0, Token::Slash));
3238
3239 let mut l = Lexer::new("7 % 3");
3240 let t = l.tokenize().expect("tokenize");
3241 assert!(matches!(t[1].0, Token::Percent));
3242 }
3243
3244 #[test]
3245 fn tokenize_comma_fat_arrow_and_semicolon() {
3246 let mut l = Lexer::new("a => 1;");
3247 let t = l.tokenize().expect("tokenize");
3248 assert!(matches!(t[0].0, Token::Ident(ref s) if s == "a"));
3249 assert!(matches!(t[1].0, Token::FatArrow));
3250 assert!(matches!(t[2].0, Token::Integer(1)));
3251 assert!(matches!(t[3].0, Token::Semicolon));
3252 }
3253
3254 #[test]
3255 fn tokenize_minus_unary_vs_binary() {
3256 let mut l = Lexer::new("- 5");
3257 let t = l.tokenize().expect("tokenize");
3258 assert!(matches!(t[0].0, Token::Minus));
3259 assert!(matches!(t[1].0, Token::Integer(5)));
3260 }
3261
3262 #[test]
3263 fn tokenize_dollar_scalar_sigil() {
3264 let mut l = Lexer::new("$foo");
3265 let t = l.tokenize().expect("tokenize");
3266 assert!(matches!(t[0].0, Token::ScalarVar(ref s) if s == "foo"));
3267 }
3268
3269 #[test]
3271 fn tokenize_assign_not_pod_when_eq_not_line_start() {
3272 let mut l = Lexer::new("$_=foo;");
3273 let t = l.tokenize().expect("tokenize");
3274 assert!(matches!(t[0].0, Token::ScalarVar(ref s) if s == "_"));
3275 assert!(matches!(t[1].0, Token::Assign));
3276 assert!(matches!(t[2].0, Token::Ident(ref s) if s == "foo"));
3277 assert!(matches!(t[3].0, Token::Semicolon));
3278 }
3279
3280 #[test]
3281 fn tokenize_pod_equals_still_skipped_at_line_start() {
3282 let mut l = Lexer::new("=head1 NAME\ncode\n=cut\n$x;");
3283 let t = l.tokenize().expect("tokenize");
3284 assert!(matches!(t[0].0, Token::ScalarVar(ref s) if s == "x"));
3285 assert!(matches!(t[1].0, Token::Semicolon));
3286 }
3287
3288 #[test]
3289 fn tokenize_at_array_sigil() {
3290 let mut l = Lexer::new("@arr");
3291 let t = l.tokenize().expect("tokenize");
3292 assert!(matches!(t[0].0, Token::ArrayVar(ref s) if s == "arr"));
3293 }
3294
3295 #[test]
3296 fn tokenize_at_caret_capture_array() {
3297 let mut l = Lexer::new("@^CAPTURE");
3298 let t = l.tokenize().expect("tokenize");
3299 assert!(matches!(t[0].0, Token::ArrayVar(ref s) if s == "^CAPTURE"));
3300 }
3301
3302 #[test]
3303 fn tokenize_percent_caret_hook_hash() {
3304 let mut l = Lexer::new("%^HOOK");
3305 let t = l.tokenize().expect("tokenize");
3306 assert!(matches!(t[0].0, Token::HashVar(ref s) if s == "^HOOK"));
3307 }
3308
3309 #[test]
3310 fn tokenize_caret_letter_and_at_minus_plus() {
3311 let mut l = Lexer::new("$^I@-@+");
3312 let t = l.tokenize().expect("tokenize");
3313 assert!(matches!(t[0].0, Token::ScalarVar(ref s) if s == "^I"));
3314 assert!(matches!(t[1].0, Token::ArrayVar(ref s) if s == "-"));
3315 assert!(matches!(t[2].0, Token::ArrayVar(ref s) if s == "+"));
3316 }
3317
3318 #[test]
3319 fn tokenize_percent_hash_sigil() {
3320 let mut l = Lexer::new("%h");
3321 let t = l.tokenize().expect("tokenize");
3322 assert!(matches!(t[0].0, Token::HashVar(ref s) if s == "h"));
3323 }
3324
3325 #[test]
3326 fn tokenize_percent_plus_named_capture_hash() {
3327 let mut l = Lexer::new("%+");
3328 let t = l.tokenize().expect("tokenize");
3329 assert!(matches!(t[0].0, Token::HashVar(ref s) if s == "+"));
3330 }
3331
3332 #[test]
3333 fn tokenize_dollar_dollar_under_brace_is_not_pid() {
3334 let mut l = Lexer::new("$$_{$k}");
3336 let t = l.tokenize().expect("tokenize");
3337 assert!(matches!(t[0].0, Token::ScalarVar(ref s) if s == "_"));
3338 assert!(matches!(t[1].0, Token::LBrace));
3339 }
3340
3341 #[test]
3342 fn tokenize_braced_scalar_deref_try_tiny() {
3343 let mut l = Lexer::new("${$code_ref}");
3345 let t = l.tokenize().expect("tokenize");
3346 assert!(matches!(t[0].0, Token::DerefScalarVar(ref s) if s == "code_ref"));
3347 }
3348
3349 #[test]
3350 fn tokenize_braced_scalar_deref_package_qualified() {
3351 let mut l = Lexer::new("${$Foo::bar}");
3352 let t = l.tokenize().expect("tokenize");
3353 assert!(matches!(t[0].0, Token::DerefScalarVar(ref s) if s == "Foo::bar"));
3354 }
3355
3356 #[test]
3357 fn tokenize_dollar_colon_stash_brace() {
3358 let mut l = Lexer::new("$::{$pack}");
3360 let t = l.tokenize().expect("tokenize");
3361 assert!(matches!(t[0].0, Token::ScalarVar(ref s) if s == "::"));
3362 assert!(matches!(t[1].0, Token::LBrace));
3363 }
3364
3365 #[test]
3366 fn tokenize_ampersand_then_ident_is_bitand_not_coderef() {
3367 let mut l = Lexer::new("&f");
3369 let t = l.tokenize().expect("tokenize");
3370 assert!(matches!(t[0].0, Token::BitAnd));
3371 assert!(matches!(t[1].0, Token::Ident(ref s) if s == "f"));
3372 }
3373
3374 #[test]
3375 fn tokenize_qq_paren_constructor() {
3376 let mut l = Lexer::new("qq(x y)");
3377 let t = l.tokenize().expect("tokenize");
3378 assert!(matches!(t[0].0, Token::DoubleString(ref s) if s == "x y"));
3379 }
3380
3381 #[test]
3382 fn tokenize_qq_slash_escaped_dollar_is_literal() {
3383 let mut l = Lexer::new(r#"qq/my \$y/"#);
3384 let t = l.tokenize().expect("tokenize");
3385 let want = format!("my {}y", LITERAL_DOLLAR_IN_DQUOTE);
3386 assert!(matches!(t[0].0, Token::DoubleString(ref s) if *s == want));
3387 }
3388
3389 #[test]
3390 fn tokenize_s_substitution_alternate_delimiter() {
3391 let mut l = Lexer::new("s#a#b#");
3392 let t = l.tokenize().expect("tokenize");
3393 assert!(matches!(t[0].0, Token::Ident(ref s) if s.starts_with("\x00s\x00")));
3394 }
3395
3396 #[test]
3397 fn tokenize_tr_slash_delimiter() {
3398 let mut l = Lexer::new("tr/a/b/");
3399 let t = l.tokenize().expect("tokenize");
3400 assert!(matches!(t[0].0, Token::Ident(ref s) if s.starts_with("\x00tr\x00")));
3401 }
3402
3403 #[test]
3404 fn tokenize_y_synonym_for_tr() {
3405 let mut l = Lexer::new("y/x/y/");
3406 let t = l.tokenize().expect("tokenize");
3407 assert!(matches!(t[0].0, Token::Ident(ref s) if s.starts_with("\x00tr\x00")));
3408 }
3409
3410 #[test]
3411 fn tokenize_less_equal_greater_relops() {
3412 let mut l = Lexer::new("1 <= 2");
3413 let t = l.tokenize().expect("tokenize");
3414 assert!(matches!(t[1].0, Token::NumLe));
3415
3416 let mut l = Lexer::new("3 >= 2");
3417 let t = l.tokenize().expect("tokenize");
3418 assert!(matches!(t[1].0, Token::NumGe));
3419
3420 let mut l = Lexer::new("1 < 2");
3421 let t = l.tokenize().expect("tokenize");
3422 assert!(matches!(t[1].0, Token::NumLt));
3423
3424 let mut l = Lexer::new("3 > 2");
3425 let t = l.tokenize().expect("tokenize");
3426 assert!(matches!(t[1].0, Token::NumGt));
3427 }
3428
3429 #[test]
3430 fn tokenize_readline_scalar_handle() {
3431 let mut l = Lexer::new("<$fh>");
3432 let t = l.tokenize().expect("tokenize");
3433 assert!(matches!(t[0].0, Token::ReadLine(ref s) if s == "fh"));
3434 }
3435
3436 #[test]
3437 fn tokenize_shift_right_and_shift_left_assign() {
3438 let mut l = Lexer::new("8 >> 1");
3439 let t = l.tokenize().expect("tokenize");
3440 assert!(matches!(t[1].0, Token::ShiftRight));
3441
3442 let mut l = Lexer::new("8 << 1");
3443 let t = l.tokenize().expect("tokenize");
3444 assert!(matches!(t[1].0, Token::ShiftLeft));
3445
3446 let mut l = Lexer::new("x <<= 3");
3447 let t = l.tokenize().expect("tokenize");
3448 assert!(matches!(t[1].0, Token::ShiftLeftAssign));
3449 }
3450
3451 #[test]
3452 fn tokenize_heredoc_after_print_not_shift() {
3453 let src = "print <<EOT\nhi\nEOT\n";
3454 let mut l = Lexer::new(src);
3455 let t = l.tokenize().expect("tokenize");
3456 assert!(matches!(t[0].0, Token::Ident(ref s) if s == "print"));
3457 assert!(
3458 matches!(&t[1].0, Token::HereDoc(tag, body, interpolate) if tag == "EOT" && body == "hi\n" && *interpolate),
3459 "got {:?}",
3460 t[1].0
3461 );
3462 }
3463
3464 #[test]
3465 fn tokenize_bitwise_or_xor() {
3466 let mut l = Lexer::new("3 | 1");
3467 let t = l.tokenize().expect("tokenize");
3468 assert!(matches!(t[1].0, Token::BitOr));
3469
3470 let mut l = Lexer::new("3 ^ 1");
3471 let t = l.tokenize().expect("tokenize");
3472 assert!(matches!(t[1].0, Token::BitXor));
3473 }
3474
3475 #[test]
3476 fn tokenize_pipe_forward_vs_bitor_vs_logor() {
3477 let mut l = Lexer::new("1 |> f");
3479 let t = l.tokenize().expect("tokenize");
3480 assert!(matches!(t[1].0, Token::PipeForward), "got {:?}", t[1].0);
3481
3482 let mut l = Lexer::new("a | b || c |> d");
3484 let t = l.tokenize().expect("tokenize");
3485 let kinds: Vec<_> = t.iter().map(|(k, _)| k.clone()).collect();
3486 assert!(kinds.iter().any(|k| matches!(k, Token::BitOr)));
3487 assert!(kinds.iter().any(|k| matches!(k, Token::LogOr)));
3488 assert!(kinds.iter().any(|k| matches!(k, Token::PipeForward)));
3489 }
3490
3491 #[test]
3492 fn tokenize_compare_and_three_way_string_ops() {
3493 let mut l = Lexer::new("\"a\" cmp \"b\"");
3494 let t = l.tokenize().expect("tokenize");
3495 assert!(matches!(t[1].0, Token::StrCmp));
3496 }
3497
3498 #[test]
3499 fn tokenize_package_double_colon_splits_qualified_name() {
3500 let mut l = Lexer::new("Foo::Bar::baz");
3501 let t = l.tokenize().expect("tokenize");
3502 assert!(matches!(t[0].0, Token::Ident(ref s) if s == "Foo"));
3503 assert!(matches!(t[1].0, Token::PackageSep));
3504 assert!(matches!(t[2].0, Token::Ident(ref s) if s == "Bar"));
3505 assert!(matches!(t[3].0, Token::PackageSep));
3506 assert!(matches!(t[4].0, Token::Ident(ref s) if s == "baz"));
3507 }
3508
3509 #[test]
3510 fn tokenize_pod_line_skipped_like_comment_prefix() {
3511 let mut l = Lexer::new("=pod\n=cut\n42");
3513 let t = l.tokenize().expect("tokenize");
3514 assert!(matches!(t[0].0, Token::Integer(42)));
3515 }
3516
3517 #[test]
3518 fn tokenize_underscore_in_identifier() {
3519 let mut l = Lexer::new("__PACKAGE__");
3520 let t = l.tokenize().expect("tokenize");
3521 assert!(matches!(t[0].0, Token::Ident(ref s) if s == "__PACKAGE__"));
3522 }
3523
3524 #[test]
3526 fn tokenize_x_repeat_vs_sub_name() {
3527 let mut l = Lexer::new("3 x 4");
3528 let t = l.tokenize().expect("tokenize");
3529 assert!(matches!(t[1].0, Token::X));
3530
3531 let mut l = Lexer::new("sub x { 1 }");
3532 let t = l.tokenize().expect("tokenize");
3533 assert!(matches!(t[0].0, Token::Ident(ref s) if s == "sub"));
3534 assert!(matches!(t[1].0, Token::Ident(ref s) if s == "x"));
3535 }
3536}