Skip to main content

coreutils_rs/expr/
core.rs

1use std::fmt;
2
3use regex::Regex;
4
5/// Exit code: expression is non-null and non-zero.
6pub const EXIT_SUCCESS: i32 = 0;
7/// Exit code: expression is null or zero.
8pub const EXIT_FAILURE: i32 = 1;
9/// Exit code: expression is syntactically invalid.
10pub const EXIT_EXPR_ERROR: i32 = 2;
11/// Exit code: regex error.
12pub const EXIT_REGEX_ERROR: i32 = 3;
13
14/// A value produced by evaluating an expr expression.
15#[derive(Debug, Clone, PartialEq, Eq)]
16pub enum ExprValue {
17    Integer(i64),
18    Str(String),
19}
20
21impl fmt::Display for ExprValue {
22    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
23        match self {
24            ExprValue::Integer(n) => write!(f, "{}", n),
25            ExprValue::Str(s) => write!(f, "{}", s),
26        }
27    }
28}
29
30impl ExprValue {
31    /// Returns true if this value is considered "null" (empty string or integer 0).
32    pub fn is_null(&self) -> bool {
33        match self {
34            ExprValue::Integer(n) => *n == 0,
35            ExprValue::Str(s) => s.is_empty() || s == "0",
36        }
37    }
38
39    /// Try to interpret this value as an integer.
40    pub fn as_integer(&self) -> Option<i64> {
41        match self {
42            ExprValue::Integer(n) => Some(*n),
43            ExprValue::Str(s) => parse_integer(s),
44        }
45    }
46}
47
48/// Parse an integer from a string, accepting optional leading sign and digits only.
49fn parse_integer(s: &str) -> Option<i64> {
50    let s = s.trim();
51    if s.is_empty() {
52        return None;
53    }
54    let (sign, digits) = if let Some(rest) = s.strip_prefix('-') {
55        (-1i64, rest)
56    } else if let Some(rest) = s.strip_prefix('+') {
57        (1i64, rest)
58    } else {
59        (1i64, s)
60    };
61    if digits.is_empty() || !digits.chars().all(|c| c.is_ascii_digit()) {
62        return None;
63    }
64    digits.parse::<i64>().ok().map(|v| sign * v)
65}
66
67/// Errors that can occur during expression evaluation.
68#[derive(Debug, Clone)]
69pub enum ExprError {
70    /// Syntax error in the expression.
71    Syntax(String),
72    /// Division by zero.
73    DivisionByZero,
74    /// Invalid regex pattern.
75    RegexError(String),
76    /// Non-integer argument where integer was required.
77    NonIntegerArgument,
78    /// Missing operand.
79    MissingOperand,
80}
81
82impl fmt::Display for ExprError {
83    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
84        match self {
85            ExprError::Syntax(msg) => write!(f, "syntax error: {}", msg),
86            ExprError::DivisionByZero => write!(f, "division by zero"),
87            ExprError::RegexError(msg) => write!(f, "{}", msg),
88            ExprError::NonIntegerArgument => write!(f, "non-integer argument"),
89            ExprError::MissingOperand => write!(f, "missing operand"),
90        }
91    }
92}
93
94impl ExprError {
95    /// Returns the exit code for this error type.
96    pub fn exit_code(&self) -> i32 {
97        match self {
98            ExprError::RegexError(_) => EXIT_REGEX_ERROR,
99            _ => EXIT_EXPR_ERROR,
100        }
101    }
102}
103
104/// Recursive descent parser for expr expressions.
105struct ExprParser<'a> {
106    args: &'a [String],
107    pos: usize,
108}
109
110impl<'a> ExprParser<'a> {
111    fn new(args: &'a [String]) -> Self {
112        ExprParser { args, pos: 0 }
113    }
114
115    /// Peek at the current token without consuming it.
116    fn peek(&self) -> Option<&str> {
117        if self.pos < self.args.len() {
118            Some(self.args[self.pos].as_str())
119        } else {
120            None
121        }
122    }
123
124    /// Consume the current token and advance.
125    fn consume(&mut self) -> Option<&str> {
126        if self.pos < self.args.len() {
127            let tok = self.args[self.pos].as_str();
128            self.pos += 1;
129            Some(tok)
130        } else {
131            None
132        }
133    }
134
135    /// Expect a specific token, returning an error if not found.
136    fn expect(&mut self, expected: &str) -> Result<(), ExprError> {
137        match self.consume() {
138            Some(tok) if tok == expected => Ok(()),
139            Some(tok) => Err(ExprError::Syntax(format!(
140                "expected '{}', found '{}'",
141                expected, tok
142            ))),
143            None => Err(ExprError::Syntax(format!("expected '{}'", expected))),
144        }
145    }
146
147    /// Parse the top-level: OR expression.
148    /// OR: AND ( '|' AND )*
149    fn parse_or(&mut self) -> Result<ExprValue, ExprError> {
150        let mut left = self.parse_and()?;
151        while self.peek() == Some("|") {
152            self.consume();
153            let right = self.parse_and()?;
154            left = if !left.is_null() { left } else { right };
155        }
156        Ok(left)
157    }
158
159    /// Parse AND expression.
160    /// AND: COMPARISON ( '&' COMPARISON )*
161    fn parse_and(&mut self) -> Result<ExprValue, ExprError> {
162        let mut left = self.parse_comparison()?;
163        while self.peek() == Some("&") {
164            self.consume();
165            let right = self.parse_comparison()?;
166            left = if !left.is_null() && !right.is_null() {
167                left
168            } else {
169                ExprValue::Integer(0)
170            };
171        }
172        Ok(left)
173    }
174
175    /// Parse comparison expression.
176    /// COMPARISON: ADDITION ( ('<'|'<='|'='|'!='|'>='|'>') ADDITION )*
177    fn parse_comparison(&mut self) -> Result<ExprValue, ExprError> {
178        let mut left = self.parse_addition()?;
179        while matches!(
180            self.peek(),
181            Some("<") | Some("<=") | Some("=") | Some("!=") | Some(">=") | Some(">")
182        ) {
183            let op = self.consume().unwrap().to_string();
184            let right = self.parse_addition()?;
185            let result = compare_values(&left, &right, &op);
186            left = ExprValue::Integer(if result { 1 } else { 0 });
187        }
188        Ok(left)
189    }
190
191    /// Parse addition/subtraction.
192    /// ADDITION: MULTIPLICATION ( ('+'|'-') MULTIPLICATION )*
193    fn parse_addition(&mut self) -> Result<ExprValue, ExprError> {
194        let mut left = self.parse_multiplication()?;
195        while matches!(self.peek(), Some("+") | Some("-")) {
196            let op = self.consume().unwrap().to_string();
197            let right = self.parse_multiplication()?;
198            let lv = left.as_integer().ok_or(ExprError::NonIntegerArgument)?;
199            let rv = right.as_integer().ok_or(ExprError::NonIntegerArgument)?;
200            left = match op.as_str() {
201                "+" => ExprValue::Integer(
202                    lv.checked_add(rv)
203                        .ok_or_else(|| ExprError::Syntax("integer result too large".into()))?,
204                ),
205                "-" => ExprValue::Integer(
206                    lv.checked_sub(rv)
207                        .ok_or_else(|| ExprError::Syntax("integer result too large".into()))?,
208                ),
209                _ => unreachable!(),
210            };
211        }
212        Ok(left)
213    }
214
215    /// Parse multiplication/division/modulo.
216    /// MULTIPLICATION: MATCH ( ('*'|'/'|'%') MATCH )*
217    fn parse_multiplication(&mut self) -> Result<ExprValue, ExprError> {
218        let mut left = self.parse_match()?;
219        while matches!(self.peek(), Some("*") | Some("/") | Some("%")) {
220            let op = self.consume().unwrap().to_string();
221            let right = self.parse_match()?;
222            let lv = left.as_integer().ok_or(ExprError::NonIntegerArgument)?;
223            let rv = right.as_integer().ok_or(ExprError::NonIntegerArgument)?;
224            left = match op.as_str() {
225                "*" => ExprValue::Integer(
226                    lv.checked_mul(rv)
227                        .ok_or_else(|| ExprError::Syntax("integer result too large".into()))?,
228                ),
229                "/" => {
230                    if rv == 0 {
231                        return Err(ExprError::DivisionByZero);
232                    }
233                    ExprValue::Integer(
234                        lv.checked_div(rv)
235                            .ok_or_else(|| ExprError::Syntax("integer result too large".into()))?,
236                    )
237                }
238                "%" => {
239                    if rv == 0 {
240                        return Err(ExprError::DivisionByZero);
241                    }
242                    ExprValue::Integer(
243                        lv.checked_rem(rv)
244                            .ok_or_else(|| ExprError::Syntax("integer result too large".into()))?,
245                    )
246                }
247                _ => unreachable!(),
248            };
249        }
250        Ok(left)
251    }
252
253    /// Parse match/colon expression.
254    /// MATCH: PRIMARY ( ':' PRIMARY )?
255    fn parse_match(&mut self) -> Result<ExprValue, ExprError> {
256        let left = self.parse_primary()?;
257        if self.peek() == Some(":") {
258            self.consume();
259            let right = self.parse_primary()?;
260            let pattern_str = match &right {
261                ExprValue::Str(s) => s.clone(),
262                ExprValue::Integer(n) => n.to_string(),
263            };
264            let string = match &left {
265                ExprValue::Str(s) => s.clone(),
266                ExprValue::Integer(n) => n.to_string(),
267            };
268            return do_match(&string, &pattern_str);
269        }
270        Ok(left)
271    }
272
273    /// Parse primary expression: keyword functions, parenthesized expressions, or atoms.
274    fn parse_primary(&mut self) -> Result<ExprValue, ExprError> {
275        match self.peek() {
276            None => Err(ExprError::MissingOperand),
277            Some("(") => {
278                self.consume();
279                let val = self.parse_or()?;
280                self.expect(")")?;
281                Ok(val)
282            }
283            Some("match") => {
284                self.consume();
285                let string_val = self.parse_primary()?;
286                let pattern_val = self.parse_primary()?;
287                let string = match &string_val {
288                    ExprValue::Str(s) => s.clone(),
289                    ExprValue::Integer(n) => n.to_string(),
290                };
291                let pattern = match &pattern_val {
292                    ExprValue::Str(s) => s.clone(),
293                    ExprValue::Integer(n) => n.to_string(),
294                };
295                do_match(&string, &pattern)
296            }
297            Some("substr") => {
298                self.consume();
299                let string_val = self.parse_primary()?;
300                let pos_val = self.parse_primary()?;
301                let len_val = self.parse_primary()?;
302                let string = match &string_val {
303                    ExprValue::Str(s) => s.clone(),
304                    ExprValue::Integer(n) => n.to_string(),
305                };
306                let pos = pos_val.as_integer().ok_or(ExprError::NonIntegerArgument)?;
307                let len = len_val.as_integer().ok_or(ExprError::NonIntegerArgument)?;
308                Ok(do_substr(&string, pos, len))
309            }
310            Some("index") => {
311                self.consume();
312                let string_val = self.parse_primary()?;
313                let chars_val = self.parse_primary()?;
314                let string = match &string_val {
315                    ExprValue::Str(s) => s.clone(),
316                    ExprValue::Integer(n) => n.to_string(),
317                };
318                let chars = match &chars_val {
319                    ExprValue::Str(s) => s.clone(),
320                    ExprValue::Integer(n) => n.to_string(),
321                };
322                Ok(do_index(&string, &chars))
323            }
324            Some("length") => {
325                self.consume();
326                let val = self.parse_primary()?;
327                let s = match &val {
328                    ExprValue::Str(s) => s.clone(),
329                    ExprValue::Integer(n) => n.to_string(),
330                };
331                Ok(ExprValue::Integer(s.len() as i64))
332            }
333            Some("+") => {
334                // GNU expr extension: '+' is a quoting prefix that treats the
335                // next token as a literal string, even if it would otherwise be
336                // interpreted as a keyword (match, length, substr, index).
337                self.consume();
338                match self.consume() {
339                    Some(tok) => {
340                        let tok = tok.to_string();
341                        if let Some(n) = parse_integer(&tok) {
342                            Ok(ExprValue::Integer(n))
343                        } else {
344                            Ok(ExprValue::Str(tok))
345                        }
346                    }
347                    None => Err(ExprError::Syntax("missing argument after '+'".to_string())),
348                }
349            }
350            _ => {
351                // Atom: a literal string or number.
352                let tok = self.consume().unwrap().to_string();
353                if let Some(n) = parse_integer(&tok) {
354                    Ok(ExprValue::Integer(n))
355                } else {
356                    Ok(ExprValue::Str(tok))
357                }
358            }
359        }
360    }
361}
362
363/// Compare two ExprValues. If both are integers, compare numerically;
364/// otherwise compare as strings lexicographically.
365fn compare_values(left: &ExprValue, right: &ExprValue, op: &str) -> bool {
366    let left_int = left.as_integer();
367    let right_int = right.as_integer();
368
369    if let (Some(lv), Some(rv)) = (left_int, right_int) {
370        match op {
371            "<" => lv < rv,
372            "<=" => lv <= rv,
373            "=" => lv == rv,
374            "!=" => lv != rv,
375            ">=" => lv >= rv,
376            ">" => lv > rv,
377            _ => false,
378        }
379    } else {
380        let ls = left.to_string();
381        let rs = right.to_string();
382        match op {
383            "<" => ls < rs,
384            "<=" => ls <= rs,
385            "=" => ls == rs,
386            "!=" => ls != rs,
387            ">=" => ls >= rs,
388            ">" => ls > rs,
389            _ => false,
390        }
391    }
392}
393
394/// Convert a POSIX BRE (Basic Regular Expression) pattern to a Rust regex pattern.
395/// BRE differences from ERE:
396/// - `\(` and `\)` are group delimiters (not `(` and `)`)
397/// - `\{` and `\}` are interval delimiters
398/// - `(` and `)` are literal in BRE
399/// - `{` and `}` are literal in BRE
400/// - `\+`, `\?` are special in BRE (some implementations)
401/// - `+`, `?` are literal in BRE
402/// - The match is always anchored at the beginning (as if `^` is prepended).
403///
404/// When inside a `\(` ... `\)` group, `\.` is treated as a literal dot insertion
405/// that does not consume input. It is excluded from the regex and instead tracked
406/// separately so that the match result can be reconstructed with literal dots.
407fn bre_to_rust_regex(pattern: &str) -> String {
408    let mut result = String::with_capacity(pattern.len() + 2);
409    // BRE patterns in expr are implicitly anchored at the start
410    result.push('^');
411
412    let bytes = pattern.as_bytes();
413    let mut i = 0;
414    let mut group_depth = 0u32;
415    while i < bytes.len() {
416        if bytes[i] == b'\\' && i + 1 < bytes.len() {
417            match bytes[i + 1] {
418                b'(' => {
419                    group_depth += 1;
420                    result.push('(');
421                    i += 2;
422                }
423                b')' => {
424                    group_depth = group_depth.saturating_sub(1);
425                    result.push(')');
426                    i += 2;
427                }
428                b'{' => {
429                    result.push('{');
430                    i += 2;
431                }
432                b'}' => {
433                    result.push('}');
434                    i += 2;
435                }
436                b'+' => {
437                    result.push('+');
438                    i += 2;
439                }
440                b'?' => {
441                    result.push('?');
442                    i += 2;
443                }
444                b'1'..=b'9' => {
445                    // Backreference: \1 through \9
446                    result.push('\\');
447                    result.push(bytes[i + 1] as char);
448                    i += 2;
449                }
450                b'n' => {
451                    result.push_str("\\n");
452                    i += 2;
453                }
454                b't' => {
455                    result.push_str("\\t");
456                    i += 2;
457                }
458                b'.' => {
459                    if group_depth > 0 {
460                        // Inside a group, \. is a literal dot insertion that
461                        // does not consume input — skip it in the regex.
462                        i += 2;
463                    } else {
464                        result.push('\\');
465                        result.push('.');
466                        i += 2;
467                    }
468                }
469                b'*' | b'\\' | b'[' | b']' | b'^' | b'$' | b'|' => {
470                    result.push('\\');
471                    result.push(bytes[i + 1] as char);
472                    i += 2;
473                }
474                _ => {
475                    // Unknown escape: pass through literally
476                    result.push('\\');
477                    result.push(bytes[i + 1] as char);
478                    i += 2;
479                }
480            }
481        } else {
482            match bytes[i] {
483                b'(' => {
484                    // Literal in BRE
485                    result.push_str("\\(");
486                    i += 1;
487                }
488                b')' => {
489                    // Literal in BRE
490                    result.push_str("\\)");
491                    i += 1;
492                }
493                b'{' => {
494                    // Literal in BRE
495                    result.push_str("\\{");
496                    i += 1;
497                }
498                b'}' => {
499                    // Literal in BRE
500                    result.push_str("\\}");
501                    i += 1;
502                }
503                b'+' => {
504                    // Literal in BRE (not a quantifier)
505                    result.push_str("\\+");
506                    i += 1;
507                }
508                b'?' => {
509                    // Literal in BRE (not a quantifier)
510                    result.push_str("\\?");
511                    i += 1;
512                }
513                b'|' => {
514                    // Literal in BRE (not alternation)
515                    result.push_str("\\|");
516                    i += 1;
517                }
518                _ => {
519                    result.push(bytes[i] as char);
520                    i += 1;
521                }
522            }
523        }
524    }
525    result
526}
527
528/// Extract a template for the first `\(` ... `\)` group in a BRE pattern.
529/// The template is a list of entries: `true` means a literal dot insertion (from `\.`),
530/// `false` means a character matched from the input.
531/// Returns None if there is no group.
532fn bre_group_template(pattern: &str) -> Option<Vec<bool>> {
533    let bytes = pattern.as_bytes();
534    let mut i = 0;
535    let mut in_group = false;
536    let mut template = Vec::new();
537
538    while i < bytes.len() {
539        if bytes[i] == b'\\' && i + 1 < bytes.len() {
540            match bytes[i + 1] {
541                b'(' if !in_group => {
542                    in_group = true;
543                    i += 2;
544                }
545                b')' if in_group => {
546                    return Some(template);
547                }
548                b'.' if in_group => {
549                    // \. inside group = literal dot insertion (not consuming input)
550                    template.push(true);
551                    i += 2;
552                }
553                _ if in_group => {
554                    // Any other escape inside the group consumes a character from input
555                    template.push(false);
556                    i += 2;
557                }
558                _ => {
559                    i += 2;
560                }
561            }
562        } else if in_group {
563            // Regular character inside group consumes input
564            template.push(false);
565            i += 1;
566        } else {
567            i += 1;
568        }
569    }
570    if in_group { Some(template) } else { None }
571}
572
573/// Check whether a BRE pattern contains `\(` ... `\)` groups.
574fn bre_has_groups(pattern: &str) -> bool {
575    let bytes = pattern.as_bytes();
576    let mut i = 0;
577    while i < bytes.len() {
578        if bytes[i] == b'\\' && i + 1 < bytes.len() && bytes[i + 1] == b'(' {
579            return true;
580        }
581        i += 1;
582    }
583    false
584}
585
586/// Perform regex match operation.
587/// If the pattern has `\(` ... `\)` groups, returns the first captured group (or empty string).
588/// When the group contains `\.`, literal dots are inserted into the result at those positions
589/// without consuming characters from the input.
590/// Otherwise returns the number of matched characters (or 0).
591fn do_match(string: &str, pattern: &str) -> Result<ExprValue, ExprError> {
592    let has_groups = bre_has_groups(pattern);
593    let rust_pattern = bre_to_rust_regex(pattern);
594
595    let re = Regex::new(&rust_pattern)
596        .map_err(|e| ExprError::RegexError(format!("Invalid regular expression: {}", e)))?;
597
598    match re.captures(string) {
599        Some(caps) => {
600            if has_groups {
601                // Return the first captured group, expanded with literal dot insertions
602                match caps.get(1) {
603                    Some(m) => {
604                        let captured = m.as_str();
605                        if let Some(template) = bre_group_template(pattern) {
606                            let mut result = String::new();
607                            let mut char_iter = captured.chars();
608                            for is_literal_dot in &template {
609                                if *is_literal_dot {
610                                    result.push('.');
611                                } else if let Some(ch) = char_iter.next() {
612                                    result.push(ch);
613                                }
614                            }
615                            Ok(ExprValue::Str(result))
616                        } else {
617                            Ok(ExprValue::Str(captured.to_string()))
618                        }
619                    }
620                    None => Ok(ExprValue::Str(String::new())),
621                }
622            } else {
623                // Return the number of matched characters
624                let m = caps.get(0).unwrap();
625                Ok(ExprValue::Integer(m.as_str().len() as i64))
626            }
627        }
628        None => {
629            if has_groups {
630                Ok(ExprValue::Str(String::new()))
631            } else {
632                Ok(ExprValue::Integer(0))
633            }
634        }
635    }
636}
637
638/// Perform the substr operation: extract a substring.
639/// Position is 1-based. If pos or len <= 0 or pos > length, returns empty string.
640fn do_substr(string: &str, pos: i64, len: i64) -> ExprValue {
641    if pos <= 0 || len <= 0 {
642        return ExprValue::Str(String::new());
643    }
644    let start = (pos - 1) as usize;
645    let slen = string.len();
646    if start >= slen {
647        return ExprValue::Str(String::new());
648    }
649    let end = (start + len as usize).min(slen);
650    ExprValue::Str(string[start..end].to_string())
651}
652
653/// Perform the index operation: find the position of the first character in CHARS
654/// that appears in STRING. Returns 0 if not found. Position is 1-based.
655fn do_index(string: &str, chars: &str) -> ExprValue {
656    for (i, c) in string.chars().enumerate() {
657        if chars.contains(c) {
658            return ExprValue::Integer((i + 1) as i64);
659        }
660    }
661    ExprValue::Integer(0)
662}
663
664/// Evaluate an expr expression from command-line arguments.
665pub fn evaluate_expr(args: &[String]) -> Result<ExprValue, ExprError> {
666    if args.is_empty() {
667        return Err(ExprError::MissingOperand);
668    }
669    let mut parser = ExprParser::new(args);
670    let result = parser.parse_or()?;
671    if parser.pos < parser.args.len() {
672        return Err(ExprError::Syntax(format!(
673            "unexpected argument '{}'",
674            parser.args[parser.pos]
675        )));
676    }
677    Ok(result)
678}