Skip to main content

polysub/
parser.rs

1// Copyright 2025-2026 Cornell University
2// released under MIT license
3// author: Kevin Laeufer <laeufer@cornell.edu>
4
5use crate::poly::VarIndex;
6use crate::{Coef, Mod};
7use num_bigint::BigInt;
8use num_traits::{Num, ToPrimitive};
9use std::ops::Neg;
10
11/// Used to efficiently represent coefficients generated by the polynomial parser.
12/// Needs to be converted into a type that implements `Coef` to actually perform polynomial
13/// substitution.
14pub enum IntCoef {
15    I64(i64),
16    Big(BigInt),
17}
18
19impl IntCoef {
20    fn neg(&self) -> Self {
21        match self {
22            IntCoef::I64(v) => IntCoef::I64(-*v),
23            IntCoef::Big(v) => IntCoef::Big(v.neg()),
24        }
25    }
26
27    pub fn into_coef<C: Coef>(self, m: Mod) -> C {
28        match self {
29            IntCoef::I64(v) => C::from_i64(v, m),
30            IntCoef::Big(v) => C::from_big(&v, m),
31        }
32    }
33
34    pub fn as_i64(&self) -> Option<i64> {
35        match self {
36            IntCoef::I64(v) => Some(*v),
37            IntCoef::Big(v) => v.to_i64(),
38        }
39    }
40}
41
42pub fn parse_poly(line: &[u8]) -> impl Iterator<Item = (IntCoef, Vec<VarIndex>)> {
43    PolyParser::new(line)
44}
45
46struct PolyParser<'a> {
47    line: &'a [u8],
48}
49
50impl<'a> PolyParser<'a> {
51    fn new(line: &'a [u8]) -> Self {
52        Self { line }
53    }
54}
55
56impl<'a> Iterator for PolyParser<'a> {
57    type Item = (IntCoef, Vec<VarIndex>);
58
59    fn next(&mut self) -> Option<Self::Item> {
60        use State::*;
61        let mut state = LookingForSign(0);
62        let mut coef: Option<IntCoef> = None;
63        let mut vars = Vec::with_capacity(16);
64        while !self.line.is_empty() {
65            state = match state {
66                LookingForSign(i) => {
67                    // end of line special case
68                    if i == self.line.len() {
69                        self.line = &[];
70                        continue;
71                    }
72                    match self.line[i] {
73                        b'+' => ParsingCoef(Sign::Plus, i + 1, i + 1),
74                        b'-' => ParsingCoef(Sign::Minus, i + 1, i + 1),
75                        // sometimes there is no sign
76                        b'0' | b'1' | b'2' | b'3' | b'4' | b'5' | b'6' | b'7' | b'8' | b'9' => {
77                            ParsingCoef(Sign::Plus, i, i + 1)
78                        }
79                        _ => LookingForSign(i + 1),
80                    }
81                }
82                ParsingCoef(sign, start, i) => {
83                    // end of line special case
84                    if i == self.line.len() {
85                        let coef = parse_coef(&self.line[start..i]);
86                        self.line = &[];
87                        return Some((coef, vec![]));
88                    }
89                    match self.line[i] {
90                        // skip whitespace and `[` at the beginning
91                        b' ' | b'[' if start == i => ParsingCoef(sign, i + 1, i + 1),
92                        b'*' | b']' => {
93                            let cc = parse_coef(&self.line[start..i]);
94                            if sign == Sign::Minus {
95                                coef = Some(cc.neg());
96                            } else {
97                                coef = Some(cc);
98                            }
99                            ParsingVar(i + 1, i + 1)
100                        }
101                        _ => ParsingCoef(sign, start, i + 1),
102                    }
103                }
104                ParsingVar(start, i) => {
105                    // end of line special case
106                    if i == self.line.len() {
107                        let var: u32 = std::str::from_utf8(&self.line[start..i])
108                            .unwrap()
109                            .parse()
110                            .unwrap();
111                        self.line = &[];
112                        vars.push(var.into());
113                        return Some((coef.unwrap(), vars));
114                    }
115                    match self.line[i] {
116                        b'x' => {
117                            debug_assert_eq!(start, i);
118                            ParsingVar(i + 1, i + 1)
119                        }
120                        b'0' | b'1' | b'2' | b'3' | b'4' | b'5' | b'6' | b'7' | b'8' | b'9' => {
121                            ParsingVar(start, i + 1)
122                        }
123                        other => {
124                            if start != i {
125                                let var: u32 = std::str::from_utf8(&self.line[start..i])
126                                    .unwrap()
127                                    .parse()
128                                    .unwrap();
129                                vars.push(var.into());
130                            }
131                            // there is another var waiting!
132                            if other == b'*' {
133                                ParsingVar(i + 1, i + 1)
134                            } else {
135                                // done!
136                                self.line = &self.line[i..];
137                                return Some((coef.unwrap(), vars));
138                            }
139                        }
140                    }
141                }
142            }
143        }
144        None
145    }
146}
147
148fn parse_coef(token: &[u8]) -> IntCoef {
149    let s = std::str::from_utf8(token).unwrap().trim();
150    // first, try to see if we can parse as an i64
151    if let Ok(v) = str::parse::<i64>(s) {
152        IntCoef::I64(v)
153    } else {
154        // if that fails, we use a bigint
155        IntCoef::Big(BigInt::from_str_radix(s, 10).unwrap())
156    }
157}
158
159#[derive(Debug, Copy, Clone)]
160enum State {
161    LookingForSign(usize),
162    ParsingCoef(Sign, usize, usize),
163    ParsingVar(usize, usize),
164}
165
166#[derive(Debug, Copy, Clone, PartialEq)]
167enum Sign {
168    Minus,
169    Plus,
170}
171
172#[cfg(test)]
173mod tests {
174    use super::*;
175
176    fn do_parse_poly(line: &[u8]) -> Vec<(i64, Vec<u32>)> {
177        parse_poly(line)
178            .map(|(c, vars)| {
179                (
180                    c.as_i64().unwrap(),
181                    vars.into_iter().map(|v| u32::from(v)).collect(),
182                )
183            })
184            .collect()
185    }
186
187    #[test]
188    fn test_parse_poly() {
189        assert_eq!(
190            do_parse_poly(b"-1*x2817+1*x33"),
191            [(-1, vec![2817]), (1, vec![33])]
192        );
193        assert_eq!(do_parse_poly(b"+2147483648*x2848+1073741824*x2847+536870912*x2846+268435456*x2845+134217728*x2844+67108864*x2843+33554432*x2842+16777216*x2841"),
194                   [(2147483648i64, vec![2848]),
195                       (1073741824i64, vec![2847]),
196                       (536870912i64, vec![2846]),
197                       (268435456i64, vec![2845]),
198                       (134217728i64, vec![2844]),
199                       (67108864i64, vec![2843]),
200                       (33554432i64, vec![2842]),
201                       (16777216i64, vec![2841]),
202                   ]);
203
204        // here there is a `+1` at the end
205        assert_eq!(
206            do_parse_poly(b"-1*x663-1*x493+1"),
207            [(-1, vec![663]), (-1, vec![493]), (1, vec![])]
208        );
209
210        // from `b03_sp-ar-rc_64bit_step`
211        assert_eq!(
212            do_parse_poly(b"-2*x65*x2-1*x65*x1"),
213            [(-2, vec![65, 2]), (-1, vec![65, 1]),]
214        );
215    }
216
217    // make sure that we can parse polynoms that are turned into strings using our implementation of Display
218    #[test]
219    fn test_parse_polynom_display() {
220        assert_eq!(
221            do_parse_poly(b"[1*x2817] + [1*x33]"),
222            [(1, vec![2817]), (1, vec![33])]
223        );
224
225        assert_eq!(
226            do_parse_poly(b"[-1*x2817] + [1*x33]"),
227            [(-1, vec![2817]), (1, vec![33])]
228        );
229
230        assert_eq!(
231            do_parse_poly(b"[1*x2817] + [ -1 *x33]"),
232            [(1, vec![2817]), (-1, vec![33])]
233        );
234
235        // parse a monom with empty term
236        assert_eq!(
237            do_parse_poly(b"[2*] + [4294967294*x1*x18]"),
238            [(2, vec![]), (4294967294i64, vec![1, 18])]
239        );
240    }
241}