libpatron/btor2/
parse.rs

1// Copyright 2023 The Regents of the University of California
2// released under BSD 3-Clause License
3// author: Kevin Laeufer <laeufer@berkeley.edu>
4
5use crate::ir::*;
6use fuzzy_matcher::FuzzyMatcher;
7use smallvec::SmallVec;
8use std::collections::HashMap;
9use std::str::FromStr;
10
11pub fn parse_str(ctx: &mut Context, input: &str, name: Option<&str>) -> Option<TransitionSystem> {
12    match Parser::new(ctx).parse(input.as_bytes(), name) {
13        Ok(sys) => Some(sys),
14        Err(errors) => {
15            report_errors(errors, "str", input);
16            None
17        }
18    }
19}
20
21pub fn parse_file<P: AsRef<std::path::Path>>(filename: P) -> Option<(Context, TransitionSystem)> {
22    let mut ctx = Context::default();
23    let sys = parse_file_with_ctx(filename, &mut ctx)?;
24    Some((ctx, sys))
25}
26
27pub fn parse_file_with_ctx<P: AsRef<std::path::Path>>(
28    filename: P,
29    ctx: &mut Context,
30) -> Option<TransitionSystem> {
31    let path = filename.as_ref();
32    let f = std::fs::File::open(path).expect("Failed to open btor file!");
33    let reader = std::io::BufReader::new(f);
34    let backup_name = path.file_stem().and_then(|n| n.to_str());
35    match Parser::new(ctx).parse(reader, backup_name) {
36        Ok(sys) => Some(sys),
37        Err(errors) => {
38            report_errors(
39                errors,
40                path.file_name().unwrap().to_str().unwrap(),
41                &std::fs::read_to_string(path).unwrap(),
42            );
43            None
44        }
45    }
46}
47
48struct Parser<'a> {
49    ctx: &'a mut Context,
50    sys: TransitionSystem,
51    errors: Errors,
52    /// offset of the current line inside the file
53    offset: usize,
54    /// maps file id to type
55    type_map: HashMap<LineId, Type>,
56    /// maps file id to state in the Transition System
57    state_map: HashMap<LineId, StateRef>,
58    /// maps file id to signal in the Transition System
59    signal_map: HashMap<LineId, ExprRef>,
60}
61
62type LineId = u32;
63
64pub const DEFAULT_INPUT_PREFIX: &str = "_input";
65pub const DEFAULT_STATE_PREFIX: &str = "_state";
66
67impl<'a> Parser<'a> {
68    fn new(ctx: &'a mut Context) -> Self {
69        Parser {
70            ctx,
71            sys: TransitionSystem::new("".to_string()),
72            errors: Errors::new(),
73            offset: 0,
74            type_map: HashMap::new(),
75            state_map: HashMap::new(),
76            signal_map: HashMap::new(),
77        }
78    }
79
80    fn parse(
81        &mut self,
82        input: impl std::io::BufRead,
83        backup_name: Option<&str>,
84    ) -> Result<TransitionSystem, Errors> {
85        // ensure that default input and state names are reserved in order to get nicer names
86        self.ctx.add_node(DEFAULT_INPUT_PREFIX);
87        self.ctx.add_node(DEFAULT_STATE_PREFIX);
88
89        for line_res in input.lines() {
90            let line = line_res.expect("failed to read line");
91            let _ignore_errors = self.parse_line(&line);
92            self.offset += line.len() + 1; // TODO: this assumes that the line terminates with a single character
93        }
94
95        // get a better name if none could be determined from the file content
96        // this better name is often derived from the filename or other meta info
97        if self.sys.name.is_empty() {
98            if let Some(name) = backup_name {
99                self.sys.name = name.to_string();
100            }
101        }
102
103        // possibly improve state names through yosys aliases which look like:
104        // better_name = zext(__state__, 0)
105        improve_state_names(self.ctx, &mut self.sys);
106
107        // demote states without next or init to input
108        let input_states = self
109            .sys
110            .states()
111            .rev() // this reverse is needed in order to properly remove elements from back to front
112            .filter(|(_, s)| s.init.is_none() && s.next.is_none())
113            .map(|(i, _)| i)
114            .collect::<Vec<_>>();
115        for state_id in input_states {
116            let st = self.sys.remove_state(state_id);
117            self.sys.add_input(self.ctx, st.symbol);
118        }
119
120        // check to see if we encountered any errors
121        if self.errors.is_empty() {
122            Ok(std::mem::replace(
123                &mut self.sys,
124                TransitionSystem::new("".to_string()),
125            ))
126        } else {
127            Err(std::mem::take(&mut self.errors))
128        }
129    }
130
131    fn parse_line(&mut self, line: &str) -> ParseLineResult {
132        let cont = tokenize_line(line);
133        let tokens = &cont.tokens;
134        // TODO: deal with comments
135        if tokens.is_empty() {
136            // early exit if there are no tokens on this line
137            return Ok(());
138        }
139
140        // the first token should be an ID
141        let line_id = self.parse_line_id(line, tokens[0])?;
142
143        // make sure that there is a second token following the id
144        let op: &str = match tokens.get(1) {
145            None => {
146                return self.add_error(line, tokens[0], "No operation after ID.".to_owned());
147            }
148            Some(op) => op,
149        };
150
151        // check op
152        let mut labels = SignalLabels::default();
153        let expr = if UNARY_OPS_SET.contains(op) {
154            Some(self.parse_unary_op(line, tokens)?)
155        } else if BINARY_OPS_SET.contains(op) {
156            Some(self.parse_bin_op(line, tokens)?)
157        } else {
158            self.require_at_least_n_tokens(line, tokens, 3)?;
159            match op {
160                "ite" | "write" => {
161                    // ternary ops
162                    Some(self.parse_ternary_op(line, tokens)?)
163                }
164                "sort" => {
165                    self.parse_sort(line, tokens, line_id)?;
166                    None
167                }
168                "const" | "constd" | "consth" | "zero" | "one" => {
169                    Some(self.parse_format(line, tokens, op)?)
170                }
171                "ones" => Some(self.parse_ones(line, tokens)?),
172                "state" => {
173                    self.parse_state(line, &cont, line_id)?;
174                    None // states are already added, no need to re-add them
175                }
176                "input" => {
177                    self.parse_input(line, &cont, line_id)?;
178                    None // inputs are already added, no need to re-add them
179                }
180                "init" | "next" => {
181                    self.parse_state_init_or_next(line, &cont, op == "init")?;
182                    None
183                }
184                "output" | "bad" | "constraint" | "fair" => {
185                    labels = SignalLabels::from_str(op).unwrap();
186                    Some((self.get_expr_from_line_id(line, tokens[2])?, 3))
187                }
188                other => {
189                    if OTHER_OPS_SET.contains(other) {
190                        panic!("TODO: implement support for {other} operation")
191                    } else {
192                        return self.invalid_op_error(line, op);
193                    }
194                }
195            }
196        };
197        if let Some((e, token_count)) = expr {
198            self.signal_map.insert(line_id, e);
199            // try to find a name
200            let name = match cont.tokens.get(token_count) {
201                None => None,
202                Some(name) => {
203                    if include_name(name) {
204                        Some(self.ctx.add_unique_str(&clean_up_name(name)))
205                    } else {
206                        None
207                    }
208                }
209            };
210            // inputs and states do not get here
211            let kind = SignalKind::Node;
212            let merged = match self.sys.get_signal(e) {
213                Some(info) => merge_signal_info(info, &SignalInfo { name, kind, labels }),
214                None => SignalInfo { name, kind, labels },
215            };
216            self.sys
217                .add_signal(e, merged.kind, merged.labels, merged.name);
218        }
219        Ok(())
220    }
221
222    fn check_expr_type(
223        &mut self,
224        expr: ExprRef,
225        line: &str,
226        expected_type: Type,
227    ) -> ParseLineResult<ExprRef> {
228        // first we check for internal consistency
229        if let Err(e) = expr.type_check(self.ctx) {
230            let _ = self.add_error(line, line, format!("Failed to type check: {}", e.get_msg()));
231            return Err(());
232        }
233        // then we make sure that the type of the expression is actually the type that was
234        // declared in the btor2 line
235        let actual_tpe = expr.get_type(self.ctx);
236        if actual_tpe != expected_type {
237            let _ =
238                self.add_error(line, line, format!("Expression has the type {actual_tpe}, but the declared btpr2 type is {expected_type}", ));
239            Err(())
240        } else {
241            Ok(expr)
242        }
243    }
244
245    fn check_type(
246        &mut self,
247        actual: &Type,
248        expected: &Type,
249        line: &str,
250        token: &str,
251        msg: &str,
252    ) -> ParseLineResult {
253        if actual == expected {
254            Ok(())
255        } else {
256            self.add_error(line, token, format!("{msg}: {actual} != {expected}"))
257        }
258    }
259
260    fn get_label_name(&mut self, cont: &LineTokens, default: &str) -> StringRef {
261        // look at last token and see if it is not a
262
263        let base_str: &str = cont.tokens.get(3).unwrap_or(&default);
264        // TODO: look into comment for better names
265        self.ctx.add_unique_str(base_str)
266    }
267
268    fn parse_unary_op(&mut self, line: &str, tokens: &[&str]) -> ParseLineResult<(ExprRef, usize)> {
269        self.require_at_least_n_tokens(line, tokens, 4)?;
270        let tpe = self.get_tpe_from_id(line, tokens[2])?;
271        let e = self.get_expr_from_line_id(line, tokens[3])?;
272        let (res, token_count) = match tokens[1] {
273            "slice" => {
274                // slice has two integer attributes
275                self.require_at_least_n_tokens(line, tokens, 6)?;
276                let msb = self.parse_width_int(line, tokens[4], "slice msb")?;
277                let lsb = self.parse_width_int(line, tokens[5], "slice lsb")?;
278                (self.ctx.slice(e, msb, lsb), 6)
279            }
280            "not" => (self.ctx.not(e), 4),
281            "neg" => (self.ctx.negate(e), 4),
282            "uext" => {
283                self.require_at_least_n_tokens(line, tokens, 5)?;
284                let by = self.parse_width_int(line, tokens[4], "extension amount")?;
285                (self.ctx.zero_extend(e, by), 5)
286            }
287            "sext" => {
288                self.require_at_least_n_tokens(line, tokens, 5)?;
289                let by = self.parse_width_int(line, tokens[4], "extension amount")?;
290                (self.ctx.sign_extend(e, by), 5)
291            }
292            "redor" => {
293                let width = e.get_type(self.ctx).get_bit_vector_width().unwrap();
294                if width == 1 {
295                    (e, 4)
296                } else {
297                    // redor is true iff at least one bit is a one
298                    let zero = self.ctx.zero(width);
299                    let eq_zero = self.ctx.bv_equal(e, zero);
300                    (self.ctx.not(eq_zero), 4)
301                }
302            }
303            "redand" => {
304                let width = e.get_type(self.ctx).get_bit_vector_width().unwrap();
305                if width == 1 {
306                    (e, 4)
307                } else {
308                    // redand is true iff all bits are one
309                    let mask = self.ctx.mask(width);
310                    (self.ctx.bv_equal(e, mask), 4)
311                }
312            }
313            "redxor" => {
314                let width = e.get_type(self.ctx).get_bit_vector_width().unwrap();
315                if width == 1 {
316                    (e, 4)
317                } else {
318                    let bits: Vec<_> = (0..width).map(|ii| self.ctx.slice(e, ii, ii)).collect();
319                    let res = bits
320                        .into_iter()
321                        .reduce(|acc, e| self.ctx.xor(acc, e))
322                        .unwrap();
323                    (res, 4)
324                }
325            }
326            other => panic!("unexpected unary op: {other}"),
327        };
328        let checked = self.check_expr_type(res, line, tpe)?;
329        Ok((checked, token_count))
330    }
331
332    fn parse_bin_op(&mut self, line: &str, tokens: &[&str]) -> ParseLineResult<(ExprRef, usize)> {
333        self.require_at_least_n_tokens(line, tokens, 5)?;
334        let tpe = self.get_tpe_from_id(line, tokens[2])?;
335        let a = self.get_expr_from_line_id(line, tokens[3])?;
336        let b = self.get_expr_from_line_id(line, tokens[4])?;
337        let e: ExprRef = match tokens[1] {
338            "iff" => {
339                self.check_type(
340                    &tpe,
341                    &Type::BOOL,
342                    line,
343                    tokens[2],
344                    "iff always returns bool",
345                )?;
346                self.check_type(
347                    &a.get_type(self.ctx),
348                    &Type::BOOL,
349                    line,
350                    tokens[3],
351                    "iff expects a boolean argument",
352                )?;
353                self.check_type(
354                    &b.get_type(self.ctx),
355                    &Type::BOOL,
356                    line,
357                    tokens[4],
358                    "iff expects a boolean argument",
359                )?;
360                self.ctx.bv_equal(a, b)
361            }
362            "implies" => self.ctx.implies(a, b),
363            "sgt" => self.ctx.greater_signed(a, b),
364            "ugt" => self.ctx.greater(a, b),
365            "sgte" => self.ctx.greater_or_equal_signed(a, b),
366            "ugte" => self.ctx.greater_or_equal(a, b),
367            "slt" => self.ctx.greater_signed(b, a),
368            "ult" => self.ctx.greater(b, a),
369            "slte" => self.ctx.greater_or_equal_signed(b, a),
370            "ulte" => self.ctx.greater_or_equal(b, a),
371            "and" => self.ctx.and(a, b),
372            "nand" => {
373                let inner = self.ctx.and(a, b);
374                self.check_expr_type(inner, line, tpe)?;
375                self.ctx.not(inner)
376            }
377            "nor" => {
378                let inner = self.ctx.or(a, b);
379                self.check_expr_type(inner, line, tpe)?;
380                self.ctx.not(inner)
381            }
382            "or" => self.ctx.or(a, b),
383            "xnor" => {
384                let inner = self.ctx.xor(a, b);
385                self.check_expr_type(inner, line, tpe)?;
386                self.ctx.not(inner)
387            }
388            "xor" => self.ctx.xor(a, b),
389            "rol" | "ror" => todo!("Add support for bit rotates."),
390            "sll" => self.ctx.shift_left(a, b),
391            "sra" => self.ctx.arithmetic_shift_right(a, b),
392            "srl" => self.ctx.shift_right(a, b),
393            "add" => self.ctx.add(a, b),
394            "mul" => self.ctx.mul(a, b),
395            "sdiv" => self.ctx.signed_div(a, b),
396            "udiv" => self.ctx.div(a, b),
397            "smod" => self.ctx.signed_mod(a, b),
398            "srem" => self.ctx.signed_remainder(a, b),
399            "urem" => self.ctx.remainder(a, b),
400            "sub" => self.ctx.sub(a, b),
401            "saddo" | "uaddo" | "sdivo" | "udivo" | "smulo" | "umulo" | "ssubo" | "usubo" => {
402                todo!("Add support for overflow operators")
403            }
404            "concat" => self.ctx.concat(a, b),
405            "eq" => self.ctx.bv_equal(a, b),
406            "neq" => {
407                let inner = self.ctx.bv_equal(a, b);
408                self.check_expr_type(inner, line, tpe)?;
409                self.ctx.not(inner)
410            }
411            "read" => self.ctx.array_read(a, b),
412            other => panic!("unexpected binary op: {other}"),
413        };
414        let checked = self.check_expr_type(e, line, tpe)?;
415        Ok((checked, 5))
416    }
417
418    fn parse_ternary_op(
419        &mut self,
420        line: &str,
421        tokens: &[&str],
422    ) -> ParseLineResult<(ExprRef, usize)> {
423        self.require_at_least_n_tokens(line, tokens, 6)?;
424        let tpe = self.get_tpe_from_id(line, tokens[2])?;
425        let a = self.get_expr_from_line_id(line, tokens[3])?;
426        let b = self.get_expr_from_line_id(line, tokens[4])?;
427        let c = self.get_expr_from_line_id(line, tokens[5])?;
428        let res: ExprRef = match tokens[1] {
429            "ite" => {
430                if tpe.is_bit_vector() {
431                    self.ctx.bv_ite(a, b, c)
432                } else {
433                    self.ctx.array_ite(a, b, c)
434                }
435            }
436            "write" => self.ctx.array_store(a, b, c),
437            other => panic!("unexpected binary op: {other}"),
438        };
439        let checked = self.check_expr_type(res, line, tpe)?;
440        Ok((checked, 6))
441    }
442
443    fn parse_state(
444        &mut self,
445        line: &str,
446        cont: &LineTokens,
447        line_id: LineId,
448    ) -> ParseLineResult<()> {
449        let tpe = self.get_tpe_from_id(line, cont.tokens[2])?;
450        let name = self.get_label_name(cont, DEFAULT_STATE_PREFIX);
451        let sym = self.ctx.symbol(name, tpe);
452        let state_ref = self.sys.add_state(self.ctx, sym);
453        self.state_map.insert(line_id, state_ref);
454        self.signal_map.insert(line_id, sym);
455        Ok(())
456    }
457
458    fn parse_input(
459        &mut self,
460        line: &str,
461        cont: &LineTokens,
462        line_id: LineId,
463    ) -> ParseLineResult<()> {
464        let tpe = self.get_tpe_from_id(line, cont.tokens[2])?;
465        let name = self.get_label_name(cont, DEFAULT_INPUT_PREFIX);
466        let sym = self.ctx.symbol(name, tpe);
467        self.sys.add_input(self.ctx, sym);
468        self.signal_map.insert(line_id, sym);
469        Ok(())
470    }
471
472    fn parse_state_init_or_next(
473        &mut self,
474        line: &str,
475        cont: &LineTokens,
476        is_init_not_next: bool,
477    ) -> ParseLineResult {
478        let lbl = if is_init_not_next { "init" } else { "next" };
479        self.require_at_least_n_tokens(line, &cont.tokens, 5)?;
480        let tpe = self.get_tpe_from_id(line, cont.tokens[2])?;
481        let state_ref = self.get_state_from_id(line, cont.tokens[3])?;
482        let state_sym = self.sys.get(state_ref).symbol;
483        let state_tpe = state_sym.type_check(self.ctx).unwrap();
484        let state_name = state_sym.get_symbol_name(self.ctx).unwrap().to_string();
485        self.check_type(
486            &state_tpe,
487            &tpe,
488            line,
489            cont.tokens[4],
490            &format!("[{state_name}.{lbl}] Expression type does not match state type."),
491        )?;
492
493        let maybe_expr = self.get_expr_from_line_id(line, cont.tokens[4])?;
494        // for state init, sometimes array states are assigned a bv-type expression
495        let bv_assigned_to_array =
496            maybe_expr.get_type(self.ctx).is_bit_vector() && state_tpe.is_array();
497        let expr = if is_init_not_next && bv_assigned_to_array {
498            self.ctx
499                .array_const(maybe_expr, state_tpe.get_array_index_width().unwrap())
500        } else {
501            maybe_expr
502        };
503        self.check_type(
504            &expr.get_type(self.ctx),
505            &tpe,
506            line,
507            cont.tokens[4],
508            &format!("[{state_name}.{lbl}] Expressions has mismatched type"),
509        )?;
510
511        if is_init_not_next {
512            self.sys
513                .modify_state(state_ref, |state| state.init = Some(expr));
514        } else {
515            self.sys
516                .modify_state(state_ref, |state| state.next = Some(expr));
517        }
518        Ok(())
519    }
520
521    fn parse_line_id(&mut self, line: &str, token: &str) -> ParseLineResult<LineId> {
522        match token.parse::<LineId>().ok() {
523            None => {
524                let _ = self.add_error(
525                    line,
526                    token,
527                    "Expected valid non-negative integer ID.".to_owned(),
528                );
529                Err(())
530            }
531            Some(id) => Ok(id),
532        }
533    }
534
535    fn parse_ones(&mut self, line: &str, tokens: &[&str]) -> ParseLineResult<(ExprRef, usize)> {
536        self.require_at_least_n_tokens(line, tokens, 3)?;
537        // derive width from type
538        let width = self.get_bv_width(line, tokens[2])?;
539        let res = if width > BVLiteralInt::BITS {
540            todo!("Add support for literals of size: {width}")
541        } else {
542            let value = if width == BVLiteralInt::BITS {
543                BVLiteralInt::MAX
544            } else {
545                ((1 as BVLiteralInt) << width) - 1
546            };
547            self.ctx.bv_lit(value, width)
548        };
549        Ok((res, 3))
550    }
551
552    fn parse_format(
553        &mut self,
554        line: &str,
555        tokens: &[&str],
556        op: &str,
557    ) -> ParseLineResult<(ExprRef, usize)> {
558        self.require_at_least_n_tokens(line, tokens, 3)?;
559        // derive width from type
560        let width = self.get_bv_width(line, tokens[2])?;
561
562        let res = match op {
563            "zero" => Ok(self.ctx.zero(width)),
564            "one" => Ok(self.ctx.one(width)),
565            "const" => self.parse_bv_lit_str(line, tokens[3], 2, width),
566            "constd" => self.parse_bv_lit_str(line, tokens[3], 10, width),
567            "consth" => self.parse_bv_lit_str(line, tokens[3], 16, width),
568            other => panic!("Did not expect {other} as a possible format op!"),
569        }?;
570        Ok((res, 3))
571    }
572
573    fn parse_bv_lit_str(
574        &mut self,
575        line: &str,
576        token: &str,
577        base: u32,
578        width: WidthInt,
579    ) -> ParseLineResult<ExprRef> {
580        match BVLiteralInt::from_str_radix(token, base) {
581            Ok(val) => {
582                if bv_value_fits_width(val, width) {
583                    Ok(self.ctx.bv_lit(val, width))
584                } else {
585                    let _ = self.add_error(
586                        line,
587                        token,
588                        format!("Value {val} does not fit into a bit-vector of width {width}"),
589                    );
590                    Err(())
591                }
592            }
593            Err(_) => {
594                let _ = self.add_error(
595                    line,
596                    token,
597                    format!("Failed to parse as an integer of base {base}"),
598                );
599                Err(())
600            }
601        }
602    }
603
604    fn get_bv_width(&mut self, line: &str, token: &str) -> ParseLineResult<WidthInt> {
605        let tpe = self.get_tpe_from_id(line, token)?;
606        match tpe {
607            Type::BV(width) => Ok(width),
608            Type::Array(tpe) => {
609                let _ = self.add_error(
610                    line,
611                    token,
612                    format!(
613                        "Points to an array type `{tpe:?}`, but a bit-vector type is required!"
614                    ),
615                );
616                Err(())
617            }
618        }
619    }
620
621    fn get_tpe_from_id(&mut self, line: &str, token: &str) -> ParseLineResult<Type> {
622        let tpe_id = self.parse_line_id(line, token)?;
623        match self.type_map.get(&tpe_id) {
624            None => {
625                let _ = self.add_error(
626                    line,
627                    token,
628                    format!("ID `{tpe_id}` does not point to a valid type!"),
629                );
630                Err(())
631            }
632            Some(tpe) => Ok(*tpe),
633        }
634    }
635
636    fn get_state_from_id(&mut self, line: &str, token: &str) -> ParseLineResult<StateRef> {
637        let state_id = self.parse_line_id(line, token)?;
638        match self.state_map.get(&state_id) {
639            None => {
640                let _ = self.add_error(
641                    line,
642                    token,
643                    format!("ID `{state_id}` does not point to a valid state!"),
644                );
645                Err(())
646            }
647            Some(state) => Ok(*state),
648        }
649    }
650
651    fn get_expr_from_line_id(&mut self, line: &str, token: &str) -> ParseLineResult<ExprRef> {
652        let signal_id = self.parse_line_id(line, token)?;
653        let expr_ref = match self.signal_map.get(&signal_id) {
654            None => {
655                let _ = self.add_error(
656                    line,
657                    token,
658                    format!("ID `{signal_id}` does not point to a valid signal!"),
659                );
660                Err(())
661            }
662            Some(signal) => Ok(*signal),
663        }?;
664        Ok(expr_ref)
665    }
666
667    fn parse_sort(&mut self, line: &str, tokens: &[&str], line_id: LineId) -> ParseLineResult {
668        self.require_at_least_n_tokens(line, tokens, 3)?;
669        match tokens[2] {
670            "bitvec" => {
671                self.require_at_least_n_tokens(line, tokens, 4)?;
672                let width = self.parse_width_int(line, tokens[3], "bit-vector width")?;
673                self.type_map.insert(line_id, Type::BV(width));
674                Ok(())
675            }
676            "array" => {
677                self.require_at_least_n_tokens(line, tokens, 5)?;
678                let index_tpe = self.get_tpe_from_id(line, tokens[3])?;
679                let data_tpe = self.get_tpe_from_id(line, tokens[4])?;
680                let index_width = index_tpe.get_bit_vector_width().unwrap();
681                let data_width = data_tpe.get_bit_vector_width().unwrap();
682                self.type_map.insert(
683                    line_id,
684                    Type::Array(ArrayType {
685                        index_width,
686                        data_width,
687                    }),
688                );
689                Ok(())
690            }
691            other => self.add_error(
692                line,
693                tokens[2],
694                format!("Expected `bitvec` or `array`. Not `{other}`."),
695            ),
696        }
697    }
698
699    fn parse_width_int(
700        &mut self,
701        line: &str,
702        token: &str,
703        kind: &str,
704    ) -> ParseLineResult<WidthInt> {
705        match token.parse::<WidthInt>() {
706            Ok(width) => Ok(width),
707            Err(_) => {
708                let _ = self.add_error(
709                    line,
710                    token,
711                    format!(
712                        "Not a valid {kind}. An integer between {} and {} is required!",
713                        WidthInt::MAX,
714                        WidthInt::MIN
715                    ),
716                );
717                Err(())
718            }
719        }
720    }
721
722    fn add_error(&mut self, line: &str, token: &str, msg: String) -> ParseLineResult {
723        let explain = "".to_owned(); // TODO: how do we best utilize both msg and explain?
724        let start = str_offset(token, line);
725        let end = start + token.len();
726        let e = ParserError {
727            msg,
728            explain,
729            start: start + self.offset,
730            end: end + self.offset,
731        };
732        self.errors.push(e);
733        Err(())
734    }
735
736    fn require_at_least_n_tokens(
737        &mut self,
738        line: &str,
739        tokens: &[&str],
740        n: usize,
741    ) -> ParseLineResult {
742        if tokens.len() < n {
743            let op = tokens[1];
744            let start = str_offset(op, line);
745            let last_token = tokens.last().unwrap();
746            let end = str_offset(last_token, line) + last_token.len();
747            self.add_error(
748                line,
749                &line[start..end],
750                format!(
751                    "{op} requires at least {n} tokens, only {} provided",
752                    tokens.len()
753                ),
754            )
755        } else {
756            Ok(())
757        }
758    }
759
760    fn invalid_op_error(&mut self, line: &str, op: &str) -> ParseLineResult {
761        let all_ops = UNARY_OPS
762            .iter()
763            .chain(BINARY_OPS.iter())
764            .chain(TERNARY_OPS.iter())
765            .chain(OTHER_OPS.iter());
766        let matcher = fuzzy_matcher::skim::SkimMatcherV2::default();
767        let mut matches: Vec<(&&str, i64)> = all_ops
768            .flat_map(|other| matcher.fuzzy_match(other, op).map(|s| (other, s)))
769            .collect();
770        matches.sort_by_key(|(_, s)| -(*s));
771        let n_matches = std::cmp::min(matches.len(), 5);
772        let suggestions = matches
773            .iter()
774            .take(n_matches)
775            .map(|(n, _)| **n)
776            .collect::<Vec<&str>>()
777            .join(", ");
778        self.add_error(
779            line,
780            op,
781            format!("Invalid op {op}. Did you mean: {suggestions}?"),
782        )
783    }
784}
785
786/// yosys likes to use a lot of $ in the signal names, we want to avoid that for readability reasons
787fn clean_up_name(name: &str) -> String {
788    name.replace('$', "_")
789}
790
791/// decides whether a name should be included or ignored
792fn include_name(name: &str) -> bool {
793    // yosys sometime includes complete file paths which are not super helpful
794    let yosys_path = name.contains('/') && name.contains(':') && name.len() > 30;
795    let yosys_flatten_output = name.starts_with("$flatten\\");
796    !yosys_path && !yosys_flatten_output
797}
798
799/// When Yosys emits a btor file, it often will not actually name the states correctly.
800/// Instead it creates nodes that just point to the state and carry its name. This function searches
801/// for these nodes and tries to rename the states.
802fn improve_state_names(ctx: &mut Context, sys: &mut TransitionSystem) {
803    let mut renames = HashMap::new();
804    for (_, state) in sys.states() {
805        // since the alias signal refers to the same expression as the state symbol,
806        // it will generate a signal info with the better name
807        if let Some(signal) = sys.get_signal(state.symbol) {
808            if let Some(name_ref) = signal.name {
809                let old_name_ref = state.symbol.get_symbol_name_ref(ctx).unwrap();
810                if old_name_ref != name_ref {
811                    renames.insert(state.symbol, name_ref);
812                }
813            }
814        }
815    }
816    if renames.is_empty() {
817        return; // noting to do
818    }
819    do_transform(ctx, sys, |ctx, expr_ref, _| {
820        renames
821            .get(&expr_ref)
822            .map(|new_name_ref| ctx.symbol(*new_name_ref, expr_ref.get_type(ctx)))
823    });
824}
825
826// Line Tokenizer
827#[derive(Default, Debug)]
828pub(crate) struct LineTokens<'a> {
829    pub(crate) tokens: SmallVec<[&'a str; 4]>,
830    pub(crate) comment: Option<&'a str>,
831}
832
833const NO_TOKEN: usize = usize::MAX;
834pub(crate) fn tokenize_line(line: &str) -> LineTokens {
835    if line.is_empty() {
836        // special handling for empty lines
837        return LineTokens::default();
838    }
839    let line_len = line.len();
840    let mut out = LineTokens::default();
841    let mut token_start: usize = NO_TOKEN;
842    #[inline]
843    fn finish_token<'a>(
844        token_start: &mut usize,
845        out: &mut LineTokens<'a>,
846        line: &'a str,
847        ii: usize,
848    ) {
849        if *token_start != NO_TOKEN {
850            out.tokens.push(&line[*token_start..ii]);
851            *token_start = NO_TOKEN;
852        }
853    }
854
855    for (ii, cc) in line.char_indices() {
856        match cc {
857            // white space character
858            ' ' | '\t' => finish_token(&mut token_start, &mut out, line, ii),
859            // comment start
860            ';' => {
861                finish_token(&mut token_start, &mut out, line, ii);
862                out.comment = Some(&line[ii + 1..line_len]);
863                return out;
864            }
865            _ => {
866                if token_start == NO_TOKEN {
867                    token_start = ii
868                }
869            }
870        }
871    }
872    finish_token(&mut token_start, &mut out, line, line_len);
873    out
874}
875
876#[derive(Debug)]
877struct ParserError {
878    msg: String,
879    explain: String, // displayed next to offending token
880    start: usize,
881    end: usize,
882}
883
884type Errors = Vec<ParserError>;
885
886fn report_errors(errors: Errors, name: &str, source: &str) {
887    let report_file = codespan_reporting::files::SimpleFile::new(name, source);
888    for err in errors.into_iter() {
889        report_error(err, &report_file);
890    }
891}
892
893fn report_error(error: ParserError, file: &codespan_reporting::files::SimpleFile<&str, &str>) {
894    let diagnostic = codespan_reporting::diagnostic::Diagnostic::error()
895        .with_message(error.msg)
896        .with_labels(vec![codespan_reporting::diagnostic::Label::primary(
897            (),
898            error.start..error.end,
899        )
900        .with_message(error.explain)]);
901    let writer = codespan_reporting::term::termcolor::StandardStream::stderr(
902        codespan_reporting::term::termcolor::ColorChoice::Auto,
903    );
904    let config = codespan_reporting::term::Config::default();
905    codespan_reporting::term::emit(&mut writer.lock(), &config, file, &diagnostic).unwrap();
906}
907
908fn str_offset(needle: &str, haystack: &str) -> usize {
909    let offset = (needle.as_ptr() as usize) - (haystack.as_ptr() as usize);
910    assert!(
911        offset < haystack.len(),
912        "{} is not fully contained in {}",
913        needle,
914        haystack
915    );
916    offset
917}
918
919const UNARY_OPS: [&str; 10] = [
920    "not", "inc", "dec", "neg", "redand", "redor", "redxor", "slice", "uext", "sext",
921];
922const BINARY_OPS: [&str; 41] = [
923    "iff", "implies", "sgt", "ugt", "sgte", "ugte", "slt", "ult", "slte", "ulte", "and", "nand",
924    "nor", "or", "xnor", "xor", "rol", "ror", "sll", "sra", "srl", "add", "mul", "sdiv", "udiv",
925    "smod", "srem", "urem", "sub", "saddo", "uaddo", "sdivo", "udivo", "smulo", "umulo", "ssubo",
926    "usubo", "concat", "eq", "neq", "read",
927];
928const TERNARY_OPS: [&str; 1] = ["ite"];
929const OTHER_OPS: [&str; 17] = [
930    "sort",
931    "input",
932    "output",
933    "bad",
934    "constraint",
935    "fair",
936    "state",
937    "next",
938    "init",
939    "const",
940    "constd",
941    "consth",
942    "zero",
943    "one",
944    "ones",
945    "read",
946    "write",
947];
948
949lazy_static! {
950    static ref UNARY_OPS_SET: std::collections::HashSet<&'static str> =
951        std::collections::HashSet::from(UNARY_OPS);
952    static ref BINARY_OPS_SET: std::collections::HashSet<&'static str> =
953        std::collections::HashSet::from(BINARY_OPS);
954    static ref OTHER_OPS_SET: std::collections::HashSet<&'static str> =
955        std::collections::HashSet::from(OTHER_OPS);
956}
957
958/// Indicated success or failure. Errors and data is not returned, but rather added to the context.
959type ParseLineResult<T = ()> = std::result::Result<T, ()>;
960
961#[cfg(test)]
962mod tests {
963    use super::*;
964
965    #[test]
966    fn tokenize() {
967        // correct number of tokens
968        assert_eq!(tokenize_line("").tokens.len(), 0);
969        assert_eq!(tokenize_line("a").tokens.len(), 1);
970        assert_eq!(tokenize_line(" a").tokens.len(), 1);
971        assert_eq!(tokenize_line("a ").tokens.len(), 1);
972        assert_eq!(tokenize_line(" a ").tokens.len(), 1);
973        assert_eq!(tokenize_line("a b").tokens.len(), 2);
974        assert_eq!(tokenize_line("a \t b").tokens.len(), 2);
975        assert_eq!(tokenize_line("a     b").tokens.len(), 2);
976        assert_eq!(tokenize_line("a b ; c").tokens.len(), 2);
977        assert_eq!(tokenize_line("a b;c").tokens.len(), 2);
978        // correctly deal with whitespace
979        assert_eq!(tokenize_line("a").tokens[0], "a");
980        assert_eq!(tokenize_line(" a").tokens[0], "a");
981        assert_eq!(tokenize_line("a ").tokens[0], "a");
982        assert_eq!(tokenize_line(" a ").tokens[0], "a");
983        // comments
984        let comment_res = tokenize_line("a b; c");
985        assert_eq!(comment_res.comment, Some(" c"));
986        assert_eq!(comment_res.tokens[0], "a");
987        assert_eq!(comment_res.tokens[1], "b");
988        // unicode (not sure if this is actually supported by the btor2 format...)
989        let unicode_res = tokenize_line("✔ 1✖2  ○;○123");
990        assert_eq!(unicode_res.tokens.len(), 3);
991        assert!(unicode_res.comment.is_some());
992        assert_eq!(unicode_res.tokens[0], "✔");
993        assert_eq!(unicode_res.tokens[1], "1✖2");
994        assert_eq!(unicode_res.tokens[2], "○");
995        assert_eq!(unicode_res.comment.unwrap(), "○123");
996    }
997
998    fn parse_private(code: &str) -> Result<TransitionSystem, Errors> {
999        let mut ctx = Context::default();
1000        Parser::new(&mut ctx).parse(code.as_bytes(), None)
1001    }
1002
1003    #[test]
1004    fn parse_failures() {
1005        parse_private("").expect("parsing an empty line should be fine");
1006        parse_private("   ").expect("parsing an line with just whitespace should be fine");
1007        parse_private("  ; test bla  ").expect("parsing an line with a comment should be fine");
1008        parse_private("not_an_id").expect_err("invalid id");
1009        parse_private("-1").expect_err("invalid id");
1010        parse_private("0").expect_err("missing op");
1011        parse_private("0 ").expect_err("missing op");
1012    }
1013}