veripb_parser/
opb_parser.rs

1//! Parser for the OPB format as [specified by the PB competition 2024](https://www.cril.univ-artois.fr/PB24/OPBgeneral.pdf).
2
3use std::path::Path;
4
5use ahash::AHashMap;
6use logos::{Lexer, Logos};
7use malachite_bigint::BigInt;
8use num_traits::{ToPrimitive, Zero};
9use veripb_formula::{pb_constraint::constraint_from_terms_and_coeff_sum, prelude::*};
10
11use crate::{error::ParserError, opb_token::OPBToken, parser::get_lines};
12
13type OptionalConstraint = Option<PBConstraintEnum>;
14type Constraint = PBConstraintEnum;
15
16#[derive(Debug, PartialEq)]
17enum Comparator {
18    GreaterEqual,
19    Equal,
20}
21
22/// Parse an OPB file into [`Database`](veripb_formula::database::Database) of constraints and store the names of the variables in a [`VarNameManager`](veripb_formula::var_name_manager::VarNameManager).
23///
24/// This function creates a `VarNameManager` and calls the function `parse_opb_from_file_given_var_manager()`. See `parse_opb_from_file_given_var_manager()` for more details.
25#[inline]
26pub fn parse_opb_from_file<P>(
27    filename: P,
28) -> Result<(Formula, VarNameManager, AHashMap<String, isize>), ParserError>
29where
30    P: AsRef<Path>,
31{
32    let mut var_name_manager = VarNameManager::default();
33    let (formula, labels) = parse_opb_from_file_given_var_manager(filename, &mut var_name_manager)?;
34    Ok((formula, var_name_manager, labels))
35}
36
37/// Parse an OPB file into [`Database`](veripb_formula::database::Database) of constraints and store the names of the variables in a [`VarNameManager`](veripb_formula::var_name_manager::VarNameManager).
38///
39/// Furthermore, labels for constraints are supported. The labels are returned as a mapping from a `String` to a constraint ID. The constraint IDs start at 1, hence the the constraint ID is offset by 1 to the constraints in the formula.
40pub fn parse_opb_from_file_given_var_manager<P>(
41    filename: P,
42    var_name_manager: &mut VarNameManager,
43) -> Result<(Formula, AHashMap<String, isize>), ParserError>
44where
45    P: AsRef<Path>,
46{
47    let mut formula = Formula::default();
48    let mut labels = AHashMap::new();
49    let lines = get_lines(&filename)?;
50
51    for (line_number, line) in lines.map_while(Result::ok).enumerate() {
52        let mut lex = OPBToken::lexer(&line);
53        let result = match lex.next() {
54            None | Some(Ok(OPBToken::Comment)) => continue,
55            Some(Ok(OPBToken::Label)) => {
56                let label = lex.slice();
57                let result = match lex.next() {
58                    Some(Ok(OPBToken::Integer)) => Some(
59                        parse_opb_constraint_dyn(&mut lex, var_name_manager).map_err(|e| {
60                            e.add_file_and_line(
61                                filename.as_ref().to_string_lossy().to_string(),
62                                line_number,
63                            )
64                            .unwrap()
65                        })?,
66                    ),
67                    Some(Ok(OPBToken::GreaterEqual)) => Some(
68                        parse_constraint_empty_terms(&mut lex, Comparator::GreaterEqual).map_err(
69                            |e| {
70                                e.add_file_and_line(
71                                    filename.as_ref().to_string_lossy().to_string(),
72                                    line_number,
73                                )
74                                .unwrap()
75                            },
76                        )?,
77                    ),
78                    Some(Err(_)) | Some(Ok(_)) | None => {
79                        return Err(ParserError::token_error_with_file(
80                            lex.span(),
81                            "'>=' or integer",
82                            filename.as_ref().to_string_lossy().to_string(),
83                            line_number,
84                        ));
85                    }
86                };
87                if result.as_ref().unwrap().1.is_some() {
88                    return Err(ParserError::token_error_with_file(
89                        lex.span(),
90                        "inequality constraint",
91                        filename.as_ref().to_string_lossy().to_string(),
92                        line_number,
93                    ));
94                }
95                labels.insert(label.to_string(), (formula.len() + 1) as isize);
96                result
97            }
98            Some(Ok(OPBToken::Minimize)) => {
99                formula.objective = Some(parse_opb_objective(&mut lex, var_name_manager, false)?);
100                None
101            }
102            Some(Ok(OPBToken::Maximize)) => {
103                formula.objective = Some(parse_opb_objective(&mut lex, var_name_manager, true)?);
104                None
105            }
106            Some(Ok(OPBToken::Integer)) => Some(
107                parse_opb_constraint_dyn(&mut lex, var_name_manager).map_err(|e| {
108                    e.add_file_and_line(
109                        filename.as_ref().to_string_lossy().to_string(),
110                        line_number,
111                    )
112                    .unwrap()
113                })?,
114            ),
115            Some(Ok(OPBToken::GreaterEqual)) => Some(
116                parse_constraint_empty_terms(&mut lex, Comparator::GreaterEqual).map_err(|e| {
117                    e.add_file_and_line(
118                        filename.as_ref().to_string_lossy().to_string(),
119                        line_number,
120                    )
121                    .unwrap()
122                })?,
123            ),
124            Some(Ok(OPBToken::Equal)) => Some(
125                parse_constraint_empty_terms(&mut lex, Comparator::Equal).map_err(|e| {
126                    e.add_file_and_line(
127                        filename.as_ref().to_string_lossy().to_string(),
128                        line_number,
129                    )
130                    .unwrap()
131                })?,
132            ),
133            Some(Err(_)) | Some(Ok(_)) => {
134                return Err(ParserError::token_error_with_file(
135                    lex.span(),
136                    "'*', 'min:', '>=', '=', or integer",
137                    filename.as_ref().to_string_lossy().to_string(),
138                    line_number,
139                ));
140            }
141        };
142        if let Some((geq_constraint, leq_constraint)) = result {
143            formula.constraints.push(geq_constraint);
144            if let Some(constraint) = leq_constraint {
145                formula.constraints.push(constraint);
146            }
147        }
148    }
149
150    Ok((formula, labels))
151}
152
153/// Parse an objective specified in OPB format.
154pub fn parse_opb_objective(
155    lex: &mut Lexer<OPBToken>,
156    var_name_manager: &mut VarNameManager,
157    is_maximization: bool,
158) -> Result<PBObjective, ParserError> {
159    let mut terms = Vec::new();
160    let mut integer = BigInt::zero();
161    match lex.next() {
162        Some(Ok(OPBToken::Integer)) => integer = lex.slice().parse().unwrap(),
163        Some(Ok(OPBToken::Semicolon)) | None => {
164            return Ok(PBObjective::from_terms(terms, integer, is_maximization))
165        }
166        _ => return Err(ParserError::token_error(lex.span(), "integer or ';'")),
167    }
168
169    while let Some(token) = lex.next() {
170        // Expect the next literal.
171        let lit = match token {
172            Ok(OPBToken::Var) => Lit::from_var(var_name_manager.add_by_name(lex.slice()), false),
173            Ok(OPBToken::Negation) => {
174                if lex.next() == Some(Ok(OPBToken::Var)) {
175                    Lit::from_var(var_name_manager.add_by_name(lex.slice()), true)
176                } else {
177                    return Err(ParserError::token_error(lex.span(), "variable name"));
178                }
179            }
180            Ok(OPBToken::Semicolon) => {
181                return Ok(PBObjective::from_terms(terms, integer, is_maximization));
182            }
183            _ => return Err(ParserError::token_error(lex.span(), "literal or ';'")),
184        };
185
186        terms.push(GeneralPBTerm::new(integer, lit));
187
188        // Expect the next integer or end.
189        match lex.next() {
190            Some(Ok(OPBToken::Integer)) => integer = lex.slice().parse().unwrap(),
191            Some(Ok(OPBToken::Semicolon)) | None => {
192                return Ok(PBObjective::from_terms(
193                    terms,
194                    BigInt::zero(),
195                    is_maximization,
196                ))
197            }
198            _ => return Err(ParserError::token_error(lex.span(), "integer or ';'")),
199        }
200    }
201
202    Ok(PBObjective::from_terms(terms, integer, is_maximization))
203}
204
205/// Parse a single OPB constraint.
206pub fn parse_single_constraint(
207    lex: &mut Lexer<OPBToken>,
208    var_names: &mut VarNameManager,
209) -> Result<(Constraint, OptionalConstraint), ParserError> {
210    match lex.next() {
211        Some(Ok(OPBToken::Integer)) => parse_opb_constraint_dyn(lex, var_names),
212        Some(Ok(OPBToken::GreaterEqual)) => {
213            parse_constraint_empty_terms(lex, Comparator::GreaterEqual)
214        }
215        Some(Ok(OPBToken::Equal)) => parse_constraint_empty_terms(lex, Comparator::Equal),
216        _ => Err(ParserError::token_error(
217            lex.span(),
218            "'*', 'min:', '>=' or integer",
219        )),
220    }
221}
222
223/// Special case for parsing OPB constraints without terms.
224fn parse_constraint_empty_terms(
225    lex: &mut Lexer<OPBToken>,
226    comparator: Comparator,
227) -> Result<(Constraint, OptionalConstraint), ParserError> {
228    let degree: BigInt = parse_degree(lex)?.unwrap();
229    check_constraint_end(lex)?;
230
231    match degree.to_i64() {
232        Some(degree) => Ok(get_constraints_from_terms(comparator, vec![], 0, degree)),
233        None => match degree.to_i128() {
234            Some(degree) => Ok(get_constraints_from_terms(comparator, vec![], 0, degree)),
235            None => Ok(get_constraints_from_terms(
236                comparator,
237                vec![],
238                BigInt::zero(),
239                degree,
240            )),
241        },
242    }
243}
244
245/// Helper function to parse a constraint.
246///
247/// This function iteratively tries to parse the constraint as `i64`, then `i128` and finally as `BigInt` to get a constraint that has a small as possible size. The function returns a `DynPBConstraint`, which can directly be added to a database.
248///
249/// **Note:** When this function is called the lexer has already parsed the first token, which should be an integer.
250fn parse_opb_constraint_dyn(
251    lex: &mut Lexer<OPBToken>,
252    var_name_manager: &mut VarNameManager,
253) -> Result<(Constraint, OptionalConstraint), ParserError> {
254    // parse terms
255    let mut terms_i64 = Vec::new();
256    let mut coeff_sum_i64 = 0;
257    if let Some(comparator) =
258        parse_opb_terms(lex, var_name_manager, &mut coeff_sum_i64, &mut terms_i64)?
259    {
260        let degree = match parse_degree(lex)? {
261            Some(degree) => degree,
262            // Workaround that should be safe in most cases. Maybe we need to properly handle this in the future.
263            None => {
264                if lex.slice().starts_with("-") {
265                    i64::MIN
266                } else {
267                    i64::MAX
268                }
269            }
270        };
271        check_constraint_end(lex)?;
272        Ok(get_constraints_from_terms(
273            comparator,
274            terms_i64,
275            coeff_sum_i64,
276            degree,
277        ))
278    } else {
279        let mut terms_i128 = terms_i64.into_iter().map(|t| t.into()).collect();
280        let mut coeff_sum_i128 = coeff_sum_i64.into();
281
282        if let Some(comparator) =
283            parse_opb_terms(lex, var_name_manager, &mut coeff_sum_i128, &mut terms_i128)?
284        {
285            let degree = match parse_degree(lex)? {
286                Some(degree) => degree,
287                // Workaround that should be safe in most cases. Maybe we need to properly handle this in the future.
288                None => {
289                    if lex.slice().starts_with("-") {
290                        i128::MIN
291                    } else {
292                        i128::MAX
293                    }
294                }
295            };
296            check_constraint_end(lex)?;
297            Ok(get_constraints_from_terms(
298                comparator,
299                terms_i128,
300                coeff_sum_i128,
301                degree,
302            ))
303        } else {
304            let mut terms_big: Vec<GeneralPBTerm<BigInt>> =
305                terms_i128.into_iter().map(|t| t.into()).collect();
306            let mut coeff_sum_big = coeff_sum_i128.into();
307            let comparator =
308                parse_opb_terms(lex, var_name_manager, &mut coeff_sum_big, &mut terms_big)?
309                    .unwrap();
310            let degree = parse_degree(lex)?.unwrap();
311            check_constraint_end(lex)?;
312            Ok(get_constraints_from_terms(
313                comparator,
314                terms_big,
315                coeff_sum_big,
316                degree,
317            ))
318        }
319    }
320}
321
322/// Helper function to parse terms of generic parameter `N`.
323///
324/// The function returns an `Err` if the tokenization failed. The function returns `Ok(None)` if a coefficient is larger than the largest or smaller than the smallest number of the type `N`. Hence, the caller of the function should take care of adjusting the size of the coefficients accordingly.
325///
326/// **Note:** When this function is called the lexer has already parsed the first token, which should be an integer.
327#[inline]
328fn parse_opb_terms<N: Int>(
329    lex: &mut Lexer<OPBToken>,
330    var_name_manager: &mut VarNameManager,
331    coeff_sum: &mut N,
332    terms: &mut Vec<GeneralPBTerm<N>>,
333) -> Result<Option<Comparator>, ParserError> {
334    let mut coeff: N = match lex.slice().parse() {
335        Ok(integer) => integer,
336        Err(_) => return Ok(None),
337    };
338
339    loop {
340        if coeff.is_negative() {
341            let abs = match N::zero().checked_sub(&coeff) {
342                Some(abs) => abs,
343                None => return Ok(None),
344            };
345            *coeff_sum = match coeff_sum.checked_add(&abs) {
346                Some(sum) => sum,
347                None => return Ok(None),
348            };
349        } else {
350            *coeff_sum = match coeff_sum.checked_add(&coeff) {
351                Some(sum) => sum,
352                None => return Ok(None),
353            };
354        }
355        let lit = match lex.next() {
356            Some(Ok(OPBToken::Var)) => {
357                Lit::from_var(var_name_manager.add_by_name(lex.slice()), false)
358            }
359            Some(Ok(OPBToken::Negation)) => {
360                if lex.next() == Some(Ok(OPBToken::Var)) {
361                    Lit::from_var(var_name_manager.add_by_name(lex.slice()), true)
362                } else {
363                    return Err(ParserError::token_error(lex.span(), "variable name"));
364                }
365            }
366            _ => {
367                return Err(ParserError::token_error(lex.span(), "literal"));
368            }
369        };
370        terms.push(GeneralPBTerm::new(coeff, lit));
371
372        coeff = match lex.next() {
373            Some(Ok(OPBToken::GreaterEqual)) => {
374                return Ok(Some(Comparator::GreaterEqual));
375            }
376            Some(Ok(OPBToken::Equal)) => {
377                return Ok(Some(Comparator::Equal));
378            }
379            Some(Ok(OPBToken::Integer)) => match lex.slice().parse() {
380                Ok(integer) => integer,
381                Err(_) => return Ok(None),
382            },
383            _ => {
384                return Err(ParserError::token_error(lex.span(), "integer or '>='"));
385            }
386        };
387    }
388}
389
390/// Parse the degree (right-hand side) of the constraint.
391///
392/// The function returns an `Err` if the tokenization failed. The function returns `Ok(None)` if a coefficient is larger than the largest or smaller than the smallest number of the type `N`. Hence, the caller of the function should take care of adjusting the degree accordingly.
393#[inline]
394fn parse_degree<N: Int>(lex: &mut Lexer<OPBToken>) -> Result<Option<N>, ParserError> {
395    if lex.next() != Some(Ok(OPBToken::Integer)) {
396        return Err(ParserError::token_error(lex.span(), "integer"));
397    }
398
399    let degree = lex.slice().parse().ok();
400
401    Ok(degree)
402}
403
404/// Check if the constraint end as specified with `;` and that no further data is specified after the semicolon.
405#[inline]
406fn check_constraint_end(lex: &mut Lexer<OPBToken>) -> Result<(), ParserError> {
407    if lex.next() != Some(Ok(OPBToken::Semicolon)) {
408        Err(ParserError::token_error(lex.span(), "';'"))?;
409    }
410
411    Ok(())
412}
413
414/// Create a `DynPBConstraint` from `terms` and `degree`.
415///
416/// This function detects if the PB constraint is a cardinality constraint or clause and returns the most specific type. If the `comparator` used in the pseudo-Boolean constraint is `=`, then the constraint is split into two constraints. The function always returns the `>=` direction as the first entry of the tuple and optionally return the `>=` direction.
417#[inline]
418fn get_constraints_from_terms<N: Int>(
419    comparator: Comparator,
420    terms: Vec<GeneralPBTerm<N>>,
421    coeff_sum: N,
422    degree: N,
423) -> (PBConstraintEnum, Option<PBConstraintEnum>)
424where
425    i64: TryFrom<N>,
426    PBConstraintEnum: From<GeneralPBConstraint<N>>,
427{
428    // Compute leq-constraint
429    let mut leq_constraint: Option<PBConstraintEnum> = None;
430    if comparator == Comparator::Equal {
431        let negated_terms = terms
432            .iter()
433            .map(|term| GeneralPBTerm::new(-term.coeff.clone(), term.lit))
434            .collect();
435
436        leq_constraint = Some(constraint_from_terms_and_coeff_sum(
437            negated_terms,
438            -degree.clone(),
439            coeff_sum.clone(),
440        ));
441    }
442
443    // Compute geq-constraint.
444    let geq_constraint = constraint_from_terms_and_coeff_sum(terms, degree, coeff_sum);
445
446    (geq_constraint, leq_constraint)
447}