varisat_dimacs/
lib.rs

1//! DIMCAS CNF parser and writer for the Varisat SAT solver.
2
3use std::{borrow::Borrow, io, mem::replace};
4
5use varisat_formula::{CnfFormula, ExtendFormula, Lit, Var};
6
7use anyhow::Error;
8use thiserror::Error;
9
10/// Possible errors while parsing a DIMACS CNF formula.
11#[derive(Debug, Error)]
12pub enum ParserError {
13    #[error(
14        "line {}: Unexpected character in DIMACS CNF input: '{}'",
15        line,
16        unexpected
17    )]
18    UnexpectedInput { line: usize, unexpected: char },
19    #[error(
20        "line {}: Literal index is too large: {}{}...",
21        line,
22        index,
23        final_digit
24    )]
25    LiteralTooLarge {
26        line: usize,
27        index: usize,
28        final_digit: usize,
29    },
30    #[error("line {}: Invalid header syntax: {}", line, header)]
31    InvalidHeader { line: usize, header: String },
32    #[error("line {}: Unterminated clause", line)]
33    UnterminatedClause { line: usize },
34    #[error(
35        "Formula has {} variables while the header specifies {} variables",
36        var_count,
37        header_var_count
38    )]
39    VarCount {
40        var_count: usize,
41        header_var_count: usize,
42    },
43    #[error(
44        "Formula has {} clauses while the header specifies {} clauses",
45        clause_count,
46        header_clause_count
47    )]
48    ClauseCount {
49        clause_count: usize,
50        header_clause_count: usize,
51    },
52    #[error("Parser invoked after a previous error")]
53    PreviousError,
54}
55
56/// Variable and clause count present in a DIMACS CNF header.
57#[derive(Copy, Clone, Debug)]
58pub struct DimacsHeader {
59    pub var_count: usize,
60    pub clause_count: usize,
61}
62
63/// Parser for DIMACS CNF files.
64///
65/// This parser can consume the input in chunks while also producing the parsed result in chunks.
66#[derive(Default)]
67pub struct DimacsParser {
68    formula: CnfFormula,
69    partial_clause: Vec<Lit>,
70    header: Option<DimacsHeader>,
71
72    line_number: usize,
73    clause_count: usize,
74    partial_lit: usize,
75    negate_next_lit: bool,
76
77    in_lit: bool,
78    in_comment_or_header: bool,
79    in_header: bool,
80    start_of_line: bool,
81    error: bool,
82
83    header_line: Vec<u8>,
84}
85
86impl DimacsParser {
87    /// Create a new DIMACS CNF parser.
88    pub fn new() -> DimacsParser {
89        DimacsParser {
90            formula: CnfFormula::new(),
91            partial_clause: vec![],
92            header: None,
93
94            line_number: 1,
95            clause_count: 0,
96            partial_lit: 0,
97            negate_next_lit: false,
98
99            in_lit: false,
100            in_comment_or_header: false,
101            in_header: false,
102            start_of_line: true,
103            error: false,
104
105            header_line: vec![],
106        }
107    }
108
109    /// Parse the given input and check the header if present.
110    ///
111    /// This parses the whole input into a single [`CnfFormula`](varisat_formula::CnfFormula).
112    /// Incremental parsing is possible using [`parse_incremental`](DimacsParser::parse_incremental)
113    /// or the [`parse_chunk`](DimacsParser::parse_chunk) method.
114    pub fn parse(input: impl io::Read) -> Result<CnfFormula, Error> {
115        Ok(Self::parse_incremental(input, |_| Ok(()))?.take_formula())
116    }
117
118    /// Parse the given input incrementally and check the header if present.
119    ///
120    /// The callback is invoked repeatedly with a reference to the parser. The callback can process
121    /// the formula incrementally by calling [`take_formula`](DimacsParser::take_formula) on the
122    /// passed argument.
123    pub fn parse_incremental(
124        input: impl io::Read,
125        mut callback: impl FnMut(&mut DimacsParser) -> Result<(), Error>,
126    ) -> Result<DimacsParser, Error> {
127        use io::BufRead;
128
129        let mut buffer = io::BufReader::new(input);
130        let mut parser = Self::new();
131
132        loop {
133            let data = buffer.fill_buf()?;
134            if data.is_empty() {
135                break;
136            }
137            parser.parse_chunk(data)?;
138            let len = data.len();
139            buffer.consume(len);
140
141            callback(&mut parser)?;
142        }
143        parser.eof()?;
144        callback(&mut parser)?;
145        parser.check_header()?;
146
147        Ok(parser)
148    }
149
150    /// Parse a chunk of input.
151    ///
152    /// After parsing the last chunk call the [`eof`](DimacsParser::eof) method.
153    ///
154    /// If this method returns an error, the parser is in an invalid state and cannot parse further
155    /// chunks.
156    pub fn parse_chunk(&mut self, chunk: &[u8]) -> Result<(), ParserError> {
157        if self.error {
158            return Err(ParserError::PreviousError);
159        }
160        for &byte in chunk.iter() {
161            if byte == b'\n' {
162                self.line_number += 1;
163            }
164            match byte {
165                b'\n' | b'\r' if self.in_comment_or_header => {
166                    if self.in_header {
167                        self.in_header = false;
168                        self.parse_header_line()?;
169                    }
170                    self.in_comment_or_header = false;
171                    self.start_of_line = true
172                }
173                _ if self.in_comment_or_header => {
174                    if self.in_header {
175                        self.header_line.push(byte);
176                    }
177                }
178                b'0'..=b'9' => {
179                    self.in_lit = true;
180                    let digit = (byte - b'0') as usize;
181
182                    const CAN_OVERFLOW: usize = Var::max_count() / 10;
183                    const OVERFLOW_DIGIT: usize = Var::max_count() % 10;
184
185                    // Overflow check that is fast but still works if LitIdx has the same size as
186                    // usize
187                    if CAN_OVERFLOW <= self.partial_lit {
188                        let carry = (digit <= OVERFLOW_DIGIT) as usize;
189
190                        if CAN_OVERFLOW + carry <= self.partial_lit {
191                            self.error = true;
192                            return Err(ParserError::LiteralTooLarge {
193                                line: self.line_number,
194                                index: self.partial_lit,
195                                final_digit: digit,
196                            });
197                        }
198                    }
199
200                    self.partial_lit = self.partial_lit * 10 + digit;
201
202                    self.start_of_line = false
203                }
204                b'-' if !self.negate_next_lit && !self.in_lit => {
205                    self.negate_next_lit = true;
206                    self.start_of_line = false
207                }
208                b' ' | b'\n' | b'\r' if !self.negate_next_lit || self.in_lit => {
209                    self.finish_literal();
210                    self.negate_next_lit = false;
211                    self.in_lit = false;
212                    self.partial_lit = 0;
213                    self.start_of_line = byte != b' ';
214                }
215                b'c' if self.start_of_line => {
216                    self.in_comment_or_header = true;
217                }
218                b'p' if self.start_of_line && self.header.is_none() => {
219                    self.in_comment_or_header = true;
220                    self.in_header = true;
221                    self.header_line.push(b'p');
222                }
223                _ => {
224                    self.error = true;
225                    return Err(ParserError::UnexpectedInput {
226                        line: self.line_number,
227                        unexpected: byte as char,
228                    });
229                }
230            }
231        }
232
233        Ok(())
234    }
235
236    /// Finish parsing the input.
237    ///
238    /// This does not check whether the header information was correct, call
239    /// [`check_header`](DimacsParser::check_header) for this.
240    pub fn eof(&mut self) -> Result<(), ParserError> {
241        if self.in_header {
242            self.parse_header_line()?;
243        }
244
245        self.finish_literal();
246
247        if !self.partial_clause.is_empty() {
248            return Err(ParserError::UnterminatedClause {
249                line: self.line_number,
250            });
251        }
252
253        Ok(())
254    }
255
256    /// Verifies the header information when present.
257    ///
258    /// Does nothing when the input doesn't contain a header.
259    pub fn check_header(&self) -> Result<(), ParserError> {
260        if let Some(header) = self.header {
261            let var_count = self.formula.var_count();
262            if var_count != header.var_count {
263                return Err(ParserError::VarCount {
264                    var_count,
265                    header_var_count: header.var_count,
266                });
267            }
268
269            if self.clause_count != header.clause_count {
270                return Err(ParserError::ClauseCount {
271                    clause_count: self.clause_count,
272                    header_clause_count: header.clause_count,
273                });
274            }
275        }
276
277        Ok(())
278    }
279
280    /// Returns the subformula of everything parsed since the last call to this method.
281    ///
282    /// To parse the whole input into a single [`CnfFormula`](varisat_formula::CnfFormula), simply
283    /// call this method once after calling [`eof`](DimacsParser::eof). For incremental parsing this
284    /// method can be invoked after each call of [`parse_chunk`](DimacsParser::parse_chunk).
285    ///
286    /// The variable count of the returned formula will be the maximum of the variable count so far
287    /// and the variable count of the header if present.
288    pub fn take_formula(&mut self) -> CnfFormula {
289        let mut new_formula = CnfFormula::new();
290        new_formula.set_var_count(self.formula.var_count());
291        replace(&mut self.formula, new_formula)
292    }
293
294    /// Return the DIMACS CNF header data if present.
295    pub fn header(&self) -> Option<DimacsHeader> {
296        self.header
297    }
298
299    /// Number of clauses parsed.
300    pub fn clause_count(&self) -> usize {
301        self.clause_count
302    }
303
304    /// Number of variables in the parsed formula.
305    pub fn var_count(&self) -> usize {
306        self.formula.var_count()
307    }
308
309    fn finish_literal(&mut self) {
310        if self.in_lit {
311            if self.partial_lit == 0 {
312                self.formula.add_clause(&self.partial_clause);
313                self.partial_clause.clear();
314                self.clause_count += 1;
315            } else {
316                self.partial_clause
317                    .push(Var::from_dimacs(self.partial_lit as isize).lit(!self.negate_next_lit));
318            }
319        }
320    }
321
322    fn parse_header_line(&mut self) -> Result<(), ParserError> {
323        let header_line = String::from_utf8_lossy(&self.header_line).into_owned();
324
325        if !header_line.starts_with("p ") {
326            return self.invalid_header(header_line);
327        }
328
329        let mut header_values = header_line[2..].split_whitespace();
330
331        if header_values.next() != Some("cnf") {
332            return self.invalid_header(header_line);
333        }
334
335        let var_count: usize = match header_values
336            .next()
337            .and_then(|value| str::parse(value).ok())
338        {
339            None => return self.invalid_header(header_line),
340            Some(value) => value,
341        };
342
343        if var_count > Var::max_count() {
344            self.error = true;
345            return Err(ParserError::LiteralTooLarge {
346                line: self.line_number,
347                index: var_count / 10,
348                final_digit: var_count % 10,
349            });
350        }
351
352        let clause_count: usize = match header_values
353            .next()
354            .and_then(|value| str::parse(value).ok())
355        {
356            None => return self.invalid_header(header_line),
357            Some(value) => value,
358        };
359
360        if header_values.next().is_some() {
361            return self.invalid_header(header_line);
362        }
363
364        self.header = Some(DimacsHeader {
365            var_count,
366            clause_count,
367        });
368
369        self.formula.set_var_count(var_count);
370
371        Ok(())
372    }
373
374    fn invalid_header(&mut self, header_line: String) -> Result<(), ParserError> {
375        self.error = true;
376        Err(ParserError::InvalidHeader {
377            line: self.line_number,
378            header: header_line,
379        })
380    }
381}
382
383/// Write a DIMACS CNF header.
384///
385/// Can be used with [`write_dimacs_clauses`] to implement incremental writing.
386pub fn write_dimacs_header(target: &mut impl io::Write, header: DimacsHeader) -> io::Result<()> {
387    writeln!(
388        target,
389        "p cnf {var_count} {clause_count}",
390        var_count = header.var_count,
391        clause_count = header.clause_count
392    )
393}
394
395/// Write an iterator of clauses as headerless DIMACS CNF.
396///
397/// Can be used with [`write_dimacs_header`] to implement incremental writing.
398pub fn write_dimacs_clauses(
399    target: &mut impl io::Write,
400    clauses: impl IntoIterator<Item = impl IntoIterator<Item = impl Borrow<Lit>>>,
401) -> io::Result<()> {
402    for clause in clauses.into_iter() {
403        for lit in clause.into_iter() {
404            itoa::write(&mut *target, lit.borrow().to_dimacs())?;
405            target.write_all(b" ")?;
406        }
407        target.write_all(b"0\n")?;
408    }
409    Ok(())
410}
411
412/// Write a formula as DIMACS CNF.
413///
414/// Use [`write_dimacs_header`] and [`write_dimacs_clauses`] to implement incremental writing.
415pub fn write_dimacs(target: &mut impl io::Write, formula: &CnfFormula) -> io::Result<()> {
416    write_dimacs_header(
417        &mut *target,
418        DimacsHeader {
419            var_count: formula.var_count(),
420            clause_count: formula.len(),
421        },
422    )?;
423    write_dimacs_clauses(&mut *target, formula.iter())
424}
425
426#[cfg(test)]
427mod tests {
428    use super::*;
429
430    use anyhow::Error;
431    use proptest::{test_runner::TestCaseError, *};
432
433    use varisat_formula::{cnf::strategy::*, cnf_formula};
434
435    #[test]
436    fn odd_whitespace() -> Result<(), Error> {
437        let parsed = DimacsParser::parse(
438            b"p  cnf  4   3  \n  1  \n 2  3\n0 -4 0 2\nccomment  \n\n0\n\n" as &[_],
439        )?;
440
441        let expected = cnf_formula![
442            1, 2, 3;
443            -4;
444            2;
445        ];
446
447        assert_eq!(parsed, expected);
448
449        Ok(())
450    }
451
452    macro_rules! expect_error {
453        ( $input:expr, $( $cases:tt )* ) => {
454            match DimacsParser::parse($input as &[_]) {
455                Ok(parsed) => panic!("Expexcted errror but got {:?}", parsed),
456                Err(err) => match err.downcast_ref() {
457                    Some(casted_err) => match casted_err {
458                        $( $cases )*,
459                        _ => panic!("Unexpected error {:?}", casted_err),
460                    },
461                    None => panic!("Unexpected error type {:?}", err),
462                }
463            }
464        };
465    }
466
467    #[test]
468    fn invalid_headers() {
469        expect_error!(b"pcnf 1 3", ParserError::InvalidHeader { .. } => ());
470        expect_error!(b"p notcnf 1 3", ParserError::InvalidHeader { .. } => ());
471        expect_error!(b"p cnf 1", ParserError::InvalidHeader { .. } => ());
472        expect_error!(b"p cnf 1 2 3", ParserError::InvalidHeader { .. } => ());
473        expect_error!(b"p cnf foo bar", ParserError::InvalidHeader { .. } => ());
474        expect_error!(b"p cnf -3 -6", ParserError::InvalidHeader { .. } => ());
475
476        expect_error!(
477            format!("p cnf {} 4", Var::max_var().to_dimacs() + 1).as_bytes(),
478            ParserError::LiteralTooLarge { .. } => ()
479        );
480        DimacsParser::parse(format!("p cnf {} 0", Var::max_var().to_dimacs()).as_bytes()).unwrap();
481
482        expect_error!(b"p cnf 4 18446744073709551616", ParserError::InvalidHeader { .. } => ());
483
484        expect_error!(
485            b"p cnf 1 2\np cnf 1 2\n",
486            ParserError::UnexpectedInput { unexpected: 'p', .. } => ()
487        );
488    }
489
490    #[test]
491    fn invalid_header_data() {
492        expect_error!(
493            b"p cnf 1 1\n 2 0",
494            ParserError::VarCount { var_count: 2, header_var_count: 1 } => ()
495        );
496
497        expect_error!(
498            b"p cnf 10 1\n 1 0 0",
499            ParserError::ClauseCount { clause_count: 2, header_clause_count: 1 } => ()
500        );
501
502        expect_error!(
503            b"p cnf 10 4\n 1 0",
504            ParserError::ClauseCount { clause_count: 1, header_clause_count: 4 } => ()
505        );
506    }
507
508    #[test]
509    fn syntax_errors() {
510        expect_error!(
511            b"1 2 ?foo",
512            ParserError::UnexpectedInput { unexpected: '?', .. } => ()
513        );
514
515        expect_error!(
516            b"1 2 - 3 0",
517            ParserError::UnexpectedInput { unexpected: ' ', .. } => ()
518        );
519
520        expect_error!(
521            b"1 2 -\n3 0",
522            ParserError::UnexpectedInput { unexpected: '\n', .. } => ()
523        );
524
525        expect_error!(
526            b"1 2 --3 0",
527            ParserError::UnexpectedInput { unexpected: '-', .. } => ()
528        );
529
530        expect_error!(
531            b"1 2-3 0",
532            ParserError::UnexpectedInput { unexpected: '-', .. } => ()
533        );
534    }
535
536    #[test]
537    fn unterminated_clause() {
538        expect_error!(
539            b"1 2 3",
540            ParserError::UnterminatedClause { .. } => ()
541        );
542    }
543
544    #[test]
545    fn literal_too_large() {
546        expect_error!(
547            format!("1 {} 2 0", Var::max_var().to_dimacs() + 1).as_bytes(),
548            ParserError::LiteralTooLarge { .. } => ()
549        );
550
551        assert_eq!(
552            DimacsParser::parse(format!("1 {} 2 0", Var::max_var().to_dimacs()).as_bytes())
553                .unwrap(),
554            cnf_formula![
555                1, Var::max_var().to_dimacs(), 2;
556            ]
557        );
558    }
559
560    proptest! {
561
562        #[test]
563        fn roundtrip(input in cnf_formula(1..100usize, 0..1000, 0..10)) {
564            let mut buf = vec![];
565
566            write_dimacs(&mut buf, &input)?;
567
568            let parsed = DimacsParser::parse(&buf[..]).map_err(|e| TestCaseError::fail(e.to_string()))?;
569
570            prop_assert_eq!(parsed, input);
571        }
572    }
573}