flussab_cnf/
cnf.rs

1//! Parsing and writing of the DIMACS CNF file format.
2use std::io::{BufReader, Read, Write};
3
4use flussab::{text::LineReader, write, DeferredReader, DeferredWriter};
5
6use crate::{error::ParseError, token, Dimacs};
7
8/// Header data of a DIMACS CNF file.
9///
10/// Contains the number of variables and clauses. This parser considers a count of `0` as
11/// unspecified and will not check that count during parsing, even in strict mode.
12#[derive(Copy, Clone, Debug)]
13pub struct Header {
14    /// Upper bound on the number of variables present in the formula.
15    ///
16    /// Ignored during parsing when `0`.
17    pub var_count: usize,
18    /// Number of clauses present in the formula.
19    ///
20    /// Ignored during parsing when `0`.
21    pub clause_count: usize,
22}
23
24/// Configuration for the DIMACS CNF parser.
25#[derive(Clone, Default, Debug)]
26#[non_exhaustive]
27pub struct Config {
28    /// When set, the variable and clause count of the DIMACS CNF header are ignored
29    /// during parsing. (Default: `false`)
30    pub ignore_header: bool,
31}
32
33impl Config {
34    #[inline]
35    /// Sets the [`ignore_header`][Self#structfield.ignore_header] field.
36    pub fn ignore_header(mut self, value: bool) -> Self {
37        self.ignore_header = value;
38        self
39    }
40}
41
42/// Parser for the DIMACS CNF file format.
43pub struct Parser<'a, L> {
44    reader: LineReader<'a>,
45    clause_count: usize,
46    clause_limit: usize,
47    clause_limit_active: bool,
48    lit_limit: isize,
49    lit_limit_is_hard: bool,
50    lit_buf: Vec<L>,
51    header: Option<Header>,
52}
53
54impl<'a, L> Parser<'a, L>
55where
56    L: Dimacs,
57{
58    /// Creates a parser reading from a [`BufReader`].
59    pub fn from_buf_reader(
60        buf_reader: BufReader<impl Read + 'a>,
61        config: Config,
62    ) -> Result<Self, ParseError> {
63        Self::new(
64            LineReader::new(DeferredReader::from_buf_reader(buf_reader)),
65            config,
66        )
67    }
68
69    /// Creates a parser reading from a [`Read`] instance.
70    ///
71    /// If the [`Read`] instance is a [`BufReader`], it is better to use
72    /// [`from_buf_reader`][Self::from_buf_reader] to avoid unnecessary double buffering of the
73    /// data.
74    pub fn from_read(read: impl Read + 'a, config: Config) -> Result<Self, ParseError> {
75        Self::new(LineReader::new(DeferredReader::from_read(read)), config)
76    }
77
78    /// Creates a parser reading from a boxed [`Read`] instance.
79    ///
80    /// If the [`Read`] instance is a [`BufReader`], it is better to use
81    /// [`from_buf_reader`][Self::from_buf_reader] to avoid unnecessary double buffering of the
82    /// data.
83    #[inline(never)]
84    pub fn from_boxed_dyn_read(
85        read: Box<dyn Read + 'a>,
86        config: Config,
87    ) -> Result<Self, ParseError> {
88        Self::new(
89            LineReader::new(DeferredReader::from_boxed_dyn_read(read)),
90            config,
91        )
92    }
93
94    /// Creates a parser reading from a [`LineReader`].
95    pub fn new(reader: LineReader<'a>, config: Config) -> Result<Self, ParseError> {
96        let mut new = Self {
97            reader,
98            clause_count: 0,
99            clause_limit: 0,
100            clause_limit_active: false,
101            lit_limit: L::MAX_DIMACS,
102            lit_limit_is_hard: true,
103            lit_buf: vec![],
104            header: None,
105        };
106
107        if let Some(header) = new.parse_header()? {
108            if !config.ignore_header {
109                if header.var_count != 0 {
110                    new.lit_limit = header.var_count as isize;
111                    new.lit_limit_is_hard = false;
112                }
113                if header.clause_count != 0 {
114                    new.clause_limit = header.clause_count;
115                    new.clause_limit_active = true;
116                }
117            }
118            new.header = Some(header);
119        }
120
121        Ok(new)
122    }
123
124    fn parse_header(&mut self) -> Result<Option<Header>, ParseError> {
125        let reader = &mut self.reader;
126
127        token::skip_whitespace(reader);
128        while token::comment(reader).matches()? || token::newline(reader).matches()? {}
129
130        token::word(reader, b"p")
131            .and_then(|_| {
132                token::word(reader, b"cnf").or_give_up(|| token::unexpected(reader, "\"cnf\""))?;
133
134                let var_count = token::var_count::<L>(reader)
135                    .or_give_up(|| token::unexpected(reader, "variable count"))?;
136
137                let clause_count = token::uint_count(reader, "clause count")
138                    .or_give_up(|| token::unexpected(reader, "clause count"))?;
139
140                token::interactive_end_of_line(reader)
141                    .or_give_up(|| token::unexpected(reader, "end of line"))?;
142
143                Ok(Header {
144                    var_count,
145                    clause_count,
146                })
147            })
148            .optional()
149    }
150
151    /// Returns the DIMACS CNF header if it was present.
152    pub fn header(&self) -> Option<Header> {
153        self.header
154    }
155
156    /// Parses and returns the next clause.
157    ///
158    /// Returns `Ok(None)` if the end of file was successfully reached.
159    pub fn next_clause(&mut self) -> Result<Option<&[L]>, ParseError> {
160        let input = &mut self.reader;
161        self.lit_buf.clear();
162
163        token::skip_whitespace(input);
164        loop {
165            if self.clause_count != self.clause_limit || !self.clause_limit_active {
166                let clause = token::clause_lits(
167                    input,
168                    &mut self.lit_buf,
169                    self.lit_limit,
170                    self.lit_limit_is_hard,
171                )
172                .and_also(|_| {
173                    token::interactive_end_of_line(input)
174                        .or_give_up(|| token::unexpected(input, "end of line"))
175                });
176                if clause.matches()? {
177                    self.clause_count += 1;
178                    break Ok(Some(&self.lit_buf));
179                }
180            }
181
182            if token::comment(input).matches()? {
183                continue;
184            }
185
186            if token::newline(input).matches()? {
187                continue;
188            }
189
190            if (!self.clause_limit_active || self.clause_count >= self.clause_limit)
191                && token::eof(input).matches()?
192            {
193                break Ok(None);
194            }
195
196            break Err(self.unexpected_statement());
197        }
198    }
199
200    #[cold]
201    #[inline(never)]
202    fn unexpected_statement(&mut self) -> ParseError {
203        let mut expected = vec![];
204
205        if self.clause_count == 0 && self.header.is_none() {
206            expected.push("header");
207        }
208        if !self.clause_limit_active || self.clause_count < self.clause_limit {
209            expected.push("clause");
210        }
211        expected.push("comment");
212
213        if !self.clause_limit_active || self.clause_count >= self.clause_limit {
214            expected.push("end of file");
215        }
216
217        let last = expected.pop().unwrap();
218        let mut expected = expected.join(", ");
219        expected.push_str(" or ");
220        expected.push_str(last);
221
222        token::unexpected(&mut self.reader, &expected)
223    }
224}
225
226/// Writes a DIMACS CNF header.
227pub fn write_header(writer: &mut DeferredWriter, header: Header) {
228    let _ = writeln!(writer, "p cnf {} {}", header.var_count, header.clause_count);
229}
230
231/// Writes a clause.
232pub fn write_clause<L: Dimacs>(writer: &mut DeferredWriter, clause_lits: &[L]) {
233    for lit in clause_lits {
234        write::text::ascii_digits(writer, lit.dimacs());
235        writer.write_all_defer_err(b" ");
236    }
237    writer.write_all_defer_err(b"0\n")
238}
239
240#[cfg(test)]
241mod tests {
242    use assert_matches::assert_matches;
243
244    use super::*;
245
246    type Result<T> = std::result::Result<T, ParseError>;
247
248    #[test]
249    fn empty() -> Result<()> {
250        let mut parser = Parser::<i32>::from_read("".as_bytes(), Config::default())?;
251
252        assert_eq!(parser.next_clause()?, None);
253        Ok(())
254    }
255
256    #[test]
257    fn headerless() -> Result<()> {
258        let mut parser =
259            Parser::<i32>::from_read("1 2 -3 0\n4 5 0\n-6 0\n0\n".as_bytes(), Config::default())?;
260
261        assert_eq!(parser.next_clause()?, Some(&[1, 2, -3][..]));
262        assert_eq!(parser.next_clause()?, Some(&[4, 5][..]));
263        assert_eq!(parser.next_clause()?, Some(&[-6][..]));
264        assert_eq!(parser.next_clause()?, Some(&[][..]));
265        assert_eq!(parser.next_clause()?, None);
266        Ok(())
267    }
268
269    #[test]
270    fn eof_clause() -> Result<()> {
271        let mut parser =
272            Parser::<i32>::from_read("1 2 -3 0\n4 5 0\n-6 0".as_bytes(), Config::default())?;
273
274        assert_eq!(parser.next_clause()?, Some(&[1, 2, -3][..]));
275        assert_eq!(parser.next_clause()?, Some(&[4, 5][..]));
276        assert_eq!(parser.next_clause()?, Some(&[-6][..]));
277        assert_eq!(parser.next_clause()?, None);
278        Ok(())
279    }
280
281    #[test]
282    fn empty_lines() -> Result<()> {
283        let mut parser = Parser::<i32>::from_read(
284            "\n1 2 -3 0\n\n4 5 0\n\n-6 0\n\n".as_bytes(),
285            Config::default(),
286        )?;
287
288        assert_eq!(parser.next_clause()?, Some(&[1, 2, -3][..]));
289        assert_eq!(parser.next_clause()?, Some(&[4, 5][..]));
290        assert_eq!(parser.next_clause()?, Some(&[-6][..]));
291        assert_eq!(parser.next_clause()?, None);
292        Ok(())
293    }
294
295    #[test]
296    fn split_clauses() -> Result<()> {
297        let mut parser =
298            Parser::<i32>::from_read("1 2\n-3 0\n4 5\n0\n-6\n0\n".as_bytes(), Config::default())?;
299
300        assert_eq!(parser.next_clause()?, Some(&[1, 2, -3][..]));
301        assert_eq!(parser.next_clause()?, Some(&[4, 5][..]));
302        assert_eq!(parser.next_clause()?, Some(&[-6][..]));
303        assert_eq!(parser.next_clause()?, None);
304        Ok(())
305    }
306
307    #[test]
308    fn split_clauses_with_comments() -> Result<()> {
309        let mut parser = Parser::<i32>::from_read(
310            "1 2\nc 0\n-3 0\nc 0\n4 5\nc 0\n0\nc 0\n-6\nc 0\n0\n".as_bytes(),
311            Config::default(),
312        )?;
313
314        assert_eq!(parser.next_clause()?, Some(&[1, 2, -3][..]));
315        assert_eq!(parser.next_clause()?, Some(&[4, 5][..]));
316        assert_eq!(parser.next_clause()?, Some(&[-6][..]));
317        assert_eq!(parser.next_clause()?, None);
318        Ok(())
319    }
320
321    #[test]
322    fn incomplete_header() {
323        let parser = Parser::<i32>::from_read("p cnf\n1 2 -3 0\n".as_bytes(), Config::default());
324
325        assert_matches!(parser.err(), Some(..));
326
327        let parser = Parser::<i32>::from_read("p cnf 0\n1 2 -3 0\n".as_bytes(), Config::default());
328
329        assert_matches!(parser.err(), Some(..));
330    }
331
332    #[test]
333    fn empty_header() -> Result<()> {
334        let mut parser =
335            Parser::<i32>::from_read("p cnf 0 0\n1 2 -3 0\n".as_bytes(), Config::default())?;
336
337        assert_eq!(parser.next_clause()?, Some(&[1, 2, -3][..]));
338        assert_eq!(parser.next_clause()?, None);
339        Ok(())
340    }
341
342    #[test]
343    fn empty_lines_before_header() -> Result<()> {
344        let mut parser =
345            Parser::<i32>::from_read("\n\np cnf 0 0\n1 2 -3 0\n".as_bytes(), Config::default())?;
346
347        assert_eq!(parser.next_clause()?, Some(&[1, 2, -3][..]));
348        assert_eq!(parser.next_clause()?, None);
349        Ok(())
350    }
351
352    #[test]
353    fn var_only_header() -> Result<()> {
354        let mut parser =
355            Parser::<i32>::from_read("p cnf 3 0\n1 2 -3 0\n".as_bytes(), Config::default())?;
356
357        assert_eq!(parser.next_clause()?, Some(&[1, 2, -3][..]));
358        assert_eq!(parser.next_clause()?, None);
359        Ok(())
360    }
361
362    #[test]
363    fn full_header() -> Result<()> {
364        let mut parser =
365            Parser::<i32>::from_read("p cnf 3 1\n1 2 -3 0\n".as_bytes(), Config::default())?;
366
367        assert_eq!(parser.next_clause()?, Some(&[1, 2, -3][..]));
368        assert_eq!(parser.next_clause()?, None);
369        Ok(())
370    }
371
372    #[test]
373    fn early_comment() -> Result<()> {
374        let mut parser =
375            Parser::<i32>::from_read("c 9 0\np cnf 3 1\n1 2 -3 0\n".as_bytes(), Config::default())?;
376
377        assert_eq!(parser.next_clause()?, Some(&[1, 2, -3][..]));
378        assert_eq!(parser.next_clause()?, None);
379        Ok(())
380    }
381
382    #[test]
383    fn mid_comment() -> Result<()> {
384        let mut parser =
385            Parser::<i32>::from_read("p cnf 3 1\nc 9 0\n1 2 -3 0\n".as_bytes(), Config::default())?;
386
387        assert_eq!(parser.next_clause()?, Some(&[1, 2, -3][..]));
388        assert_eq!(parser.next_clause()?, None);
389        Ok(())
390    }
391
392    #[test]
393    fn late_comment() -> Result<()> {
394        let mut parser =
395            Parser::<i32>::from_read("p cnf 3 1\n1 2 -3 0\nc 9 0\n".as_bytes(), Config::default())?;
396
397        assert_eq!(parser.next_clause()?, Some(&[1, 2, -3][..]));
398        assert_eq!(parser.next_clause()?, None);
399        Ok(())
400    }
401
402    #[test]
403    fn crlf_newlines() -> Result<()> {
404        let mut parser = Parser::<i32>::from_read(
405            "p cnf 3 1\r\n1 2 -3 0\r\nc 0\r\n".as_bytes(),
406            Config::default(),
407        )?;
408
409        assert_eq!(parser.next_clause()?, Some(&[1, 2, -3][..]));
410        assert_eq!(parser.next_clause()?, None);
411        Ok(())
412    }
413
414    #[test]
415    fn extra_misc_whitespace() -> Result<()> {
416        let mut parser = Parser::<i32>::from_read(
417            " p\tcnf  3 1\t\n\t1\t 2\t-3\t0\r\n".as_bytes(),
418            Config::default(),
419        )?;
420
421        assert_eq!(parser.next_clause()?, Some(&[1, 2, -3][..]));
422        assert_eq!(parser.next_clause()?, None);
423        Ok(())
424    }
425
426    #[test]
427    fn leading_zeros_and_negative_zero() -> Result<()> {
428        let mut parser = Parser::<i32>::from_read(
429            "p cnf 00000000000000000000004 002\n00001 02 -03 00\n 004 -00\n".as_bytes(),
430            Config::default(),
431        )?;
432
433        assert_eq!(parser.next_clause()?, Some(&[1, 2, -3][..]));
434        assert_eq!(parser.next_clause()?, Some(&[4][..]));
435        assert_eq!(parser.next_clause()?, None);
436        Ok(())
437    }
438
439    #[test]
440    fn max_var_count() -> Result<()> {
441        let input = format!("p cnf {} 0\n{0} -{0} 0\n", i32::MAX);
442        let mut parser = Parser::<i32>::from_read(input.as_bytes(), Config::default())?;
443
444        assert_eq!(parser.next_clause()?, Some(&[i32::MAX, -i32::MAX][..]));
445        Ok(())
446    }
447
448    #[test]
449    fn err_exceeding_max_var_count() {
450        let input = format!("p cnf {} 0\n", (i32::MAX as u32) + 1);
451        let parser = Parser::<i32>::from_read(input.as_bytes(), Config::default());
452
453        assert_matches!(parser.err(), Some(..));
454    }
455
456    #[test]
457    fn err_negative_var_count() {
458        let parser = Parser::<i32>::from_read("p cnf -1 0".as_bytes(), Config::default());
459
460        assert_matches!(parser.err(), Some(..));
461    }
462
463    #[test]
464    fn err_exceeding_max_clause_count() {
465        let input = format!("p cnf 1 {}\n", (u64::MAX as u128) + 1);
466        let parser = Parser::<i32>::from_read(input.as_bytes(), Config::default());
467
468        assert_matches!(parser.err(), Some(..));
469    }
470
471    #[test]
472    fn err_wrong_header() {
473        let parser = Parser::<i32>::from_read("p notcnf\n".as_bytes(), Config::default());
474        assert_matches!(parser.err(), Some(..));
475    }
476
477    #[test]
478    fn err_wrong_header_line2() {
479        let parser = Parser::<i32>::from_read("\np\n".as_bytes(), Config::default());
480
481        assert_matches!(parser.err(), Some(..));
482    }
483
484    #[test]
485    fn err_missing_clauses() -> Result<()> {
486        let mut parser =
487            Parser::<i32>::from_read("p cnf 3 2\n1 -2 3 0\n".as_bytes(), Config::default())?;
488
489        assert_eq!(parser.next_clause()?, Some(&[1, -2, 3][..]));
490        assert_matches!(parser.next_clause(), Err(..));
491        Ok(())
492    }
493
494    #[test]
495    fn err_extra_clauses() -> Result<()> {
496        let mut parser = Parser::<i32>::from_read(
497            "p cnf 3 2\n1 -2 3 0\n2 0\n3 0\n".as_bytes(),
498            Config::default(),
499        )?;
500
501        assert_eq!(parser.next_clause()?, Some(&[1, -2, 3][..]));
502        assert_eq!(parser.next_clause()?, Some(&[2][..]));
503        assert_matches!(parser.next_clause(), Err(..));
504        Ok(())
505    }
506
507    #[test]
508    fn err_pos_lit_out_of_range() -> Result<()> {
509        let mut parser =
510            Parser::<i32>::from_read("p cnf 3 2\n1 -2 3 0\n2 4 0".as_bytes(), Config::default())?;
511
512        assert_eq!(parser.next_clause()?, Some(&[1, -2, 3][..]));
513        assert_matches!(parser.next_clause(), Err(..));
514        Ok(())
515    }
516
517    #[test]
518    fn pos_lit_out_of_range_ignored_header() -> Result<()> {
519        let mut parser = Parser::<i32>::from_read(
520            "p cnf 3 2\n1 -2 3 0\n2 4 0".as_bytes(),
521            Config::default().ignore_header(true),
522        )?;
523
524        assert_eq!(parser.next_clause()?, Some(&[1, -2, 3][..]));
525        assert_eq!(parser.next_clause()?, Some(&[2, 4][..]));
526        Ok(())
527    }
528
529    #[test]
530    fn err_neg_lit_out_of_range() -> Result<()> {
531        let mut parser =
532            Parser::<i32>::from_read("p cnf 3 2\n1 -2 3 0\n2 -4 0".as_bytes(), Config::default())?;
533
534        assert_eq!(parser.next_clause()?, Some(&[1, -2, 3][..]));
535        assert_matches!(parser.next_clause(), Err(..));
536        Ok(())
537    }
538
539    #[test]
540    fn err_unterminated_clause() -> Result<()> {
541        let mut parser =
542            Parser::<i32>::from_read("p cnf 3 2\n1 -2 3 0\n2 -3".as_bytes(), Config::default())?;
543
544        assert_eq!(parser.next_clause()?, Some(&[1, -2, 3][..]));
545        assert_matches!(parser.next_clause(), Err(..));
546        Ok(())
547    }
548
549    #[test]
550    fn err_dangling_literal() -> Result<()> {
551        let mut parser = Parser::<i32>::from_read(
552            "p cnf 3 2\n1 -2 3 0\n2 -3 0 1 0\n".as_bytes(),
553            Config::default(),
554        )?;
555
556        assert_eq!(parser.next_clause()?, Some(&[1, -2, 3][..]));
557        assert_matches!(parser.next_clause(), Err(..));
558        Ok(())
559    }
560
561    #[test]
562    fn err_unexpected_token_var_count() {
563        let parser = Parser::<i32>::from_read("p cnf error 2\n".as_bytes(), Config::default());
564
565        assert_matches!(parser.err(), Some(..));
566    }
567
568    #[test]
569    fn err_unexpected_token_clause_count() {
570        let parser = Parser::<i32>::from_read("p cnf 2 error\n".as_bytes(), Config::default());
571
572        assert_matches!(parser.err(), Some(..));
573    }
574
575    #[test]
576    fn err_extra_header_field() {
577        let parser = Parser::<i32>::from_read("p cnf 2 1 2\n".as_bytes(), Config::default());
578
579        assert_matches!(parser.err(), Some(..));
580    }
581
582    #[test]
583    fn err_unexpected_token_clause() -> Result<()> {
584        let mut parser =
585            Parser::<i32>::from_read("p cnf 2 1\n 1 2 err 0\n".as_bytes(), Config::default())?;
586
587        assert_matches!(parser.next_clause(), Err(..));
588
589        Ok(())
590    }
591
592    #[test]
593    fn roundtrip() -> Result<()> {
594        let input = &r"
595p cnf 5 12
596-1 -2 0
597-1 -3 0
598-1 -4 0
599-1 -5 0
600-2 -3 0
601-2 -4 0
602-2 -5 0
603-3 -4 0
604-3 -5 0
605-4 -5 0
6062 5 3 4 1 0
6074 2 3 1 5 0
608"[1..];
609
610        let mut output = vec![];
611        let mut parser = Parser::<i32>::from_read(input.as_bytes(), Config::default())?;
612
613        {
614            let mut writer = DeferredWriter::from_write(&mut output);
615
616            write_header(&mut writer, parser.header().unwrap());
617
618            while let Some(clause) = parser.next_clause()? {
619                write_clause(&mut writer, clause);
620            }
621
622            writer.flush()?;
623        }
624
625        assert_eq!(input.as_bytes(), output);
626
627        Ok(())
628    }
629}