1use std::{borrow::Borrow, io, mem::replace};
4
5use varisat_formula::{CnfFormula, ExtendFormula, Lit, Var};
6
7use anyhow::Error;
8use thiserror::Error;
9
10#[derive(Debug, Error)]
12pub enum ParserError {
13 #[error(
14 "line {}: Unexpected character in DIMACS CNF input: '{}'",
15 line,
16 unexpected
17 )]
18 UnexpectedInput { line: usize, unexpected: char },
19 #[error(
20 "line {}: Literal index is too large: {}{}...",
21 line,
22 index,
23 final_digit
24 )]
25 LiteralTooLarge {
26 line: usize,
27 index: usize,
28 final_digit: usize,
29 },
30 #[error("line {}: Invalid header syntax: {}", line, header)]
31 InvalidHeader { line: usize, header: String },
32 #[error("line {}: Unterminated clause", line)]
33 UnterminatedClause { line: usize },
34 #[error(
35 "Formula has {} variables while the header specifies {} variables",
36 var_count,
37 header_var_count
38 )]
39 VarCount {
40 var_count: usize,
41 header_var_count: usize,
42 },
43 #[error(
44 "Formula has {} clauses while the header specifies {} clauses",
45 clause_count,
46 header_clause_count
47 )]
48 ClauseCount {
49 clause_count: usize,
50 header_clause_count: usize,
51 },
52 #[error("Parser invoked after a previous error")]
53 PreviousError,
54}
55
56#[derive(Copy, Clone, Debug)]
58pub struct DimacsHeader {
59 pub var_count: usize,
60 pub clause_count: usize,
61}
62
63#[derive(Default)]
67pub struct DimacsParser {
68 formula: CnfFormula,
69 partial_clause: Vec<Lit>,
70 header: Option<DimacsHeader>,
71
72 line_number: usize,
73 clause_count: usize,
74 partial_lit: usize,
75 negate_next_lit: bool,
76
77 in_lit: bool,
78 in_comment_or_header: bool,
79 in_header: bool,
80 start_of_line: bool,
81 error: bool,
82
83 header_line: Vec<u8>,
84}
85
86impl DimacsParser {
87 pub fn new() -> DimacsParser {
89 DimacsParser {
90 formula: CnfFormula::new(),
91 partial_clause: vec![],
92 header: None,
93
94 line_number: 1,
95 clause_count: 0,
96 partial_lit: 0,
97 negate_next_lit: false,
98
99 in_lit: false,
100 in_comment_or_header: false,
101 in_header: false,
102 start_of_line: true,
103 error: false,
104
105 header_line: vec![],
106 }
107 }
108
109 pub fn parse(input: impl io::Read) -> Result<CnfFormula, Error> {
115 Ok(Self::parse_incremental(input, |_| Ok(()))?.take_formula())
116 }
117
118 pub fn parse_incremental(
124 input: impl io::Read,
125 mut callback: impl FnMut(&mut DimacsParser) -> Result<(), Error>,
126 ) -> Result<DimacsParser, Error> {
127 use io::BufRead;
128
129 let mut buffer = io::BufReader::new(input);
130 let mut parser = Self::new();
131
132 loop {
133 let data = buffer.fill_buf()?;
134 if data.is_empty() {
135 break;
136 }
137 parser.parse_chunk(data)?;
138 let len = data.len();
139 buffer.consume(len);
140
141 callback(&mut parser)?;
142 }
143 parser.eof()?;
144 callback(&mut parser)?;
145 parser.check_header()?;
146
147 Ok(parser)
148 }
149
150 pub fn parse_chunk(&mut self, chunk: &[u8]) -> Result<(), ParserError> {
157 if self.error {
158 return Err(ParserError::PreviousError);
159 }
160 for &byte in chunk.iter() {
161 if byte == b'\n' {
162 self.line_number += 1;
163 }
164 match byte {
165 b'\n' | b'\r' if self.in_comment_or_header => {
166 if self.in_header {
167 self.in_header = false;
168 self.parse_header_line()?;
169 }
170 self.in_comment_or_header = false;
171 self.start_of_line = true
172 }
173 _ if self.in_comment_or_header => {
174 if self.in_header {
175 self.header_line.push(byte);
176 }
177 }
178 b'0'..=b'9' => {
179 self.in_lit = true;
180 let digit = (byte - b'0') as usize;
181
182 const CAN_OVERFLOW: usize = Var::max_count() / 10;
183 const OVERFLOW_DIGIT: usize = Var::max_count() % 10;
184
185 if CAN_OVERFLOW <= self.partial_lit {
188 let carry = (digit <= OVERFLOW_DIGIT) as usize;
189
190 if CAN_OVERFLOW + carry <= self.partial_lit {
191 self.error = true;
192 return Err(ParserError::LiteralTooLarge {
193 line: self.line_number,
194 index: self.partial_lit,
195 final_digit: digit,
196 });
197 }
198 }
199
200 self.partial_lit = self.partial_lit * 10 + digit;
201
202 self.start_of_line = false
203 }
204 b'-' if !self.negate_next_lit && !self.in_lit => {
205 self.negate_next_lit = true;
206 self.start_of_line = false
207 }
208 b' ' | b'\n' | b'\r' if !self.negate_next_lit || self.in_lit => {
209 self.finish_literal();
210 self.negate_next_lit = false;
211 self.in_lit = false;
212 self.partial_lit = 0;
213 self.start_of_line = byte != b' ';
214 }
215 b'c' if self.start_of_line => {
216 self.in_comment_or_header = true;
217 }
218 b'p' if self.start_of_line && self.header.is_none() => {
219 self.in_comment_or_header = true;
220 self.in_header = true;
221 self.header_line.push(b'p');
222 }
223 _ => {
224 self.error = true;
225 return Err(ParserError::UnexpectedInput {
226 line: self.line_number,
227 unexpected: byte as char,
228 });
229 }
230 }
231 }
232
233 Ok(())
234 }
235
236 pub fn eof(&mut self) -> Result<(), ParserError> {
241 if self.in_header {
242 self.parse_header_line()?;
243 }
244
245 self.finish_literal();
246
247 if !self.partial_clause.is_empty() {
248 return Err(ParserError::UnterminatedClause {
249 line: self.line_number,
250 });
251 }
252
253 Ok(())
254 }
255
256 pub fn check_header(&self) -> Result<(), ParserError> {
260 if let Some(header) = self.header {
261 let var_count = self.formula.var_count();
262 if var_count != header.var_count {
263 return Err(ParserError::VarCount {
264 var_count,
265 header_var_count: header.var_count,
266 });
267 }
268
269 if self.clause_count != header.clause_count {
270 return Err(ParserError::ClauseCount {
271 clause_count: self.clause_count,
272 header_clause_count: header.clause_count,
273 });
274 }
275 }
276
277 Ok(())
278 }
279
280 pub fn take_formula(&mut self) -> CnfFormula {
289 let mut new_formula = CnfFormula::new();
290 new_formula.set_var_count(self.formula.var_count());
291 replace(&mut self.formula, new_formula)
292 }
293
294 pub fn header(&self) -> Option<DimacsHeader> {
296 self.header
297 }
298
299 pub fn clause_count(&self) -> usize {
301 self.clause_count
302 }
303
304 pub fn var_count(&self) -> usize {
306 self.formula.var_count()
307 }
308
309 fn finish_literal(&mut self) {
310 if self.in_lit {
311 if self.partial_lit == 0 {
312 self.formula.add_clause(&self.partial_clause);
313 self.partial_clause.clear();
314 self.clause_count += 1;
315 } else {
316 self.partial_clause
317 .push(Var::from_dimacs(self.partial_lit as isize).lit(!self.negate_next_lit));
318 }
319 }
320 }
321
322 fn parse_header_line(&mut self) -> Result<(), ParserError> {
323 let header_line = String::from_utf8_lossy(&self.header_line).into_owned();
324
325 if !header_line.starts_with("p ") {
326 return self.invalid_header(header_line);
327 }
328
329 let mut header_values = header_line[2..].split_whitespace();
330
331 if header_values.next() != Some("cnf") {
332 return self.invalid_header(header_line);
333 }
334
335 let var_count: usize = match header_values
336 .next()
337 .and_then(|value| str::parse(value).ok())
338 {
339 None => return self.invalid_header(header_line),
340 Some(value) => value,
341 };
342
343 if var_count > Var::max_count() {
344 self.error = true;
345 return Err(ParserError::LiteralTooLarge {
346 line: self.line_number,
347 index: var_count / 10,
348 final_digit: var_count % 10,
349 });
350 }
351
352 let clause_count: usize = match header_values
353 .next()
354 .and_then(|value| str::parse(value).ok())
355 {
356 None => return self.invalid_header(header_line),
357 Some(value) => value,
358 };
359
360 if header_values.next().is_some() {
361 return self.invalid_header(header_line);
362 }
363
364 self.header = Some(DimacsHeader {
365 var_count,
366 clause_count,
367 });
368
369 self.formula.set_var_count(var_count);
370
371 Ok(())
372 }
373
374 fn invalid_header(&mut self, header_line: String) -> Result<(), ParserError> {
375 self.error = true;
376 Err(ParserError::InvalidHeader {
377 line: self.line_number,
378 header: header_line,
379 })
380 }
381}
382
383pub fn write_dimacs_header(target: &mut impl io::Write, header: DimacsHeader) -> io::Result<()> {
387 writeln!(
388 target,
389 "p cnf {var_count} {clause_count}",
390 var_count = header.var_count,
391 clause_count = header.clause_count
392 )
393}
394
395pub fn write_dimacs_clauses(
399 target: &mut impl io::Write,
400 clauses: impl IntoIterator<Item = impl IntoIterator<Item = impl Borrow<Lit>>>,
401) -> io::Result<()> {
402 for clause in clauses.into_iter() {
403 for lit in clause.into_iter() {
404 itoa::write(&mut *target, lit.borrow().to_dimacs())?;
405 target.write_all(b" ")?;
406 }
407 target.write_all(b"0\n")?;
408 }
409 Ok(())
410}
411
412pub fn write_dimacs(target: &mut impl io::Write, formula: &CnfFormula) -> io::Result<()> {
416 write_dimacs_header(
417 &mut *target,
418 DimacsHeader {
419 var_count: formula.var_count(),
420 clause_count: formula.len(),
421 },
422 )?;
423 write_dimacs_clauses(&mut *target, formula.iter())
424}
425
426#[cfg(test)]
427mod tests {
428 use super::*;
429
430 use anyhow::Error;
431 use proptest::{test_runner::TestCaseError, *};
432
433 use varisat_formula::{cnf::strategy::*, cnf_formula};
434
435 #[test]
436 fn odd_whitespace() -> Result<(), Error> {
437 let parsed = DimacsParser::parse(
438 b"p cnf 4 3 \n 1 \n 2 3\n0 -4 0 2\nccomment \n\n0\n\n" as &[_],
439 )?;
440
441 let expected = cnf_formula![
442 1, 2, 3;
443 -4;
444 2;
445 ];
446
447 assert_eq!(parsed, expected);
448
449 Ok(())
450 }
451
452 macro_rules! expect_error {
453 ( $input:expr, $( $cases:tt )* ) => {
454 match DimacsParser::parse($input as &[_]) {
455 Ok(parsed) => panic!("Expexcted errror but got {:?}", parsed),
456 Err(err) => match err.downcast_ref() {
457 Some(casted_err) => match casted_err {
458 $( $cases )*,
459 _ => panic!("Unexpected error {:?}", casted_err),
460 },
461 None => panic!("Unexpected error type {:?}", err),
462 }
463 }
464 };
465 }
466
467 #[test]
468 fn invalid_headers() {
469 expect_error!(b"pcnf 1 3", ParserError::InvalidHeader { .. } => ());
470 expect_error!(b"p notcnf 1 3", ParserError::InvalidHeader { .. } => ());
471 expect_error!(b"p cnf 1", ParserError::InvalidHeader { .. } => ());
472 expect_error!(b"p cnf 1 2 3", ParserError::InvalidHeader { .. } => ());
473 expect_error!(b"p cnf foo bar", ParserError::InvalidHeader { .. } => ());
474 expect_error!(b"p cnf -3 -6", ParserError::InvalidHeader { .. } => ());
475
476 expect_error!(
477 format!("p cnf {} 4", Var::max_var().to_dimacs() + 1).as_bytes(),
478 ParserError::LiteralTooLarge { .. } => ()
479 );
480 DimacsParser::parse(format!("p cnf {} 0", Var::max_var().to_dimacs()).as_bytes()).unwrap();
481
482 expect_error!(b"p cnf 4 18446744073709551616", ParserError::InvalidHeader { .. } => ());
483
484 expect_error!(
485 b"p cnf 1 2\np cnf 1 2\n",
486 ParserError::UnexpectedInput { unexpected: 'p', .. } => ()
487 );
488 }
489
490 #[test]
491 fn invalid_header_data() {
492 expect_error!(
493 b"p cnf 1 1\n 2 0",
494 ParserError::VarCount { var_count: 2, header_var_count: 1 } => ()
495 );
496
497 expect_error!(
498 b"p cnf 10 1\n 1 0 0",
499 ParserError::ClauseCount { clause_count: 2, header_clause_count: 1 } => ()
500 );
501
502 expect_error!(
503 b"p cnf 10 4\n 1 0",
504 ParserError::ClauseCount { clause_count: 1, header_clause_count: 4 } => ()
505 );
506 }
507
508 #[test]
509 fn syntax_errors() {
510 expect_error!(
511 b"1 2 ?foo",
512 ParserError::UnexpectedInput { unexpected: '?', .. } => ()
513 );
514
515 expect_error!(
516 b"1 2 - 3 0",
517 ParserError::UnexpectedInput { unexpected: ' ', .. } => ()
518 );
519
520 expect_error!(
521 b"1 2 -\n3 0",
522 ParserError::UnexpectedInput { unexpected: '\n', .. } => ()
523 );
524
525 expect_error!(
526 b"1 2 --3 0",
527 ParserError::UnexpectedInput { unexpected: '-', .. } => ()
528 );
529
530 expect_error!(
531 b"1 2-3 0",
532 ParserError::UnexpectedInput { unexpected: '-', .. } => ()
533 );
534 }
535
536 #[test]
537 fn unterminated_clause() {
538 expect_error!(
539 b"1 2 3",
540 ParserError::UnterminatedClause { .. } => ()
541 );
542 }
543
544 #[test]
545 fn literal_too_large() {
546 expect_error!(
547 format!("1 {} 2 0", Var::max_var().to_dimacs() + 1).as_bytes(),
548 ParserError::LiteralTooLarge { .. } => ()
549 );
550
551 assert_eq!(
552 DimacsParser::parse(format!("1 {} 2 0", Var::max_var().to_dimacs()).as_bytes())
553 .unwrap(),
554 cnf_formula![
555 1, Var::max_var().to_dimacs(), 2;
556 ]
557 );
558 }
559
560 proptest! {
561
562 #[test]
563 fn roundtrip(input in cnf_formula(1..100usize, 0..1000, 0..10)) {
564 let mut buf = vec![];
565
566 write_dimacs(&mut buf, &input)?;
567
568 let parsed = DimacsParser::parse(&buf[..]).map_err(|e| TestCaseError::fail(e.to_string()))?;
569
570 prop_assert_eq!(parsed, input);
571 }
572 }
573}