rate_common/
parser.rs

1//! DIMACS and DRAT/DPR parser
2
3use crate::{
4    clause::{puts_clause, Clause, ProofStep, RedundancyProperty},
5    clausedatabase::{ClauseDatabase, ClauseStorage, WitnessDatabase},
6    literal::{Literal, Variable},
7    memory::{format_memory_usage, HeapSpace, Offset, Vector},
8    output::{panic_on_error, print_key_value, unreachable, Timer},
9};
10use std::{
11    cmp,
12    convert::TryInto,
13    fs::File,
14    io::{
15        self, BufReader, BufWriter, Error, ErrorKind, Read, Result, Seek, SeekFrom, StdinLock,
16        Write,
17    },
18    iter::Peekable,
19    panic, slice,
20};
21
22/// CNF and DRAT/DPR parser.
23#[derive(Debug, PartialEq)]
24pub struct Parser {
25    /// The redundancy property identifying the proof format.
26    pub redundancy_property: RedundancyProperty,
27    /// The highest variable parsed so far
28    pub maxvar: Variable,
29    /// For RAT, the pivot (first literal) for each clause
30    ///
31    /// It is necessary to store this because the clauses will be sorted
32    /// (and watches will be shuffled).
33    pub clause_pivot: Vector<Literal>,
34    /// The first clause that is part of the proof (and not the input formula)
35    pub proof_start: Clause,
36    /// The proof steps
37    pub proof: Vector<ProofStep>,
38    /// How many proof steps we want to parse
39    pub max_proof_steps: Option<usize>,
40    /// Print diagnostics and timing information
41    pub verbose: bool,
42    /// Clause store
43    pub clause_db: ClauseDatabase,
44    /// Witness store
45    pub witness_db: WitnessDatabase,
46}
47
48impl Default for Parser {
49    /// Create a new parser.
50    ///
51    /// *Note*: this allocates the static clause and witness databases, so this should only be called once.
52    fn default() -> Parser {
53        Parser {
54            redundancy_property: RedundancyProperty::RAT,
55            maxvar: Variable::new(0),
56            clause_pivot: Vector::new(),
57            proof_start: Clause::new(0),
58            proof: Vector::new(),
59            max_proof_steps: None,
60            verbose: true,
61            clause_db: ClauseDatabase::default(),
62            witness_db: WitnessDatabase::default(),
63        }
64    }
65}
66
67impl Parser {
68    /// Returns true if we are parsing a (D)PR proof.
69    pub fn is_pr(&self) -> bool {
70        self.redundancy_property == RedundancyProperty::PR
71    }
72}
73
74/// A fixed-size hash table that maps clauses (sets of literals) to clause identifiers.
75///
76/// This should work exactly like the one used in `drat-trim`.
77/// Given that we expect the number of clauses in the hash table
78/// not to exceed a couple million this should be faster and leaner than
79/// [DynamicHashTable](struct.DynamicHashTable.html).
80pub struct HashTable(Vector<Vector<Clause>>);
81
82/// Return the hash bucket to which this clause belongs.
83fn bucket_index(clause: &[Literal]) -> usize {
84    clause_hash(clause) % HashTable::SIZE
85}
86
87impl HashTable {
88    /// The number of buckets in our hash table (`drat-trim` uses a million)
89    const SIZE: usize = 2 * 1024 * 1024;
90    /// The initial size of each bucket.
91    ///
92    /// We could increase this to at least use `malloc_usable_size` (system-dependent).
93    const BUCKET_INITIAL_SIZE: u16 = 4;
94    /// Allocate the hash table.
95    #[allow(clippy::new_without_default)]
96    pub fn new() -> HashTable {
97        HashTable(Vector::from_vec(vec![
98            Vector::with_capacity(
99                HashTable::BUCKET_INITIAL_SIZE.into()
100            );
101            HashTable::SIZE
102        ]))
103    }
104    /// Add a new clause to the hashtable.
105    pub fn add_clause(&mut self, clause_db: &ClauseDatabase, clause: Clause) {
106        self.0[bucket_index(clause_db.clause(clause))].push(clause);
107    }
108    /// Find a clause that is equivalent to given clause.
109    ///
110    /// If delete is true, delete the found clause.
111    pub fn find_equal_clause(
112        &mut self,
113        clause_db: &ClauseDatabase,
114        needle: Clause,
115        delete: bool,
116    ) -> Option<Clause> {
117        let bucket = &mut self.0[bucket_index(clause_db.clause(needle))];
118        for offset in 0..bucket.len() {
119            let clause = bucket[offset];
120            if clause_db.clause(needle) == clause_db.clause(clause) {
121                if delete {
122                    bucket.swap_remove(offset);
123                }
124                return Some(clause);
125            }
126        }
127        None
128    }
129    /// Return true if this exact clause is active.
130    pub fn clause_is_active(&self, clause_db: &ClauseDatabase, needle: Clause) -> bool {
131        self.0[bucket_index(clause_db.clause(needle))]
132            .iter()
133            .any(|&clause| needle == clause)
134    }
135    /// Delete this exact clause, return true if that succeeded.
136    pub fn delete_clause(&mut self, clause_db: &ClauseDatabase, needle: Clause) -> bool {
137        self.0[bucket_index(clause_db.clause(needle))]
138            .iter()
139            .position(|&clause| needle == clause)
140            .map(|offset| self.0[bucket_index(clause_db.clause(needle))].swap_remove(offset))
141            .is_some()
142    }
143}
144
145/// An iterator over the elements of the hash table
146pub struct HashTableIterator<'a> {
147    /// The iterator over the buckets
148    buckets: slice::Iter<'a, Vector<Clause>>,
149    /// The iterator over a single bucket
150    bucket: slice::Iter<'a, Clause>,
151}
152
153impl<'a> Iterator for HashTableIterator<'a> {
154    type Item = &'a Clause;
155    fn next(&mut self) -> Option<Self::Item> {
156        self.bucket.next().or_else(|| {
157            self.buckets.next().and_then(|next_bucket| {
158                self.bucket = next_bucket.iter();
159                self.bucket.next()
160            })
161        })
162    }
163}
164
165impl<'a> IntoIterator for &'a HashTable {
166    type Item = &'a Clause;
167    type IntoIter = HashTableIterator<'a>;
168    fn into_iter(self) -> Self::IntoIter {
169        HashTableIterator {
170            buckets: self.0.iter(),
171            bucket: self.0[0].iter(),
172        }
173    }
174}
175
176impl HeapSpace for HashTable {
177    fn heap_space(&self) -> usize {
178        self.0.heap_space()
179    }
180}
181
182/// Parse a formula and a proof file.
183pub fn parse_files(formula_file: &str, proof_file: &str, memory_usage_breakdown: bool) -> Parser {
184    let mut parser = Parser::default();
185    let mut clause_ids = HashTable::new();
186    run_parser(&mut parser, formula_file, proof_file, &mut clause_ids);
187    if memory_usage_breakdown {
188        print_memory_usage(&parser, &clause_ids);
189    }
190    parser
191}
192
193/// Print the memory usage of a parser (useful after parsing).
194fn print_memory_usage(parser: &Parser, clause_ids: &HashTable) {
195    let usages = vec![
196        ("db", parser.clause_db.heap_space()),
197        ("hash-table", clause_ids.heap_space()),
198        ("proof", parser.proof.heap_space()),
199        ("rest", parser.clause_pivot.heap_space()),
200    ];
201    let total = usages.iter().map(|pair| pair.1).sum();
202    print_key_value("parser memory (MB)", format_memory_usage(total));
203    for (name, usage) in usages {
204        print_key_value(&format!("memory-{}", name), format_memory_usage(usage));
205    }
206}
207
208/// Parse a formula.
209///
210/// This requires the proof file as well to determine the proof format,
211/// which is necessary for initialization of the witness database.
212pub fn run_parser_on_formula(
213    mut parser: &mut Parser,
214    formula_file: &str,
215    proof_file: &str,
216    clause_ids: &mut HashTable,
217) {
218    parser.redundancy_property = proof_format_by_extension(&proof_file);
219    if parser.verbose {
220        comment!("mode: {}", parser.redundancy_property);
221    }
222    let mut _timer = Timer::name("formula-parsing time");
223    if !parser.verbose {
224        _timer.disabled = true;
225    }
226    parse_formula(
227        &mut parser,
228        clause_ids,
229        read_compressed_file(formula_file, false),
230    )
231    .unwrap_or_else(|err| die!("failed to parse formula: {}", err));
232}
233
234/// Parse a formula and a proof file using a given hash table.
235pub fn run_parser(
236    mut parser: &mut Parser,
237    formula: &str,
238    proof_file: &str,
239    clause_ids: &mut HashTable,
240) {
241    let binary = is_binary_drat(proof_file);
242    run_parser_on_formula(parser, formula, proof_file, clause_ids);
243    let mut _timer = Timer::name("proof-parsing time");
244    if !parser.verbose {
245        _timer.disabled = true;
246    }
247    if binary && parser.verbose {
248        comment!("binary proof mode");
249    }
250    parse_proof(
251        &mut parser,
252        clause_ids,
253        read_compressed_file(proof_file, binary),
254        binary,
255    )
256    .unwrap_or_else(|err| die!("failed to parse proof: {}", err));
257    parser.clause_db.shrink_to_fit();
258    parser.witness_db.shrink_to_fit();
259    parser.clause_pivot.shrink_to_fit();
260    parser.proof.shrink_to_fit();
261}
262
263/// Open a file for reading.
264/// # Panics
265/// Panics on error.
266pub fn open_file(filename: &str) -> File {
267    File::open(filename).unwrap_or_else(|err| die!("cannot open file: {}", err))
268}
269
270/// Open a file for writing.
271/// Returns a locked stdout if the filename is "-".
272/// # Panics
273/// Panics on error.
274pub fn open_file_for_writing<'a>(filename: &str, stdout: &'a io::Stdout) -> Box<dyn Write + 'a> {
275    if filename == "-" {
276        Box::new(BufWriter::new(stdout.lock()))
277    } else {
278        Box::new(BufWriter::new(File::create(filename).unwrap_or_else(
279            |err| die!("cannot open file for writing: {}", err),
280        )))
281    }
282}
283
284/// File extension of Zstandard archives.
285const ZSTD: &str = ".zst";
286/// File extension of Gzip archives.
287const GZIP: &str = ".gz";
288/// File extension of Bzip2 archives.
289const BZIP2: &str = ".bz2";
290/// File extension of XZ archives.
291const XZ: &str = ".xz";
292/// File extension of LZ4 archives.
293const LZ4: &str = ".lz4";
294
295/// Strip the compression format off a filename.
296///
297/// If the filename ends with a known archive extension,
298/// return the filname without extension and the extension.
299/// Otherwise return the unmodified filename and the empty string.
300fn compression_format_by_extension(filename: &str) -> (&str, &str) {
301    let mut basename = filename;
302    let mut compression_format = "";
303    for extension in &[ZSTD, GZIP, BZIP2, LZ4, XZ] {
304        if filename.ends_with(extension) {
305            compression_format = extension;
306            basename = &filename[0..filename.len() - extension.len()];
307            break;
308        }
309    }
310    (basename, compression_format)
311}
312
313/// Determine the proof format based on the proof filename.
314pub fn proof_format_by_extension(proof_filename: &str) -> RedundancyProperty {
315    let (basename, _compression_format) = compression_format_by_extension(proof_filename);
316    if basename.ends_with(".drat") {
317        RedundancyProperty::RAT
318    } else if basename.ends_with(".pr") || basename.ends_with(".dpr") {
319        RedundancyProperty::PR
320    } else {
321        RedundancyProperty::RAT
322    }
323}
324
325impl RedundancyProperty {
326    /// Give the canonical file extension for proofs based on this redundancy property.
327    pub fn file_extension(&self) -> &str {
328        match self {
329            RedundancyProperty::RAT => "drat",
330            RedundancyProperty::PR => "dpr",
331        }
332    }
333}
334
335/// Return an [Input](struct.Input.html) to read from a possibly compressed file.
336///
337/// If the file is compressed it is transparently uncompressed.
338/// If the filename is "-", returns an [Input](struct.Input.html) reading data from stdin.
339/// Argument `binary` is passed on to [Input](struct.Input.html).
340pub fn read_compressed_file_or_stdin<'a>(
341    filename: &'a str,
342    binary: bool,
343    stdin: StdinLock<'a>,
344) -> Input<'a> {
345    match filename {
346        "-" => Input::new(Box::new(stdin.bytes().map(panic_on_error)), binary),
347        filename => read_compressed_file(filename, binary),
348    }
349}
350
351/// Return an [Input](struct.Input.html) to read from a possibly compressed file.
352///
353/// If the file is compressed it is transparently uncompressed.
354/// Argument `binary` is passed on to [Input](struct.Input.html).
355pub fn read_compressed_file(filename: &str, binary: bool) -> Input {
356    let file = open_file(filename);
357    Input::new(read_from_compressed_file(file, filename), binary)
358}
359
360/// Return an Iterator to read from a possibly compressed file.
361///
362/// If the file is compressed it is transparently uncompressed.
363fn read_from_compressed_file(file: File, filename: &str) -> Box<dyn Iterator<Item = u8>> {
364    let (_basename, compression_format) = compression_format_by_extension(filename);
365    if compression_format == "" {
366        return Box::new(BufReader::new(file).bytes().map(panic_on_error));
367    }
368    match compression_format {
369        ZSTD => {
370            let de = zstd::stream::read::Decoder::new(file)
371                .unwrap_or_else(|err| die!("failed to decompress ZST archive: {}", err));
372            Box::new(de.bytes().map(panic_on_error))
373        }
374        GZIP => {
375            let de = flate2::read::GzDecoder::new(file);
376            Box::new(de.bytes().map(panic_on_error))
377        }
378        BZIP2 => {
379            let de = bzip2::read::BzDecoder::new(file);
380            Box::new(de.bytes().map(panic_on_error))
381        }
382        XZ => {
383            let de = xz2::read::XzDecoder::new(file);
384            Box::new(de.bytes().map(panic_on_error))
385        }
386        LZ4 => {
387            let de = lz4::Decoder::new(file)
388                .unwrap_or_else(|err| die!("failed to decode LZ4 archive: {}", err));
389            Box::new(de.bytes().map(panic_on_error))
390        }
391        _ => unreachable(),
392    }
393}
394
395/// Add a literal to the clause or witness database.
396///
397/// If the literal is zero, the current clause or witness will be terminated.
398fn add_literal(
399    parser: &mut Parser,
400    clause_ids: &mut HashTable,
401    state: ProofParserState,
402    literal: Literal,
403) {
404    parser.maxvar = cmp::max(parser.maxvar, literal.variable());
405    match state {
406        ProofParserState::Clause => {
407            parser.clause_db.push_literal(literal, parser.verbose);
408            if parser.is_pr() && literal.is_zero() {
409                parser.witness_db.push_literal(literal, parser.verbose);
410            }
411        }
412        ProofParserState::Witness => {
413            invariant!(parser.is_pr());
414            parser.witness_db.push_literal(literal, parser.verbose);
415            if literal.is_zero() {
416                parser.clause_db.push_literal(literal, parser.verbose);
417            }
418        }
419        ProofParserState::Deletion => {
420            parser.clause_db.push_literal(literal, parser.verbose);
421            if literal.is_zero() {
422                add_deletion(parser, clause_ids);
423            }
424        }
425        ProofParserState::Start => unreachable(),
426    }
427    match state {
428        ProofParserState::Clause | ProofParserState::Witness => {
429            if literal.is_zero() {
430                clause_ids.add_clause(&parser.clause_db, parser.clause_db.last_clause());
431            }
432        }
433        _ => (),
434    }
435}
436
437/// Add a deletion to the proof.
438///
439/// Looks up the last parsed clause in the hash table and adds the deletion upon success.
440fn add_deletion(parser: &mut Parser, clause_ids: &mut HashTable) {
441    let clause = parser.clause_db.last_clause();
442    match clause_ids.find_equal_clause(&parser.clause_db, clause, /*delete=*/ true) {
443        None => {
444            if parser.verbose {
445                as_warning!({
446                    puts!("c deleted clause is not present in the formula: ");
447                    puts_clause(parser.clause_db.clause(clause));
448                    puts!("\n");
449                });
450            }
451            // Need this for sickcheck
452            parser
453                .proof
454                .push(ProofStep::deletion(Clause::DOES_NOT_EXIST))
455        }
456        Some(clause) => parser.proof.push(ProofStep::deletion(clause)),
457    }
458    parser.clause_db.pop_clause();
459}
460
461/// Compute the hash of a clause. This is the same hash function `drat-trim` uses.
462fn clause_hash(clause: &[Literal]) -> usize {
463    let mut sum: usize = 0;
464    let mut prod: usize = 1;
465    let mut xor: usize = 0;
466    for &literal in clause {
467        prod = prod.wrapping_mul(literal.as_offset());
468        sum = sum.wrapping_add(literal.as_offset());
469        xor ^= literal.as_offset();
470    }
471    ((1023 * sum + prod) ^ (31 * xor))
472}
473
474/// Check if a character is a decimal digit.
475fn is_digit(value: u8) -> bool {
476    value >= b'0' && value <= b'9'
477}
478
479/// Check if a character is a decimal digit or a dash.
480fn is_digit_or_dash(value: u8) -> bool {
481    is_digit(value) || value == b'-'
482}
483
484// Error messages.
485/// A numeric overflow. This should only happen for user input.
486const OVERFLOW: &str = "overflow while parsing number";
487/// Parser error ("unexpected EOF")
488const EOF: &str = "premature end of file";
489/// Parser error (`expected ...`)
490const NUMBER: &str = "expected number";
491/// Parser error (`expected ...`)
492const SPACE: &str = "expected space";
493/// Parser error (`expected ...`)
494const NUMBER_OR_SPACE: &str = "expected number or space";
495/// Parser error (`expected ...`)
496const NUMBER_OR_MINUS: &str = "expected number or \"-\"";
497/// Parser error (`expected ...`)
498const P_CNF: &str = "expected \"p cnf\"";
499/// Parser error (`expected ...`)
500const DRAT: &str = "expected DRAT instruction";
501/// Parser error (`expected ...`)
502const NEWLINE: &str = "expected newline";
503
504/// Parse a decimal number.
505///
506/// Consumes one or more decimal digits, returning the value of the
507/// resulting number on success. Fails if there is no digit or if the digits do
508/// not end in a whitespace or newline.
509fn parse_u64(input: &mut Input) -> Result<u64> {
510    match input.peek() {
511        None => return Err(input.error(NUMBER)),
512        Some(c) => {
513            if !is_digit(c) {
514                return Err(input.error(NUMBER));
515            }
516        }
517    }
518    let mut value: u64 = 0;
519    while let Some(c) = input.peek() {
520        if is_space(c) {
521            break;
522        }
523        if !is_digit(c) {
524            return Err(input.error(NUMBER_OR_SPACE));
525        }
526        input.next();
527        value = value
528            .checked_mul(10)
529            .and_then(|val| val.checked_add(u64::from(c - b'0')))
530            .ok_or_else(|| input.error(OVERFLOW))?;
531    }
532    Ok(value)
533}
534
535/// Just like `parse_u64` but convert the result to an i32.
536fn parse_i32(input: &mut Input) -> Result<i32> {
537    let value = parse_u64(input)?;
538    if value > i32::max_value().try_into().unwrap() {
539        Err(input.error(OVERFLOW))
540    } else {
541        Ok(value as i32)
542    }
543}
544
545/// Parse a [Literal](../literal/struct.Literal.html).
546///
547/// Consumes zero or more whitespace characters followd
548/// by an optional "-", a number of at least one decimal digit,
549/// trailed by whitespace. If the number is zero, consumes all whitespace
550/// until the next newline.
551pub fn parse_literal(input: &mut Input) -> Result<Literal> {
552    parse_any_space(input);
553    match input.peek() {
554        None => Err(input.error(EOF)),
555        Some(c) if is_digit_or_dash(c) => {
556            let sign = if c == b'-' {
557                input.next();
558                -1
559            } else {
560                1
561            };
562            let number = parse_i32(input)?;
563            if number == 0 {
564                parse_any_whitespace(input);
565            }
566            Ok(Literal::new(sign * number))
567        }
568        _ => Err(input.error(NUMBER_OR_MINUS)),
569    }
570}
571
572/// Parse a literal from a compressed proof.
573pub fn parse_literal_binary(input: &mut Input) -> Result<Literal> {
574    let mut i = 0;
575    let mut result = 0;
576    while let Some(value) = input.next() {
577        if (u64::from(value & 0x7f) << (7 * i)) > u32::max_value().into() {
578            return Err(input.error(OVERFLOW));
579        }
580        result |= u32::from(value & 0x7f) << (7 * i);
581        i += 1;
582        if (value & 0x80) == 0 {
583            break;
584        }
585    }
586    Ok(Literal::from_raw(result))
587}
588
589/// Parse a DIMACS comment starting with "c ".
590///
591/// Consumes a leading "c" and any characters until (including) the next newline.
592fn parse_comment(input: &mut Input) -> Result<()> {
593    match input.peek() {
594        Some(b'c') => {
595            input.next();
596            while let Some(c) = input.next() {
597                if c == b'\n' {
598                    return Ok(());
599                }
600            }
601            Err(input.error(NEWLINE))
602        }
603        _ => Err(input.error("")),
604    }
605}
606
607/// Parse one or more spaces.
608fn parse_some_spaces(input: &mut Input) -> Result<()> {
609    if input.peek() != Some(b' ') {
610        return Err(input.error(SPACE));
611    }
612    while let Some(b' ') = input.peek() {
613        input.next();
614    }
615    Ok(())
616}
617
618/// Parse zero or more spaces.
619fn parse_any_space(input: &mut Input) {
620    while let Some(c) = input.peek() {
621        if c != b' ' {
622            break;
623        }
624        input.next();
625    }
626}
627
628/// Parse zero or more spaces or linebreaks.
629fn parse_any_whitespace(input: &mut Input) {
630    while let Some(c) = input.peek() {
631        if !is_space(c) {
632            break;
633        }
634        input.next();
635    }
636}
637
638/// Parse a DIMACS header.
639fn parse_formula_header(input: &mut Input) -> Result<(i32, u64)> {
640    while Some(b'c') == input.peek() {
641        parse_comment(input)?
642    }
643    for &expected in b"p cnf" {
644        if input.peek().map_or(true, |c| c != expected) {
645            return Err(input.error(P_CNF));
646        }
647        input.next();
648    }
649    parse_some_spaces(input)?;
650    let maxvar = parse_i32(input)?;
651    parse_some_spaces(input)?;
652    let num_clauses = parse_u64(input)?;
653    parse_any_whitespace(input);
654    Ok((maxvar, num_clauses))
655}
656
657/// Returns true if the character is one of the whitespace characters we allow.
658fn is_space(c: u8) -> bool {
659    [b' ', b'\n', b'\r'].iter().any(|&s| s == c)
660}
661
662/// Commence the addition of a new clause to the database.
663///
664/// Must be called before pushing th first literal with
665/// [add_literal()](fn.add_literal.html).
666fn open_clause(parser: &mut Parser, state: ProofParserState) -> Clause {
667    let clause = parser.clause_db.open_clause();
668    if parser.is_pr() && state != ProofParserState::Deletion {
669        let witness = parser.witness_db.open_clause();
670        invariant!(clause == witness);
671    }
672    clause
673}
674
675/// Parse a DIMACS clause.
676fn parse_clause(parser: &mut Parser, clause_ids: &mut HashTable, input: &mut Input) -> Result<()> {
677    open_clause(parser, ProofParserState::Clause);
678    parser.clause_pivot.push(Literal::NEVER_READ);
679    loop {
680        let literal = parse_literal(input)?;
681        add_literal(parser, clause_ids, ProofParserState::Clause, literal);
682        if literal.is_zero() {
683            return Ok(());
684        }
685    }
686}
687
688/// Parse a DIMACS formula.
689fn parse_formula(parser: &mut Parser, clause_ids: &mut HashTable, mut input: Input) -> Result<()> {
690    parse_formula_header(&mut input)?;
691    while let Some(c) = input.peek() {
692        if c == b'c' {
693            parse_comment(&mut input)?;
694            continue;
695        }
696        parse_clause(parser, clause_ids, &mut input)?;
697    }
698    Ok(())
699}
700
701/// Return true if the file is in binary (compressed) DRAT.
702///
703/// Read the first ten characters of the given file to determine
704/// that, just like `drat-trim`. This works fine on real proofs.
705pub fn is_binary_drat(filename: &str) -> bool {
706    let mut file = open_file(filename);
707    file.seek(SeekFrom::Start(0)).unwrap_or_else(|_err| {
708        die!("proof file is not seekable - cannot currently determine if it has binary DRAT")
709    });
710    is_binary_drat_impl(read_from_compressed_file(file, filename))
711}
712/// Implementation of `is_binary_drat`.
713fn is_binary_drat_impl(buffer: impl Iterator<Item = u8>) -> bool {
714    for c in buffer.take(10) {
715        if (c != 100) && (c != 10) && (c != 13) && (c != 32) && (c != 45) && ((c < 48) || (c > 57))
716        {
717            return true;
718        }
719    }
720    false
721}
722
723/// The state of our proof parser
724#[derive(Debug, PartialEq, Eq, Clone, Copy)]
725pub enum ProofParserState {
726    /// Before the start of an instruction
727    Start,
728    /// Inside a clause/lemma
729    Clause,
730    /// Inside a witness
731    Witness,
732    /// Inside a deletion
733    Deletion,
734}
735
736/// Parse a single proof step
737pub fn parse_proof_step(
738    parser: &mut Parser,
739    clause_ids: &mut HashTable,
740    input: &mut Input,
741    binary: bool,
742    state: &mut ProofParserState,
743) -> Result<Option<()>> {
744    let literal_parser = if binary {
745        parse_literal_binary
746    } else {
747        parse_literal
748    };
749    let mut lemma_head = true;
750    let mut first_literal = None;
751    while let Some(c) = input.peek() {
752        if !binary && is_space(c) {
753            input.next();
754            continue;
755        }
756        if *state == ProofParserState::Start {
757            *state = match c {
758                b'd' => {
759                    input.next();
760                    open_clause(parser, ProofParserState::Deletion);
761                    ProofParserState::Deletion
762                }
763                c if (!binary && is_digit_or_dash(c)) || (binary && c == b'a') => {
764                    if binary {
765                        input.next();
766                    }
767                    lemma_head = true;
768                    let clause = open_clause(parser, ProofParserState::Clause);
769                    parser.proof.push(ProofStep::lemma(clause));
770                    ProofParserState::Clause
771                }
772                _ => return Err(input.error(DRAT)),
773            };
774            continue;
775        }
776        let literal = literal_parser(input)?;
777        if parser.is_pr() && *state == ProofParserState::Clause && first_literal == Some(literal) {
778            *state = ProofParserState::Witness;
779        }
780        if *state == ProofParserState::Clause && lemma_head {
781            parser.clause_pivot.push(literal);
782            first_literal = Some(literal);
783            lemma_head = false;
784        }
785        invariant!(*state != ProofParserState::Start);
786        add_literal(parser, clause_ids, *state, literal);
787        if literal.is_zero() {
788            *state = ProofParserState::Start;
789            return Ok(Some(()));
790        }
791    }
792    Ok(None)
793}
794
795/// Fix-up incomplete proofs.
796///
797/// This adds a zero if the last line was missing one.
798/// Additionally it adds an empty clause as final lemma.
799pub fn finish_proof(parser: &mut Parser, clause_ids: &mut HashTable, state: &mut ProofParserState) {
800    // patch missing zero terminators
801    match *state {
802        ProofParserState::Clause | ProofParserState::Deletion | ProofParserState::Witness => {
803            add_literal(parser, clause_ids, *state, Literal::new(0));
804        }
805        ProofParserState::Start => (),
806    };
807}
808
809/// Parse a proof given the hashtable.
810fn parse_proof(
811    parser: &mut Parser,
812    clause_ids: &mut HashTable,
813    mut input: Input,
814    binary: bool,
815) -> Result<()> {
816    parser.proof_start = Clause::new(parser.clause_db.number_of_clauses());
817    let mut state = ProofParserState::Start;
818    if parser.max_proof_steps != Some(0) {
819        while let Some(()) = parse_proof_step(parser, clause_ids, &mut input, binary, &mut state)? {
820            if parser
821                .max_proof_steps
822                .map_or(false, |max_steps| parser.proof.len() == max_steps)
823            {
824                break;
825            }
826        }
827    }
828    finish_proof(parser, clause_ids, &mut state);
829    Ok(())
830}
831
832#[cfg(test)]
833mod tests {
834    use super::*;
835
836    #[allow(unused_macros)]
837    macro_rules! literals {
838        ($($x:expr),*) => (Vector::from_vec(vec!($(Literal::new($x)),*)));
839    }
840
841    fn sample_formula(clause_ids: &mut HashTable) -> Parser {
842        let mut parser = Parser::default();
843        parser.redundancy_property = RedundancyProperty::RAT;
844        let example = r#"c comment
845p cnf 2 2
8461 2 0
847c comment
848-1 -2 0"#;
849        assert!(parse_formula(
850            &mut parser,
851            clause_ids,
852            Input::new(Box::new(example.as_bytes().iter().cloned()), false),
853        )
854        .is_ok());
855        parser
856    }
857    #[test]
858    fn valid_formula_and_proof() {
859        let mut clause_ids = HashTable::new();
860        let mut parser = sample_formula(&mut clause_ids);
861        let result = parse_proof(
862            &mut parser,
863            &mut clause_ids,
864            Input::new(Box::new(b"1 2 3 0\nd 1 2 0".into_iter().cloned()), false),
865            false,
866        );
867        assert!(result.is_ok());
868        fn lit(x: i32) -> Literal {
869            Literal::new(x)
870        }
871        fn raw(x: u32) -> Literal {
872            Literal::from_raw(x)
873        }
874        assert_eq!(
875            parser,
876            Parser {
877                redundancy_property: RedundancyProperty::RAT,
878                maxvar: Variable::new(3),
879                clause_pivot: vector!(Literal::NEVER_READ, Literal::NEVER_READ, Literal::new(1)),
880                proof_start: Clause::new(2),
881                proof: vector!(
882                    ProofStep::lemma(Clause::new(2)),
883                    ProofStep::deletion(Clause::new(0)),
884                ),
885                max_proof_steps: None,
886                verbose: true,
887                clause_db: ClauseDatabase::from(
888                    vector!(
889                        raw(0),
890                        raw(0),
891                        lit(1),
892                        lit(2),
893                        lit(0),
894                        raw(1),
895                        raw(0),
896                        lit(-2),
897                        lit(-1),
898                        lit(0),
899                        raw(2),
900                        raw(0),
901                        lit(1),
902                        lit(2),
903                        lit(3),
904                        lit(0),
905                    ),
906                    vector!(0, 5, 10),
907                ),
908                witness_db: WitnessDatabase::from(vector!(), vector!(0)),
909            }
910        );
911    }
912}
913
914impl HeapSpace for Parser {
915    fn heap_space(&self) -> usize {
916        self.clause_db.heap_space()
917            + self.witness_db.heap_space()
918            + self.clause_pivot.heap_space()
919            + self.proof.heap_space()
920    }
921}
922
923/// A peekable iterator for bytes that records line and column information.
924pub struct Input<'a> {
925    /// The source of the input data
926    source: Peekable<Box<dyn Iterator<Item = u8> + 'a>>,
927    /// Whether we are parsing binary or textual data
928    binary: bool,
929    /// The current line number (if not binary)
930    line: usize,
931    /// The current column
932    column: usize,
933}
934
935impl<'a> Input<'a> {
936    /// Create a new `Input` from some source
937    pub fn new(source: Box<dyn Iterator<Item = u8> + 'a>, binary: bool) -> Self {
938        Input {
939            source: source.peekable(),
940            binary,
941            line: 1,
942            column: 1,
943        }
944    }
945    /// Look at the next byte without consuming it
946    pub fn peek(&mut self) -> Option<u8> {
947        self.source.peek().cloned()
948    }
949    /// Create an io::Error with the given message and position information.
950    pub fn error(&self, why: &'static str) -> Error {
951        Error::new(
952            ErrorKind::InvalidData,
953            if self.binary {
954                format!("{} at position {}", why, self.column)
955            } else {
956                format!("{} at line {} column {}", why, self.line, self.column)
957            },
958        )
959    }
960}
961
962impl Iterator for Input<'_> {
963    type Item = u8;
964    fn next(&mut self) -> Option<u8> {
965        self.source.next().map(|c| {
966            if !self.binary && c == b'\n' {
967                self.line += 1;
968                self.column = 0;
969            }
970            self.column += 1;
971            c
972        })
973    }
974}
rate_common/parser.rs

rate_common/
parser.rs