cnf_parser/
lexer.rs

1use crate::{
2    Input,
3    Literal,
4    Output,
5    Problem,
6    Token,
7};
8use core::{
9    marker::PhantomData,
10    num::NonZeroI32,
11};
12
13/// A parse buffer helping with reading bytes from an input source.
14pub struct PeekReader<'a, I> {
15    input: &'a mut I,
16    peek: Option<u8>,
17    current: usize,
18}
19
20impl<'a, I> PeekReader<'a, I>
21where
22    I: Input,
23{
24    /// Creates a new parse buffer from the given slice of bytes.
25    pub fn new(input: &'a mut I) -> Self {
26        Self {
27            input,
28            peek: None,
29            current: 0,
30        }
31    }
32
33    /// Returns the current position in the input stream.
34    ///
35    /// # Note
36    ///
37    /// This is mostly important for error handling information.
38    pub fn current_position(&self) -> usize {
39        self.current.saturating_sub(1)
40    }
41
42    /// Peeks the next byte from the source if any bytes are left.
43    pub fn peek_byte(&mut self) -> Option<u8> {
44        match self.peek {
45            Some(peek) => Some(peek),
46            None => self.read_byte(),
47        }
48    }
49
50    /// Skips the next byte.
51    pub fn skip_byte(&mut self) {
52        match &mut self.peek {
53            None => {
54                self.read_byte();
55            }
56            peek @ Some(_) => {
57                *peek = None;
58            }
59        }
60    }
61
62    /// Reads the next byte from the source if any bytes are left.
63    pub fn read_byte(&mut self) -> Option<u8> {
64        self.peek = self.input.read_byte();
65        if self.peek.is_some() {
66            self.current += 1;
67        }
68        self.peek
69    }
70}
71
72/// Errors encountered while parsing CNF input.
73#[derive(Debug, Clone, PartialEq, Eq)]
74pub enum Error<OutputError> {
75    Output(OutputError),
76    UnexpectedByte {
77        at: usize,
78        encountered: Option<u8>,
79        expected: Option<u8>,
80    },
81    InvalidTokenStart {
82        at: usize,
83        encountered: u8,
84    },
85    ExpectedWhitespace {
86        at: usize,
87        encountered: Option<u8>,
88    },
89    OutOfRangeU32 {
90        at: usize,
91        encountered: u64,
92    },
93    OutOfRangeI32 {
94        at: usize,
95        encountered: u32,
96    },
97    DuplicateProblem {
98        at: usize,
99        duplicate: Problem,
100    },
101}
102
103impl<OutputError> Error<OutputError> {
104    pub(crate) fn from_output(output_error: OutputError) -> Self {
105        Self::Output(output_error)
106    }
107}
108
109/// A CNF lexer and parser.
110pub struct Lexer<'a, I, O> {
111    reader: PeekReader<'a, I>,
112    seen_problem: bool,
113    marker: PhantomData<fn() -> O>,
114}
115
116impl<'a, I, O> Lexer<'a, I, O>
117where
118    I: Input + 'a,
119    O: Output,
120{
121    pub fn new(input: &'a mut I) -> Self {
122        Self {
123            reader: PeekReader::new(input),
124            seen_problem: false,
125            marker: Default::default(),
126        }
127    }
128
129    fn is_nonzero_i32_start(byte: u8) -> bool {
130        match byte {
131            b'-' | b'1'..=b'9' => true,
132            _ => false,
133        }
134    }
135
136    fn parse_nonzero_i32(&mut self) -> Result<NonZeroI32, Error<<O as Output>::Error>> {
137        debug_assert!(self
138            .reader
139            .peek_byte()
140            .map(Self::is_nonzero_i32_start)
141            .unwrap_or(false));
142        let pos = self.reader.current_position();
143        let sign_factor = if self.reader.peek_byte() == Some(b'-') {
144            self.reader.read_byte();
145            -1
146        } else {
147            1
148        };
149        let value_u32 = self.parse_u32()?;
150        if value_u32 > i32::MAX as u32 || value_u32 == 0 {
151            return Err(Error::OutOfRangeI32 {
152                at: pos,
153                encountered: value_u32,
154            })
155        }
156        let value_i32 = value_u32 as i32 * sign_factor;
157        Ok(NonZeroI32::new(value_i32).expect("encountered invalid zero i32 value"))
158    }
159
160    fn is_literal_start(byte: u8) -> bool {
161        Self::is_nonzero_i32_start(byte)
162    }
163
164    fn parse_literal(&mut self) -> Result<Literal, Error<<O as Output>::Error>> {
165        debug_assert!(self
166            .reader
167            .peek_byte()
168            .map(Self::is_literal_start)
169            .unwrap_or(false));
170        let value = self.parse_nonzero_i32()?;
171        Ok(Literal::new(value))
172    }
173
174    fn parse_u32(&mut self) -> Result<u32, Error<<O as Output>::Error>> {
175        debug_assert!(self
176            .reader
177            .peek_byte()
178            .map(|c| c.is_ascii_digit())
179            .unwrap_or(false));
180        let pos = self.reader.current_position();
181        let mut acc: u64 = 0;
182        'outer: loop {
183            match self.reader.peek_byte() {
184                None => break 'outer,
185                Some(c) if c.is_ascii_whitespace() => break 'outer,
186                Some(c) if c.is_ascii_digit() => {
187                    self.reader.skip_byte();
188                    let normalized = c - b'0';
189                    acc *= 10;
190                    acc += normalized as u64;
191                    if acc > u32::MAX as u64 {
192                        return Err(Error::OutOfRangeU32 {
193                            at: pos,
194                            encountered: acc,
195                        })
196                    }
197                }
198                Some(invalid) => {
199                    return Err(Error::UnexpectedByte {
200                        at: self.reader.current_position(),
201                        encountered: Some(invalid),
202                        expected: None,
203                    })
204                }
205            }
206        }
207        Ok(acc as u32)
208    }
209
210    fn skip_expected_str(
211        &mut self,
212        str: &str,
213    ) -> Result<(), Error<<O as Output>::Error>> {
214        for &expected in str.as_bytes() {
215            let actual = self.reader.peek_byte();
216            if actual != Some(expected) {
217                return Err(Error::UnexpectedByte {
218                    at: self.reader.current_position(),
219                    encountered: actual,
220                    expected: Some(expected),
221                })
222            } else {
223                self.reader.skip_byte();
224            }
225        }
226        Ok(())
227    }
228
229    /// Skips all bytes until a non ascii whitespace byte is peeked.
230    ///
231    /// Returns `true` if whitespace has actually been skipped.
232    fn skip_whitespace(&mut self) -> bool {
233        let mut skipped_whitespace = false;
234        'skip: loop {
235            match self.reader.peek_byte() {
236                Some(whitespace) if whitespace.is_ascii_whitespace() => {
237                    self.reader.skip_byte();
238                    skipped_whitespace = true;
239                    continue 'skip
240                }
241                _non_whitespace => return skipped_whitespace,
242            }
243        }
244    }
245
246    fn skip_expected_whitespace(&mut self) -> Result<(), Error<<O as Output>::Error>> {
247        if !self.skip_whitespace() {
248            return Err(Error::ExpectedWhitespace {
249                at: self.reader.current_position(),
250                encountered: self.reader.peek_byte(),
251            })
252        }
253        Ok(())
254    }
255
256    fn parse_problem(&mut self) -> Result<Problem, Error<<O as Output>::Error>> {
257        debug_assert_eq!(self.reader.peek_byte(), Some(b'p'));
258        let pos = self.reader.current_position();
259        self.reader.skip_byte();
260        self.skip_expected_whitespace()?;
261        self.skip_expected_str("cnf")?;
262        self.skip_expected_whitespace()?;
263        let num_variables = self.parse_u32()?;
264        self.skip_expected_whitespace()?;
265        let num_clauses = self.parse_u32()?;
266        let problem = Problem::new(num_variables, num_clauses);
267        if self.seen_problem {
268            return Err(Error::DuplicateProblem {
269                at: pos,
270                duplicate: problem,
271            })
272        }
273        self.seen_problem = true;
274        Ok(problem)
275    }
276
277    fn skip_until_next_line(&mut self) {
278        'skip: loop {
279            match self.reader.peek_byte() {
280                Some(b'\n') | None => break 'skip,
281                _ => self.reader.skip_byte(),
282            }
283        }
284    }
285
286    fn skip_comment(&mut self) -> Result<(), Error<<O as Output>::Error>> {
287        debug_assert_eq!(self.reader.peek_byte(), Some(b'c'));
288        self.reader.skip_byte();
289        if self
290            .reader
291            .peek_byte()
292            .map(|byte| !byte.is_ascii_whitespace())
293            .unwrap_or_else(|| false)
294        {
295            return Err(Error::ExpectedWhitespace {
296                at: self.reader.current_position(),
297                encountered: self.reader.peek_byte(),
298            })
299        }
300        self.skip_until_next_line();
301        Ok(())
302    }
303
304    fn parse_end_clause(&mut self) -> Result<Token, Error<<O as Output>::Error>> {
305        debug_assert_eq!(self.reader.peek_byte(), Some(b'0'));
306        self.reader.skip_byte();
307        Ok(Token::ClauseEnd)
308    }
309
310    /// Returns the next input token if any.
311    ///
312    /// # Errors
313    ///
314    /// Returns an error in case the input is malformatted.
315    pub fn next_token(&mut self) -> Result<Option<Token>, Error<<O as Output>::Error>> {
316        self.skip_whitespace();
317        match self.reader.peek_byte() {
318            None => Ok(None),
319            Some(b'p') => self.parse_problem().map(Token::from).map(Into::into),
320            Some(b'c') => {
321                self.skip_comment()?;
322                self.next_token()
323            }
324            Some(b'0') => self.parse_end_clause().map(Into::into),
325            Some(byte) if Self::is_literal_start(byte) => {
326                self.parse_literal().map(Token::from).map(Into::into)
327            }
328            Some(invalid) => {
329                Err(Error::InvalidTokenStart {
330                    at: self.reader.current_position(),
331                    encountered: invalid,
332                })
333            }
334        }
335    }
336}