1use crate::{
2 Input,
3 Literal,
4 Output,
5 Problem,
6 Token,
7};
8use core::{
9 marker::PhantomData,
10 num::NonZeroI32,
11};
12
13pub struct PeekReader<'a, I> {
15 input: &'a mut I,
16 peek: Option<u8>,
17 current: usize,
18}
19
20impl<'a, I> PeekReader<'a, I>
21where
22 I: Input,
23{
24 pub fn new(input: &'a mut I) -> Self {
26 Self {
27 input,
28 peek: None,
29 current: 0,
30 }
31 }
32
33 pub fn current_position(&self) -> usize {
39 self.current.saturating_sub(1)
40 }
41
42 pub fn peek_byte(&mut self) -> Option<u8> {
44 match self.peek {
45 Some(peek) => Some(peek),
46 None => self.read_byte(),
47 }
48 }
49
50 pub fn skip_byte(&mut self) {
52 match &mut self.peek {
53 None => {
54 self.read_byte();
55 }
56 peek @ Some(_) => {
57 *peek = None;
58 }
59 }
60 }
61
62 pub fn read_byte(&mut self) -> Option<u8> {
64 self.peek = self.input.read_byte();
65 if self.peek.is_some() {
66 self.current += 1;
67 }
68 self.peek
69 }
70}
71
72#[derive(Debug, Clone, PartialEq, Eq)]
74pub enum Error<OutputError> {
75 Output(OutputError),
76 UnexpectedByte {
77 at: usize,
78 encountered: Option<u8>,
79 expected: Option<u8>,
80 },
81 InvalidTokenStart {
82 at: usize,
83 encountered: u8,
84 },
85 ExpectedWhitespace {
86 at: usize,
87 encountered: Option<u8>,
88 },
89 OutOfRangeU32 {
90 at: usize,
91 encountered: u64,
92 },
93 OutOfRangeI32 {
94 at: usize,
95 encountered: u32,
96 },
97 DuplicateProblem {
98 at: usize,
99 duplicate: Problem,
100 },
101}
102
103impl<OutputError> Error<OutputError> {
104 pub(crate) fn from_output(output_error: OutputError) -> Self {
105 Self::Output(output_error)
106 }
107}
108
109pub struct Lexer<'a, I, O> {
111 reader: PeekReader<'a, I>,
112 seen_problem: bool,
113 marker: PhantomData<fn() -> O>,
114}
115
116impl<'a, I, O> Lexer<'a, I, O>
117where
118 I: Input + 'a,
119 O: Output,
120{
121 pub fn new(input: &'a mut I) -> Self {
122 Self {
123 reader: PeekReader::new(input),
124 seen_problem: false,
125 marker: Default::default(),
126 }
127 }
128
129 fn is_nonzero_i32_start(byte: u8) -> bool {
130 match byte {
131 b'-' | b'1'..=b'9' => true,
132 _ => false,
133 }
134 }
135
136 fn parse_nonzero_i32(&mut self) -> Result<NonZeroI32, Error<<O as Output>::Error>> {
137 debug_assert!(self
138 .reader
139 .peek_byte()
140 .map(Self::is_nonzero_i32_start)
141 .unwrap_or(false));
142 let pos = self.reader.current_position();
143 let sign_factor = if self.reader.peek_byte() == Some(b'-') {
144 self.reader.read_byte();
145 -1
146 } else {
147 1
148 };
149 let value_u32 = self.parse_u32()?;
150 if value_u32 > i32::MAX as u32 || value_u32 == 0 {
151 return Err(Error::OutOfRangeI32 {
152 at: pos,
153 encountered: value_u32,
154 })
155 }
156 let value_i32 = value_u32 as i32 * sign_factor;
157 Ok(NonZeroI32::new(value_i32).expect("encountered invalid zero i32 value"))
158 }
159
160 fn is_literal_start(byte: u8) -> bool {
161 Self::is_nonzero_i32_start(byte)
162 }
163
164 fn parse_literal(&mut self) -> Result<Literal, Error<<O as Output>::Error>> {
165 debug_assert!(self
166 .reader
167 .peek_byte()
168 .map(Self::is_literal_start)
169 .unwrap_or(false));
170 let value = self.parse_nonzero_i32()?;
171 Ok(Literal::new(value))
172 }
173
174 fn parse_u32(&mut self) -> Result<u32, Error<<O as Output>::Error>> {
175 debug_assert!(self
176 .reader
177 .peek_byte()
178 .map(|c| c.is_ascii_digit())
179 .unwrap_or(false));
180 let pos = self.reader.current_position();
181 let mut acc: u64 = 0;
182 'outer: loop {
183 match self.reader.peek_byte() {
184 None => break 'outer,
185 Some(c) if c.is_ascii_whitespace() => break 'outer,
186 Some(c) if c.is_ascii_digit() => {
187 self.reader.skip_byte();
188 let normalized = c - b'0';
189 acc *= 10;
190 acc += normalized as u64;
191 if acc > u32::MAX as u64 {
192 return Err(Error::OutOfRangeU32 {
193 at: pos,
194 encountered: acc,
195 })
196 }
197 }
198 Some(invalid) => {
199 return Err(Error::UnexpectedByte {
200 at: self.reader.current_position(),
201 encountered: Some(invalid),
202 expected: None,
203 })
204 }
205 }
206 }
207 Ok(acc as u32)
208 }
209
210 fn skip_expected_str(
211 &mut self,
212 str: &str,
213 ) -> Result<(), Error<<O as Output>::Error>> {
214 for &expected in str.as_bytes() {
215 let actual = self.reader.peek_byte();
216 if actual != Some(expected) {
217 return Err(Error::UnexpectedByte {
218 at: self.reader.current_position(),
219 encountered: actual,
220 expected: Some(expected),
221 })
222 } else {
223 self.reader.skip_byte();
224 }
225 }
226 Ok(())
227 }
228
229 fn skip_whitespace(&mut self) -> bool {
233 let mut skipped_whitespace = false;
234 'skip: loop {
235 match self.reader.peek_byte() {
236 Some(whitespace) if whitespace.is_ascii_whitespace() => {
237 self.reader.skip_byte();
238 skipped_whitespace = true;
239 continue 'skip
240 }
241 _non_whitespace => return skipped_whitespace,
242 }
243 }
244 }
245
246 fn skip_expected_whitespace(&mut self) -> Result<(), Error<<O as Output>::Error>> {
247 if !self.skip_whitespace() {
248 return Err(Error::ExpectedWhitespace {
249 at: self.reader.current_position(),
250 encountered: self.reader.peek_byte(),
251 })
252 }
253 Ok(())
254 }
255
256 fn parse_problem(&mut self) -> Result<Problem, Error<<O as Output>::Error>> {
257 debug_assert_eq!(self.reader.peek_byte(), Some(b'p'));
258 let pos = self.reader.current_position();
259 self.reader.skip_byte();
260 self.skip_expected_whitespace()?;
261 self.skip_expected_str("cnf")?;
262 self.skip_expected_whitespace()?;
263 let num_variables = self.parse_u32()?;
264 self.skip_expected_whitespace()?;
265 let num_clauses = self.parse_u32()?;
266 let problem = Problem::new(num_variables, num_clauses);
267 if self.seen_problem {
268 return Err(Error::DuplicateProblem {
269 at: pos,
270 duplicate: problem,
271 })
272 }
273 self.seen_problem = true;
274 Ok(problem)
275 }
276
277 fn skip_until_next_line(&mut self) {
278 'skip: loop {
279 match self.reader.peek_byte() {
280 Some(b'\n') | None => break 'skip,
281 _ => self.reader.skip_byte(),
282 }
283 }
284 }
285
286 fn skip_comment(&mut self) -> Result<(), Error<<O as Output>::Error>> {
287 debug_assert_eq!(self.reader.peek_byte(), Some(b'c'));
288 self.reader.skip_byte();
289 if self
290 .reader
291 .peek_byte()
292 .map(|byte| !byte.is_ascii_whitespace())
293 .unwrap_or_else(|| false)
294 {
295 return Err(Error::ExpectedWhitespace {
296 at: self.reader.current_position(),
297 encountered: self.reader.peek_byte(),
298 })
299 }
300 self.skip_until_next_line();
301 Ok(())
302 }
303
304 fn parse_end_clause(&mut self) -> Result<Token, Error<<O as Output>::Error>> {
305 debug_assert_eq!(self.reader.peek_byte(), Some(b'0'));
306 self.reader.skip_byte();
307 Ok(Token::ClauseEnd)
308 }
309
310 pub fn next_token(&mut self) -> Result<Option<Token>, Error<<O as Output>::Error>> {
316 self.skip_whitespace();
317 match self.reader.peek_byte() {
318 None => Ok(None),
319 Some(b'p') => self.parse_problem().map(Token::from).map(Into::into),
320 Some(b'c') => {
321 self.skip_comment()?;
322 self.next_token()
323 }
324 Some(b'0') => self.parse_end_clause().map(Into::into),
325 Some(byte) if Self::is_literal_start(byte) => {
326 self.parse_literal().map(Token::from).map(Into::into)
327 }
328 Some(invalid) => {
329 Err(Error::InvalidTokenStart {
330 at: self.reader.current_position(),
331 encountered: invalid,
332 })
333 }
334 }
335 }
336}