yapcol 0.1.0

Yet Another Parser Combinator Library - YAPCoL
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
//! This module provides the [`Input`] type, which wraps any iterator and exposes it as a token
//! stream that parsers can consume. It is the primary interface through which parsers read tokens,
//! and it is the only type in this module that users need to interact with directly.
//!
//! # Tokens
//!
//! Any type that implements [`PartialEq`] and [`Clone`] can be used as a token. This is
//! automatically satisfied via the blanket implementation of the [`Token`] trait, so no manual
//! implementation is required.
//!
//! # Creating an input stream
//!
//! An [`Input`] is constructed from any type that implements [`IntoIterator`], where the iterator
//! elements must implement [`Token`]:
//!
//! ```
//! use yapcol::input::Input;
//!
//! let tokens = vec!['h', 'e', 'l', 'l', 'o'];
//! let mut input = Input::new(tokens);
//! ```
//!
//! # Lookahead
//!
//! [`Input`] supports arbitrary lookahead: tokens can be fetched and inspected without being
//! permanently consumed. A lookahead operation is started with
//! [`Input::start_look_ahead`](Input::start_look_ahead) and stopped with
//! [`Input::stop_look_ahead`](Input::stop_look_ahead). When stopping, the caller decides whether
//! to backtrack (keeping the fetched tokens available for re-reading) or to discard them.
//!
//! Lookahead operations can be nested. Each nested operation is self-contained, and they must be
//! stopped in the reverse order of their creation (last started, first stopped).
//!
//! In practice, lookahead is used internally by parser combinators such as [`crate::attempt`] and
//! [`crate::look_ahead`] defined in the crate root, so most users will not need to call these
//! methods directly.

use std::collections::VecDeque;
use std::iter::Peekable;

/// The smallest unit of input supported. If you'd like to use a custom type as tokens (e.g.,
/// for lexical analysis, a.k.a. lexing), implementing [`PartialEq`] and [`Clone`] is enough to
/// make it token-compatible.
pub trait Token: PartialEq + Clone {}

impl<T> Token for T where T: PartialEq + Clone {}

/// A frame used to keep track of lookahead operations.
struct LookAheadFrame {
	/// Where in the lookahead buffer this frame started.
	start_index: usize,
	/// How many tokens this frame takes in the lookahead buffer.
	length: usize,
}

impl LookAheadFrame {
	/// The (lookahead buffer) index of the next token in this frame.
	fn next_ix(&self) -> usize {
		self.start_index + self.length
	}
}

/// A handler used to enforce the following on lookahead operations:
/// - That calls to [`Input::stop_look_ahead`] are only possible after a call to
///   [`Input::start_look_ahead`].
/// - That the right order of start/stop lookahead operations is performed.
#[must_use]
pub(crate) struct LookAheadHandler {
	id: usize,
}

/// Possible locations where the next input token might be.
enum TokenLocation {
	Stream,
	StreamLookingAhead,
	BufferHead,
	BufferTail,
}

/// An input stream that can be used to fetch input tokens. It's the most important entity in this
/// module, concentrating all input operations.
pub struct Input<I>
where
	I: Iterator<Item: Token>,
{
	stream: Peekable<I>,
	consumed_count: usize,
	next_location: TokenLocation,
	look_ahead_frames: Vec<LookAheadFrame>,
	look_ahead_buffer: VecDeque<I::Item>,
}

impl<I> Input<I>
where
	I: Iterator<Item: Token>,
{
	/// Constructs an [`Input`] based on an underlying input source.
	///
	/// # Arguments
	///
	/// - `source`: the underlying input source containing the input tokens.
	///
	/// # Examples
	///
	/// ```
	/// use yapcol::input::Input;
	///
	/// let tokens = vec!["hello!"];
	/// let mut input = Input::new(tokens);
	/// ```
	pub fn new<T>(source: impl IntoIterator<Item = T, IntoIter = I>) -> Input<I>
	where
		I: Iterator<Item = T>,
	{
		Self {
			stream: source.into_iter().peekable(),
			consumed_count: 0,
			next_location: TokenLocation::Stream,
			look_ahead_frames: Vec::new(),
			look_ahead_buffer: VecDeque::new(),
		}
	}

	/// Fetches the next token in the input stream, mutating the input stream.
	pub(crate) fn next_token(&mut self) -> Option<I::Item> {
		match self.next_location {
			TokenLocation::Stream => {
				self.consumed_count += 1;
				self.stream.next()
			}
			TokenLocation::StreamLookingAhead => {
				let frame = self.look_ahead_frames.last_mut().unwrap();
				match self.stream.next() {
					None => None,
					Some(token) => {
						let cloned = token.clone();
						self.look_ahead_buffer.push_back(token);
						frame.length += 1;
						Some(cloned)
					}
				}
			}
			TokenLocation::BufferHead => {
				self.consumed_count += 1;
				let output = self.look_ahead_buffer.pop_front();
				self.next_location = if self.look_ahead_buffer.is_empty() {
					TokenLocation::Stream
				} else {
					TokenLocation::BufferHead
				};
				output
			}
			TokenLocation::BufferTail => {
				let frame = self.look_ahead_frames.last_mut().unwrap();
				let token = self.look_ahead_buffer.get(frame.next_ix()).unwrap();
				frame.length += 1;
				self.next_location = if frame.next_ix() == self.look_ahead_buffer.len() {
					TokenLocation::StreamLookingAhead
				} else {
					TokenLocation::BufferTail
				};
				Some(token.clone())
			}
		}
	}

	/// Peeks into the input, returning a reference to the next token. Calling this method does not
	/// mutate the input, and calling it repeatedly will return the same item over and over.
	pub(crate) fn peek(&mut self) -> Option<&I::Item> {
		match self.next_location {
			TokenLocation::Stream => self.stream.peek(),
			TokenLocation::StreamLookingAhead => self.stream.peek(),
			TokenLocation::BufferHead => self.look_ahead_buffer.front(),
			TokenLocation::BufferTail => {
				let frame = self.look_ahead_frames.last_mut().unwrap();
				self.look_ahead_buffer.get(frame.next_ix())
			}
		}
	}

	/// How many tokens the input stream has consumed.
	pub(crate) fn consumed_count(&self) -> usize {
		self.consumed_count
	}

	/// Starts a lookahead operation, putting the input stream into lookahead mode (if it already
	/// wasn't). During lookahead mode, tokens might be fetched, but they won't be consumed. This
	/// implements arbitrary lookahead, where tokens will be cached internally, for as long as the
	/// lookahead mode is enabled.
	/// For more information on disabling lookahead mode, check [`stop_look_ahead`].
	///
	/// This function returns a [`LookAheadHandler`], which *must* be used to stop the look ahead
	/// operation. Failing to use this handler will trigger compilation warnings.
	///
	/// # Nested lookahead operations
	///
	/// [`Input`] supports nested lookahead operations, where each operation is self-contained and
	/// unaware of the others. Repeated calls to this method will create different lookahead
	/// operations, where each one is "deeper" than the previous one.
	///
	/// One rule must be respected when nesting these operations: the order in which the operations
	/// are stopped must be the reverse order of their creation. In order words, only the most
	/// recent (active) lookahead operation might be stopped. The [`LookAheadHandler`]s are there
	/// to enforce this rule.
	pub(crate) fn start_look_ahead(&mut self) -> LookAheadHandler {
		let new_frame = match self.look_ahead_frames.last() {
			Some(previous) => {
				let start_index = previous.start_index + previous.length;
				LookAheadFrame {
					start_index,
					length: 0,
				}
			}
			None => LookAheadFrame {
				start_index: 0,
				length: 0,
			},
		};
		self.next_location = if self.look_ahead_buffer.is_empty()
			|| new_frame.next_ix() == self.look_ahead_buffer.len()
		{
			TokenLocation::StreamLookingAhead
		} else {
			TokenLocation::BufferTail
		};

		let token_id = self.look_ahead_frames.len();
		self.look_ahead_frames.push(new_frame);
		LookAheadHandler { id: token_id }
	}

	/// Stops the current lookahead operation, controlling whether the input stream should
	/// backtrack.
	///
	/// # Arguments
	///
	/// - `handler`: Handler used to stop the lookahead operation. This handler *must* belong to the
	///   latest lookahead operation, enforcing the lookahead rule that only the most recent
	///   operation might be stopped. The function will panic if this invariant is not respected.
	/// - `backtrack`: Whether the input should backtrack. If `true`, all tokens fetched during the
	///   lookahead operation will remain cached, and later fetching will return them. In order
	///   words, it pretends that all the fetching that happened during the lookahead operation
	///   did not happen. If `false`, it discards the tokens fetched during the lookahead
	///   operation, and later fetch requests will return new tokens.
	///
	/// # Nested lookahead operations
	///
	/// A call to this method stops the current lookahead operation but might not stop the
	/// lookahead mode—that will only happen if the operation being stopped is the only active one.
	pub(crate) fn stop_look_ahead(&mut self, handler: LookAheadHandler, backtrack: bool) {
		let frame = self.look_ahead_frames.pop().unwrap();
		if handler.id != self.look_ahead_frames.len() {
			panic!("Look ahead handler doesn't match current lookahead depth.")
		}

		if !backtrack {
			self.consumed_count += frame.length;
			let buffer_length = self.look_ahead_buffer.len();
			self.look_ahead_buffer
				.truncate(buffer_length - frame.length);
		}

		self.next_location = if self.look_ahead_frames.is_empty() {
			if self.look_ahead_buffer.is_empty() {
				TokenLocation::Stream
			} else {
				TokenLocation::BufferHead
			}
		} else {
			let frame = self.look_ahead_frames.last_mut().unwrap();
			if frame.next_ix() == self.look_ahead_buffer.len() {
				TokenLocation::StreamLookingAhead
			} else {
				TokenLocation::BufferTail
			}
		}
	}
}

#[cfg(test)]
mod tests {
	use super::*;

	#[test]
	fn lookahead_no_backtracking() {
		let tokens = vec![1, 2];
		let mut input = Input::new(tokens);
		let handler = input.start_look_ahead();
		let next = input.next_token();
		assert_eq!(next, Some(1));
		let next = input.next_token();
		assert_eq!(next, Some(2));
		input.stop_look_ahead(handler, false);
		assert!(input.look_ahead_buffer.is_empty());
		assert_eq!(input.consumed_count(), 2);
		assert_eq!(input.look_ahead_frames.len(), 0);
		assert_eq!(input.next_token(), None);
	}

	#[test]
	fn lookahead_backtracking() {
		let tokens = vec![1, 2];
		let mut input = Input::new(tokens);
		let handler = input.start_look_ahead();
		let next = input.next_token();
		assert_eq!(next, Some(1));
		let next = input.next_token();
		assert_eq!(next, Some(2));
		input.stop_look_ahead(handler, true);
		assert!(!input.look_ahead_buffer.is_empty());
		assert_eq!(input.consumed_count(), 0);
		assert_eq!(input.next_token(), Some(1));
	}

	#[test]
	fn peek_twice() {
		let tokens = vec![1, 2];
		let mut input = Input::new(tokens);
		assert_eq!(input.peek(), Some(&1));
		assert_eq!(input.peek(), Some(&1));
	}

	#[test]
	fn peek_twice_while_looking_ahead_backtracking() {
		let tokens = vec![1, 2];
		let mut input = Input::new(tokens);
		let handler = input.start_look_ahead();
		assert_eq!(input.peek(), Some(&1));
		assert_eq!(input.peek(), Some(&1));
		input.stop_look_ahead(handler, true);
		assert_eq!(input.peek(), Some(&1));
	}

	#[test]
	fn peek_twice_while_looking_ahead_not_backtracking() {
		let tokens = vec![1, 2];
		let mut input = Input::new(tokens);
		let handler = input.start_look_ahead();
		assert_eq!(input.peek(), Some(&1));
		assert_eq!(input.peek(), Some(&1));
		input.stop_look_ahead(handler, false);
		assert_eq!(input.peek(), Some(&1));
	}

	#[test]
	fn repeat_peek_look_ahead_backtracking() {
		let tokens = vec![1, 2];
		let mut input = Input::new(tokens);
		let handler = input.start_look_ahead();
		assert_eq!(input.next_token(), Some(1));
		input.stop_look_ahead(handler, true);
		assert_eq!(input.peek(), Some(&1));

		let handler = input.start_look_ahead();
		assert_eq!(input.peek(), Some(&1));
		input.stop_look_ahead(handler, true);
		assert_eq!(input.peek(), Some(&1));
	}

	#[test]
	fn repeat_peek_look_ahead_not_backtracking() {
		let tokens = vec![1, 2];
		let mut input = Input::new(tokens);
		let handler = input.start_look_ahead();
		assert_eq!(input.next_token(), Some(1));
		input.stop_look_ahead(handler, false);
		assert_eq!(input.peek(), Some(&2));

		let handler = input.start_look_ahead();
		assert_eq!(input.next_token(), Some(2));
		input.stop_look_ahead(handler, true);
		assert_eq!(input.peek(), Some(&2));
		assert_eq!(input.next_token(), Some(2));
	}

	#[test]
	fn nested_lookahead_backtrack() {
		let tokens = vec![1, 2, 3, 4, 5];
		let mut input = Input::new(tokens);
		let handler1 = input.start_look_ahead();
		assert_eq!(input.next_token(), Some(1));
		let handler2 = input.start_look_ahead();
		assert_eq!(input.next_token(), Some(2));
		input.stop_look_ahead(handler2, true);
		assert_eq!(input.next_token(), Some(2));
		input.stop_look_ahead(handler1, true);
		assert_eq!(input.next_token(), Some(1));
	}

	#[test]
	fn nested_lookahead_no_backtrack() {
		let tokens = vec![1, 2, 3, 4, 5];
		let mut input = Input::new(tokens);
		let handler1 = input.start_look_ahead();
		assert_eq!(input.next_token(), Some(1));
		let handler2 = input.start_look_ahead();
		assert_eq!(input.next_token(), Some(2));
		input.stop_look_ahead(handler2, false);
		assert_eq!(input.next_token(), Some(3));
		input.stop_look_ahead(handler1, false);
		assert_eq!(input.next_token(), Some(4));
	}

	#[test]
	fn nested_look_ahead_backtrack_first() {
		let tokens = vec![1, 2, 3, 4, 5];
		let mut input = Input::new(tokens);
		let handler1 = input.start_look_ahead();
		assert_eq!(input.next_token(), Some(1));
		let handler2 = input.start_look_ahead();
		assert_eq!(input.next_token(), Some(2));
		input.stop_look_ahead(handler2, true);
		assert_eq!(input.next_token(), Some(2));
		input.stop_look_ahead(handler1, false);
		assert_eq!(input.next_token(), Some(3));
	}

	#[test]
	fn nested_look_ahead_backtrack_second() {
		let tokens = vec![1, 2, 3, 4, 5];
		let mut input = Input::new(tokens);
		let handler1 = input.start_look_ahead();
		assert_eq!(input.next_token(), Some(1));
		let handler2 = input.start_look_ahead();
		assert_eq!(input.next_token(), Some(2));
		input.stop_look_ahead(handler2, false);
		assert_eq!(input.next_token(), Some(3));
		input.stop_look_ahead(handler1, true);
		assert_eq!(input.next_token(), Some(1));
	}

	#[test]
	#[should_panic]
	fn wrong_token() {
		let tokens = vec![1, 2, 3, 4, 5];
		let mut input = Input::new(tokens);
		let handler1 = input.start_look_ahead();
		let _handler2 = input.start_look_ahead();
		input.stop_look_ahead(handler1, false);
	}
}