1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
use std::cell::RefCell;

use crate::{Metrics, Position, Span};

/// Lazy string buffer that fills up on demand, can be iterated and indexed by
/// character position.
///
/// The `SourceBuffer` wraps aroung a `char` iterator. It can be itself used as
/// a `char` iterator, or as a `SourceBuffer` to access an arbitrary fragment of
/// the input source stream.
pub struct SourceBuffer<E, I: Iterator<Item = Result<char, E>>, M: Metrics> {
	p: RefCell<Inner<E, I>>,

	/// Metrics used.
	metrics: M,
}

struct Inner<E, I: Iterator<Item = Result<char, E>>> {
	/// Input source `char` stream.
	input: I,

	/// SourceBuffer error state.
	error: Option<E>,

	/// Buffer data.
	data: Vec<char>,

	/// Lines index.
	///
	/// Contains the index of the first character of each line.
	lines: Vec<usize>,

	/// Span of the buffer.
	span: Span,
}

impl<E, I: Iterator<Item = Result<char, E>>> Inner<E, I> {
	/// Read the next line from the input stream and add it to the buffer.
	/// Returns `true` if a new line has been added. Returns `false` if the
	/// source stream is done.
	fn read_line<M: Metrics>(&mut self, metrics: &M) -> bool {
		if self.error.is_none() {
			let line = self.span.end().line;
			while line == self.span.end().line {
				match self.input.next() {
					Some(Ok(c)) => {
						self.data.push(c);
						self.span.push(c, metrics);
					}
					Some(Err(e)) => {
						self.error = Some(e);
						return false;
					}
					None => return false,
				}
			}

			// register the next line index.
			self.lines.push(self.data.len());

			true
		} else {
			false
		}
	}

	/// Get the index of the char at the given cursor position if it is in the
	/// buffer. If it is not in the buffer but after the buffered content,
	/// the input stream will be read until the buffer span includes the
	/// given position.
	///
	/// Returns `None` if the given position if previous to the buffer start
	/// positions, if the source stream ends before the given position, or
	/// if the line at the given position is shorter than the given position
	/// column.
	fn index_at<M: Metrics>(&mut self, pos: Position, metrics: &M) -> Result<Option<usize>, E> {
		if pos < self.span.start() {
			Ok(None)
		} else {
			while pos >= self.span.end() && self.read_line(metrics) {}

			if pos >= self.span.end() {
				let mut error = None;
				std::mem::swap(&mut error, &mut self.error);
				match error {
					Some(e) => Err(e),
					None => Ok(None),
				}
			} else {
				// line index relative to the first line of the buffer.
				let relative_line = pos.line - self.span.start().line;
				// get the index of the char of the begining of the line in the buffer.
				let mut i = self.lines[relative_line];
				// place a virtual cursor at the begining of the target line.
				let mut cursor = Position::new(pos.line, 0);

				while cursor < pos {
					cursor = cursor.next(self.data[i], metrics);
					i += 1;
				}

				if cursor == pos {
					// found it!
					Ok(Some(i))
				} else {
					// the position does not exist in the buffer.
					Ok(None)
				}
			}
		}
	}

	/// Get the character at the given index.
	///
	/// If it is not in the buffer but after the buffered content, the input
	/// stream will be read until the buffer span includes the given
	/// position. Returns `None` if the source stream ends before the given
	/// position.
	fn get<M: Metrics>(&mut self, i: usize, metrics: &M) -> Result<Option<char>, E> {
		while i >= self.data.len() && self.read_line(metrics) {}

		if i >= self.data.len() {
			let mut error = None;
			std::mem::swap(&mut error, &mut self.error);
			match error {
				Some(e) => Err(e),
				None => Ok(None),
			}
		} else {
			Ok(Some(self.data[i]))
		}
	}
}

impl<E, I: Iterator<Item = Result<char, E>>, M: Metrics> SourceBuffer<E, I, M> {
	/// Create a new empty buffer starting at the given position.
	pub fn new(input: I, position: Position, metrics: M) -> Self {
		Self {
			p: RefCell::new(Inner {
				input,
				error: None,
				data: Vec::new(),
				lines: vec![0],
				span: position.into(),
			}),
			metrics,
		}
	}

	/// Get the metrics used by the source buffer to map every character.
	pub fn metrics(&self) -> &M { &self.metrics }

	/// Get the span of the entire buffered data.
	pub fn span(&self) -> Span { self.p.borrow().span }

	/// Get the index of the char at the given cursor position if it is in the
	/// buffer. If it is not in the buffer but after the buffered content,
	/// the input stream will be read until the buffer span includes the
	/// given position.
	///
	/// Returns `None` if the given position if previous to the buffer start
	/// positions, if the source stream ends before the given position, or
	/// if the line at the given position is shorter than the given position
	/// column.
	pub fn index_at(&self, pos: Position) -> Result<Option<usize>, E> {
		self.p.borrow_mut().index_at(pos, &self.metrics)
	}

	/// Get the char at the given position if it is in the buffer.
	/// If it is not in the buffer yet, the input stream will be pulled until
	/// the buffer span includes the given position.
	///
	/// Returns `None` if the given position is out of range, if the source
	/// stream ends before the given position, or if the line at the given
	/// position is shorter than the given position column.
	pub fn at(&self, pos: Position) -> Result<Option<char>, E> {
		match self.index_at(pos) {
			Ok(Some(i)) => self.p.borrow_mut().get(i, &self.metrics),
			Ok(None) => Ok(None),
			Err(e) => Err(e)
		}
	}

	/// Get the character at the given index.
	///
	/// If it is not in the buffer but after the buffered content, the input
	/// stream will be read until the buffer span includes the given
	/// position. Returns `None` if the source stream ends before the given
	/// position.
	pub fn get(&self, i: usize) -> Result<Option<char>, E> { self.p.borrow_mut().get(i, &self.metrics) }

	/// Returns an iterator through the characters of the buffer from the
	/// begining of it.
	///
	/// When it reaches the end of the buffer, the buffer will start reading
	/// from the source stream.
	pub fn iter(&self) -> Iter<E, I, M> {
		Iter {
			buffer: self,
			i: Some(Ok(0)),
			pos: self.p.borrow().span.start(),
			end: Position::end(),
		}
	}

	/// Returns an iterator through the characters of the buffer from the given
	/// position.
	///
	/// If the input position precedes the buffer start position, then it will
	/// start from the buffer start position.
	/// When it reaches the end of the buffer, the buffer will start reading
	/// from the source stream.
	pub fn iter_from(&self, pos: Position) -> Iter<E, I, M> {
		let start = self.p.borrow().span.start();
		let pos = std::cmp::max(start, pos);

		Iter {
			buffer: self,
			i: self.index_at(pos).transpose(),
			pos,
			end: Position::end(),
		}
	}

	/// Returns an iterator through the characters of the buffer in the given
	/// span.
	///
	/// If the input start position precedes the buffer start position, then it
	/// will start from the buffer start position.
	/// When it reaches the end of the buffer, the buffer will start reading
	/// from the source stream.
	pub fn iter_span(&self, span: Span) -> Iter<E, I, M> {
		let start = self.p.borrow().span.start();
		let pos = std::cmp::max(start, span.start());

		Iter {
			buffer: self,
			i: self.index_at(pos).transpose(),
			pos,
			end: span.end(),
		}
	}
}

/// Iterator over the characters of a [`SourceBuffer`].
///
/// This iterator is created using the [`SourceBuffer::iter`] method or the
/// [`SourceBuffer::iter_from`] method. When it reaches the end of the buffer,
/// the buffer will start reading from the source stream until the stream itself
/// return `None`.
pub struct Iter<'b, E, I: 'b + Iterator<Item = Result<char, E>>, M: Metrics> {
	buffer: &'b SourceBuffer<E, I, M>,
	i: Option<Result<usize, E>>,
	pos: Position,
	end: Position,
}

impl<'b, E, I: 'b + Iterator<Item = Result<char, E>>, M: Metrics> Iter<'b, E, I, M> {
	pub fn into_string(self) -> Result<String, E> {
		let mut string = String::new();

		for c in self {
			string.push(c?);
		}

		Ok(string)
	}
}

impl<'b, E, I: 'b + Iterator<Item = Result<char, E>>, M: Metrics> Iterator for Iter<'b, E, I, M> {
	type Item = Result<char, E>;

	fn next(&mut self) -> Option<Result<char, E>> {
		if self.pos >= self.end {
			None
		} else {
			match &mut self.i {
				Some(Ok(ref mut i)) => match self.buffer.get(*i) {
					Ok(Some(c)) => {
						self.pos = self.pos.next(c, self.buffer.metrics());
						*i += 1;
						Some(Ok(c))
					}
					Ok(None) => None,
					Err(e) => Some(Err(e)),
				},
				None => None,
				ref mut i => {
					let mut new_i = None;
					std::mem::swap(&mut new_i, i);
					if let Some(Err(e)) = new_i {
						Some(Err(e))
					} else {
						unreachable!()
					}
				}
			}
		}
	}
}