source_span/
lib.rs

1//! This crate provides utilities to locate characters and ranges of characters
2//! (spans) in a source file. It also provides ways to print fragments of the
3//! source file with span informations, hints, errors, warning and notes,
4//! just like the `rustc` compiler.
5//!
6//! ## Basic usage
7//!
8//! This crate is designed as an incremental parsing utility.
9//! Its primary function is to keep track of the line and column position of
10//! each character in a character stream:
11//! ```rust
12//! use source_span::Position;
13//!
14//! let metrics = &source_span::DEFAULT_METRICS; // characters metrics
15//! let mut pos = Position::new(0, 0);
16//! let str = "Hello\nWorld!";
17//!
18//! for c in str.chars() {
19//! 	// `pos` holds the position (line, column) of
20//! 	// the current character at all points.
21//! 	pos.shift(c, metrics)
22//! }
23//! ```
24//!
25//! Using the `Span` type, it is also possible to build ranges of characters.
26//!
27//! ```rust
28//! # use source_span::{Position, Span};
29//! # let metrics = source_span::DEFAULT_METRICS;
30//! let mut chars = "1 + (2 * 2) / 3".chars();
31//! let mut pos = Position::new(0, 0);
32//! while let Some(c) = chars.next() {
33//! 	if c == '(' {
34//! 		break
35//! 	}
36//!
37//! 	pos.shift(c, &metrics)
38//! }
39//!
40//! let mut span: Span = pos.into();
41//!
42//! while let Some(c) = chars.next() {
43//! 	span.push(c, &metrics);
44//!
45//! 	if c == ')' {
46//! 		break
47//! 	}
48//! }
49//!
50//! // `span` now holds the beginning and end position of the `"(2 * 2)"` slice.
51//! ```
52//! ## SourceBuffer
53//!
54//! This crate provides a simple `SourceBuffer` buffer
55//! to index a character stream by character position.
56//!
57//! ```rust
58//! # use std::io::Read;
59//! use std::fs::File;
60//! use source_span::{DEFAULT_METRICS, Position, SourceBuffer};
61//!
62//! let file = File::open("examples/fib.txt").unwrap();
63//! let chars = utf8_decode::UnsafeDecoder::new(file.bytes());
64//! let metrics = DEFAULT_METRICS;
65//! let buffer = SourceBuffer::new(chars, Position::default(), metrics);
66//!
67//! buffer.at(Position::new(4, 2)); // get the character at line 4, column 2.
68//! ```
69//!
70//! The `SourceBuffer` type works as a wrapper around a character iterator.
71//! It is lazy: new characters are pulled from the wrapped iterator and put in
72//! the buffer only when needed.
73//! It can be used to access characters at a specific cursor position (as seen
74//! above) or iterate a slice of the text using a `Span`:
75//!
76//! ```rust
77//! # use std::io::Read;
78//! # use std::fs::File;
79//! # use source_span::{DEFAULT_METRICS, Position, SourceBuffer};
80//! # let file = File::open("examples/fib.txt").unwrap();
81//! # let chars = utf8_decode::UnsafeDecoder::new(file.bytes());
82//! # let metrics = DEFAULT_METRICS;
83//! # let buffer = SourceBuffer::new(chars, Position::default(), metrics);
84//! # let span = buffer.span();
85//! for c in buffer.iter_span(span) {
86//!     // do something.
87//! }
88//! ```
89//!
90//! ## Formatting
91//!
92//! This crate also provides a way to format decorated text, highlighting
93//! portions of the source text using ASCII art.
94//! It can be used to produce outputs similar as the following:
95//!
96//! ```txt
97//! 1 |   fn main() {
98//!   |  ___________^
99//! 2 | |     println!("Hello World!")
100//!   | |              ^^^^^^^^^^^^^^ a string
101//! 3 | | }
102//!   | |_^ a block
103//! ```
104//!
105//! Each highlight is described by a span, can be associated to a label and
106//! drawn with a specific style (defining what characters and color to use to
107//! draw the lines).
108#![warn(clippy::perf, clippy::must_use_candidate)]
109use std::cmp::{Ord, Ordering, PartialOrd};
110
111mod buffer;
112pub mod fmt;
113mod loc;
114mod metrics;
115mod position;
116mod layout;
117
118pub use buffer::SourceBuffer;
119pub use loc::Loc;
120pub use metrics::*;
121pub use position::Position;
122pub use layout::*;
123
124/// Span in a source file.
125///
126/// A span points to a range of caracters between two cursor [`Position`].
127///
128/// ## Span construction with the `push*` methods
129///
130/// A span can be directly created using the [`new`](Span::new) method, however
131/// in the context of parsing (or lexing) it might be useful to build spans
132/// incrementally. The `push*` methods family will help you do that.
133///
134///   * [`push`](Span::push) will extend the span to include the given character
135///     located at the spans `end`.
136///   * [`push_column`](Span::push_column) will extend the span to include the
137///     next column. Note that this does not
138/// necessarily correspond to the next character (if it is a NL, or a full-width
139/// character for instance).
140///   * [`push_line`](Span::push_line) will extend the span to include the rest
141///     of the line. The end of the span will be
142/// placed at the begining of the next line.
143///
144///   * The [`next`](Span::next) method can finally be used to create the span
145///     to `[end, end]` (when a token has
146/// been read entirely for instance) and start building the next span. The
147/// [`clear`](Span::clear) method does the same but *in place*.
148///
149/// ## Example
150///
151/// Here is a basic example computing the span of every word/token in a `char`
152/// stream.
153///
154/// ```rust
155/// use source_span::{Span, DEFAULT_METRICS};
156///
157/// #[derive(Clone, Default)]
158/// pub struct Token {
159/// 	string: String,
160/// 	span: Span,
161/// }
162///
163/// let string = "This is an example String.".to_string();
164/// let mut tokens = Vec::new();
165/// let mut current = Token::default();
166/// let metrics = &DEFAULT_METRICS;
167///
168/// for c in string.chars() {
169/// 	if c.is_whitespace() {
170/// 		// save the current token.
171/// 		if !current.string.is_empty() {
172/// 			tokens.push(current.clone());
173/// 		}
174///
175/// 		// reset current token.
176/// 		current.string.clear();
177/// 		current.span.clear(); // the span here is moved to the end of itself.
178/// 	} else {
179/// 		current.string.push(c);
180/// 		current.span.push(c, metrics);
181/// 	}
182/// }
183///
184/// if !current.string.is_empty() {
185/// 	tokens.push(current);
186/// }
187/// ```
188#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)]
189pub struct Span {
190	/// The position of the first character in the span.
191	start: Position,
192
193	/// The last position in the span.
194	last: Position,
195
196	/// The position of the character directly following the span.
197	///
198	/// It is not included in the span.
199	end: Position,
200}
201
202impl PartialOrd for Span {
203	fn partial_cmp(&self, other: &Self) -> Option<Ordering> { Some(self.cmp(other)) }
204}
205
206impl Ord for Span {
207	fn cmp(&self, other: &Self) -> Ordering {
208		if self == other {
209			Ordering::Equal
210		} else if self.includes(other) {
211			Ordering::Greater
212		} else if other.includes(self) {
213			Ordering::Less
214		} else {
215			self.start.cmp(&other.start)
216		}
217	}
218}
219
220impl Span {
221	/// Create a new span from three positions.
222	///
223	/// If the `end` position or the `last` position is before the `start`
224	/// position then the returned span will be `[start, start]`.
225	/// If the `last` position is equal to `end` while the span is not empty, it
226	/// will panic.
227	#[must_use]
228	pub fn new(start: Position, mut last: Position, mut end: Position) -> Self {
229		if end < start || last < start {
230			last = start;
231			end = start;
232		}
233
234		if last >= end && end != start {
235			panic!("invalid span ({:?}, {:?}, {:?})", start, last, end);
236		}
237
238		Self { start, last, end }
239	}
240
241	pub fn of_string<M: Metrics>(str: &str, metrics: &M) -> Self {
242		let mut last = Position::new(0, 0);
243		let mut end = Position::new(0, 0);
244		for c in str.chars() {
245			last = end;
246			end.shift(c, metrics)
247		}
248
249		Self {
250			start: Position::new(0, 0),
251			last,
252			end,
253		}
254	}
255
256	/// Return the position of the first character in the span.
257	#[must_use]
258	pub const fn start(&self) -> Position { self.start }
259
260	/// Return the last position included in the span.
261	#[must_use]
262	pub const fn last(&self) -> Position { self.last }
263
264	/// Return the position of the character directly following the span.
265	///
266	/// It is not included in the span.
267	#[must_use]
268	pub const fn end(&self) -> Position { self.end }
269
270	/// Checks if the span is empty.
271	#[must_use]
272	pub fn is_empty(&self) -> bool { self.start == self.end }
273
274	/// Checks if two span overlaps.
275	#[must_use]
276	pub fn overlaps(&self, other: &Span) -> bool {
277		(self.start <= other.start && self.end > other.start)
278			|| (other.start <= self.start && other.end > self.start)
279	}
280
281	/// Checks if the given span is included it this span.
282	#[must_use]
283	pub fn includes(&self, other: &Span) -> bool {
284		self.start <= other.start && self.last >= other.last
285	}
286
287	/// The number of lines covered by the span.
288	///
289	/// It is at least one, even if the span is empty.
290	#[must_use]
291	pub const fn line_count(&self) -> usize { self.last.line - self.start.line + 1 }
292
293	/// Checks if the span includes the given line.
294	#[must_use]
295	pub fn includes_line(&self, line: usize) -> bool {
296		line >= self.start.line && line <= self.end.line
297	}
298
299	/// Extends the span to include the next column.
300	///
301	/// Note that this does not necessarily correspond
302	/// to the next character (if it is a NL, or a full-width character for
303	/// instance). To do that you can use the [`push`](Span::push) method.
304	pub fn push_column(&mut self) {
305		self.last = self.end;
306		self.end = self.end.next_column();
307	}
308
309	/// Extends the span to include the rest of the line.
310	///
311	/// The end of the span will be placed at the begining of the next line.
312	pub fn push_line(&mut self) {
313		self.last = self.end;
314		self.end = self.end.next_line();
315	}
316
317	/// Extend the span to include the given character located at the spans
318	/// `end` position.
319	pub fn push<M: Metrics>(&mut self, c: char, metrics: &M) {
320		self.last = self.end;
321		self.end = self.end.next(c, metrics);
322	}
323
324	/// Compute the union of two spans.
325	///
326	/// If the two spans do not overlap, all positions in between will be
327	/// included in the resulting span.
328	#[must_use]
329	pub fn union(&self, other: Self) -> Self {
330		if other.last > self.last && other.end > self.end {
331			Self {
332				start: std::cmp::min(self.start, other.start),
333				last: other.last,
334				end: other.end,
335			}
336		} else {
337			Self {
338				start: std::cmp::min(self.start, other.start),
339				last: self.last,
340				end: self.end,
341			}
342		}
343	}
344
345	/// Computes the intersection of the two spans.
346	///
347	/// If the two spans do not overlap, then the empty span located at the
348	/// start of the most advanced span (maximum of the start of the two
349	/// spans) is returned.
350	#[must_use]
351	pub fn inter(&self, other: Self) -> Self {
352		let start = std::cmp::max(self.start, other.start);
353		Self::new(start, other.last, other.end)
354	}
355
356	/// Extend the span to the end of the given span.
357	///
358	/// This is the *in-place* version of [`union`](Span::union), except that
359	/// nothing happens if the input span finishes before the end of `self`.
360	pub fn append(&mut self, other: Self) {
361		if other.last > self.last && other.end > self.end {
362			self.last = other.last;
363			self.end = other.end;
364		}
365	}
366
367	/// Return the next span (defined as `[end, end]`).
368	#[must_use]
369	pub const fn next(&self) -> Self {
370		Self {
371			start: self.end,
372			last: self.end,
373			end: self.end,
374		}
375	}
376
377	/// Set the span to [`next`](Span::next) (`[end, end]`).
378	pub fn clear(&mut self) {
379		self.start = self.end;
380		self.last = self.end;
381	}
382
383	/// Return the span aligned on line boundaries.
384	///
385	/// This will compute the smallest span including `self` such that
386	///  * `start` is at the begining of a line (column 0),
387	///  * `end` is at the end of a line (column [`std::usize::MAX`]),
388	///  * `last` points to the last character of a line (column
389	///    `std::usize::MAX - 1`).
390	#[must_use]
391	pub const fn aligned(&self) -> Self {
392		Self {
393			start: Position {
394				line: self.start.line,
395				column: 0,
396			},
397			last: Position {
398				line: self.end.line,
399				column: usize::max_value() - 1,
400			},
401			end: Position {
402				line: self.end.line,
403				column: usize::max_value(),
404			},
405		}
406	}
407}
408
409impl From<Position> for Span {
410	fn from(pos: Position) -> Self {
411		Self {
412			start: pos,
413			last: pos,
414			end: pos,
415		}
416	}
417}
418
419impl ::std::fmt::Display for Span {
420	fn fmt(&self, f: &mut ::std::fmt::Formatter) -> ::std::fmt::Result {
421		if self.start == self.last {
422			write!(f, "{}", self.start)
423		} else {
424			write!(f, "from {:?} to {:?}", self.start, self.end)
425		}
426	}
427}
428
429#[cfg(test)]
430mod tests {
431	use super::*;
432
433	#[test]
434	fn test_display_span() {
435		assert_eq!(
436			Span::new(
437				Position::new(0, 0),
438				Position::new(1, 20),
439				Position::new(3, 41),
440			)
441			.to_string(),
442			"from 1:1 to 4:42".to_string()
443		);
444	}
445}