source_span/lib.rs
1//! This crate provides utilities to locate characters and ranges of characters
2//! (spans) in a source file. It also provides ways to print fragments of the
3//! source file with span informations, hints, errors, warning and notes,
4//! just like the `rustc` compiler.
5//!
6//! ## Basic usage
7//!
8//! This crate is designed as an incremental parsing utility.
9//! Its primary function is to keep track of the line and column position of
10//! each character in a character stream:
11//! ```rust
12//! use source_span::Position;
13//!
14//! let metrics = &source_span::DEFAULT_METRICS; // characters metrics
15//! let mut pos = Position::new(0, 0);
16//! let str = "Hello\nWorld!";
17//!
18//! for c in str.chars() {
19//! // `pos` holds the position (line, column) of
20//! // the current character at all points.
21//! pos.shift(c, metrics)
22//! }
23//! ```
24//!
25//! Using the `Span` type, it is also possible to build ranges of characters.
26//!
27//! ```rust
28//! # use source_span::{Position, Span};
29//! # let metrics = source_span::DEFAULT_METRICS;
30//! let mut chars = "1 + (2 * 2) / 3".chars();
31//! let mut pos = Position::new(0, 0);
32//! while let Some(c) = chars.next() {
33//! if c == '(' {
34//! break
35//! }
36//!
37//! pos.shift(c, &metrics)
38//! }
39//!
40//! let mut span: Span = pos.into();
41//!
42//! while let Some(c) = chars.next() {
43//! span.push(c, &metrics);
44//!
45//! if c == ')' {
46//! break
47//! }
48//! }
49//!
50//! // `span` now holds the beginning and end position of the `"(2 * 2)"` slice.
51//! ```
52//! ## SourceBuffer
53//!
54//! This crate provides a simple `SourceBuffer` buffer
55//! to index a character stream by character position.
56//!
57//! ```rust
58//! # use std::io::Read;
59//! use std::fs::File;
60//! use source_span::{DEFAULT_METRICS, Position, SourceBuffer};
61//!
62//! let file = File::open("examples/fib.txt").unwrap();
63//! let chars = utf8_decode::UnsafeDecoder::new(file.bytes());
64//! let metrics = DEFAULT_METRICS;
65//! let buffer = SourceBuffer::new(chars, Position::default(), metrics);
66//!
67//! buffer.at(Position::new(4, 2)); // get the character at line 4, column 2.
68//! ```
69//!
70//! The `SourceBuffer` type works as a wrapper around a character iterator.
71//! It is lazy: new characters are pulled from the wrapped iterator and put in
72//! the buffer only when needed.
73//! It can be used to access characters at a specific cursor position (as seen
74//! above) or iterate a slice of the text using a `Span`:
75//!
76//! ```rust
77//! # use std::io::Read;
78//! # use std::fs::File;
79//! # use source_span::{DEFAULT_METRICS, Position, SourceBuffer};
80//! # let file = File::open("examples/fib.txt").unwrap();
81//! # let chars = utf8_decode::UnsafeDecoder::new(file.bytes());
82//! # let metrics = DEFAULT_METRICS;
83//! # let buffer = SourceBuffer::new(chars, Position::default(), metrics);
84//! # let span = buffer.span();
85//! for c in buffer.iter_span(span) {
86//! // do something.
87//! }
88//! ```
89//!
90//! ## Formatting
91//!
92//! This crate also provides a way to format decorated text, highlighting
93//! portions of the source text using ASCII art.
94//! It can be used to produce outputs similar as the following:
95//!
96//! ```txt
97//! 1 | fn main() {
98//! | ___________^
99//! 2 | | println!("Hello World!")
100//! | | ^^^^^^^^^^^^^^ a string
101//! 3 | | }
102//! | |_^ a block
103//! ```
104//!
105//! Each highlight is described by a span, can be associated to a label and
106//! drawn with a specific style (defining what characters and color to use to
107//! draw the lines).
108#![warn(clippy::perf, clippy::must_use_candidate)]
109use std::cmp::{Ord, Ordering, PartialOrd};
110
111mod buffer;
112pub mod fmt;
113mod loc;
114mod metrics;
115mod position;
116mod layout;
117
118pub use buffer::SourceBuffer;
119pub use loc::Loc;
120pub use metrics::*;
121pub use position::Position;
122pub use layout::*;
123
124/// Span in a source file.
125///
126/// A span points to a range of caracters between two cursor [`Position`].
127///
128/// ## Span construction with the `push*` methods
129///
130/// A span can be directly created using the [`new`](Span::new) method, however
131/// in the context of parsing (or lexing) it might be useful to build spans
132/// incrementally. The `push*` methods family will help you do that.
133///
134/// * [`push`](Span::push) will extend the span to include the given character
135/// located at the spans `end`.
136/// * [`push_column`](Span::push_column) will extend the span to include the
137/// next column. Note that this does not
138/// necessarily correspond to the next character (if it is a NL, or a full-width
139/// character for instance).
140/// * [`push_line`](Span::push_line) will extend the span to include the rest
141/// of the line. The end of the span will be
142/// placed at the begining of the next line.
143///
144/// * The [`next`](Span::next) method can finally be used to create the span
145/// to `[end, end]` (when a token has
146/// been read entirely for instance) and start building the next span. The
147/// [`clear`](Span::clear) method does the same but *in place*.
148///
149/// ## Example
150///
151/// Here is a basic example computing the span of every word/token in a `char`
152/// stream.
153///
154/// ```rust
155/// use source_span::{Span, DEFAULT_METRICS};
156///
157/// #[derive(Clone, Default)]
158/// pub struct Token {
159/// string: String,
160/// span: Span,
161/// }
162///
163/// let string = "This is an example String.".to_string();
164/// let mut tokens = Vec::new();
165/// let mut current = Token::default();
166/// let metrics = &DEFAULT_METRICS;
167///
168/// for c in string.chars() {
169/// if c.is_whitespace() {
170/// // save the current token.
171/// if !current.string.is_empty() {
172/// tokens.push(current.clone());
173/// }
174///
175/// // reset current token.
176/// current.string.clear();
177/// current.span.clear(); // the span here is moved to the end of itself.
178/// } else {
179/// current.string.push(c);
180/// current.span.push(c, metrics);
181/// }
182/// }
183///
184/// if !current.string.is_empty() {
185/// tokens.push(current);
186/// }
187/// ```
188#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)]
189pub struct Span {
190 /// The position of the first character in the span.
191 start: Position,
192
193 /// The last position in the span.
194 last: Position,
195
196 /// The position of the character directly following the span.
197 ///
198 /// It is not included in the span.
199 end: Position,
200}
201
202impl PartialOrd for Span {
203 fn partial_cmp(&self, other: &Self) -> Option<Ordering> { Some(self.cmp(other)) }
204}
205
206impl Ord for Span {
207 fn cmp(&self, other: &Self) -> Ordering {
208 if self == other {
209 Ordering::Equal
210 } else if self.includes(other) {
211 Ordering::Greater
212 } else if other.includes(self) {
213 Ordering::Less
214 } else {
215 self.start.cmp(&other.start)
216 }
217 }
218}
219
220impl Span {
221 /// Create a new span from three positions.
222 ///
223 /// If the `end` position or the `last` position is before the `start`
224 /// position then the returned span will be `[start, start]`.
225 /// If the `last` position is equal to `end` while the span is not empty, it
226 /// will panic.
227 #[must_use]
228 pub fn new(start: Position, mut last: Position, mut end: Position) -> Self {
229 if end < start || last < start {
230 last = start;
231 end = start;
232 }
233
234 if last >= end && end != start {
235 panic!("invalid span ({:?}, {:?}, {:?})", start, last, end);
236 }
237
238 Self { start, last, end }
239 }
240
241 pub fn of_string<M: Metrics>(str: &str, metrics: &M) -> Self {
242 let mut last = Position::new(0, 0);
243 let mut end = Position::new(0, 0);
244 for c in str.chars() {
245 last = end;
246 end.shift(c, metrics)
247 }
248
249 Self {
250 start: Position::new(0, 0),
251 last,
252 end,
253 }
254 }
255
256 /// Return the position of the first character in the span.
257 #[must_use]
258 pub const fn start(&self) -> Position { self.start }
259
260 /// Return the last position included in the span.
261 #[must_use]
262 pub const fn last(&self) -> Position { self.last }
263
264 /// Return the position of the character directly following the span.
265 ///
266 /// It is not included in the span.
267 #[must_use]
268 pub const fn end(&self) -> Position { self.end }
269
270 /// Checks if the span is empty.
271 #[must_use]
272 pub fn is_empty(&self) -> bool { self.start == self.end }
273
274 /// Checks if two span overlaps.
275 #[must_use]
276 pub fn overlaps(&self, other: &Span) -> bool {
277 (self.start <= other.start && self.end > other.start)
278 || (other.start <= self.start && other.end > self.start)
279 }
280
281 /// Checks if the given span is included it this span.
282 #[must_use]
283 pub fn includes(&self, other: &Span) -> bool {
284 self.start <= other.start && self.last >= other.last
285 }
286
287 /// The number of lines covered by the span.
288 ///
289 /// It is at least one, even if the span is empty.
290 #[must_use]
291 pub const fn line_count(&self) -> usize { self.last.line - self.start.line + 1 }
292
293 /// Checks if the span includes the given line.
294 #[must_use]
295 pub fn includes_line(&self, line: usize) -> bool {
296 line >= self.start.line && line <= self.end.line
297 }
298
299 /// Extends the span to include the next column.
300 ///
301 /// Note that this does not necessarily correspond
302 /// to the next character (if it is a NL, or a full-width character for
303 /// instance). To do that you can use the [`push`](Span::push) method.
304 pub fn push_column(&mut self) {
305 self.last = self.end;
306 self.end = self.end.next_column();
307 }
308
309 /// Extends the span to include the rest of the line.
310 ///
311 /// The end of the span will be placed at the begining of the next line.
312 pub fn push_line(&mut self) {
313 self.last = self.end;
314 self.end = self.end.next_line();
315 }
316
317 /// Extend the span to include the given character located at the spans
318 /// `end` position.
319 pub fn push<M: Metrics>(&mut self, c: char, metrics: &M) {
320 self.last = self.end;
321 self.end = self.end.next(c, metrics);
322 }
323
324 /// Compute the union of two spans.
325 ///
326 /// If the two spans do not overlap, all positions in between will be
327 /// included in the resulting span.
328 #[must_use]
329 pub fn union(&self, other: Self) -> Self {
330 if other.last > self.last && other.end > self.end {
331 Self {
332 start: std::cmp::min(self.start, other.start),
333 last: other.last,
334 end: other.end,
335 }
336 } else {
337 Self {
338 start: std::cmp::min(self.start, other.start),
339 last: self.last,
340 end: self.end,
341 }
342 }
343 }
344
345 /// Computes the intersection of the two spans.
346 ///
347 /// If the two spans do not overlap, then the empty span located at the
348 /// start of the most advanced span (maximum of the start of the two
349 /// spans) is returned.
350 #[must_use]
351 pub fn inter(&self, other: Self) -> Self {
352 let start = std::cmp::max(self.start, other.start);
353 Self::new(start, other.last, other.end)
354 }
355
356 /// Extend the span to the end of the given span.
357 ///
358 /// This is the *in-place* version of [`union`](Span::union), except that
359 /// nothing happens if the input span finishes before the end of `self`.
360 pub fn append(&mut self, other: Self) {
361 if other.last > self.last && other.end > self.end {
362 self.last = other.last;
363 self.end = other.end;
364 }
365 }
366
367 /// Return the next span (defined as `[end, end]`).
368 #[must_use]
369 pub const fn next(&self) -> Self {
370 Self {
371 start: self.end,
372 last: self.end,
373 end: self.end,
374 }
375 }
376
377 /// Set the span to [`next`](Span::next) (`[end, end]`).
378 pub fn clear(&mut self) {
379 self.start = self.end;
380 self.last = self.end;
381 }
382
383 /// Return the span aligned on line boundaries.
384 ///
385 /// This will compute the smallest span including `self` such that
386 /// * `start` is at the begining of a line (column 0),
387 /// * `end` is at the end of a line (column [`std::usize::MAX`]),
388 /// * `last` points to the last character of a line (column
389 /// `std::usize::MAX - 1`).
390 #[must_use]
391 pub const fn aligned(&self) -> Self {
392 Self {
393 start: Position {
394 line: self.start.line,
395 column: 0,
396 },
397 last: Position {
398 line: self.end.line,
399 column: usize::max_value() - 1,
400 },
401 end: Position {
402 line: self.end.line,
403 column: usize::max_value(),
404 },
405 }
406 }
407}
408
409impl From<Position> for Span {
410 fn from(pos: Position) -> Self {
411 Self {
412 start: pos,
413 last: pos,
414 end: pos,
415 }
416 }
417}
418
419impl ::std::fmt::Display for Span {
420 fn fmt(&self, f: &mut ::std::fmt::Formatter) -> ::std::fmt::Result {
421 if self.start == self.last {
422 write!(f, "{}", self.start)
423 } else {
424 write!(f, "from {:?} to {:?}", self.start, self.end)
425 }
426 }
427}
428
429#[cfg(test)]
430mod tests {
431 use super::*;
432
433 #[test]
434 fn test_display_span() {
435 assert_eq!(
436 Span::new(
437 Position::new(0, 0),
438 Position::new(1, 20),
439 Position::new(3, 41),
440 )
441 .to_string(),
442 "from 1:1 to 4:42".to_string()
443 );
444 }
445}