lexpr/datum.rs
1//! S-expression values including source location.
2
3use std::{io, iter, slice};
4
5use crate::{
6 parse::{read, Options, Parser, Position, Result},
7 Cons, Value,
8};
9
10/// Combines an S-expression value with location information.
11///
12/// A `Datum` keeps, along with a plain `Value`, information about the text
13/// location the value was parsed from. For compound values, such as lists and
14/// vectors, that includes information for all contained values, recursively.
15///
16/// A `Datum` can be obtained by using the [`next_datum`] and [`expect_datum`]
17/// methods on `Parser`, or via the iterator obtained with [`datum_iter`].
18///
19/// [`next_datum`]: Parser::next_datum
20/// [`expect_datum`]: Parser::expect_datum
21/// [`datum_iter`]: Parser::datum_iter
22#[derive(Debug, Clone, PartialEq)]
23pub struct Datum {
24 value: Value,
25 info: SpanInfo,
26}
27
28impl Datum {
29 pub(crate) fn into_inner(self) -> (Value, SpanInfo) {
30 (self.value, self.info)
31 }
32
33 /// Returns a reference to the contained value.
34 pub fn value(&self) -> &Value {
35 &self.value
36 }
37
38 /// Returns the span for the compelete value.
39 pub fn span(&self) -> Span {
40 self.info.span()
41 }
42
43 /// Returns a reference to the datum.
44 pub fn as_ref(&self) -> Ref<'_> {
45 Ref {
46 value: &self.value,
47 info: &self.info,
48 }
49 }
50
51 /// Returns an iterator over the elements of a list.
52 ///
53 /// If the value contained in the datum is not either a cons cell or `Null`, `None` is
54 /// returned.
55 ///
56 /// Note that the returned iterator has special behavior for improper lists, yielding the
57 /// element after the dot after returning `None` the first time.
58 ///
59 /// ```
60 /// use lexpr::sexp;
61 ///
62 /// let datum = lexpr::datum::from_str("(1 2 . 3)").unwrap();
63 /// let mut iter = datum.list_iter().unwrap();
64 /// let one = iter.next().unwrap();
65 /// assert_eq!(one.value(), &sexp!(1));
66 /// let two = iter.next().unwrap();
67 /// assert_eq!(two.value(), &sexp!(2));
68 /// assert_eq!(iter.next(), None);
69 /// let three = iter.next().unwrap();
70 /// assert_eq!(three.value(), &sexp!(3));
71 /// assert_eq!(iter.next(), None);
72 /// ```
73 pub fn list_iter(&self) -> Option<ListIter<'_>> {
74 self.as_ref().list_iter()
75 }
76
77 /// Returns an iterator over the elements of a vector.
78 ///
79 /// If the value contained in the datum is not a vector, `None` is returned.
80 pub fn vector_iter(&self) -> Option<VectorIter<'_>> {
81 self.as_ref().vector_iter()
82 }
83
84 pub(crate) fn primitive(value: Value, start: Position, end: Position) -> Self {
85 Datum {
86 value,
87 info: SpanInfo::Prim(Span { start, end }),
88 }
89 }
90
91 pub(crate) fn vec(
92 elements: Vec<Value>,
93 element_info: Vec<SpanInfo>,
94 start: Position,
95 end: Position,
96 ) -> Self {
97 Datum {
98 value: Value::Vector(elements.into()),
99 info: SpanInfo::Vec(Span { start, end }, element_info),
100 }
101 }
102
103 pub(crate) fn cons(cell: Cons, meta: [SpanInfo; 2], start: Position, end: Position) -> Self {
104 Datum {
105 value: Value::Cons(cell),
106 info: SpanInfo::Cons(Span::new(start, end), Box::new(meta)),
107 }
108 }
109
110 pub(crate) fn quotation(name: &str, quoted: Datum, quote_span: Span) -> Self {
111 let (quoted_value, quoted_info) = quoted.into_inner();
112 let quoted_end = quoted_info.span().end();
113 let null_span = Span::new(quoted_end, quoted_end);
114 Datum {
115 value: Value::list(vec![Value::symbol(name), quoted_value]),
116 info: SpanInfo::Cons(
117 Span::new(quote_span.start(), quoted_end),
118 Box::new([
119 SpanInfo::Prim(quote_span),
120 SpanInfo::Cons(
121 quoted_info.span(),
122 Box::new([quoted_info, SpanInfo::Prim(null_span)]),
123 ),
124 ]),
125 ),
126 }
127 }
128}
129
130impl From<Datum> for Value {
131 fn from(datum: Datum) -> Self {
132 datum.value
133 }
134}
135
136/// A reference to a value and corresponding location information.
137///
138/// A `Ref` is the generalized version of `&Datum`; it can not only refer a top-level, owned `Datum`
139/// value, but also to values recursively contained therein.
140#[derive(Debug, Clone, Copy, PartialEq)]
141pub struct Ref<'a> {
142 value: &'a Value,
143 info: &'a SpanInfo,
144}
145
146impl<'a> AsRef<Value> for Ref<'a> {
147 fn as_ref(&self) -> &Value {
148 self.value
149 }
150}
151
152impl<'a> From<Ref<'a>> for Datum {
153 /// Turns a reference into an owned `Datum`, by cloning the referenced value and location
154 /// information.
155 fn from(r: Ref<'a>) -> Self {
156 Datum {
157 value: r.value.clone(),
158 info: r.info.clone(),
159 }
160 }
161}
162
163impl<'a> Ref<'a> {
164 fn new(value: &'a Value, info: &'a SpanInfo) -> Self {
165 Ref { value, info }
166 }
167
168 /// Returns the span of the referenced value.
169 pub fn span(&self) -> Span {
170 self.info.span()
171 }
172
173 /// Returns a reference to the contained value.
174 pub fn value(&self) -> &'a Value {
175 self.value
176 }
177
178 /// If the value referenced is not either a cons cell or `Null`, `None` is returned.
179 ///
180 /// Note that the returned iterator has special behavior for improper lists, yielding the
181 /// element after the dot after returning `None` the first time; see [`Datum::list_iter`] for an
182 /// example.
183 pub fn list_iter(&self) -> Option<ListIter<'a>> {
184 match (self.value, self.info) {
185 (Value::Cons(cell), SpanInfo::Cons(_, meta)) => Some(ListIter::cons(cell, meta)),
186 (Value::Null, _) => Some(ListIter::empty()),
187 _ => None,
188 }
189 }
190
191 /// Returns an iterator over the elements of a vector.
192 ///
193 /// If the value referenced is not a vector, `None` is returned.
194 pub fn vector_iter(&self) -> Option<VectorIter<'a>> {
195 match (self.value, self.info) {
196 (Value::Vector(elements), SpanInfo::Vec(_, element_meta)) => {
197 Some(VectorIter(elements.iter().zip(element_meta)))
198 }
199 _ => None,
200 }
201 }
202
203 /// Returns a pair of references to the fields of a cons cell.
204 ///
205 /// If the value referenced is not a cons cell, `None` is returned.
206 pub fn as_pair(&self) -> Option<(Ref<'a>, Ref<'a>)> {
207 let (car, cdr) = self.value.as_pair()?;
208 match &self.info {
209 SpanInfo::Cons(_, inner) if inner.len() == 2 => {
210 Some((Ref::new(car, &inner[0]), Ref::new(cdr, &inner[1])))
211 }
212 _ => unreachable!("badly shaped pair span information"),
213 }
214 }
215}
216
217impl<'a> std::ops::Deref for Ref<'a> {
218 type Target = Value;
219
220 fn deref(&self) -> &Self::Target {
221 self.value
222 }
223}
224
225/// The start and end for a span of text.
226#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
227pub struct Span {
228 start: Position,
229 end: Position,
230}
231
232impl Span {
233 pub(crate) fn new(start: Position, end: Position) -> Self {
234 Span { start, end }
235 }
236
237 pub(crate) fn empty() -> Self {
238 Span {
239 start: Position::new(0, 0),
240 end: Position::new(0, 0),
241 }
242 }
243
244 /// Get the starting line/column in the source file for this span.
245 pub fn start(&self) -> Position {
246 self.start
247 }
248
249 /// Get the ending line/column in the source file for this span.
250 pub fn end(&self) -> Position {
251 self.end
252 }
253}
254
255#[derive(Debug, Clone, PartialEq)]
256pub(crate) enum SpanInfo {
257 Prim(Span),
258 Cons(Span, Box<[SpanInfo; 2]>),
259 Vec(Span, Vec<SpanInfo>),
260}
261
262impl SpanInfo {
263 fn span(&self) -> Span {
264 match self {
265 SpanInfo::Prim(span) => *span,
266 SpanInfo::Cons(span, _) => *span,
267 SpanInfo::Vec(span, _) => *span,
268 }
269 }
270 pub(crate) fn cons_mut(&mut self) -> Option<&mut [SpanInfo; 2]> {
271 match self {
272 SpanInfo::Cons(_, info) => Some(info),
273 _ => None,
274 }
275 }
276}
277
278/// An iterator over the elements
279#[derive(Debug, Clone)]
280pub struct VectorIter<'a>(iter::Zip<slice::Iter<'a, Value>, slice::Iter<'a, SpanInfo>>);
281
282impl<'a> Iterator for VectorIter<'a> {
283 type Item = Ref<'a>;
284
285 fn next(&mut self) -> Option<Self::Item> {
286 self.0.next().map(|(value, info)| Ref { value, info })
287 }
288}
289
290/// An iterator yielding the `car` field of a chain of cons cells.
291///
292/// # Improper lists
293///
294/// Since in Lisp, lists can be "improper", i.e., terminated by a value other than `Null`, this
295/// iterator type takes advantage of the fact that Rust's iterators can produce multiple sequences
296/// of values, each terminated by `None`. For an improper list, the terminating value is produced
297/// after the sequence of elements, as a singleton element, again followed by `None`.
298///
299/// For example, while the list `(1 2 3)` will produce the three expected `Some` values, followed by
300/// `None`, the list `(1 2 . 3)` will produce `Some` values for `1` and `2`, then a `None`, followed
301/// by a some value for `3`, and then the final `None`.
302#[derive(Debug, Clone)]
303pub struct ListIter<'a>(ListCursor<'a>);
304
305impl<'a> ListIter<'a> {
306 /// Returns true when the iterator is completely exhausted.
307 ///
308 /// For an improper list, true will only be returned after the terminating value has been
309 /// consumed.
310 pub fn is_empty(&self) -> bool {
311 matches!(&self.0, ListCursor::Exhausted)
312 }
313
314 /// Returns a peek at the value that would be returned by a call to `next`.
315 ///
316 /// For improper lists, this implies that after the last regular element, `None` will be
317 /// returned, while `is_empty` still returns false at that point.
318 pub fn peek(&self) -> Option<Ref<'_>> {
319 match &self.0 {
320 ListCursor::Cons(cell, info) => Some(Ref {
321 value: cell.car(),
322 info: &info[0],
323 }),
324 ListCursor::Dot(_, _) => None,
325 ListCursor::Rest(value, info) => Some(Ref { value, info }),
326 ListCursor::Exhausted => None,
327 }
328 }
329
330 fn empty() -> Self {
331 ListIter(ListCursor::Exhausted)
332 }
333
334 fn cons(cell: &'a Cons, meta: &'a [SpanInfo; 2]) -> Self {
335 ListIter(ListCursor::Cons(cell, meta))
336 }
337}
338
339#[derive(Debug, Clone)]
340enum ListCursor<'a> {
341 Cons(&'a Cons, &'a [SpanInfo; 2]),
342 Dot(&'a Value, &'a SpanInfo),
343 Rest(&'a Value, &'a SpanInfo),
344 Exhausted,
345}
346
347impl<'a> Iterator for ListIter<'a> {
348 type Item = Ref<'a>;
349
350 fn next(&mut self) -> Option<Self::Item> {
351 match self.0 {
352 ListCursor::Cons(cell, [car_meta, cdr_meta]) => {
353 let car = cell.car();
354 match cdr_meta {
355 SpanInfo::Cons(_, next) => {
356 let cell = cell
357 .cdr()
358 .as_cons()
359 .expect("badly shaped list span information");
360 self.0 = ListCursor::Cons(cell, next);
361 }
362 SpanInfo::Prim(_) if cell.cdr().is_null() => {
363 self.0 = ListCursor::Exhausted;
364 }
365 _ => {
366 self.0 = ListCursor::Dot(cell.cdr(), cdr_meta);
367 }
368 }
369 Some(Ref {
370 value: car,
371 info: car_meta,
372 })
373 }
374 ListCursor::Dot(value, info) => {
375 self.0 = ListCursor::Rest(value, info);
376 None
377 }
378 ListCursor::Rest(value, info) => {
379 self.0 = ListCursor::Exhausted;
380 Some(Ref { value, info })
381 }
382 ListCursor::Exhausted => None,
383 }
384 }
385}
386
387fn from_trait<'de, R>(read: R, options: Options) -> Result<Datum>
388where
389 R: read::Read<'de>,
390{
391 let mut parser = Parser::with_options(read, options);
392 let datum = parser.expect_datum()?;
393 parser.expect_end()?;
394
395 Ok(datum)
396}
397
398/// Parse a datum from an IO stream containing a single S-expression.
399///
400/// The content of the IO stream is parsed directly from the stream
401/// without being buffered in memory.
402///
403/// When reading from a source against which short reads are not efficient, such
404/// as a [`File`], you will want to apply your own buffering, e.g. using
405/// [`std::io::BufReader`].
406///
407/// ```no_run
408/// use std::error::Error;
409/// use std::fs::File;
410/// use std::io::BufReader;
411/// use std::path::Path;
412///
413/// fn read_datum_from_file<P: AsRef<Path>>(path: P) -> Result<lexpr::Datum, Box<dyn Error>> {
414/// // Open the file in read-only mode with buffer.
415/// let file = File::open(path)?;
416/// let reader = BufReader::new(file);
417///
418/// // Read an arbitrary S-expression, using parser options suitable for Emacs Lisp.
419/// let datum = lexpr::datum::from_reader_custom(reader, lexpr::parse::Options::elisp())?;
420///
421/// // Return the datum.
422/// Ok(datum)
423/// }
424///
425/// let datum = read_datum_from_file("test.el").unwrap();
426/// println!("{:?}", datum);
427/// ```
428///
429/// [`File`]: https://doc.rust-lang.org/std/fs/struct.File.html
430/// [`BufReader`]: https://doc.rust-lang.org/std/io/struct.BufReader.html
431pub fn from_reader_custom(rdr: impl io::Read, options: Options) -> Result<Datum> {
432 from_trait(read::IoRead::new(rdr), options)
433}
434
435/// Parse a datum from an IO stream of S-expressions, using the default parser
436/// options.
437///
438/// See [`from_reader_custom`] for more information.
439///
440/// [`from_reader_custom`]: fn.from_reader_custom.html
441pub fn from_reader(rdr: impl io::Read) -> Result<Datum> {
442 from_reader_custom(rdr, Options::default())
443}
444
445/// Parse a datum from an IO stream of S-expressions, using the parser
446/// options suitable for parsing Emacs Lisp.
447///
448/// See [`from_reader_custom`] for more information.
449///
450/// [`from_reader_custom`]: fn.from_reader_custom.html
451pub fn from_reader_elisp(rdr: impl io::Read) -> Result<Datum> {
452 from_reader_custom(rdr, Options::elisp())
453}
454
455/// Parse a datum from bytes representing a single S-expression.
456///
457/// ```
458/// let datum = lexpr::from_slice_custom(b"(a (nested) list)", lexpr::parse::Options::new());
459/// println!("{:?}", datum);
460/// ```
461pub fn from_slice_custom(bytes: &[u8], options: Options) -> Result<Datum> {
462 // TODO: the use of SliceRead is most probably not a good idea, since it calculates position
463 // information on-demand, leading to O(n^2) complexity.
464 from_trait(read::SliceRead::new(bytes), options)
465}
466
467/// Parse a datum from bytes representing a single S-expressions, using the
468/// default parser options.
469///
470/// See [`from_slice_custom`] for more information.
471///
472/// [`from_slice_custom`]: fn.from_slice_custom.html
473pub fn from_slice(bytes: &[u8]) -> Result<Datum> {
474 from_slice_custom(bytes, Options::default())
475}
476
477/// Parse a datum from bytes representing a single S-expressions, using parser
478/// options suitable for Emacs Lisp.
479///
480/// See [`from_slice_custom`] for more information.
481///
482/// [`from_slice_custom`]: fn.from_slice_custom.html
483pub fn from_slice_elisp(bytes: &[u8]) -> Result<Datum> {
484 from_slice_custom(bytes, Options::elisp())
485}
486
487/// Parse a datum from a string slice representing a single S-expression.
488///
489/// ```
490/// let datum = lexpr::from_str_custom("(a (nested) list)", lexpr::parse::Options::new());
491/// println!("{:?}", datum);
492/// ```
493pub fn from_str_custom(s: &str, options: Options) -> Result<Datum> {
494 // TODO: the use of StrRead (which delegates to SliceRead) is most probably not a good idea,
495 // since it calculates position information on-demand, leading to O(n^2) complexity.
496 from_trait(read::StrRead::new(s), options)
497}
498
499/// Parse a datum from a string slice representing a single S-expressions, using
500/// the default parser options.
501///
502/// See [`from_str_custom`] for more information.
503///
504/// [`from_str_custom`]: fn.from_str_custom.html
505pub fn from_str(s: &str) -> Result<Datum> {
506 from_str_custom(s, Options::default())
507}
508
509/// Parse a datum from a string slice representing a single S-expression, using
510/// parser options suitable for Emacs Lisp.
511///
512/// See [`from_str_custom`] for more information.
513///
514/// [`from_str_custom`]: fn.from_str_custom.html
515pub fn from_str_elisp(s: &str) -> Result<Datum> {
516 from_str_custom(s, Options::elisp())
517}