lexpr/
datum.rs

1//! S-expression values including source location.
2
3use std::{io, iter, slice};
4
5use crate::{
6    parse::{read, Options, Parser, Position, Result},
7    Cons, Value,
8};
9
10/// Combines an S-expression value with location information.
11///
12/// A `Datum` keeps, along with a plain `Value`, information about the text
13/// location the value was parsed from. For compound values, such as lists and
14/// vectors, that includes information for all contained values, recursively.
15///
16/// A `Datum` can be obtained by using the [`next_datum`] and [`expect_datum`]
17/// methods on `Parser`, or via the iterator obtained with [`datum_iter`].
18///
19/// [`next_datum`]: Parser::next_datum
20/// [`expect_datum`]: Parser::expect_datum
21/// [`datum_iter`]: Parser::datum_iter
22#[derive(Debug, Clone, PartialEq)]
23pub struct Datum {
24    value: Value,
25    info: SpanInfo,
26}
27
28impl Datum {
29    pub(crate) fn into_inner(self) -> (Value, SpanInfo) {
30        (self.value, self.info)
31    }
32
33    /// Returns a reference to the contained value.
34    pub fn value(&self) -> &Value {
35        &self.value
36    }
37
38    /// Returns the span for the compelete value.
39    pub fn span(&self) -> Span {
40        self.info.span()
41    }
42
43    /// Returns a reference to the datum.
44    pub fn as_ref(&self) -> Ref<'_> {
45        Ref {
46            value: &self.value,
47            info: &self.info,
48        }
49    }
50
51    /// Returns an iterator over the elements of a list.
52    ///
53    /// If the value contained in the datum is not either a cons cell or `Null`, `None` is
54    /// returned.
55    ///
56    /// Note that the returned iterator has special behavior for improper lists, yielding the
57    /// element after the dot after returning `None` the first time.
58    ///
59    /// ```
60    /// use lexpr::sexp;
61    ///
62    /// let datum = lexpr::datum::from_str("(1 2 . 3)").unwrap();
63    /// let mut iter = datum.list_iter().unwrap();
64    /// let one = iter.next().unwrap();
65    /// assert_eq!(one.value(), &sexp!(1));
66    /// let two = iter.next().unwrap();
67    /// assert_eq!(two.value(), &sexp!(2));
68    /// assert_eq!(iter.next(), None);
69    /// let three = iter.next().unwrap();
70    /// assert_eq!(three.value(), &sexp!(3));
71    /// assert_eq!(iter.next(), None);
72    /// ```
73    pub fn list_iter(&self) -> Option<ListIter<'_>> {
74        self.as_ref().list_iter()
75    }
76
77    /// Returns an iterator over the elements of a vector.
78    ///
79    /// If the value contained in the datum is not a vector, `None` is returned.
80    pub fn vector_iter(&self) -> Option<VectorIter<'_>> {
81        self.as_ref().vector_iter()
82    }
83
84    pub(crate) fn primitive(value: Value, start: Position, end: Position) -> Self {
85        Datum {
86            value,
87            info: SpanInfo::Prim(Span { start, end }),
88        }
89    }
90
91    pub(crate) fn vec(
92        elements: Vec<Value>,
93        element_info: Vec<SpanInfo>,
94        start: Position,
95        end: Position,
96    ) -> Self {
97        Datum {
98            value: Value::Vector(elements.into()),
99            info: SpanInfo::Vec(Span { start, end }, element_info),
100        }
101    }
102
103    pub(crate) fn cons(cell: Cons, meta: [SpanInfo; 2], start: Position, end: Position) -> Self {
104        Datum {
105            value: Value::Cons(cell),
106            info: SpanInfo::Cons(Span::new(start, end), Box::new(meta)),
107        }
108    }
109
110    pub(crate) fn quotation(name: &str, quoted: Datum, quote_span: Span) -> Self {
111        let (quoted_value, quoted_info) = quoted.into_inner();
112        let quoted_end = quoted_info.span().end();
113        let null_span = Span::new(quoted_end, quoted_end);
114        Datum {
115            value: Value::list(vec![Value::symbol(name), quoted_value]),
116            info: SpanInfo::Cons(
117                Span::new(quote_span.start(), quoted_end),
118                Box::new([
119                    SpanInfo::Prim(quote_span),
120                    SpanInfo::Cons(
121                        quoted_info.span(),
122                        Box::new([quoted_info, SpanInfo::Prim(null_span)]),
123                    ),
124                ]),
125            ),
126        }
127    }
128}
129
130impl From<Datum> for Value {
131    fn from(datum: Datum) -> Self {
132        datum.value
133    }
134}
135
136/// A reference to a value and corresponding location information.
137///
138/// A `Ref` is the generalized version of `&Datum`; it can not only refer a top-level, owned `Datum`
139/// value, but also to values recursively contained therein.
140#[derive(Debug, Clone, Copy, PartialEq)]
141pub struct Ref<'a> {
142    value: &'a Value,
143    info: &'a SpanInfo,
144}
145
146impl<'a> AsRef<Value> for Ref<'a> {
147    fn as_ref(&self) -> &Value {
148        self.value
149    }
150}
151
152impl<'a> From<Ref<'a>> for Datum {
153    /// Turns a reference into an owned `Datum`, by cloning the referenced value and location
154    /// information.
155    fn from(r: Ref<'a>) -> Self {
156        Datum {
157            value: r.value.clone(),
158            info: r.info.clone(),
159        }
160    }
161}
162
163impl<'a> Ref<'a> {
164    fn new(value: &'a Value, info: &'a SpanInfo) -> Self {
165        Ref { value, info }
166    }
167
168    /// Returns the span of the referenced value.
169    pub fn span(&self) -> Span {
170        self.info.span()
171    }
172
173    /// Returns a reference to the contained value.
174    pub fn value(&self) -> &'a Value {
175        self.value
176    }
177
178    /// If the value referenced is not either a cons cell or `Null`, `None` is returned.
179    ///
180    /// Note that the returned iterator has special behavior for improper lists, yielding the
181    /// element after the dot after returning `None` the first time; see [`Datum::list_iter`] for an
182    /// example.
183    pub fn list_iter(&self) -> Option<ListIter<'a>> {
184        match (self.value, self.info) {
185            (Value::Cons(cell), SpanInfo::Cons(_, meta)) => Some(ListIter::cons(cell, meta)),
186            (Value::Null, _) => Some(ListIter::empty()),
187            _ => None,
188        }
189    }
190
191    /// Returns an iterator over the elements of a vector.
192    ///
193    /// If the value referenced is not a vector, `None` is returned.
194    pub fn vector_iter(&self) -> Option<VectorIter<'a>> {
195        match (self.value, self.info) {
196            (Value::Vector(elements), SpanInfo::Vec(_, element_meta)) => {
197                Some(VectorIter(elements.iter().zip(element_meta)))
198            }
199            _ => None,
200        }
201    }
202
203    /// Returns a pair of references to the fields of a cons cell.
204    ///
205    /// If the value referenced is not a cons cell, `None` is returned.
206    pub fn as_pair(&self) -> Option<(Ref<'a>, Ref<'a>)> {
207        let (car, cdr) = self.value.as_pair()?;
208        match &self.info {
209            SpanInfo::Cons(_, inner) if inner.len() == 2 => {
210                Some((Ref::new(car, &inner[0]), Ref::new(cdr, &inner[1])))
211            }
212            _ => unreachable!("badly shaped pair span information"),
213        }
214    }
215}
216
217impl<'a> std::ops::Deref for Ref<'a> {
218    type Target = Value;
219
220    fn deref(&self) -> &Self::Target {
221        self.value
222    }
223}
224
225/// The start and end for a span of text.
226#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
227pub struct Span {
228    start: Position,
229    end: Position,
230}
231
232impl Span {
233    pub(crate) fn new(start: Position, end: Position) -> Self {
234        Span { start, end }
235    }
236
237    pub(crate) fn empty() -> Self {
238        Span {
239            start: Position::new(0, 0),
240            end: Position::new(0, 0),
241        }
242    }
243
244    /// Get the starting line/column in the source file for this span.
245    pub fn start(&self) -> Position {
246        self.start
247    }
248
249    /// Get the ending line/column in the source file for this span.
250    pub fn end(&self) -> Position {
251        self.end
252    }
253}
254
255#[derive(Debug, Clone, PartialEq)]
256pub(crate) enum SpanInfo {
257    Prim(Span),
258    Cons(Span, Box<[SpanInfo; 2]>),
259    Vec(Span, Vec<SpanInfo>),
260}
261
262impl SpanInfo {
263    fn span(&self) -> Span {
264        match self {
265            SpanInfo::Prim(span) => *span,
266            SpanInfo::Cons(span, _) => *span,
267            SpanInfo::Vec(span, _) => *span,
268        }
269    }
270    pub(crate) fn cons_mut(&mut self) -> Option<&mut [SpanInfo; 2]> {
271        match self {
272            SpanInfo::Cons(_, info) => Some(info),
273            _ => None,
274        }
275    }
276}
277
278/// An iterator over the elements
279#[derive(Debug, Clone)]
280pub struct VectorIter<'a>(iter::Zip<slice::Iter<'a, Value>, slice::Iter<'a, SpanInfo>>);
281
282impl<'a> Iterator for VectorIter<'a> {
283    type Item = Ref<'a>;
284
285    fn next(&mut self) -> Option<Self::Item> {
286        self.0.next().map(|(value, info)| Ref { value, info })
287    }
288}
289
290/// An iterator yielding the `car` field of a chain of cons cells.
291///
292/// # Improper lists
293///
294/// Since in Lisp, lists can be "improper", i.e., terminated by a value other than `Null`, this
295/// iterator type takes advantage of the fact that Rust's iterators can produce multiple sequences
296/// of values, each terminated by `None`. For an improper list, the terminating value is produced
297/// after the sequence of elements, as a singleton element, again followed by `None`.
298///
299/// For example, while the list `(1 2 3)` will produce the three expected `Some` values, followed by
300/// `None`, the list `(1 2 . 3)` will produce `Some` values for `1` and `2`, then a `None`, followed
301/// by a some value for `3`, and then the final `None`.
302#[derive(Debug, Clone)]
303pub struct ListIter<'a>(ListCursor<'a>);
304
305impl<'a> ListIter<'a> {
306    /// Returns true when the iterator is completely exhausted.
307    ///
308    /// For an improper list, true will only be returned after the terminating value has been
309    /// consumed.
310    pub fn is_empty(&self) -> bool {
311        matches!(&self.0, ListCursor::Exhausted)
312    }
313
314    /// Returns a peek at the value that would be returned by a call to `next`.
315    ///
316    /// For improper lists, this implies that after the last regular element, `None` will be
317    /// returned, while `is_empty` still returns false at that point.
318    pub fn peek(&self) -> Option<Ref<'_>> {
319        match &self.0 {
320            ListCursor::Cons(cell, info) => Some(Ref {
321                value: cell.car(),
322                info: &info[0],
323            }),
324            ListCursor::Dot(_, _) => None,
325            ListCursor::Rest(value, info) => Some(Ref { value, info }),
326            ListCursor::Exhausted => None,
327        }
328    }
329
330    fn empty() -> Self {
331        ListIter(ListCursor::Exhausted)
332    }
333
334    fn cons(cell: &'a Cons, meta: &'a [SpanInfo; 2]) -> Self {
335        ListIter(ListCursor::Cons(cell, meta))
336    }
337}
338
339#[derive(Debug, Clone)]
340enum ListCursor<'a> {
341    Cons(&'a Cons, &'a [SpanInfo; 2]),
342    Dot(&'a Value, &'a SpanInfo),
343    Rest(&'a Value, &'a SpanInfo),
344    Exhausted,
345}
346
347impl<'a> Iterator for ListIter<'a> {
348    type Item = Ref<'a>;
349
350    fn next(&mut self) -> Option<Self::Item> {
351        match self.0 {
352            ListCursor::Cons(cell, [car_meta, cdr_meta]) => {
353                let car = cell.car();
354                match cdr_meta {
355                    SpanInfo::Cons(_, next) => {
356                        let cell = cell
357                            .cdr()
358                            .as_cons()
359                            .expect("badly shaped list span information");
360                        self.0 = ListCursor::Cons(cell, next);
361                    }
362                    SpanInfo::Prim(_) if cell.cdr().is_null() => {
363                        self.0 = ListCursor::Exhausted;
364                    }
365                    _ => {
366                        self.0 = ListCursor::Dot(cell.cdr(), cdr_meta);
367                    }
368                }
369                Some(Ref {
370                    value: car,
371                    info: car_meta,
372                })
373            }
374            ListCursor::Dot(value, info) => {
375                self.0 = ListCursor::Rest(value, info);
376                None
377            }
378            ListCursor::Rest(value, info) => {
379                self.0 = ListCursor::Exhausted;
380                Some(Ref { value, info })
381            }
382            ListCursor::Exhausted => None,
383        }
384    }
385}
386
387fn from_trait<'de, R>(read: R, options: Options) -> Result<Datum>
388where
389    R: read::Read<'de>,
390{
391    let mut parser = Parser::with_options(read, options);
392    let datum = parser.expect_datum()?;
393    parser.expect_end()?;
394
395    Ok(datum)
396}
397
398/// Parse a datum from an IO stream containing a single S-expression.
399///
400/// The content of the IO stream is parsed directly from the stream
401/// without being buffered in memory.
402///
403/// When reading from a source against which short reads are not efficient, such
404/// as a [`File`], you will want to apply your own buffering, e.g. using
405/// [`std::io::BufReader`].
406///
407/// ```no_run
408/// use std::error::Error;
409/// use std::fs::File;
410/// use std::io::BufReader;
411/// use std::path::Path;
412///
413/// fn read_datum_from_file<P: AsRef<Path>>(path: P) -> Result<lexpr::Datum, Box<dyn Error>> {
414///     // Open the file in read-only mode with buffer.
415///     let file = File::open(path)?;
416///     let reader = BufReader::new(file);
417///
418///     // Read an arbitrary S-expression, using parser options suitable for Emacs Lisp.
419///     let datum = lexpr::datum::from_reader_custom(reader, lexpr::parse::Options::elisp())?;
420///
421///     // Return the datum.
422///     Ok(datum)
423/// }
424///
425/// let datum = read_datum_from_file("test.el").unwrap();
426/// println!("{:?}", datum);
427/// ```
428///
429/// [`File`]: https://doc.rust-lang.org/std/fs/struct.File.html
430/// [`BufReader`]: https://doc.rust-lang.org/std/io/struct.BufReader.html
431pub fn from_reader_custom(rdr: impl io::Read, options: Options) -> Result<Datum> {
432    from_trait(read::IoRead::new(rdr), options)
433}
434
435/// Parse a datum from an IO stream of S-expressions, using the default parser
436/// options.
437///
438/// See [`from_reader_custom`] for more information.
439///
440/// [`from_reader_custom`]: fn.from_reader_custom.html
441pub fn from_reader(rdr: impl io::Read) -> Result<Datum> {
442    from_reader_custom(rdr, Options::default())
443}
444
445/// Parse a datum from an IO stream of S-expressions, using the parser
446/// options suitable for parsing Emacs Lisp.
447///
448/// See [`from_reader_custom`] for more information.
449///
450/// [`from_reader_custom`]: fn.from_reader_custom.html
451pub fn from_reader_elisp(rdr: impl io::Read) -> Result<Datum> {
452    from_reader_custom(rdr, Options::elisp())
453}
454
455/// Parse a datum from bytes representing a single S-expression.
456///
457/// ```
458/// let datum = lexpr::from_slice_custom(b"(a (nested) list)", lexpr::parse::Options::new());
459/// println!("{:?}", datum);
460/// ```
461pub fn from_slice_custom(bytes: &[u8], options: Options) -> Result<Datum> {
462    // TODO: the use of SliceRead is most probably not a good idea, since it calculates position
463    // information on-demand, leading to O(n^2) complexity.
464    from_trait(read::SliceRead::new(bytes), options)
465}
466
467/// Parse a datum from bytes representing a single S-expressions, using the
468/// default parser options.
469///
470/// See [`from_slice_custom`] for more information.
471///
472/// [`from_slice_custom`]: fn.from_slice_custom.html
473pub fn from_slice(bytes: &[u8]) -> Result<Datum> {
474    from_slice_custom(bytes, Options::default())
475}
476
477/// Parse a datum from bytes representing a single S-expressions, using parser
478/// options suitable for Emacs Lisp.
479///
480/// See [`from_slice_custom`] for more information.
481///
482/// [`from_slice_custom`]: fn.from_slice_custom.html
483pub fn from_slice_elisp(bytes: &[u8]) -> Result<Datum> {
484    from_slice_custom(bytes, Options::elisp())
485}
486
487/// Parse a datum from a string slice representing a single S-expression.
488///
489/// ```
490/// let datum = lexpr::from_str_custom("(a (nested) list)", lexpr::parse::Options::new());
491/// println!("{:?}", datum);
492/// ```
493pub fn from_str_custom(s: &str, options: Options) -> Result<Datum> {
494    // TODO: the use of StrRead (which delegates to SliceRead) is most probably not a good idea,
495    // since it calculates position information on-demand, leading to O(n^2) complexity.
496    from_trait(read::StrRead::new(s), options)
497}
498
499/// Parse a datum from a string slice representing a single S-expressions, using
500/// the default parser options.
501///
502/// See [`from_str_custom`] for more information.
503///
504/// [`from_str_custom`]: fn.from_str_custom.html
505pub fn from_str(s: &str) -> Result<Datum> {
506    from_str_custom(s, Options::default())
507}
508
509/// Parse a datum from a string slice representing a single S-expression, using
510/// parser options suitable for Emacs Lisp.
511///
512/// See [`from_str_custom`] for more information.
513///
514/// [`from_str_custom`]: fn.from_str_custom.html
515pub fn from_str_elisp(s: &str) -> Result<Datum> {
516    from_str_custom(s, Options::elisp())
517}