lwb_parser/sources/
source_file.rs

1use crate::sources::character_class::CharacterClass;
2use miette::{MietteError, SourceCode, SourceSpan, SpanContents};
3use serde::{Deserialize, Deserializer, Serialize, Serializer};
4use std::io;
5use std::io::Read;
6use std::iter::Peekable;
7use std::path::Path;
8use std::sync::Arc;
9
10#[doc(hidden)]
11#[derive(Debug, Serialize, Deserialize)]
12struct Inner {
13    contents: String,
14    contents_for_display: String,
15    name: String,
16}
17
18/// SourceFile represents a source into which spans
19/// point. Source files can be cheaply cloned as the
20/// actual contents of them live behind an `Rc`.
21#[derive(Clone, Debug)]
22pub struct SourceFile(Arc<Inner>);
23
24impl Serialize for SourceFile {
25    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
26    where
27        S: Serializer,
28    {
29        serializer.serialize_unit()
30    }
31}
32
33impl<'de> Deserialize<'de> for SourceFile {
34    fn deserialize<D>(_deserializer: D) -> Result<Self, D::Error>
35    where
36        D: Deserializer<'de>,
37    {
38        Ok(SourceFile::new("", "dummmy"))
39    }
40}
41
42impl SourceFile {
43    pub fn open(name: impl AsRef<Path>) -> io::Result<Self> {
44        let mut f = std::fs::File::open(&name)?;
45        let mut contents = String::new();
46
47        f.read_to_string(&mut contents)?;
48
49        Ok(Self(Arc::new(Inner {
50            contents: contents.clone(),
51            contents_for_display: contents + "        ",
52            name: name.as_ref().to_string_lossy().to_string(),
53        })))
54    }
55
56    /// Create a new SourceFile
57    pub fn new(contents: impl AsRef<str>, name: impl AsRef<str>) -> Self {
58        Self(Arc::new(Inner {
59            contents: contents.as_ref().to_string(),
60            contents_for_display: contents.as_ref().to_string() + "        ",
61            name: name.as_ref().to_string(),
62        }))
63    }
64
65    pub fn new_for_test(s: impl AsRef<str>) -> Self {
66        Self::new(s.as_ref(), "test")
67    }
68
69    pub fn iter(&self) -> SourceFileIterator {
70        SourceFileIterator {
71            inner_iter: self.0.contents.chars().peekable(),
72            index: 0,
73        }
74    }
75
76    /// returns the name of this source file
77    pub fn name(&self) -> &str {
78        &self.0.name
79    }
80
81    /// returns the contents of this source file as a
82    /// string. When parsing you likely often want to
83    /// use `.iter()` instead as the source file iterator
84    /// has a number of methods useful for parsing.
85    pub fn contents(&self) -> &str {
86        &self.0.contents
87    }
88
89    pub fn contents_for_display(&self) -> &str {
90        &self.0.contents_for_display
91    }
92}
93
94#[derive(Clone)]
95pub struct SourceFileIterator<'a> {
96    inner_iter: Peekable<std::str::Chars<'a>>,
97    index: usize,
98}
99
100impl<'a> SourceFileIterator<'a> {
101    /// Peek at the next character that can be obtained
102    /// by calling [`next`] or [`accept`].
103    pub fn peek(&mut self) -> Option<&char> {
104        self.inner_iter.peek()
105    }
106
107    /// Advance to the next character, discarding any
108    /// character or error that is encountered.
109    pub fn advance(&mut self) {
110        self.next();
111    }
112
113    /// Skip n characters.
114    pub fn skip_n(&mut self, n: usize) {
115        for _ in 0..n {
116            self.advance();
117        }
118    }
119
120    pub fn max_pos(&mut self, other: Self) {
121        if other.index > self.index {
122            *self = other;
123        }
124    }
125
126    /// When the next value in the iterator is `c`, advance
127    /// the iterator and return true. Otherwise, return false.
128    ///
129    /// ```
130    /// # use lwb_parser::sources::source_file::SourceFile;
131    /// let sf = SourceFile::new_for_test("test");
132    /// let mut sfi = sf.iter();
133    ///
134    /// assert!(sfi.accept(&'t'.into()));
135    ///
136    /// // because the previous accept accepted
137    /// // a 't' we will now see an e
138    /// assert_eq!(sfi.peek(), Some(&'e'));
139    /// assert_eq!(sfi.next(), Some('e'));
140    /// sfi.advance();
141    /// assert!(sfi.accept(&'t'.into()));
142    ///
143    /// // can't accept more, iterator is exhausted
144    /// assert!(!sfi.accept(&'x'.into()));
145    /// ```
146    pub fn accept(&mut self, c: &CharacterClass) -> bool {
147        self.accept_option(c).is_some()
148    }
149
150    /// Like accepts but returns an option
151    pub fn accept_option(&mut self, c: &CharacterClass) -> Option<char> {
152        if let Some(true) = self.peek().map(|&i| c.contains(i)) {
153            self.next()
154        } else {
155            None
156        }
157    }
158
159    /// accept an entire string. Returns true  only
160    /// if the whole string could be accepted.
161    ///
162    /// ```
163    /// # use lwb_parser::sources::source_file::SourceFile;
164    /// let sf = SourceFile::new_for_test("test");
165    /// let mut sfi = sf.iter();
166    ///
167    /// assert!(sfi.accept_str("test"));
168    /// assert!(!sfi.accept_str("test"));
169    /// assert!(sfi.exhausted());
170    ///
171    /// let mut sfi = sf.iter();
172    /// assert!(sfi.accept_str("te"));
173    /// assert!(sfi.accept_str("st"));
174    /// assert!(sfi.exhausted());
175    ///
176    /// let mut sfi = sf.iter();
177    /// assert!(!sfi.accept_str("cat"));
178    /// assert!(sfi.accept_str("test"));
179    /// assert!(sfi.exhausted());
180    /// ```
181    pub fn accept_str(&mut self, s: &str) -> bool {
182        let mut self_clone = self.clone();
183        for c in s.chars() {
184            if !self_clone.accept(&c.into()) {
185                return false;
186            }
187        }
188
189        *self = self_clone;
190        true
191    }
192
193    /// Skips any layout (defined by the layout character class passed in)
194    ///
195    /// ```
196    /// # use lwb_parser::sources::source_file::SourceFile;
197    /// let sf = SourceFile::new_for_test("   test");
198    /// let mut sfi = sf.iter();
199    ///
200    /// assert!(!sfi.accept_str("test"));
201    /// sfi.skip_layout(&' '.into());
202    /// assert!(sfi.accept_str("test"));
203    /// ```
204    pub fn skip_layout(&mut self, layout: &CharacterClass) {
205        while self.accept(layout) {}
206    }
207
208    /// First skip any layout that can be found, then accept like [`accept`]
209    ///
210    /// ```
211    /// # use lwb_parser::sources::source_file::SourceFile;
212    /// let sf = SourceFile::new_for_test("   t");
213    /// let mut sfi = sf.iter();
214    ///
215    /// assert!(!sfi.accept(&'t'.into()));
216    /// assert!(sfi.accept_skip_layout(&'t'.into(), &' '.into()));
217    /// ```
218    pub fn accept_skip_layout(&mut self, c: &CharacterClass, layout: &CharacterClass) -> bool {
219        let mut self_clone = self.clone();
220        self_clone.skip_layout(layout);
221        if self_clone.accept(c) {
222            *self = self_clone;
223            true
224        } else {
225            false
226        }
227    }
228
229    /// First skip any layout that can be found, then accept the string like [`accept_str`].
230    ///
231    /// ```
232    /// # use lwb_parser::sources::source_file::SourceFile;
233    /// let sf = SourceFile::new_for_test("   test");
234    /// let mut sfi = sf.iter();
235    ///
236    /// assert!(!sfi.accept_str("test"));
237    /// assert!(sfi.accept_str_skip_layout("test", &' '.into()));
238    /// ```
239    pub fn accept_str_skip_layout(&mut self, s: &str, layout: &CharacterClass) -> bool {
240        let mut self_clone = self.clone();
241        self_clone.skip_layout(layout);
242        if self_clone.accept_str(s) {
243            *self = self_clone;
244            true
245        } else {
246            false
247        }
248    }
249
250    /// accepts until a certain character is found in the input.
251    ///
252    /// ```
253    /// # use lwb_parser::sources::source_file::SourceFile;
254    /// let sf = SourceFile::new_for_test("test   ");
255    /// let mut sfi = sf.iter();
256    ///
257    /// assert_eq!(sfi.accept_to_next(&' '.into()), "test");
258    /// ```
259    pub fn accept_to_next(&mut self, target: &CharacterClass) -> String {
260        let mut res = String::new();
261
262        while let Some(&i) = self.peek() {
263            if target.contains(i) {
264                break;
265            } else {
266                res.push(i);
267                self.advance();
268            }
269        }
270
271        res
272    }
273
274    /// Returns true if this iter won't return more
275    /// ```
276    /// # use lwb_parser::sources::source_file::SourceFile;
277    /// let sf = SourceFile::new_for_test("test");
278    /// let mut sfi = sf.iter();
279    ///
280    /// assert!(sfi.accept_str("test"));
281    /// assert!(sfi.exhausted());
282    pub fn exhausted(&mut self) -> bool {
283        self.peek().is_none()
284    }
285
286    /// Returns the position of the character that is next.
287    /// ```
288    /// # use lwb_parser::sources::source_file::SourceFile;
289    /// let sf = SourceFile::new_for_test("test");
290    /// let mut sfi = sf.iter();
291    ///
292    /// assert_eq!(sfi.position(), 0);
293    /// assert!(sfi.accept_str("tes"));
294    /// assert_eq!(sfi.position(), 3);
295    /// sfi.advance();
296    /// assert_eq!(sfi.position(), 4);
297    /// sfi.advance(); //Already at the end, so it has no effect on position
298    /// assert_eq!(sfi.position(), 4);
299    pub fn position(&self) -> usize {
300        self.index
301    }
302}
303
304impl<'a> Iterator for SourceFileIterator<'a> {
305    type Item = char;
306
307    fn next(&mut self) -> Option<Self::Item> {
308        let next = self.inner_iter.next();
309        if let Some(next) = next {
310            self.index += next.len_utf8();
311        }
312        next
313    }
314}
315
316impl SourceCode for SourceFile {
317    fn read_span<'a>(
318        &'a self,
319        span: &SourceSpan,
320        context_lines_before: usize,
321        context_lines_after: usize,
322    ) -> Result<Box<dyn SpanContents<'a> + 'a>, MietteError> {
323        <str as SourceCode>::read_span(
324            self.contents(),
325            span,
326            context_lines_before,
327            context_lines_after,
328        )
329    }
330}