pulsar_utils/
loc.rs

1// Copyright (C) 2024 Ethan Uppal. All rights reserved.
2use std::{cmp::Ordering, fmt::Display, rc::Rc};
3
4/// Different sources of text data.
5#[derive(Clone, Debug, Eq)]
6pub enum Source {
7    /// `Source::File { name, contents }` is a text file with name `name` and
8    /// contents `contents`.
9    ///
10    /// Invariant: if two [`Source::File`]s have the same `name`, they must
11    /// represent the same file. For this reason, it is recommended to use
12    /// fully-qualified paths for `name`.
13    File {
14        name: String,
15        contents: String
16    },
17    Unknown
18}
19
20impl Source {
21    pub fn file(name: String, contents: String) -> Rc<Source> {
22        Rc::new(Source::File { name, contents })
23    }
24
25    /// `contents(source)` is the string contents of `source`.
26    pub fn contents(&self) -> &str {
27        match self {
28            Self::File { name: _, contents } => contents,
29            Self::Unknown => ""
30        }
31    }
32
33    /// @see [`Loc::lines`]
34    fn lines(
35        &self, pos: usize, before: usize, after: usize
36    ) -> (Vec<String>, usize) {
37        match self {
38            Self::File { name: _, contents } => {
39                assert!(pos < contents.len());
40                let bytes = contents.as_bytes();
41
42                // Find the bounds of the current line
43                let mut start_pos = pos;
44                while start_pos > 0 && bytes[start_pos - 1] != b'\n' {
45                    start_pos -= 1;
46                }
47                let mut end_pos = start_pos;
48                while end_pos < contents.len() && bytes[end_pos] != b'\n' {
49                    end_pos += 1;
50                }
51                end_pos += 1;
52
53                // Slice the contents to get the current line
54                let line = contents
55                    .get(start_pos..end_pos - 1)
56                    .unwrap_or_default()
57                    .to_string();
58
59                // Make iterators for the before/after lines
60                let before_lines: Vec<_> = {
61                    let (before_contents, _) = contents.split_at(start_pos);
62                    let mut result: Vec<_> = before_contents
63                        .lines()
64                        .rev()
65                        .take(before)
66                        .map(String::from)
67                        .collect();
68                    result.reverse();
69                    result
70                };
71                let after_lines: Vec<_> = if end_pos < contents.len() {
72                    let (_, after_contents) = contents.split_at(end_pos);
73                    after_contents
74                        .lines()
75                        .take(after)
76                        .map(String::from)
77                        .collect()
78                } else {
79                    std::iter::empty().collect()
80                };
81
82                // Construct the final result
83                let mut result = vec![];
84                result.extend(before_lines);
85                let pos_current_line = result.len();
86                result.push(line);
87                result.extend(after_lines);
88
89                (result, pos_current_line)
90            }
91            Self::Unknown => (vec![], 0)
92        }
93    }
94}
95
96impl Display for Source {
97    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
98        match self {
99            Source::File { name, contents: _ } => write!(f, "{}", name),
100            Source::Unknown => write!(f, "<unknown>")
101        }
102    }
103}
104
105impl Default for Source {
106    fn default() -> Self {
107        Source::File {
108            name: String::new(),
109            contents: String::new()
110        }
111    }
112}
113
114impl PartialEq for Source {
115    fn eq(&self, other: &Self) -> bool {
116        match (self, other) {
117            (Self::Unknown, Self::Unknown) => true,
118            (
119                Self::File { name, contents: _ },
120                Self::File {
121                    name: other_name,
122                    contents: _
123                }
124            ) => name == other_name,
125            _ => false
126        }
127    }
128}
129
130/// `Loc(line, col, pos, source)` is a location referring to line `line` and
131/// column `col` of `source`, where the combination of `line` and `col` produces
132/// a direct offset `pos`. It is formatted as `"{source}:{line}:{col}"` where
133/// `{source}` is the formatted substitution of `source` and likewise for
134/// `line`/`col`. It is required that no numeric field is negative, that is,
135/// `line`, `col`, and `pos` should be treated as if they were of type `usize`.
136#[derive(Debug, Clone, Eq)]
137pub struct Loc {
138    pub line: isize,
139    pub col: isize,
140    pub pos: isize,
141    pub source: Rc<Source>
142}
143
144impl Loc {
145    /// `loc.lines(before, after)` is a pair of a vector containing the
146    /// line in `loc.source` at position `loc.pos`, preceded by the up to
147    /// `before` previous lines and up to `after` subsequent lines, as well
148    /// as an index into the vector for the line containing `loc.pos`.
149    ///
150    /// Requires: `loc.pos` is a valid position in `loc.source`.
151    pub fn lines(&self, before: usize, after: usize) -> (Vec<String>, usize) {
152        self.source.lines(self.pos as usize, before, after)
153    }
154
155    pub fn make_invalid() -> Self {
156        Loc {
157            line: 0,
158            col: 0,
159            pos: 0,
160            source: Rc::new(Source::Unknown)
161        }
162    }
163
164    pub fn is_invalid(&self) -> bool {
165        let invalid = Loc::make_invalid();
166        self.line == invalid.line
167            && self.col == invalid.col
168            && self.pos == invalid.pos
169    }
170}
171
172impl Display for Loc {
173    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
174        write!(f, "{}:{}:{}", self.source, self.line, self.col)
175    }
176}
177
178impl Default for Loc {
179    fn default() -> Self {
180        Loc {
181            line: 1,
182            col: 1,
183            pos: 0,
184            source: Rc::new(Source::Unknown)
185        }
186    }
187}
188
189impl PartialEq for Loc {
190    fn eq(&self, other: &Self) -> bool {
191        self.line == other.line
192            && self.col == other.col
193            && self.pos == other.pos
194            && self.source.as_ref() == other.source.as_ref()
195    }
196}
197
198impl PartialOrd for Loc {
199    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
200        if self.source != other.source {
201            None
202        } else {
203            self.pos.partial_cmp(&other.pos)
204        }
205    }
206}
207
208/// The location enclosed by a region begins at `start` and ends exclusively
209/// at `end`. It is required that both locations come from the same source and
210/// that `end` monotonically proceeds `start` (so `start` and `end` can compare
211/// equal). This invariant is enforced when constructing through
212/// [`Region::new`].
213#[derive(Debug, Default, PartialEq, Eq, Clone)]
214pub struct Region {
215    /// An inclusive lower bound (see [`Loc::partial_cmp`]) on the region
216    /// enclosed.
217    pub start: Loc,
218
219    /// An exclusive upper bound (see [`Loc::partial_cmp`]) on the region
220    /// enclosed.
221    pub end: Loc
222}
223
224/// The line section with `start` and `end` represents the characters at
225/// positions from lower bound `start` to exclusive upper bound `end` on a line.
226/// The core invariant that `end >= start` is enforced by [`LineSection::new`].
227pub struct LineSection {
228    /// The initial position on the line.
229    pub start: isize,
230
231    /// One after the final valid position contained by this line section on
232    /// the line, that is, an exclusive upper bound on the indices of the range
233    /// of characters contained by this line section.
234    pub end: isize
235}
236
237impl LineSection {
238    pub fn new(start: isize, end: isize) -> Self {
239        assert!(start <= end);
240        Self { start, end }
241    }
242
243    pub fn length(&self) -> usize {
244        (self.end - self.start) as usize
245    }
246}
247
248impl Region {
249    /// A region from `start` up to (but not including) `end`.
250    pub fn new(start: Loc, end: Loc) -> Region {
251        assert!(start <= end, "`Region::from`: `start` and `end` must from the same source and `end` must be at least after `start`.");
252        Region { start, end }
253    }
254
255    /// A region at `start` of length 1.
256    ///
257    /// Requires: the `start.line` contains at least one more character after
258    /// `start.col`.
259    pub fn unit(start: Loc) -> Region {
260        let mut end = start.clone();
261        end.pos += 1;
262        end.col += 1;
263        Region::new(start, end)
264    }
265
266    /// The source where this region occurs.
267    pub fn source(&self) -> Rc<Source> {
268        self.start.source.clone()
269    }
270
271    pub fn start_line(&self) -> isize {
272        self.start.line
273    }
274
275    pub fn end_line(&self) -> isize {
276        self.end.line
277    }
278
279    /// Given a set of *complete* `lines` from the same source as `source()`
280    /// and the line number of the first line in `lines`, `start_line`, this
281    /// function computes the intersection of this region and the given
282    /// lines. If the output vector is non-empty, the first entry in the output
283    /// vector corresponds to the first line of this region, which is not
284    /// necessarily the first line in `lines`. See [`LineSection`].
285    pub fn find_intersection(
286        &self, lines: &[String], start_line: isize
287    ) -> Vec<LineSection> {
288        let mut result = vec![];
289        for (i, line) in lines.iter().enumerate() {
290            let actual_line = start_line + (i as isize);
291            if actual_line >= self.start_line()
292                && actual_line <= self.end_line()
293            {
294                let mut start_pos = 0;
295                let mut end_pos = line.len() as isize;
296
297                if actual_line == self.start_line() {
298                    start_pos = self.start.col - 1;
299                }
300
301                if actual_line == self.end_line() {
302                    end_pos = self.end.col - 1;
303                }
304
305                result.push(LineSection::new(start_pos, end_pos));
306            }
307        }
308        result
309    }
310}
311
312impl Display for Region {
313    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
314        write!(f, "[{}, {})", self.start, self.end)
315    }
316}
317
318pub trait RegionProvider {
319    /// The starting location of this region.
320    fn start(&self) -> Loc;
321
322    /// Must be in the same source and monotonically after
323    /// [`RegionProvider::start`]. See [`Region`] for details.
324    fn end(&self) -> Loc;
325
326    /// The region of this object.
327    fn region(&self) -> Region {
328        Region::new(self.start(), self.end())
329    }
330}
331
332impl RegionProvider for Region {
333    fn start(&self) -> Loc {
334        self.start.clone()
335    }
336
337    fn end(&self) -> Loc {
338        self.end.clone()
339    }
340}