1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
use crate::{SourceID, SourcePath};
use std::{
    borrow::Cow,
    fmt::{Debug, Display, Formatter},
    ops::Range,
    path::Path,
};
use url::Url;

mod display;

/// A type representing a single identifier that may be referred to by [`Span`]s.
///
/// In most cases, a identifier is a single input file.
#[derive(Clone, Debug, Hash, PartialEq, Eq)]
pub struct SourceText {
    /// The path of the identifier.
    path: SourcePath,
    /// The text
    raw: String,
    /// The lines of the identifier.
    lines: Vec<SourceLine>,
    /// bytes in identifier
    length: u32,
    /// Is the data dirty
    dirty: bool,
}

/// A type representing a single line of a [`SourceText`].
#[derive(Clone, Debug, Hash, PartialEq, Eq)]
pub struct SourceLine {
    /// Get the offset of this line in the original [`SourceText`] (i.e: the number of characters that precede it).
    pub offset: u32,
    /// Get the character length of this line.
    pub length: u32,
    /// Get the view of this line in the original [`SourceText`].
    pub text: String,
}

/// A type representing a single line of a [`Source`].
#[derive(Copy, Clone, Default, Eq, PartialEq, Hash)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct SourceSpan {
    /// The start offset of the span
    pub start: u32,
    /// The end offset of the span
    pub end: u32,
    /// The file id of the span
    pub file: SourceID,
}

impl SourceText {
    /// Create a snippet with given name
    pub fn snippet<S, N>(text: S, name: N) -> Self
    where
        S: Into<String>,
        N: Into<Cow<'static, str>>,
    {
        let mut src = Self::from(text);
        src.path = SourcePath::Snippet(name.into());
        src
    }

    /// Get the cache id
    pub fn source_id(&self) -> SourceID {
        self.path.source_id()
    }
    /// Get the length of the total number of characters in the identifier.
    pub fn get_length(&self) -> usize {
        self.length as usize
    }
    /// Get access to a specific, zero-indexed [`SourceLine`].
    pub fn get_line(&self, idx: usize) -> Option<&SourceLine> {
        self.lines.get(idx)
    }
    /// Get the length of the total number of characters in the identifier.
    pub fn get_source(&self) -> &SourcePath {
        &self.path
    }
    /// Set path name of identifier
    pub fn set_source(&mut self, path: SourcePath) {
        self.path = path;
    }
    /// Set path name of identifier
    pub fn set_path(&mut self, path: &Path) {
        self.path = SourcePath::Local(path.to_path_buf());
    }
    /// Get path name of identifier
    pub fn with_path(self, path: &Path) -> Self {
        Self { path: SourcePath::Local(path.to_path_buf()), ..self }
    }
    /// Set path name of identifier
    pub fn set_remote(&mut self, url: Url) -> bool {
        self.path = SourcePath::Remote(url);
        true
    }
    /// Get path name of identifier
    pub fn with_remote(self, url: Url) -> Self {
        Self { path: SourcePath::Remote(url), ..self }
    }

    /// Return an iterator over the characters in the identifier.
    pub fn chars(&self) -> impl Iterator<Item = char> + '_ {
        self.lines.iter().map(|l| l.chars()).flatten()
    }
    /// Return an iterator over the [`SourceLine`]s in this identifier.
    pub fn lines(&self) -> impl ExactSizeIterator<Item = &SourceLine> + '_ {
        self.lines.iter()
    }
    /// Clear the cache cache
    pub fn clear(&mut self) {
        self.raw.clear();
        self.lines.clear();
        self.dirty = true;
    }
}
impl SourceText {
    /// Get the line that the given offset appears on, and the line/column numbers of the offset.
    ///
    /// Note that the line/column numbers are zero-indexed.
    pub fn get_offset_line(&self, offset: u32) -> Option<(&SourceLine, usize, u32)> {
        if offset <= self.length {
            let idx = self.lines.binary_search_by_key(&offset, |line| line.offset).unwrap_or_else(|idx| idx.saturating_sub(1));
            let line = &self.lines[idx];
            assert!(offset >= line.offset, "offset = {}, line.offset = {}", offset, line.offset);
            Some((line, idx, offset - line.offset))
        }
        else {
            None
        }
    }
    /// Get the range of lines that this source_text runs across.
    ///
    /// The resulting range is guaranteed to contain valid line indices (i.e: those that can be used for
    /// [`SourceText::get_line`]).
    pub fn get_line_range(&self, span: &Range<u32>) -> Range<usize> {
        let start = self.get_offset_line(span.start).map_or(0, |(_, l, _)| l);
        let end = self.get_offset_line(span.end.saturating_sub(1).max(span.start)).map_or(self.lines.len(), |(_, l, _)| l + 1);
        start..end
    }
}

impl SourceSpan {
    /// Create a new source_text with the given start and end offsets, and the given file.
    pub fn new(file: SourceID, start: u32, end: u32) -> Self {
        Self { start, end, file }
    }
    /// Create a new source_text with the given start and end offsets, and the given file.
    pub fn get_range(&self) -> Range<u32> {
        self.start..self.end
    }
    /// Get the start offset of this source_text.
    ///
    /// Offsets are zero-indexed character offsets from the beginning of the identifier.
    pub fn get_start(&self) -> u32 {
        self.start
    }

    /// Get the (exclusive) end offset of this source_text.
    ///
    /// The end offset should *always* be greater than or equal to the start offset as given by [`Span::start`].
    ///
    /// Offsets are zero-indexed character offsets from the beginning of the identifier.
    pub fn get_end(&self) -> u32 {
        self.end
    }
    /// Create a new source_text with the given start and end offsets, and the given file.
    pub fn set_range(&mut self, range: Range<u32>) {
        self.start = range.start;
        self.end = range.end;
    }
    /// Create a new source_text with the given start and end offsets, and the given file.
    pub fn with_range(self, range: Range<u32>) -> Self {
        Self { start: range.start, end: range.end, ..self }
    }
    /// Create a new source_text with the given start and end offsets, and the given file.
    pub fn get_file(&self) -> SourceID {
        self.file
    }
    /// Create a new source_text with the given start and end offsets, and the given file.
    pub fn set_file(&mut self, file: SourceID) {
        self.file = file;
    }
    /// Create a new source_text with the given start and end offsets, and the given file.
    pub fn with_file(self, file: SourceID) -> Self {
        Self { file, ..self }
    }

    /// Get the length of this source_text (difference between the start of the source_text and the end of the source_text).
    pub fn length(&self) -> u32 {
        self.end.saturating_sub(self.start)
    }
    /// Determine whether the source_text contains the given offset.
    pub fn contains(&self, offset: u32) -> bool {
        self.get_range().contains(&offset)
    }
}
impl SourceLine {
    /// Get the offset source_text of this line in the original [`SourceText`].
    pub fn range(&self) -> Range<u32> {
        self.offset..self.offset + self.length
    }

    /// Return an iterator over the characters in the line, excluding trailing whitespace.
    pub fn chars(&self) -> impl Iterator<Item = char> + '_ {
        self.text.chars()
    }
}