1use std::char;
2use std::ops::Range;
3use std::path::PathBuf;
4use std::sync::Arc;
5
6use miden_diagnostics::*;
7
8pub type SourceResult<T> = std::result::Result<T, SourceError>;
9
10pub trait Source: Sized {
12 fn new(src: Arc<SourceFile>) -> Self;
14
15 fn read(&mut self) -> Option<(SourceIndex, char)>;
17
18 fn peek(&mut self) -> Option<(SourceIndex, char)>;
20
21 fn span(&self) -> SourceSpan;
23
24 fn slice(&self, span: impl Into<Range<usize>>) -> &str;
26}
27
28#[derive(Debug, thiserror::Error)]
29pub enum SourceError {
30 #[error("error reading {path:?}: {source:?}")]
31 RootFileIO {
32 source: std::io::Error,
33 path: PathBuf,
34 },
35
36 #[error("invalid source path")]
37 InvalidPath { reason: String },
38}
39impl ToDiagnostic for SourceError {
40 fn to_diagnostic(self) -> Diagnostic {
41 match self {
42 SourceError::RootFileIO { source, path: _ } => {
43 Diagnostic::error().with_message(source.to_string())
44 }
45 SourceError::InvalidPath { reason } => {
46 Diagnostic::error().with_message(format!("invalid path: {}", reason))
47 }
48 }
49 }
50}
51
52pub struct FileMapSource {
54 src: Arc<SourceFile>,
55 bytes: *const [u8],
56 start: SourceIndex,
57 peek: Option<(SourceIndex, char)>,
58 end: usize,
59 pos: usize,
60 eof: bool,
61}
62impl FileMapSource {
63 fn peek_char(&self) -> Option<(SourceIndex, char)> {
64 self.peek
65 }
66
67 fn next_char(&mut self) -> Option<(SourceIndex, char)> {
68 let result = if self.peek.is_some() {
70 std::mem::replace(&mut self.peek, None)
71 } else {
72 let next = unsafe { self.next_char_internal() };
73 match next {
74 None => {
75 self.eof = true;
76 return None;
77 }
78 result => result,
79 }
80 };
81
82 self.peek = unsafe { self.next_char_internal() };
84
85 result
86 }
87
88 #[inline]
89 unsafe fn next_char_internal(&mut self) -> Option<(SourceIndex, char)> {
90 let mut pos = self.pos;
91 let end = self.end;
92 if pos == end {
93 self.eof = true;
94 }
95
96 if self.eof {
97 return None;
98 }
99
100 let start = self.start + pos;
101
102 let bytes: &[u8] = &*self.bytes;
103
104 let x = *bytes.get_unchecked(pos);
106 if x < 128 {
107 self.pos = pos + 1;
108 return Some((start, char::from_u32_unchecked(x as u32)));
109 }
110
111 let init = Self::utf8_first_byte(x, 2);
115
116 pos += 1;
117 let y = if pos == end {
118 0u8
119 } else {
120 *bytes.get_unchecked(pos)
121 };
122 let mut ch = Self::utf8_acc_cont_byte(init, y);
123 if x >= 0xE0 {
124 pos += 1;
127 let z = if pos == end {
128 0u8
129 } else {
130 *bytes.get_unchecked(pos)
131 };
132 let y_z = Self::utf8_acc_cont_byte((y & Self::CONT_MASK) as u32, z);
133 ch = init << 12 | y_z;
134 if x >= 0xF0 {
135 pos += 1;
138 let w = if pos == end {
139 0u8
140 } else {
141 *bytes.get_unchecked(pos)
142 };
143 ch = (init & 7) << 18 | Self::utf8_acc_cont_byte(y_z, w);
144 }
145 }
146
147 pos += 1;
148 if pos >= end {
149 self.eof = true
150 }
151 self.pos = pos;
152
153 Some((start, char::from_u32_unchecked(ch)))
154 }
155
156 #[inline]
160 fn utf8_first_byte(byte: u8, width: u32) -> u32 {
161 (byte & (0x7F >> width)) as u32
162 }
163
164 #[inline]
166 fn utf8_acc_cont_byte(ch: u32, byte: u8) -> u32 {
167 (ch << 6) | (byte & Self::CONT_MASK) as u32
168 }
169
170 const CONT_MASK: u8 = 0b0011_1111;
172}
173impl Source for FileMapSource {
174 fn new(src: Arc<SourceFile>) -> Self {
175 let start = SourceIndex::new(src.id(), ByteIndex(0));
176 let mut source = Self {
177 src,
178 bytes: &[],
179 peek: None,
180 start,
181 end: 0,
182 pos: 0,
183 eof: false,
184 };
185 let s = source.src.source();
186 let bytes = s.as_bytes();
187 source.end = bytes.len();
188 source.bytes = bytes;
189 source.peek = unsafe { source.next_char_internal() };
190 source
191 }
192
193 #[inline]
194 fn read(&mut self) -> Option<(SourceIndex, char)> {
195 self.next_char()
196 }
197
198 #[inline]
199 fn peek(&mut self) -> Option<(SourceIndex, char)> {
200 self.peek_char()
201 }
202
203 #[inline]
204 fn span(&self) -> SourceSpan {
205 self.src.source_span()
206 }
207
208 #[inline]
209 fn slice(&self, span: impl Into<Range<usize>>) -> &str {
210 self.src.source_slice(span).unwrap()
211 }
212}
213
214impl Iterator for FileMapSource {
215 type Item = (SourceIndex, char);
216
217 fn next(&mut self) -> Option<Self::Item> {
218 self.read()
219 }
220}
221
222#[cfg(test)]
223mod test {
224 use pretty_assertions::assert_eq;
225
226 use super::*;
227
228 fn read_all_chars(source: FileMapSource) -> Vec<char> {
229 source.map(|result| result.1).collect()
230 }
231
232 #[test]
233 fn file_source() {
234 let expected = vec!['h', 'e', 'l', 'l', 'o', ' ', 'w', 'o', 'r', 'l', 'd', '!'];
235
236 let codemap = CodeMap::default();
237
238 let id1 = codemap.add("nofile", "hello world!".to_string());
239 let file1 = codemap.get(id1).unwrap();
240 let source1 = FileMapSource::new(file1);
241 let chars = read_all_chars(source1);
242
243 assert_eq!(expected, chars);
244
245 let id2 = codemap.add("nofile", "hello world!".to_string());
246 let file2 = codemap.get(id2).unwrap();
247 let mut source2 = FileMapSource::new(file2);
248 assert_eq!(
249 Some((SourceIndex::new(id2, ByteIndex(0)), 'h')),
250 source2.peek()
251 );
252 assert_eq!(
253 Some((SourceIndex::new(id2, ByteIndex(0)), 'h')),
254 source2.next()
255 );
256
257 let id3 = codemap.add("nofile", "éé".to_string());
258 let file3 = codemap.get(id3).unwrap();
259 let mut source3 = FileMapSource::new(file3);
260 assert_eq!(
261 Some((SourceIndex::new(id3, ByteIndex(0)), 'é')),
262 source3.peek()
263 );
264 assert_eq!(
265 Some((SourceIndex::new(id3, ByteIndex(0)), 'é')),
266 source3.next()
267 );
268 assert_eq!(
269 Some((SourceIndex::new(id3, ByteIndex(2)), 'é')),
270 source3.peek()
271 );
272 assert_eq!(
273 Some((SourceIndex::new(id3, ByteIndex(2)), 'é')),
274 source3.next()
275 );
276 }
277}