1use super::*;
2
3use std::io::Error;
4use std::{
5 collections::{hash_map::Entry, HashMap},
6 fs,
7 path::{Path, PathBuf},
8};
9
10pub trait Cache<Id: ?Sized> {
12 type Storage: AsRef<str>;
18
19 fn fetch(&mut self, id: &Id) -> Result<&Source<Self::Storage>, impl fmt::Debug>;
21
22 fn display<'a>(&self, id: &'a Id) -> Option<impl fmt::Display + 'a>;
26}
27
28impl<'b, C: Cache<Id>, Id: ?Sized> Cache<Id> for &'b mut C {
29 type Storage = C::Storage;
30
31 fn fetch(&mut self, id: &Id) -> Result<&Source<Self::Storage>, impl fmt::Debug> {
32 C::fetch(self, id)
33 }
34 fn display<'a>(&self, id: &'a Id) -> Option<impl fmt::Display + 'a> {
35 C::display(self, id)
36 }
37}
38
39impl<C: Cache<Id>, Id: ?Sized> Cache<Id> for Box<C> {
40 type Storage = C::Storage;
41
42 fn fetch(&mut self, id: &Id) -> Result<&Source<Self::Storage>, impl fmt::Debug> {
43 C::fetch(self, id)
44 }
45 fn display<'a>(&self, id: &'a Id) -> Option<impl fmt::Display + 'a> {
46 C::display(self, id)
47 }
48}
49
50#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)]
52pub struct Line {
53 offset: usize,
54 char_len: usize,
55 byte_offset: usize,
56 byte_len: usize,
57}
58
59impl Line {
60 pub fn offset(&self) -> usize {
62 self.offset
63 }
64
65 pub fn len(&self) -> usize {
67 self.char_len
68 }
69
70 pub fn is_empty(&self) -> bool {
72 self.len() == 0
73 }
74
75 pub fn span(&self) -> Range<usize> {
77 self.offset..self.offset + self.char_len
78 }
79
80 fn byte_span(&self) -> Range<usize> {
83 self.byte_offset..self.byte_offset + self.byte_len
84 }
85}
86
87#[derive(Clone, Debug, Hash, PartialEq, Eq)]
91pub struct Source<I: AsRef<str> = String> {
92 text: I,
93 lines: Vec<Line>,
94 len: usize,
95 byte_len: usize,
96 display_line_offset: usize,
97}
98
99impl<I: AsRef<str>> Source<I> {
100 pub fn text(&self) -> &str {
102 self.text.as_ref()
103 }
104}
105
106impl<I: AsRef<str>> From<I> for Source<I> {
107 fn from(input: I) -> Self {
111 if input.as_ref().is_empty() {
114 return Self {
115 text: input,
116 lines: vec![Line {
117 offset: 0,
118 char_len: 0,
119 byte_offset: 0,
120 byte_len: 0,
121 }],
122 len: 0,
123 byte_len: 0,
124 display_line_offset: 0,
125 };
126 }
127
128 let mut char_offset = 0;
129 let mut byte_offset = 0;
130 let mut lines = Vec::new();
131
132 const SEPARATORS: [char; 7] = [
133 '\r', '\n', '\x0B', '\x0C', '\u{0085}', '\u{2028}', '\u{2029}', ];
141 let mut remaining = input.as_ref().split_inclusive(SEPARATORS).peekable();
142 while let Some(line) = remaining.next() {
143 let mut byte_len = line.len();
144 let mut char_len = line.chars().count();
145 if line.ends_with('\r') && remaining.next_if_eq(&"\n").is_some() {
147 byte_len += 1;
148 char_len += 1;
149 }
150 lines.push(Line {
151 offset: char_offset,
152 char_len,
153 byte_offset,
154 byte_len,
155 });
156
157 char_offset += char_len;
158 byte_offset += byte_len;
159 }
160
161 Self {
162 text: input,
163 lines,
164 len: char_offset,
165 byte_len: byte_offset,
166 display_line_offset: 0,
167 }
168 }
169}
170
171impl<I: AsRef<str>> Source<I> {
172 pub fn with_display_line_offset(mut self, offset: usize) -> Self {
174 self.display_line_offset = offset;
175 self
176 }
177
178 pub fn display_line_offset(&self) -> usize {
180 self.display_line_offset
181 }
182
183 pub fn len(&self) -> usize {
185 self.len
186 }
187
188 pub fn is_empty(&self) -> bool {
190 self.len() == 0
191 }
192
193 pub fn chars(&self) -> impl Iterator<Item = char> + '_ {
195 self.text.as_ref().chars()
196 }
197
198 pub fn line(&self, idx: usize) -> Option<Line> {
200 self.lines.get(idx).copied()
201 }
202
203 pub fn lines(&self) -> impl ExactSizeIterator<Item = Line> + '_ {
205 self.lines.iter().copied()
206 }
207
208 pub fn get_offset_line(&self, offset: usize) -> Option<(Line, usize, usize)> {
212 if offset <= self.len {
213 let idx = self
214 .lines
215 .binary_search_by_key(&offset, |line| line.offset)
216 .unwrap_or_else(|idx| idx.saturating_sub(1));
217 let line = self.line(idx)?;
218 assert!(
219 offset >= line.offset,
220 "offset = {}, line.offset = {}",
221 offset,
222 line.offset
223 );
224 Some((line, idx, offset - line.offset))
225 } else {
226 None
227 }
228 }
229
230 pub fn get_byte_line(&self, byte_offset: usize) -> Option<(Line, usize, usize)> {
234 if byte_offset <= self.byte_len {
235 let idx = self
236 .lines
237 .binary_search_by_key(&byte_offset, |line| line.byte_offset)
238 .unwrap_or_else(|idx| idx.saturating_sub(1));
239 let line = self.line(idx)?;
240 assert!(
241 byte_offset >= line.byte_offset,
242 "byte_offset = {}, line.byte_offset = {}",
243 byte_offset,
244 line.byte_offset
245 );
246 Some((line, idx, byte_offset - line.byte_offset))
247 } else {
248 None
249 }
250 }
251
252 pub fn get_line_range<S: Span>(&self, span: &S) -> Range<usize> {
257 let start = self.get_offset_line(span.start()).map_or(0, |(_, l, _)| l);
258 let end = self
259 .get_offset_line(span.end().saturating_sub(1).max(span.start()))
260 .map_or(self.lines.len(), |(_, l, _)| l + 1);
261 start..end
262 }
263
264 pub fn get_line_text(&self, line: Line) -> Option<&'_ str> {
266 self.text.as_ref().get(line.byte_span())
267 }
268}
269
270impl<I: AsRef<str>> Cache<()> for Source<I> {
271 type Storage = I;
272
273 fn fetch(&mut self, _: &()) -> Result<&Source<I>, impl fmt::Debug> {
274 Ok::<_, ()>(self)
275 }
276 fn display<'a>(&self, _: &'a ()) -> Option<impl fmt::Display + 'a> {
277 None::<&str>
278 }
279}
280
281impl<I: AsRef<str>> Cache<()> for &'_ Source<I> {
282 type Storage = I;
283
284 fn fetch(&mut self, _: &()) -> Result<&Source<I>, impl fmt::Debug> {
285 Ok::<_, ()>(*self)
286 }
287 fn display<'a>(&self, _: &'a ()) -> Option<impl fmt::Display + 'a> {
288 None::<u8>
289 }
290}
291
292impl<I: AsRef<str>, Id: fmt::Display + Eq> Cache<Id> for (Id, Source<I>) {
293 type Storage = I;
294
295 fn fetch(&mut self, id: &Id) -> Result<&Source<I>, impl fmt::Debug> {
296 if id == &self.0 {
297 Ok(&self.1)
298 } else {
299 Err(Box::new(format!("Failed to fetch source '{}'", id)))
300 }
301 }
302 fn display<'a>(&self, id: &'a Id) -> Option<impl fmt::Display + 'a> {
303 Some(Box::new(id))
304 }
305}
306
307impl<I: AsRef<str>, Id: fmt::Display + Eq> Cache<Id> for (Id, &'_ Source<I>) {
308 type Storage = I;
309
310 fn fetch(&mut self, id: &Id) -> Result<&Source<I>, impl fmt::Debug> {
311 if id == &self.0 {
312 Ok(self.1)
313 } else {
314 Err(Box::new(format!("Failed to fetch source '{}'", id)))
315 }
316 }
317 fn display<'a>(&self, id: &'a Id) -> Option<impl fmt::Display + 'a> {
318 Some(Box::new(id))
319 }
320}
321
322#[derive(Default, Debug, Clone)]
324pub struct FileCache {
325 files: HashMap<PathBuf, Source>,
326}
327
328impl Cache<Path> for FileCache {
329 type Storage = String;
330
331 fn fetch(&mut self, path: &Path) -> Result<&Source, impl fmt::Debug> {
332 Ok::<_, Error>(match self.files.entry(path.to_path_buf()) {
333 Entry::Occupied(entry) => entry.into_mut(),
335 Entry::Vacant(entry) => entry.insert(Source::from(fs::read_to_string(path)?)),
336 })
337 }
338 fn display<'a>(&self, path: &'a Path) -> Option<impl fmt::Display + 'a> {
339 Some(Box::new(path.display()))
340 }
341}
342
343#[derive(Debug, Clone)]
345pub struct FnCache<Id, F, I>
346where
347 I: AsRef<str>,
348{
349 sources: HashMap<Id, Source<I>>,
350 get: F,
351}
352
353impl<Id, F, I> FnCache<Id, F, I>
354where
355 I: AsRef<str>,
356{
357 pub fn new(get: F) -> Self {
359 Self {
360 sources: HashMap::default(),
361 get,
362 }
363 }
364
365 pub fn with_sources(mut self, sources: HashMap<Id, Source<I>>) -> Self
367 where
368 Id: Eq + Hash,
369 {
370 self.sources.reserve(sources.len());
371 for (id, src) in sources {
372 self.sources.insert(id, src);
373 }
374 self
375 }
376}
377
378impl<Id: fmt::Display + Hash + PartialEq + Eq + Clone, F, I, E> Cache<Id> for FnCache<Id, F, I>
379where
380 I: AsRef<str>,
381 E: fmt::Debug,
382 F: for<'a> FnMut(&'a Id) -> Result<I, E>,
383{
384 type Storage = I;
385
386 fn fetch(&mut self, id: &Id) -> Result<&Source<I>, impl fmt::Debug> {
387 Ok::<_, E>(match self.sources.entry(id.clone()) {
388 Entry::Occupied(entry) => entry.into_mut(),
389 Entry::Vacant(entry) => entry.insert(Source::from((self.get)(id)?)),
390 })
391 }
392 fn display<'a>(&self, id: &'a Id) -> Option<impl fmt::Display + 'a> {
393 Some(Box::new(id))
394 }
395}
396
397pub fn sources<Id, S, I>(iter: I) -> impl Cache<Id>
399where
400 Id: fmt::Display + Hash + PartialEq + Eq + Clone + 'static,
401 I: IntoIterator<Item = (Id, S)>,
402 S: AsRef<str>,
403{
404 FnCache::new((move |id| Err(format!("Failed to fetch source '{}'", id))) as fn(&_) -> _)
405 .with_sources(
406 iter.into_iter()
407 .map(|(id, s)| (id, Source::from(s)))
408 .collect(),
409 )
410}
411
412#[cfg(test)]
413mod tests {
414 use std::iter::zip;
415 use std::sync::Arc;
416
417 use super::Source;
418
419 fn test_with_lines(lines: Vec<&str>) {
420 let source: String = lines.iter().copied().collect();
421 let source = Source::from(source);
422
423 assert_eq!(source.lines.len(), lines.len());
424
425 let mut offset = 0;
426 for (source_line, raw_line) in zip(source.lines.iter().copied(), lines.into_iter()) {
427 assert_eq!(source_line.offset, offset);
428 assert_eq!(source_line.char_len, raw_line.chars().count());
429 assert_eq!(source.get_line_text(source_line).unwrap(), raw_line);
430 offset += source_line.char_len;
431 }
432
433 assert_eq!(source.len, offset);
434 }
435
436 #[test]
437 fn source_from_empty() {
438 test_with_lines(vec![""]); }
440
441 #[test]
442 fn source_from_single() {
443 test_with_lines(vec!["Single line"]);
444 test_with_lines(vec!["Single line with LF\n"]);
445 test_with_lines(vec!["Single line with CRLF\r\n"]);
446 }
447
448 #[test]
449 fn source_from_multi() {
450 test_with_lines(vec!["Two\r\n", "lines\n"]);
451 test_with_lines(vec!["Some\n", "more\r\n", "lines"]);
452 test_with_lines(vec!["\n", "\r\n", "\n", "Empty Lines"]);
453 }
454
455 #[test]
456 fn source_from_trims_trailing_spaces() {
457 test_with_lines(vec!["Trailing spaces \n", "are trimmed\t"]);
458 }
459
460 #[test]
461 fn source_from_alternate_line_endings() {
462 test_with_lines(vec![
464 "CR\r",
465 "VT\x0B",
466 "FF\x0C",
467 "NEL\u{0085}",
468 "LS\u{2028}",
469 "PS\u{2029}",
470 ]);
471 }
472
473 #[test]
474 fn source_from_other_string_types() {
475 let raw = r#"A raw string
476 with multiple
477 lines behind
478 an Arc"#;
479 let arc = Arc::from(raw);
480 let source = Source::from(arc);
481
482 assert_eq!(source.lines.len(), 4);
483
484 let mut offset = 0;
485 for (source_line, raw_line) in zip(source.lines.iter().copied(), raw.split_inclusive('\n'))
486 {
487 assert_eq!(source_line.offset, offset);
488 assert_eq!(source_line.char_len, raw_line.chars().count());
489 assert_eq!(source.get_line_text(source_line).unwrap(), raw_line);
490 offset += source_line.char_len;
491 }
492
493 assert_eq!(source.len, offset);
494 }
495
496 #[test]
497 fn source_from_reference() {
498 let raw = r#"A raw string
499 with multiple
500 lines"#;
501
502 fn non_owning_source(input: &str) -> Source<&str> {
503 Source::from(input)
504 }
505
506 let source = non_owning_source(raw);
507 assert_eq!(source.lines.len(), 3);
508 }
509}