1use super::*;
2
3use std::io::Error;
4use std::{
5 collections::{hash_map::Entry, HashMap},
6 fs,
7 path::{Path, PathBuf},
8};
9
10pub trait Cache<Id: ?Sized> {
12 type Storage: AsRef<str>;
18
19 fn fetch(&mut self, id: &Id) -> Result<&Source<Self::Storage>, impl fmt::Debug>;
21
22 fn display<'a>(&self, id: &'a Id) -> Option<impl fmt::Display + 'a>;
26}
27
28impl<C: Cache<Id>, Id: ?Sized> Cache<Id> for &mut C {
29 type Storage = C::Storage;
30
31 fn fetch(&mut self, id: &Id) -> Result<&Source<Self::Storage>, impl fmt::Debug> {
32 C::fetch(self, id)
33 }
34 fn display<'a>(&self, id: &'a Id) -> Option<impl fmt::Display + 'a> {
35 C::display(self, id)
36 }
37}
38
39impl<C: Cache<Id>, Id: ?Sized> Cache<Id> for Box<C> {
40 type Storage = C::Storage;
41
42 fn fetch(&mut self, id: &Id) -> Result<&Source<Self::Storage>, impl fmt::Debug> {
43 C::fetch(self, id)
44 }
45 fn display<'a>(&self, id: &'a Id) -> Option<impl fmt::Display + 'a> {
46 C::display(self, id)
47 }
48}
49
50#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)]
52pub struct Line {
53 offset: usize,
54 char_len: usize,
55 byte_offset: usize,
56 byte_len: usize,
57}
58
59impl Line {
60 pub fn offset(&self) -> usize {
62 self.offset
63 }
64
65 pub fn byte_offset(&self) -> usize {
67 self.byte_offset
68 }
69
70 pub fn len(&self) -> usize {
72 self.char_len
73 }
74
75 pub fn is_empty(&self) -> bool {
77 self.len() == 0
78 }
79
80 pub fn span(&self) -> Range<usize> {
82 self.offset..self.offset + self.char_len
83 }
84
85 fn byte_span(&self) -> Range<usize> {
88 self.byte_offset..self.byte_offset + self.byte_len
89 }
90}
91
92#[derive(Clone, Debug, Hash, PartialEq, Eq)]
96pub struct Source<I: AsRef<str> = String> {
97 text: I,
98 lines: Vec<Line>,
99 len: usize,
100 byte_len: usize,
101 display_line_offset: usize,
102}
103
104impl<I: AsRef<str>> Source<I> {
105 pub fn text(&self) -> &str {
107 self.text.as_ref()
108 }
109}
110
111impl<I: AsRef<str>> From<I> for Source<I> {
112 fn from(input: I) -> Self {
116 if input.as_ref().is_empty() {
119 return Self {
120 text: input,
121 lines: vec![Line {
122 offset: 0,
123 char_len: 0,
124 byte_offset: 0,
125 byte_len: 0,
126 }],
127 len: 0,
128 byte_len: 0,
129 display_line_offset: 0,
130 };
131 }
132
133 let mut char_offset = 0;
134 let mut byte_offset = 0;
135 let mut lines = Vec::new();
136
137 const SEPARATORS: [char; 7] = [
138 '\r', '\n', '\x0B', '\x0C', '\u{0085}', '\u{2028}', '\u{2029}', ];
146 let mut remaining = input.as_ref().split_inclusive(SEPARATORS).peekable();
147 while let Some(line) = remaining.next() {
148 let mut byte_len = line.len();
149 let mut char_len = line.chars().count();
150 if line.ends_with('\r') && remaining.next_if_eq(&"\n").is_some() {
152 byte_len += 1;
153 char_len += 1;
154 }
155 lines.push(Line {
156 offset: char_offset,
157 char_len,
158 byte_offset,
159 byte_len,
160 });
161
162 char_offset += char_len;
163 byte_offset += byte_len;
164 }
165
166 Self {
167 text: input,
168 lines,
169 len: char_offset,
170 byte_len: byte_offset,
171 display_line_offset: 0,
172 }
173 }
174}
175
176impl<I: AsRef<str>> Source<I> {
177 pub fn with_display_line_offset(mut self, offset: usize) -> Self {
179 self.display_line_offset = offset;
180 self
181 }
182
183 pub fn display_line_offset(&self) -> usize {
185 self.display_line_offset
186 }
187
188 pub fn len(&self) -> usize {
190 self.len
191 }
192
193 pub fn is_empty(&self) -> bool {
195 self.len() == 0
196 }
197
198 pub fn chars(&self) -> impl Iterator<Item = char> + '_ {
200 self.text.as_ref().chars()
201 }
202
203 pub fn line(&self, idx: usize) -> Option<Line> {
205 self.lines.get(idx).copied()
206 }
207
208 pub fn lines(&self) -> impl ExactSizeIterator<Item = Line> + '_ {
210 self.lines.iter().copied()
211 }
212
213 pub fn get_offset_line(&self, offset: usize) -> Option<(Line, usize, usize)> {
217 if offset <= self.len {
218 let idx = self
219 .lines
220 .binary_search_by_key(&offset, |line| line.offset)
221 .unwrap_or_else(|idx| idx.saturating_sub(1));
222 let line = self.line(idx)?;
223 assert!(
224 offset >= line.offset,
225 "offset = {}, line.offset = {}",
226 offset,
227 line.offset
228 );
229 Some((line, idx, offset - line.offset))
230 } else {
231 None
232 }
233 }
234
235 pub fn get_byte_line(&self, byte_offset: usize) -> Option<(Line, usize, usize)> {
239 if byte_offset <= self.byte_len {
240 let idx = self
241 .lines
242 .binary_search_by_key(&byte_offset, |line| line.byte_offset)
243 .unwrap_or_else(|idx| idx.saturating_sub(1));
244 let line = self.line(idx)?;
245 assert!(
246 byte_offset >= line.byte_offset,
247 "byte_offset = {}, line.byte_offset = {}",
248 byte_offset,
249 line.byte_offset
250 );
251 Some((line, idx, byte_offset - line.byte_offset))
252 } else {
253 None
254 }
255 }
256
257 pub fn get_line_range<S: Span>(&self, span: &S) -> Range<usize> {
262 let start = self.get_offset_line(span.start()).map_or(0, |(_, l, _)| l);
263 let end = self
264 .get_offset_line(span.end().saturating_sub(1).max(span.start()))
265 .map_or(self.lines.len(), |(_, l, _)| l + 1);
266 start..end
267 }
268
269 pub fn get_line_text(&self, line: Line) -> Option<&'_ str> {
271 self.text.as_ref().get(line.byte_span())
272 }
273}
274
275impl<I: AsRef<str>> Cache<()> for Source<I> {
276 type Storage = I;
277
278 fn fetch(&mut self, _: &()) -> Result<&Source<I>, impl fmt::Debug> {
279 Ok::<_, ()>(self)
280 }
281 fn display<'a>(&self, _: &'a ()) -> Option<impl fmt::Display + 'a> {
282 None::<&str>
283 }
284}
285
286impl<I: AsRef<str>> Cache<()> for &'_ Source<I> {
287 type Storage = I;
288
289 fn fetch(&mut self, _: &()) -> Result<&Source<I>, impl fmt::Debug> {
290 Ok::<_, ()>(*self)
291 }
292 fn display<'a>(&self, _: &'a ()) -> std::option::Option<impl std::fmt::Display + 'a> {
293 None::<&str>
294 }
295}
296
297impl<I: AsRef<str>, Id: fmt::Display + Eq> Cache<Id> for (Id, Source<I>) {
298 type Storage = I;
299
300 fn fetch(&mut self, id: &Id) -> Result<&Source<I>, impl fmt::Debug> {
301 if id == &self.0 {
302 Ok(&self.1)
303 } else {
304 Err(Box::new(format!("Failed to fetch source '{}'", id)))
305 }
306 }
307 fn display<'a>(&self, id: &'a Id) -> Option<impl fmt::Display + 'a> {
308 Some(Box::new(id))
309 }
310}
311
312impl<I: AsRef<str>, Id: fmt::Display + Eq> Cache<Id> for (Id, &'_ Source<I>) {
313 type Storage = I;
314
315 fn fetch(&mut self, id: &Id) -> Result<&Source<I>, impl fmt::Debug> {
316 if id == &self.0 {
317 Ok(self.1)
318 } else {
319 Err(Box::new(format!("Failed to fetch source '{}'", id)))
320 }
321 }
322 fn display<'a>(&self, id: &'a Id) -> Option<impl fmt::Display + 'a> {
323 Some(Box::new(id))
324 }
325}
326
327#[derive(Default, Debug, Clone)]
329pub struct FileCache {
330 files: HashMap<PathBuf, Source>,
331}
332
333impl Cache<Path> for FileCache {
334 type Storage = String;
335
336 fn fetch(&mut self, path: &Path) -> Result<&Source, impl fmt::Debug> {
337 Ok::<_, Error>(match self.files.entry(path.to_path_buf()) {
338 Entry::Occupied(entry) => entry.into_mut(),
340 Entry::Vacant(entry) => entry.insert(Source::from(fs::read_to_string(path)?)),
341 })
342 }
343 fn display<'a>(&self, path: &'a Path) -> Option<impl fmt::Display + 'a> {
344 Some(Box::new(path.display()))
345 }
346}
347
348#[derive(Debug, Clone)]
350pub struct FnCache<Id, F, I>
351where
352 I: AsRef<str>,
353{
354 sources: HashMap<Id, Source<I>>,
355 get: F,
356}
357
358impl<Id, F, I> FnCache<Id, F, I>
359where
360 I: AsRef<str>,
361{
362 pub fn new(get: F) -> Self {
364 Self {
365 sources: HashMap::default(),
366 get,
367 }
368 }
369
370 pub fn with_sources(mut self, sources: HashMap<Id, Source<I>>) -> Self
372 where
373 Id: Eq + Hash,
374 {
375 self.sources.reserve(sources.len());
376 for (id, src) in sources {
377 self.sources.insert(id, src);
378 }
379 self
380 }
381}
382
383impl<Id: fmt::Display + Hash + PartialEq + Eq + Clone, F, I, E> Cache<Id> for FnCache<Id, F, I>
384where
385 I: AsRef<str>,
386 E: fmt::Debug,
387 F: for<'a> FnMut(&'a Id) -> Result<I, E>,
388{
389 type Storage = I;
390
391 fn fetch(&mut self, id: &Id) -> Result<&Source<I>, impl fmt::Debug> {
392 Ok::<_, E>(match self.sources.entry(id.clone()) {
393 Entry::Occupied(entry) => entry.into_mut(),
394 Entry::Vacant(entry) => entry.insert(Source::from((self.get)(id)?)),
395 })
396 }
397 fn display<'a>(&self, id: &'a Id) -> Option<impl fmt::Display + 'a> {
398 Some(Box::new(id))
399 }
400}
401
402pub fn sources<Id, S, I>(iter: I) -> impl Cache<Id>
404where
405 Id: fmt::Display + Hash + PartialEq + Eq + Clone + 'static,
406 S: AsRef<str>,
407 I: IntoIterator<Item = (Id, S)>,
408{
409 FnCache::new((move |id| Err(format!("Failed to fetch source '{}'", id))) as fn(&_) -> _)
410 .with_sources(
411 iter.into_iter()
412 .map(|(id, s)| (id, Source::from(s)))
413 .collect(),
414 )
415}
416
417#[cfg(test)]
418mod tests {
419 use std::iter::zip;
420 use std::sync::Arc;
421
422 use super::Source;
423
424 fn test_with_lines(lines: Vec<&str>) {
425 let source: String = lines.iter().copied().collect();
426 let source = Source::from(source);
427
428 assert_eq!(source.lines.len(), lines.len());
429
430 let mut offset = 0;
431 for (source_line, raw_line) in zip(source.lines.iter().copied(), lines.into_iter()) {
432 assert_eq!(source_line.offset, offset);
433 assert_eq!(source_line.char_len, raw_line.chars().count());
434 assert_eq!(source.get_line_text(source_line).unwrap(), raw_line);
435 offset += source_line.char_len;
436 }
437
438 assert_eq!(source.len, offset);
439 }
440
441 #[test]
442 fn source_from_empty() {
443 test_with_lines(vec![""]); }
445
446 #[test]
447 fn source_from_single() {
448 test_with_lines(vec!["Single line"]);
449 test_with_lines(vec!["Single line with LF\n"]);
450 test_with_lines(vec!["Single line with CRLF\r\n"]);
451 }
452
453 #[test]
454 fn source_from_multi() {
455 test_with_lines(vec!["Two\r\n", "lines\n"]);
456 test_with_lines(vec!["Some\n", "more\r\n", "lines"]);
457 test_with_lines(vec!["\n", "\r\n", "\n", "Empty Lines"]);
458 }
459
460 #[test]
461 fn source_from_trims_trailing_spaces() {
462 test_with_lines(vec!["Trailing spaces \n", "are trimmed\t"]);
463 }
464
465 #[test]
466 fn source_from_alternate_line_endings() {
467 test_with_lines(vec![
469 "CR\r",
470 "VT\x0B",
471 "FF\x0C",
472 "NEL\u{0085}",
473 "LS\u{2028}",
474 "PS\u{2029}",
475 ]);
476 }
477
478 #[test]
479 fn source_from_other_string_types() {
480 let raw = r#"A raw string
481 with multiple
482 lines behind
483 an Arc"#;
484 let arc = Arc::from(raw);
485 let source = Source::from(arc);
486
487 assert_eq!(source.lines.len(), 4);
488
489 let mut offset = 0;
490 for (source_line, raw_line) in zip(source.lines.iter().copied(), raw.split_inclusive('\n'))
491 {
492 assert_eq!(source_line.offset, offset);
493 assert_eq!(source_line.char_len, raw_line.chars().count());
494 assert_eq!(source.get_line_text(source_line).unwrap(), raw_line);
495 offset += source_line.char_len;
496 }
497
498 assert_eq!(source.len, offset);
499 }
500
501 #[test]
502 fn source_from_reference() {
503 let raw = r#"A raw string
504 with multiple
505 lines"#;
506
507 fn non_owning_source(input: &str) -> Source<&str> {
508 Source::from(input)
509 }
510
511 let source = non_owning_source(raw);
512 assert_eq!(source.lines.len(), 3);
513 }
514}