vhdl_lang/data/
source.rs

1// This Source Code Form is subject to the terms of the Mozilla Public
2// License, v. 2.0. If a copy of the MPL was not distributed with this file,
3// You can obtain one at http://mozilla.org/MPL/2.0/.
4//
5// Copyright (c) 2018, Olof Kraigher olof.kraigher@gmail.com
6
7use super::contents::Contents;
8use parking_lot::{RwLock, RwLockReadGuard};
9use std::cmp::{max, min};
10use std::collections::hash_map::DefaultHasher;
11use std::convert::AsRef;
12use std::fmt;
13use std::fmt::Write;
14use std::hash::{Hash, Hasher};
15use std::io;
16pub use std::path::{Path, PathBuf};
17use std::sync::Arc;
18
19#[derive(Debug)]
20struct FileId {
21    name: FilePath,
22    /// Hash value of `self.name`.
23    hash: u64,
24}
25
26impl FileId {
27    fn new(name: &Path) -> FileId {
28        let name = FilePath::new(name);
29        let hash = hash(&name);
30        Self { name, hash }
31    }
32}
33
34impl PartialEq for FileId {
35    fn eq(&self, other: &Self) -> bool {
36        // Use file name hash to speedup comparison
37        if self.hash == other.hash {
38            self.name == other.name
39        } else {
40            false
41        }
42    }
43}
44
45fn hash(value: &Path) -> u64 {
46    let mut hasher = DefaultHasher::new();
47    value.hash(&mut hasher);
48    hasher.finish()
49}
50
51/// Represents a single source file and its contents.
52struct UniqueSource {
53    file_id: FileId,
54    contents: RwLock<Contents>,
55}
56
57impl fmt::Debug for UniqueSource {
58    /// Custom implementation to avoid large contents strings.
59    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
60        f.debug_struct(stringify!(UniqueSource))
61            .field(stringify!(file_id), &self.file_id)
62            .field(stringify!(contents), &"...")
63            .finish()
64    }
65}
66
67impl UniqueSource {
68    fn inline(file_name: &Path, contents: &str) -> Self {
69        Self {
70            file_id: FileId::new(file_name),
71            contents: RwLock::new(Contents::from_str(contents)),
72        }
73    }
74
75    fn from_latin1_file(file_name: &Path) -> io::Result<Self> {
76        let contents = Contents::from_latin1_file(file_name)?;
77        Ok(Self {
78            file_id: FileId::new(file_name),
79            contents: RwLock::new(contents),
80        })
81    }
82
83    #[cfg(test)]
84    pub fn from_contents(file_name: &Path, contents: Contents) -> UniqueSource {
85        Self {
86            file_id: FileId::new(file_name),
87            contents: RwLock::new(contents),
88        }
89    }
90
91    fn contents(&self) -> RwLockReadGuard<'_, Contents> {
92        self.contents.read()
93    }
94
95    fn file_name(&self) -> &Path {
96        self.file_id.name.as_ref()
97    }
98
99    fn file_path(&self) -> &FilePath {
100        &self.file_id.name
101    }
102}
103
104/// A thread-safe reference to a source file.
105/// Multiple objects of this type can refer to the same source.
106#[derive(Debug, Clone)]
107pub struct Source(Arc<UniqueSource>);
108
109impl PartialEq for Source {
110    fn eq(&self, other: &Self) -> bool {
111        self.0.file_id == other.0.file_id
112    }
113}
114
115impl PartialOrd for Source {
116    fn partial_cmp(&self, other: &Source) -> Option<std::cmp::Ordering> {
117        Some(self.cmp(other))
118    }
119}
120
121impl Ord for Source {
122    fn cmp(&self, other: &Source) -> std::cmp::Ordering {
123        self.file_name().cmp(other.file_name())
124    }
125}
126
127impl Eq for Source {}
128
129impl Hash for Source {
130    fn hash<H: Hasher>(&self, hasher: &mut H) {
131        hasher.write_u64(self.0.file_id.hash)
132    }
133}
134
135impl Source {
136    /// Creates a source from a (virtual) name and in-memory contents.
137    ///
138    /// Note: For differing values of `contents`, the value of `file_name`
139    /// *must* differ as well.
140    pub fn inline(file_name: &Path, contents: &str) -> Source {
141        Source(Arc::new(UniqueSource::inline(file_name, contents)))
142    }
143
144    pub fn from_latin1_file(file_name: &Path) -> io::Result<Source> {
145        Ok(Source(Arc::new(UniqueSource::from_latin1_file(file_name)?)))
146    }
147
148    #[cfg(test)]
149    pub fn from_contents(file_name: &Path, contents: Contents) -> Source {
150        Source(Arc::new(UniqueSource::from_contents(file_name, contents)))
151    }
152
153    pub fn contents(&self) -> RwLockReadGuard<'_, Contents> {
154        self.0.contents()
155    }
156
157    pub fn file_name(&self) -> &Path {
158        self.0.file_name()
159    }
160
161    pub(crate) fn file_path(&self) -> &FilePath {
162        self.0.file_path()
163    }
164
165    pub fn pos(&self, start: Position, end: Position) -> SrcPos {
166        SrcPos {
167            source: self.clone(),
168            range: Range { start, end },
169        }
170    }
171
172    pub fn change(&self, range: Option<&Range>, content: &str) {
173        let mut contents = self.0.contents.write();
174        if let Some(range) = range {
175            contents.change(range, content);
176        } else {
177            *contents = Contents::from_str(content);
178        }
179    }
180}
181
182/// A lexical position (line, column) in a source.
183#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Copy, Hash, Debug, Default)]
184pub struct Position {
185    /// Line (zero-based).
186    pub line: u32,
187    /// Column (zero-based).
188    /// The character offset is utf-16 encoded
189    pub character: u32,
190}
191
192impl Position {
193    pub fn new(line: u32, character: u32) -> Position {
194        Position { line, character }
195    }
196
197    pub fn next_char(self) -> Position {
198        Position {
199            line: self.line,
200            character: self.character + 1,
201        }
202    }
203
204    pub fn move_after_char(&mut self, chr: char) {
205        if chr == '\n' {
206            self.line += 1;
207            self.character = 0;
208        } else {
209            self.character += chr.len_utf16() as u32;
210        }
211    }
212
213    pub fn after_char(mut self, chr: char) -> Position {
214        self.move_after_char(chr);
215        self
216    }
217
218    pub fn prev_char(self) -> Position {
219        Position {
220            line: self.line,
221            character: self.character.saturating_sub(1),
222        }
223    }
224
225    pub fn range_to(self, end: Position) -> Range {
226        Range { start: self, end }
227    }
228}
229
230/// A lexical range in a source.
231#[derive(PartialEq, Eq, Clone, Copy, Hash, Debug)]
232pub struct Range {
233    /// Start of the range (inclusive).
234    pub start: Position,
235    /// End of the range (exclusive).
236    pub end: Position,
237}
238
239impl Range {
240    pub fn new(start: Position, end: Position) -> Range {
241        Range { start, end }
242    }
243
244    pub fn contains(&self, position: Position) -> bool {
245        self.start <= position && self.end >= position
246    }
247}
248
249/// A lexical range within a specific source file.
250#[derive(PartialEq, Clone, Debug, Eq, Hash)]
251pub struct SrcPos {
252    /// The referenced source file.
253    pub source: Source,
254    pub range: Range,
255}
256
257impl Ord for SrcPos {
258    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
259        let ordering = self.source.cmp(&other.source);
260        if std::cmp::Ordering::Equal == ordering {
261            self.range.start.cmp(&other.range.start)
262        } else {
263            ordering
264        }
265    }
266}
267
268impl PartialOrd for SrcPos {
269    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
270        Some(self.cmp(other))
271    }
272}
273
274impl AsRef<SrcPos> for SrcPos {
275    fn as_ref(&self) -> &SrcPos {
276        self
277    }
278}
279
280impl SrcPos {
281    const LINE_CONTEXT: u32 = 2;
282
283    pub fn new(source: Source, range: Range) -> SrcPos {
284        SrcPos { source, range }
285    }
286
287    fn get_line_context(&self, context_lines: u32, contents: &Contents) -> Vec<(u32, String)> {
288        let mut lines = Vec::new();
289
290        let start = self.range.start.line.saturating_sub(context_lines);
291        let end = self.range.end.line + context_lines;
292
293        for lineno in start..=end {
294            if let Some(line) = contents.get_line(lineno as usize) {
295                lines.push((lineno, line.to_owned()));
296            }
297        }
298
299        if lines.is_empty() {
300            lines.push((self.range.start.line, String::new()));
301        }
302        lines
303    }
304
305    fn push_replicate(line: &mut String, chr: char, times: usize) {
306        for _ in 0..times {
307            line.push(chr);
308        }
309    }
310
311    fn visual_width(chr: char) -> usize {
312        if chr == '\t' {
313            4
314        } else {
315            1
316        }
317    }
318
319    /// Write ~~~ to underline symbol
320    fn underline(&self, lineno_len: usize, lineno: u32, line: &str, into: &mut String) {
321        const NEWLINE_SIZE: usize = 1;
322        into.reserve("  |  ".len() + lineno_len + line.len() + NEWLINE_SIZE);
323
324        // Prefix
325        for _ in 0..lineno_len {
326            into.push(' ');
327        }
328        into.push_str("  |  ");
329
330        let mut pos = Position {
331            line: lineno,
332            character: 0,
333        };
334        // Padding before underline
335        for chr in line.chars() {
336            if pos < self.range.start {
337                Self::push_replicate(into, ' ', Self::visual_width(chr));
338            } else if pos < self.range.end {
339                Self::push_replicate(into, '~', Self::visual_width(chr));
340            } else {
341                break;
342            }
343            pos.character += chr.len_utf16() as u32;
344        }
345
346        if lineno == self.range.end.line {
347            while pos < self.range.end {
348                into.push('~');
349                pos.character += 1;
350            }
351        }
352
353        // Newline
354        into.push('\n');
355    }
356
357    fn code_context_from_contents(
358        &self,
359        contents: &Contents,
360        context_lines: u32,
361    ) -> (usize, String) {
362        let lines = self.get_line_context(context_lines, contents);
363        use pad::{Alignment, PadStr};
364        // +1 since lines are shown with 1-index
365        let lineno_len = (self.range.start.line + context_lines + 1)
366            .to_string()
367            .len();
368
369        let mut result = String::new();
370
371        for (lineno, line) in lines.iter() {
372            let line = line.to_string();
373            let line = line.trim_matches('\n');
374            let lineno_str = (lineno + 1)
375                .to_string()
376                .pad_to_width_with_alignment(lineno_len, Alignment::Right);
377            let overlaps = self.range.start.line <= *lineno && *lineno <= self.range.end.line;
378
379            if overlaps {
380                write!(result, "{lineno_str} --> ").unwrap();
381            } else {
382                write!(result, "{lineno_str}  |  ").unwrap();
383            }
384
385            for chr in line.trim_end().chars() {
386                if chr == '\t' {
387                    Self::push_replicate(&mut result, ' ', Self::visual_width(chr));
388                } else {
389                    result.push(chr);
390                }
391            }
392            result.push('\n');
393
394            if overlaps {
395                self.underline(lineno_len, *lineno, line, &mut result);
396            }
397        }
398
399        (lineno_len, result)
400    }
401
402    /// Create a string for pretty printing.
403    pub fn code_context(&self) -> String {
404        self.lineno_len_and_code_context().1
405    }
406
407    fn lineno_len_and_code_context(&self) -> (usize, String) {
408        let contents = self.source.contents();
409        self.code_context_from_contents(&contents, Self::LINE_CONTEXT)
410    }
411
412    pub fn show(&self, message: &str) -> String {
413        let (lineno_len, pretty_str) = self.lineno_len_and_code_context();
414        let file_name = self.source.file_name();
415        let mut result = String::new();
416
417        let lineno = self.range.start.line;
418        writeln!(result, "{}", &message).unwrap();
419        for _ in 0..lineno_len {
420            result.push(' ');
421        }
422        writeln!(
423            result,
424            " --> {}:{}",
425            file_name.to_string_lossy(),
426            lineno + 1
427        )
428        .unwrap();
429        for _ in 0..lineno_len {
430            result.push(' ');
431        }
432        writeln!(result, "  |").unwrap();
433        result.push_str(&pretty_str);
434        result
435    }
436
437    /// Combines two lexical positions into a larger lexical position overlapping both.
438    /// The file name is assumed to be the same.
439    pub fn combine_into(self, other: &dyn AsRef<Self>) -> Self {
440        let other = other.as_ref();
441        debug_assert!(self.source == other.source, "Assumes sources are equal");
442
443        let start = min(self.range.start, other.range.start);
444        let end = max(self.range.end, other.range.end);
445
446        SrcPos {
447            source: self.source,
448            range: Range { start, end },
449        }
450    }
451
452    pub fn start(&self) -> Position {
453        self.range.start
454    }
455
456    pub fn end(&self) -> Position {
457        self.range.end
458    }
459
460    pub fn pos_at_end(&self) -> SrcPos {
461        SrcPos {
462            source: self.source.clone(),
463            range: Range::new(self.range.end, self.range.end),
464        }
465    }
466
467    pub fn pos_at_beginning(&self) -> SrcPos {
468        SrcPos {
469            source: self.source.clone(),
470            range: Range::new(self.range.start, self.range.start),
471        }
472    }
473
474    pub fn range(&self) -> Range {
475        self.range
476    }
477
478    pub fn file_name(&self) -> &Path {
479        self.source.file_name()
480    }
481
482    pub fn combine(&self, other: &dyn AsRef<Self>) -> Self {
483        self.clone().combine_into(other)
484    }
485
486    pub fn contains(&self, pos: Position) -> bool {
487        self.range.contains(pos)
488    }
489
490    pub fn end_pos(&self) -> SrcPos {
491        SrcPos::new(self.source.clone(), Range::new(self.end(), self.end()))
492    }
493}
494
495/// Denotes an item with an associated source file.
496///
497/// Most types that implement this trait do so through the blanket implementation
498/// on [`HasSrcPos`](trait.HasSrcPos.html).
499pub trait HasSource {
500    fn source(&self) -> &Source;
501}
502
503impl HasSource for Source {
504    fn source(&self) -> &Source {
505        self
506    }
507}
508
509/// Denotes an item with an associated lexical range in a source file.
510pub trait HasSrcPos {
511    fn pos(&self) -> &SrcPos;
512}
513
514impl HasSrcPos for SrcPos {
515    fn pos(&self) -> &SrcPos {
516        self
517    }
518}
519
520impl<T: HasSrcPos> HasSource for T {
521    fn source(&self) -> &Source {
522        &self.pos().source
523    }
524}
525
526/// A wrapper around a PathBuf that ensures the path is absolute and simplified.
527///
528/// This struct can be used similar to a [PathBuf], i.e., dereferencing it will return a [Path]
529#[derive(PartialEq, Eq, Hash, Clone, Debug)]
530pub(crate) struct FilePath(PathBuf);
531
532impl std::ops::Deref for FilePath {
533    type Target = Path;
534    fn deref(&self) -> &Self::Target {
535        &self.0
536    }
537}
538
539impl FilePath {
540    pub fn new(path: &Path) -> Self {
541        // In tests, when using inline files, paths are used that do not point to an existing file.
542        // In this case, we simply want to preserve the name without changing it.
543        if cfg!(test) && !path.exists() {
544            return Self(path.to_owned());
545        }
546        // It would also be possible to use dunce::canonicalize here instead of path::absolute
547        // and dunce::simplify, but dunce::canonicalize resolves symlinks
548        // which we don't want (see issue #327)
549        let path = match std::path::absolute(path) {
550            // dunce::simplified converts UNC paths to regular paths.
551            // UNC paths have caused issues when a file was mounted on a network drive.
552            // Related issue: #115
553            Ok(path) => dunce::simplified(&path).to_owned(),
554            Err(err) => {
555                eprintln!(
556                    "Could not create absolute path {}: {:?}",
557                    path.to_string_lossy(),
558                    err
559                );
560                path.to_owned()
561            }
562        };
563        Self(path)
564    }
565}
566
567#[cfg(test)]
568mod tests {
569    use super::*;
570    use crate::data::Latin1String;
571    use crate::syntax::test::{Code, CodeBuilder};
572    use pretty_assertions::assert_eq;
573
574    #[test]
575    fn srcpos_combine() {
576        let code = Code::new("hello world");
577
578        assert_eq!(
579            code.s1("hello").pos().combine(&code.s1("world").pos()),
580            code.pos()
581        );
582
583        assert_eq!(code.s1("h").pos().combine(&code.s1("d").pos()), code.pos());
584
585        assert_eq!(code.s1("d").pos().combine(&code.s1("h").pos()), code.pos());
586    }
587
588    fn with_code_from_file<F, R>(contents: &str, fun: F) -> R
589    where
590        F: Fn(Code) -> R,
591    {
592        use std::io::Write;
593        let mut file = tempfile::NamedTempFile::new().unwrap();
594        let file_name = file.path().to_owned();
595        file.write_all(&Latin1String::from_utf8_unchecked(contents).bytes)
596            .unwrap();
597        fun(CodeBuilder::new().code_from_source(Source::from_latin1_file(&file_name).unwrap()))
598    }
599
600    #[test]
601    fn code_context_pos_from_filename() {
602        with_code_from_file("hello\nworld\n", |code: Code| {
603            assert_eq!(
604                code.s1("hello").pos().code_context(),
605                "\
6061 --> hello
607   |  ~~~~~
6082  |  world
609"
610            )
611        });
612    }
613
614    #[test]
615    fn code_context_pos_last_line_without_newline() {
616        let code = Code::new("hello world");
617        let pos = code.s1("hello").pos();
618        assert_eq!(
619            pos.code_context(),
620            "\
6211 --> hello world
622   |  ~~~~~
623"
624        );
625    }
626
627    #[test]
628    fn code_context_pos_with_indent() {
629        let code = Code::new("    hello world");
630        let pos = code.s1("hello").pos();
631        assert_eq!(
632            pos.code_context(),
633            "\
6341 -->     hello world
635   |      ~~~~~
636"
637        );
638    }
639
640    #[test]
641    fn code_context_eof() {
642        let code = Code::new("h");
643        assert_eq!(
644            code.eof_pos().code_context(),
645            "\
6461 --> h
647   |   ~
648",
649        );
650    }
651
652    #[test]
653    fn code_context_eof_empty() {
654        let code = Code::new("");
655        assert_eq!(code.eof_pos().code_context(), "1 --> \n   |  ~\n",);
656    }
657
658    #[test]
659    fn code_context_with_context() {
660        let code = Code::new("hello\nworld");
661        let pos = code.s1("hello").pos();
662        assert_eq!(
663            pos.code_context(),
664            "\
6651 --> hello
666   |  ~~~~~
6672  |  world
668",
669        );
670    }
671
672    #[test]
673    fn code_context_with_tabs() {
674        let code = Code::new("\thello\t");
675        let pos = code.s1("hello\t").pos();
676        assert_eq!(
677            pos.code_context(),
678            "\
6791 -->     hello
680   |      ~~~~~~~~~
681",
682        );
683    }
684
685    #[test]
686    fn code_context_non_ascii() {
687        let code = Code::new("åäö\nåäö\n__å_ä_ö__");
688        let substr = code.s1("å_ä_ö");
689        assert_eq!(substr.end().character - substr.start().character, 5);
690        assert_eq!(
691            substr.pos().code_context(),
692            "\
6931  |  åäö
6942  |  åäö
6953 --> __å_ä_ö__
696   |    ~~~~~
697",
698        );
699    }
700
701    #[test]
702    fn code_context_double_utf16() {
703        // Bomb emojii requires 2 utf-16 codes
704        let code = Code::new("\u{1F4A3}");
705        assert_eq!(code.end().character - code.start().character, 2);
706        assert_eq!(
707            code.pos().code_context(),
708            "\
7091 --> \u{1F4A3}
710   |  ~
711",
712        );
713    }
714
715    #[test]
716    fn code_context_non_ascii_from_file() {
717        with_code_from_file("åäö\nåäö\n__å_ä_ö__", |code: Code| {
718            let substr = code.s1("å_ä_ö");
719            assert_eq!(substr.end().character - substr.start().character, 5);
720            assert_eq!(
721                substr.pos().code_context(),
722                "\
7231  |  åäö
7242  |  åäö
7253 --> __å_ä_ö__
726   |    ~~~~~
727",
728            );
729        });
730    }
731
732    #[test]
733    fn code_context_with_full_context() {
734        let code = Code::new(
735            "\
736line1
737line2
738line3
739line4
740line5
741line6
742line7
743line8
744line9
745line10
746line11
747line12
748line13",
749        );
750        let pos = code.s1("line10").pos();
751        assert_eq!(
752            pos.code_context(),
753            " \
754 8  |  line8
755 9  |  line9
75610 --> line10
757    |  ~~~~~~
75811  |  line11
75912  |  line12
760",
761        );
762    }
763
764    #[test]
765    fn show_from_filename() {
766        with_code_from_file("hello\nworld\nline\n", |code: Code| {
767            assert_eq!(
768                code.s1("world").pos().show("Greetings"),
769                format!(
770                    "\
771Greetings
772  --> {}:2
773   |
7741  |  hello
7752 --> world
776   |  ~~~~~
7773  |  line
778",
779                    code.source().file_name().to_string_lossy()
780                )
781            )
782        });
783    }
784
785    #[test]
786    fn show_contents() {
787        let code = Code::new("hello\nworld\nline\n");
788        assert_eq!(
789            code.s1("world").pos().show("Greetings"),
790            format!(
791                "\
792Greetings
793  --> {}:2
794   |
7951  |  hello
7962 --> world
797   |  ~~~~~
7983  |  line
799",
800                code.source().file_name().to_string_lossy()
801            )
802        );
803    }
804}