1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
//! An example that shows how to implement a simple custom file database.
//! The database uses 32-bit file-ids, which could be useful for optimizing
//! memory usage.
//!
//! To run this example, execute the following command from the top level of
//! this repository:
//!
//! ```sh
//! cargo run --example custom_files
//! ```
use std::{
    cmp::Ordering,
    collections::{hash_map::DefaultHasher, BTreeMap},
    fmt::{Debug, Display, Formatter},
    fs::read_to_string,
    hash::{Hash, Hasher},
    path::{Path, PathBuf},
    string::ToString,
};

use serde::{Deserialize, Serialize};

use crate::{
    errors::DiagnosticError,
    text_cache::location::{column_index, line_starts},
    DiagnosticResult, FileID, Label, Location, Span,
};

pub mod file_id;
pub mod labels;
pub mod level;
pub mod location;

#[derive(Clone)]
pub struct TextStorage {
    files: BTreeMap<FileID, TextCache>,
}

impl Default for TextStorage {
    fn default() -> Self {
        Self { files: Default::default() }
    }
}

impl Debug for TextStorage {
    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
        let mut w = &mut f.debug_struct("TextStorage");
        for (id, cache) in &self.files {
            w = w.field(id.as_ref(), &cache)
        }
        w.finish()
    }
}

/// A file that is backed by an `Arc<String>`.
#[derive(Clone)]
pub struct TextCache {
    /// Path to original file
    pub path: Option<PathBuf>,
    /// The source code of the file.
    pub text: String,
    /// The starting byte indices in the source code.
    pub line_starts: Vec<usize>,
}

impl Debug for TextCache {
    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
        let mut w = &mut f.debug_struct("TextCache");
        match &self.path {
            Some(s) => w = w.field("path", s),
            None => w = w.field("text", &self.text),
        }
        w = w.field("lines", &self.line_starts.len());
        w.finish()
    }
}

impl TextCache {
    pub fn anonymous(source: String) -> Self {
        let mut out = Self { path: None, text: source, line_starts: vec![] };
        out.line_starts = line_starts(&out.text).collect();
        out
    }
    pub fn file(file: PathBuf) -> DiagnosticResult<Self> {
        let mut out = Self { path: Some(file), text: String::new(), line_starts: vec![] };
        out.update()?;
        Ok(out)
    }
    pub fn update(&mut self) -> DiagnosticResult {
        match &self.path {
            Some(s) => {
                self.text = read_to_string(s)?;
                self.line_starts = line_starts(&self.text).collect();
            }
            None => {}
        }
        Ok(())
    }
    pub fn line_start(&self, line_index: usize) -> DiagnosticResult<usize> {
        match line_index.cmp(&self.line_starts.len()) {
            Ordering::Less => Ok(*self.line_starts.get(line_index).expect("failed despite previous check")),
            Ordering::Equal => Ok(self.text.len()),
            Ordering::Greater => Err(DiagnosticError::LineTooLarge { given: line_index, max: self.line_starts.len() - 1 }),
        }
    }
    /// The byte range of line in the source of the file.
    pub fn line_range(&self, line_index: usize) -> DiagnosticResult<Span> {
        let line_start = self.line_start(line_index)?;
        let next_line_start = self.line_start(line_index + 1)?;
        Ok(line_start..next_line_start)
    }
}

impl TextStorage {
    /// Add a file to the database, returning the handle that can be used to
    /// refer to it again.
    pub fn file<P>(&mut self, file_path: P) -> DiagnosticResult<FileID>
    where
        P: AsRef<Path>,
    {
        let path = file_path.as_ref().to_path_buf();
        let id = FileID::try_from(&path)?;
        let file = TextCache::file(path)?;
        self.files.insert(id.clone(), file);
        Ok(id)
    }
    pub fn anonymous(&mut self, file_text: impl Display) -> FileID {
        let text = file_text.to_string();
        let id = FileID::from(&text);
        let file = TextCache::anonymous(text);
        self.files.insert(id.clone(), file);
        id
    }
    pub fn update(&mut self, name: &FileID) -> DiagnosticResult {
        match self.files.get_mut(name) {
            Some(s) => s.update()?,
            None => {}
        }
        Ok(())
    }
    /// Get the file corresponding to the given id.
    pub fn get_cache(&self, file_id: &FileID) -> DiagnosticResult<&TextCache> {
        self.files.get(file_id).ok_or(DiagnosticError::FileMissing)
    }
    /// The source code of a file.
    pub fn get_text(&self, file_id: &FileID) -> DiagnosticResult<&str> {
        Ok(&self.get_cache(file_id)?.text)
    }
    /// The index of the line at the given byte index.
    /// If the byte index is past the end of the file, returns the maximum line index in the file.
    /// This means that this function only fails if the file is not present.
    ///
    /// # Note for trait implementors
    ///
    /// This can be implemented efficiently by performing a binary search over
    /// a list of line starts that was computed by calling the [`line_starts`]
    /// function that is exported from the [`errors`] module. It might be useful
    /// to pre-compute and cache these line starts.
    ///
    /// [`line_starts`]: crate::errors::line_starts
    /// [`errors`]: crate::errors
    pub fn line_index(&self, file_id: &FileID, byte_index: usize) -> DiagnosticResult<usize> {
        self.get_cache(file_id)?.line_starts.binary_search(&byte_index).or_else(|next_line| Ok(next_line - 1))
    }
    /// The user-facing line number at the given line index.
    /// It is not necessarily checked that the specified line index
    /// is actually in the file.
    ///
    /// # Note for trait implementors
    ///
    /// This is usually 1-indexed from the beginning of the file, but
    /// can be useful for implementing something like the
    /// [C preprocessor's `#line` macro][line-macro].
    ///
    /// [line-macro]: https://en.cppreference.com/w/c/preprocessor/line
    #[allow(unused_variables)]
    pub fn line_number(&self, file_id: &FileID, line_index: usize) -> DiagnosticResult<usize> {
        Ok(line_index + 1)
    }
    /// The user-facing column number at the given line index and byte index.
    ///
    /// # Note for trait implementors
    ///
    /// This is usually 1-indexed from the the start of the line.
    /// A default implementation is provided, based on the [`column_index`]
    /// function that is exported from the [`errors`] module.
    ///
    /// [`errors`]: crate::errors
    /// [`column_index`]: crate::errors::column_index
    pub fn column_number(&self, file_id: &FileID, line_index: usize, byte_index: usize) -> DiagnosticResult<usize> {
        let source = self.get_text(file_id)?;
        let line_range = self.line_range(file_id, line_index)?;
        let column_index = column_index(source.as_ref(), line_range, byte_index);

        Ok(column_index + 1)
    }
    /// Convenience method for returning line and column number at the given
    /// byte index in the file.
    pub fn location(&self, file_id: &FileID, byte_index: usize) -> DiagnosticResult<Location> {
        let line_index = self.line_index(file_id, byte_index)?;

        Ok(Location {
            line_number: self.line_number(file_id, line_index)?,
            column_number: self.column_number(file_id, line_index, byte_index)?,
        })
    }
    /// The byte range of line in the source of the file.
    pub fn line_range(&self, file_id: &FileID, line_index: usize) -> DiagnosticResult<Span> {
        self.get_cache(file_id)?.line_range(line_index)
    }
}