source_map_cache/
lib.rs

1/*
2 * Copyright (c) Peter Bjorklund. All rights reserved. https://github.com/swamp/swamp
3 * Licensed under the MIT License. See LICENSE in the project root for license information.
4 */
5use pathdiff::diff_paths;
6use seq_map::SeqMap;
7use source_map_node::{Node, Span};
8use std::fmt::Debug;
9use std::io::ErrorKind;
10use std::path::{Path, PathBuf};
11use std::{fs, io};
12pub mod prelude;
13pub type FileId = u16;
14
15
16pub struct KeepTrackOfSourceLine {
17    pub last_line_info: SourceFileLineInfo,
18    pub current_line: usize,
19}
20
21impl Default for KeepTrackOfSourceLine {
22    fn default() -> Self {
23        Self::new()
24    }
25}
26
27impl KeepTrackOfSourceLine {
28    #[must_use]
29    pub const fn new() -> Self {
30        Self {
31            last_line_info: SourceFileLineInfo {
32                row: usize::MAX,
33                file_id: usize::MAX,
34            },
35            current_line: usize::MAX,
36        }
37    }
38
39    pub fn check_if_new_line(&mut self, found: &SourceFileLineInfo) -> Option<(usize, usize)> {
40        if self.last_line_info.file_id != found.file_id || found.row != self.current_line {
41            self.last_line_info = found.clone();
42            self.current_line = self.last_line_info.row;
43            Some((self.last_line_info.row, self.last_line_info.row))
44        } else if found.row == self.current_line {
45            None
46        } else {
47            let line_start = self.current_line;
48            self.current_line = found.row;
49            Some((line_start, found.row))
50        }
51    }
52}
53
54#[derive(Eq, PartialEq, Clone)]
55pub struct SourceFileLineInfo {
56    pub row: usize,
57    pub file_id: usize,
58}
59
60
61#[derive(Debug)]
62pub struct FileInfo {
63    pub mount_name: String,
64    pub relative_path: PathBuf,
65    pub contents: String,
66    pub line_offsets: Box<[u16]>,
67}
68
69#[derive(Debug)]
70pub struct SourceMap {
71    pub mounts: SeqMap<String, PathBuf>,
72    pub cache: SeqMap<FileId, FileInfo>,
73    pub file_cache: SeqMap<(String, String), FileId>,
74    pub next_file_id: FileId,
75}
76
77#[derive(Debug)]
78pub struct RelativePath(pub String);
79
80impl SourceMap {
81    /// # Errors
82    ///
83    pub fn new(mounts: &SeqMap<String, PathBuf>) -> io::Result<Self> {
84        let mut canonical_mounts = SeqMap::new();
85        for (mount_name, base_path) in mounts {
86            let canon_path = base_path.canonicalize().map_err(|_| {
87                io::Error::new(
88                    io::ErrorKind::InvalidData,
89                    format!("could not canonicalize {base_path:?}"),
90                )
91            })?;
92
93            if !canon_path.is_dir() {
94                return Err(io::Error::new(
95                    ErrorKind::NotFound,
96                    format!("{canon_path:?} is not a directory"),
97                ));
98            }
99            canonical_mounts
100                .insert(mount_name.clone(), canon_path)
101                .map_err(|_| {
102                    io::Error::new(io::ErrorKind::InvalidData, "could not insert mount")
103                })?;
104        }
105        Ok(Self {
106            mounts: canonical_mounts,
107            cache: SeqMap::new(),
108            file_cache: SeqMap::new(),
109            next_file_id: 1,
110        })
111    }
112
113    /// # Errors
114    ///
115    pub fn add_mount(&mut self, name: &str, path: &Path) -> io::Result<()> {
116        if !path.is_dir() {
117            return Err(io::Error::new(
118                ErrorKind::NotFound,
119                format!("{path:?} is not a directory"),
120            ));
121        }
122        self.mounts
123            .insert(name.to_string(), path.to_path_buf())
124            .map_err(|_| io::Error::new(io::ErrorKind::InvalidData, "could not insert mount"))
125    }
126
127    #[must_use]
128    pub fn base_path(&self, name: &str) -> &Path {
129        self.mounts.get(&name.to_string()).map_or_else(
130            || {
131                panic!("could not find path {name}");
132            },
133            |found| found,
134        )
135    }
136
137    pub fn read_file(&mut self, path: &Path, mount_name: &str) -> io::Result<(FileId, String)> {
138        let found_base_path = self.base_path(mount_name);
139        let relative_path = diff_paths(path, found_base_path)
140            .unwrap_or_else(|| panic!("could not find relative path {path:?} {found_base_path:?}"));
141
142        let contents = fs::read_to_string(path)?;
143
144        let id = self.next_file_id;
145        self.next_file_id += 1;
146
147        self.add_manual(id, mount_name, &relative_path, &contents);
148
149        Ok((id, contents))
150    }
151
152    pub fn add_to_cache(
153        &mut self,
154        mount_name: &str,
155        relative_path: &Path,
156        contents: &str,
157        file_id: FileId,
158    ) {
159        self.add_manual(file_id, mount_name, relative_path, contents);
160        self.file_cache
161            .insert(
162                (
163                    mount_name.to_string(),
164                    relative_path.to_str().unwrap().to_string(),
165                ),
166                file_id,
167            )
168            .unwrap();
169    }
170
171    pub fn add_manual(
172        &mut self,
173        id: FileId,
174        mount_name: &str,
175        relative_path: &Path,
176        contents: &str,
177    ) {
178        let line_offsets = Self::compute_line_offsets(contents);
179
180        self.cache
181            .insert(
182                id,
183                FileInfo {
184                    mount_name: mount_name.to_string(),
185                    relative_path: relative_path.to_path_buf(),
186                    contents: contents.to_string(),
187                    line_offsets,
188                },
189            )
190            .expect("could not add file info");
191    }
192
193    pub fn add_manual_no_id(
194        &mut self,
195        mount_name: &str,
196        relative_path: &Path,
197        contents: &str,
198    ) -> FileId {
199        let line_offsets = Self::compute_line_offsets(contents);
200        let id = self.next_file_id;
201        self.next_file_id += 1;
202
203        self.cache
204            .insert(
205                id,
206                FileInfo {
207                    mount_name: mount_name.to_string(),
208                    relative_path: relative_path.to_path_buf(),
209                    contents: contents.to_string(),
210                    line_offsets,
211                },
212            )
213            .expect("could not add file info");
214        id
215    }
216
217    pub fn read_file_relative(
218        &mut self,
219        mount_name: &str,
220        relative_path: &str,
221    ) -> io::Result<(FileId, String)> {
222        if let Some(found_in_cache) = self
223            .file_cache
224            .get(&(mount_name.to_string(), relative_path.to_string()))
225        {
226            let contents = self.cache.get(found_in_cache).unwrap().contents.clone();
227            return Ok((found_in_cache.clone(), contents));
228        }
229
230        let buf = self.to_file_system_path(mount_name, relative_path)?;
231        self.read_file(&buf, mount_name)
232    }
233
234    fn to_file_system_path(&self, mount_name: &str, relative_path: &str) -> io::Result<PathBuf> {
235        let base_path = self.base_path(mount_name).to_path_buf();
236        let mut path_buf = base_path;
237
238        path_buf.push(relative_path);
239
240        path_buf.canonicalize().map_err(|_| {
241            io::Error::new(
242                ErrorKind::Other,
243                format!("path is wrong mount:{mount_name} relative:{relative_path}",),
244            )
245        })
246    }
247
248    fn compute_line_offsets(contents: &str) -> Box<[u16]> {
249        let mut offsets = Vec::new();
250        offsets.push(0);
251
252        // Track positions of all newlines
253        for (i, &byte) in contents.as_bytes().iter().enumerate() {
254            if byte == b'\n' {
255                // Safety: new line is always encoded as single octet
256                let next_line_start = u16::try_from(i + 1).expect("too big file");
257                offsets.push(next_line_start);
258            }
259        }
260
261        // Always add the end of file position if it's not already there
262        // (happens when file doesn't end with newline)
263        let eof_offset = u16::try_from(contents.len()).expect("too big file");
264        if offsets.last().map_or(true, |&last| last != eof_offset) {
265            offsets.push(eof_offset);
266        }
267
268        offsets.into_boxed_slice()
269    }
270
271    #[must_use]
272    pub fn get_span_source(&self, file_id: FileId, offset: usize, length: usize) -> &str {
273        self.cache.get(&file_id).map_or_else(
274            || {
275                "ERROR"
276                //panic!("{}", &format!("Invalid file_id {file_id} in span"));
277            },
278            |file_info| {
279                let start = offset;
280                let end = start + length;
281                &file_info.contents[start..end]
282            },
283        )
284    }
285
286    #[must_use]
287    pub fn get_source_line(&self, file_id: FileId, line_number: usize) -> Option<&str> {
288        let file_info = self.cache.get(&file_id)?;
289
290        // Check if the requested line number is valid
291        if line_number == 0 || line_number >= file_info.line_offsets.len() {
292            return None;
293        }
294
295        let start_offset = file_info.line_offsets[line_number - 1] as usize;
296        let end_offset = file_info.line_offsets[line_number] as usize;
297
298        let line = &file_info.contents[start_offset..end_offset];
299
300        // Remove trailing newline if present.
301        // Some files may not end with a newline.
302        if line.ends_with('\n') {
303            Some(&line[..line.len() - 1])
304        } else {
305            Some(line)
306        }
307    }
308
309    #[must_use]
310    pub fn get_span_location_utf8(&self, file_id: FileId, offset: usize) -> (usize, usize) {
311        let file_info = self.cache.get(&file_id).expect("Invalid file_id in span");
312
313        let offset = offset as u16;
314
315        // Find the line containing 'offset' via binary search.
316        let line_idx = file_info
317            .line_offsets
318            .binary_search(&offset)
319            .unwrap_or_else(|insert_point| insert_point.saturating_sub(1));
320
321        // Determine the start of the line in bytes
322        let line_start = file_info.line_offsets[line_idx] as usize;
323        let octet_offset = offset as usize;
324
325        // Extract the line slice from line_start to offset
326        let line_text = &file_info.contents[line_start..octet_offset];
327
328        // Count UTF-8 characters in that range, because that is what the end user sees in their editor.
329        let column_character_offset = line_text.chars().count();
330
331        // Add one so it makes more sense to the end user
332        (line_idx + 1, column_character_offset + 1)
333    }
334
335    #[must_use]
336    pub fn fetch_relative_filename(&self, file_id: FileId) -> &str {
337        self.cache
338            .get(&file_id)
339            .unwrap()
340            .relative_path
341            .to_str()
342            .unwrap()
343    }
344
345    pub fn minimal_relative_path(target: &Path, current_dir: &Path) -> io::Result<PathBuf> {
346        let current_dir_components = current_dir.components().collect::<Vec<_>>();
347        let target_components = target.components().collect::<Vec<_>>();
348
349        let mut common_prefix_len = 0;
350        for i in 0..std::cmp::min(current_dir_components.len(), target_components.len()) {
351            if current_dir_components[i] == target_components[i] {
352                common_prefix_len += 1;
353            } else {
354                break;
355            }
356        }
357
358        let mut relative_path = PathBuf::new();
359
360        for _ in 0..(current_dir_components.len() - common_prefix_len) {
361            relative_path.push("..");
362        }
363
364        for component in &target_components[common_prefix_len..] {
365            relative_path.push(component);
366        }
367        Ok(relative_path)
368    }
369
370    pub fn get_relative_path_to(&self, file_id: FileId, current_dir: &Path) -> io::Result<PathBuf> {
371        let file_info = self.cache.get(&file_id).unwrap();
372        let mount_path = self.mounts.get(&file_info.mount_name).unwrap();
373
374        let absolute_path = mount_path.join(&file_info.relative_path);
375
376        Self::minimal_relative_path(&absolute_path, current_dir)
377    }
378
379    pub fn get_text(&self, node: &Node) -> &str {
380        self.get_span_source(
381            node.span.file_id,
382            node.span.offset as usize,
383            node.span.length as usize,
384        )
385    }
386
387    pub fn get_text_span(&self, span: &Span) -> &str {
388        self.get_span_source(span.file_id, span.offset as usize, span.length as usize)
389    }
390
391    pub fn get_line(&self, span: &Span, current_dir: &Path) -> FileLineInfo {
392        let relative_file_name = self
393            .get_relative_path_to(span.file_id, current_dir)
394            .unwrap();
395        let (row, col) = self.get_span_location_utf8(span.file_id, span.offset as usize);
396        let line = self.get_source_line(span.file_id, row).unwrap();
397
398        FileLineInfo {
399            row,
400            col,
401            line: line.to_string(),
402            relative_file_name: relative_file_name.to_str().unwrap().to_string(),
403        }
404    }
405}
406
407pub struct FileLineInfo {
408    pub row: usize,
409    pub col: usize,
410    pub line: String,
411    pub relative_file_name: String,
412}
413
414pub struct SourceLineInfo {
415    pub line: String,
416    pub relative_file_name: String,
417}
418
419pub trait SourceMapLookup: Debug {
420    fn get_text(&self, node: &Node) -> &str;
421    fn get_text_span(&self, span: &Span) -> &str;
422    fn get_line(&self, span: &Span) -> FileLineInfo;
423    fn get_relative_path(&self, file_id: FileId) -> String;
424    fn get_source_line(&self, file_id: FileId, row: usize) -> Option<&str>;
425}
426
427#[derive(Debug)]
428pub struct SourceMapWrapper<'a> {
429    pub source_map: &'a SourceMap,
430    pub current_dir: PathBuf,
431}
432
433impl SourceMapLookup for SourceMapWrapper<'_> {
434    fn get_text(&self, resolved_node: &Node) -> &str {
435        self.source_map.get_text(resolved_node)
436    }
437
438    fn get_text_span(&self, span: &Span) -> &str {
439        self.source_map.get_text_span(span)
440    }
441
442    fn get_line(&self, span: &Span) -> FileLineInfo {
443        self.source_map.get_line(span, &self.current_dir)
444    }
445
446    fn get_relative_path(&self, file_id: FileId) -> String {
447        self.source_map
448            .get_relative_path_to(file_id, &self.current_dir)
449            .unwrap()
450            .to_str()
451            .unwrap()
452            .to_string()
453    }
454
455    fn get_source_line(&self, file_id: FileId, line_number: usize) -> Option<&str> {
456        self.source_map.get_source_line(file_id, line_number)
457    }
458}