Skip to main content

source_map_cache/
lib.rs

1/*
2 * Copyright (c) Peter Bjorklund. All rights reserved. https://github.com/swamp/swamp
3 * Licensed under the MIT License. See LICENSE in the project root for license information.
4 */
5use pathdiff::diff_paths;
6use seq_map::SeqMap;
7use source_map_node::{Node, Span};
8use std::fmt::Debug;
9use std::io::ErrorKind;
10use std::path::{Path, PathBuf};
11use std::{fs, io};
12pub mod prelude;
13pub type FileId = u16;
14
15pub struct KeepTrackOfSourceLine {
16    pub last_line_info: SourceFileLineInfo,
17    pub current_line: usize,
18}
19
20impl Default for KeepTrackOfSourceLine {
21    fn default() -> Self {
22        Self::new()
23    }
24}
25
26impl KeepTrackOfSourceLine {
27    #[must_use]
28    pub const fn new() -> Self {
29        Self {
30            last_line_info: SourceFileLineInfo {
31                row: usize::MAX,
32                file_id: usize::MAX,
33            },
34            current_line: usize::MAX,
35        }
36    }
37
38    pub fn check_if_new_line(&mut self, found: &SourceFileLineInfo) -> Option<(usize, usize)> {
39        if self.last_line_info.file_id != found.file_id || found.row != self.current_line {
40            self.last_line_info = found.clone();
41            self.current_line = self.last_line_info.row;
42            Some((self.last_line_info.row, self.last_line_info.row))
43        } else if found.row == self.current_line {
44            None
45        } else {
46            let line_start = self.current_line;
47            self.current_line = found.row;
48            Some((line_start, found.row))
49        }
50    }
51}
52
53#[derive(Eq, PartialEq, Clone)]
54pub struct SourceFileLineInfo {
55    pub row: usize,
56    pub file_id: usize,
57}
58
59#[derive(Debug)]
60pub struct FileInfo {
61    pub mount_name: String,
62    pub relative_path: PathBuf,
63    pub contents: String,
64    pub line_offsets: Box<[u16]>,
65}
66
67#[derive(Debug)]
68pub struct SourceMap {
69    pub mounts: SeqMap<String, PathBuf>,
70    pub cache: SeqMap<FileId, FileInfo>,
71    pub file_cache: SeqMap<(String, String), FileId>,
72    pub next_file_id: FileId,
73}
74
75#[derive(Debug)]
76pub struct RelativePath(pub String);
77
78impl SourceMap {
79    /// # Errors
80    ///
81    pub fn new(mounts: &SeqMap<String, PathBuf>) -> io::Result<Self> {
82        let mut canonical_mounts = SeqMap::new();
83        for (mount_name, base_path) in mounts {
84            let canon_path = base_path.canonicalize().map_err(|_| {
85                io::Error::new(
86                    io::ErrorKind::InvalidData,
87                    format!("could not canonicalize {base_path:?}"),
88                )
89            })?;
90
91            if !canon_path.is_dir() {
92                return Err(io::Error::new(
93                    ErrorKind::NotFound,
94                    format!("{canon_path:?} is not a directory"),
95                ));
96            }
97            canonical_mounts
98                .insert(mount_name.clone(), canon_path)
99                .map_err(|_| {
100                    io::Error::new(io::ErrorKind::InvalidData, "could not insert mount")
101                })?;
102        }
103        Ok(Self {
104            mounts: canonical_mounts,
105            cache: SeqMap::new(),
106            file_cache: SeqMap::new(),
107            next_file_id: 1,
108        })
109    }
110
111    /// # Errors
112    ///
113    pub fn add_mount(&mut self, name: &str, path: &Path) -> io::Result<()> {
114        if !path.is_dir() {
115            return Err(io::Error::new(
116                ErrorKind::NotFound,
117                format!("{path:?} is not a directory"),
118            ));
119        }
120        self.mounts
121            .insert(name.to_string(), path.to_path_buf())
122            .map_err(|_| io::Error::new(io::ErrorKind::InvalidData, "could not insert mount"))
123    }
124
125    #[must_use]
126    pub fn base_path(&self, name: &str) -> &Path {
127        self.mounts.get(&name.to_string()).unwrap_or_else(|| {
128            panic!("could not find path {name}");
129        })
130    }
131
132    pub fn read_file(&mut self, path: &Path, mount_name: &str) -> io::Result<(FileId, String)> {
133        let found_base_path = self.base_path(mount_name);
134        let relative_path = diff_paths(path, found_base_path)
135            .unwrap_or_else(|| panic!("could not find relative path {path:?} {found_base_path:?}"));
136
137        let contents = fs::read_to_string(path)?;
138
139        let id = self.next_file_id;
140        self.next_file_id += 1;
141
142        self.add_manual(id, mount_name, &relative_path, &contents);
143
144        Ok((id, contents))
145    }
146
147    pub fn add_to_cache(
148        &mut self,
149        mount_name: &str,
150        relative_path: &Path,
151        contents: &str,
152        file_id: FileId,
153    ) {
154        self.add_manual(file_id, mount_name, relative_path, contents);
155    }
156
157    pub fn add_manual(
158        &mut self,
159        id: FileId,
160        mount_name: &str,
161        relative_path: &Path,
162        contents: &str,
163    ) {
164        let line_offsets = Self::compute_line_offsets(contents);
165
166        self.cache
167            .insert(
168                id,
169                FileInfo {
170                    mount_name: mount_name.to_string(),
171                    relative_path: relative_path.to_path_buf(),
172                    contents: contents.to_string(),
173                    line_offsets,
174                },
175            )
176            .expect("could not add file info");
177
178        // Update file_cache to maintain consistency
179        self.file_cache
180            .insert(
181                (
182                    mount_name.to_string(),
183                    relative_path.to_str().unwrap().to_string(),
184                ),
185                id,
186            )
187            .expect("could not add to file cache");
188    }
189
190    pub fn add_manual_no_id(
191        &mut self,
192        mount_name: &str,
193        relative_path: &Path,
194        contents: &str,
195    ) -> FileId {
196        let line_offsets = Self::compute_line_offsets(contents);
197        let id = self.next_file_id;
198        self.next_file_id += 1;
199
200        self.cache
201            .insert(
202                id,
203                FileInfo {
204                    mount_name: mount_name.to_string(),
205                    relative_path: relative_path.to_path_buf(),
206                    contents: contents.to_string(),
207                    line_offsets,
208                },
209            )
210            .expect("could not add file info");
211
212        // Update file_cache to maintain consistency
213        self.file_cache
214            .insert(
215                (
216                    mount_name.to_string(),
217                    relative_path.to_str().unwrap().to_string(),
218                ),
219                id,
220            )
221            .expect("could not add to file cache");
222
223        id
224    }
225
226    pub fn read_file_relative(
227        &mut self,
228        mount_name: &str,
229        relative_path: &str,
230    ) -> io::Result<(FileId, String)> {
231        if let Some(found_in_cache) = self
232            .file_cache
233            .get(&(mount_name.to_string(), relative_path.to_string()))
234        {
235            let contents = self.cache.get(found_in_cache).unwrap().contents.clone();
236            return Ok((*found_in_cache, contents));
237        }
238
239        let buf = self.to_file_system_path(mount_name, relative_path)?;
240        self.read_file(&buf, mount_name)
241    }
242
243    fn to_file_system_path(&self, mount_name: &str, relative_path: &str) -> io::Result<PathBuf> {
244        let base_path = self.base_path(mount_name).to_path_buf();
245        let mut path_buf = base_path;
246
247        path_buf.push(relative_path);
248
249        path_buf.canonicalize().map_err(|_| {
250            io::Error::other(format!(
251                "path is wrong mount:{mount_name} relative:{relative_path}",
252            ))
253        })
254    }
255
256    fn compute_line_offsets(contents: &str) -> Box<[u16]> {
257        let mut offsets = Vec::new();
258        offsets.push(0);
259
260        // Track positions of all newlines
261        for (i, &byte) in contents.as_bytes().iter().enumerate() {
262            if byte == b'\n' {
263                // Safety: new line is always encoded as single octet
264                let next_line_start = u16::try_from(i + 1).expect("too big file");
265                offsets.push(next_line_start);
266            }
267        }
268
269        // Always add the end of file position if it's not already there
270        // (happens when file doesn't end with newline)
271        let eof_offset = u16::try_from(contents.len()).expect("too big file");
272        if offsets.last().is_none_or(|&last| last != eof_offset) {
273            offsets.push(eof_offset);
274        }
275
276        offsets.into_boxed_slice()
277    }
278
279    #[must_use]
280    pub fn get_span_source(&self, file_id: FileId, offset: usize, length: usize) -> Option<&str> {
281        let file_info = self.cache.get(&file_id)?;
282
283        let start = offset;
284        let end = offset + length;
285
286        // Verify both start and end are on character boundaries
287        if start > file_info.contents.len()
288            || end > file_info.contents.len()
289            || !file_info.contents.is_char_boundary(start)
290            || !file_info.contents.is_char_boundary(end)
291        {
292            // Invalid offsets - not on character boundaries
293            return None;
294        }
295
296        Some(&file_info.contents[start..end])
297    }
298
299    #[must_use]
300    pub fn get_source_line(&self, file_id: FileId, line_number: usize) -> Option<&str> {
301        let file_info = self.cache.get(&file_id)?;
302
303        // Check if the requested line number is valid
304        if line_number == 0 || line_number >= file_info.line_offsets.len() {
305            return None;
306        }
307
308        let start_offset = file_info.line_offsets[line_number - 1] as usize;
309        let end_offset = file_info.line_offsets[line_number] as usize;
310
311        let line = &file_info.contents[start_offset..end_offset];
312
313        // Remove trailing newline if present.
314        // Some files may not end with a newline.
315        Some(line.strip_suffix('\n').unwrap_or(line))
316    }
317
318    #[must_use]
319    pub fn get_span_location_utf8(&self, file_id: FileId, offset: usize) -> Option<(usize, usize)> {
320        let file_info = self.cache.get(&file_id)?;
321        let offset = offset as u16;
322
323        let octet_offset = offset as usize;
324
325        // Verify offset is on a character boundary
326        if octet_offset > file_info.contents.len()
327            || !file_info.contents.is_char_boundary(octet_offset)
328        {
329            // Invalid offset - not on a character boundary
330            return None;
331        }
332
333        // Find the line containing 'offset' via binary search.
334        let line_idx = file_info
335            .line_offsets
336            .binary_search(&offset)
337            .unwrap_or_else(|insert_point| insert_point.saturating_sub(1));
338
339        // Determine the start of the line in bytes
340        let line_start = file_info.line_offsets[line_idx] as usize;
341
342        // Extract the line slice from line_start to offset
343        let line_text = &file_info.contents[line_start..octet_offset];
344
345        // Count UTF-8 characters in that range, because that is what the end user sees in their editor.
346        let column_character_offset = line_text.chars().count();
347
348        // Add one so it makes more sense to the end user
349        Some((line_idx + 1, column_character_offset + 1))
350    }
351
352    #[must_use]
353    pub fn fetch_relative_filename(&self, file_id: FileId) -> &str {
354        self.cache
355            .get(&file_id)
356            .unwrap()
357            .relative_path
358            .to_str()
359            .unwrap()
360    }
361
362    pub fn minimal_relative_path(target: &Path, current_dir: &Path) -> io::Result<PathBuf> {
363        let current_dir_components = current_dir.components().collect::<Vec<_>>();
364        let target_components = target.components().collect::<Vec<_>>();
365
366        let mut common_prefix_len = 0;
367        for i in 0..std::cmp::min(current_dir_components.len(), target_components.len()) {
368            if current_dir_components[i] == target_components[i] {
369                common_prefix_len += 1;
370            } else {
371                break;
372            }
373        }
374
375        let mut relative_path = PathBuf::new();
376
377        for _ in 0..(current_dir_components.len() - common_prefix_len) {
378            relative_path.push("..");
379        }
380
381        for component in &target_components[common_prefix_len..] {
382            relative_path.push(component);
383        }
384        Ok(relative_path)
385    }
386
387    pub fn get_relative_path_to(&self, file_id: FileId, current_dir: &Path) -> io::Result<PathBuf> {
388        let file_info = self.cache.get(&file_id).unwrap();
389        let mount_path = self.mounts.get(&file_info.mount_name).unwrap();
390
391        let absolute_path = mount_path.join(&file_info.relative_path);
392
393        Self::minimal_relative_path(&absolute_path, current_dir)
394    }
395
396    #[must_use]
397    pub fn get_text(&self, node: &Node) -> Option<&str> {
398        self.get_span_source(
399            node.span.file_id,
400            node.span.offset as usize,
401            node.span.length as usize,
402        )
403    }
404
405    #[must_use]
406    pub fn get_text_span(&self, span: &Span) -> Option<&str> {
407        self.get_span_source(span.file_id, span.offset as usize, span.length as usize)
408    }
409
410    #[must_use]
411    pub fn get_line(&self, span: &Span, current_dir: &Path) -> Option<FileLineInfo> {
412        let relative_file_name = self.get_relative_path_to(span.file_id, current_dir).ok()?;
413        let (row, col) = self.get_span_location_utf8(span.file_id, span.offset as usize)?;
414        let line = self.get_source_line(span.file_id, row)?;
415
416        Some(FileLineInfo {
417            row,
418            col,
419            line: line.to_string(),
420            relative_file_name: relative_file_name.to_str().unwrap().to_string(),
421        })
422    }
423
424    pub fn set(&mut self, mount_name: &str, relative_path: &Path, contents: &str) -> FileId {
425        // Check if file already exists in cache
426        if let Some(&existing_file_id) = self.file_cache.get(&(
427            mount_name.to_string(),
428            relative_path.to_str().unwrap().to_string(),
429        )) {
430            // File exists, update its contents
431            let line_offsets = Self::compute_line_offsets(contents);
432
433            if let Some(file_info) = self.cache.get_mut(&existing_file_id) {
434                file_info.contents = contents.to_string();
435                file_info.line_offsets = line_offsets;
436            }
437
438            existing_file_id
439        } else {
440            // File doesn't exist, add it as new
441            self.add_manual_no_id(mount_name, relative_path, contents)
442        }
443    }
444}
445
446pub struct FileLineInfo {
447    pub row: usize,
448    pub col: usize,
449    pub line: String,
450    pub relative_file_name: String,
451}
452
453pub struct SourceLineInfo {
454    pub line: String,
455    pub relative_file_name: String,
456}
457
458pub trait SourceMapLookup: Debug {
459    fn get_text(&self, node: &Node) -> Option<&str>;
460    fn get_text_span(&self, span: &Span) -> Option<&str>;
461    fn get_line(&self, span: &Span) -> Option<FileLineInfo>;
462    fn get_relative_path(&self, file_id: FileId) -> String;
463    fn get_source_line(&self, file_id: FileId, row: usize) -> Option<&str>;
464}
465
466#[derive(Debug)]
467pub struct SourceMapWrapper<'a> {
468    pub source_map: &'a SourceMap,
469    pub current_dir: PathBuf,
470}
471
472impl SourceMapLookup for SourceMapWrapper<'_> {
473    fn get_text(&self, resolved_node: &Node) -> Option<&str> {
474        self.source_map.get_text(resolved_node)
475    }
476
477    fn get_text_span(&self, span: &Span) -> Option<&str> {
478        self.source_map.get_text_span(span)
479    }
480
481    fn get_line(&self, span: &Span) -> Option<FileLineInfo> {
482        self.source_map.get_line(span, &self.current_dir)
483    }
484
485    fn get_relative_path(&self, file_id: FileId) -> String {
486        self.source_map
487            .get_relative_path_to(file_id, &self.current_dir)
488            .unwrap()
489            .to_str()
490            .unwrap()
491            .to_string()
492    }
493
494    fn get_source_line(&self, file_id: FileId, line_number: usize) -> Option<&str> {
495        self.source_map.get_source_line(file_id, line_number)
496    }
497}