swamp_script_source_map/
lib.rs

1/*
2 * Copyright (c) Peter Bjorklund. All rights reserved. https://github.com/swamp/script
3 * Licensed under the MIT License. See LICENSE in the project root for license information.
4 */
5
6use pathdiff::diff_paths;
7use seq_map::SeqMap;
8use std::io::ErrorKind;
9use std::path::{Path, PathBuf};
10use std::{fs, io};
11use tracing::trace;
12
13pub mod prelude;
14
15pub type FileId = u16;
16
17#[derive(Debug)]
18pub struct FileInfo {
19    pub mount_name: String,
20    pub relative_path: PathBuf,
21    pub contents: String,
22    pub line_offsets: Box<[u16]>,
23}
24
25#[derive(Debug)]
26pub struct SourceMap {
27    pub mounts: SeqMap<String, PathBuf>,
28    pub cache: SeqMap<FileId, FileInfo>,
29    pub file_cache: SeqMap<(String, String), FileId>,
30    pub next_file_id: FileId,
31}
32
33#[derive(Debug)]
34pub struct RelativePath(pub String);
35
36impl SourceMap {
37    /// # Errors
38    ///
39    pub fn new(mounts: &SeqMap<String, PathBuf>) -> io::Result<Self> {
40        let mut canonical_mounts = SeqMap::new();
41        for (mount_name, base_path) in mounts {
42            let canon_path = base_path.canonicalize().map_err(|_| {
43                io::Error::new(
44                    io::ErrorKind::InvalidData,
45                    format!("could not canonicalize {base_path:?}"),
46                )
47            })?;
48
49            if !canon_path.is_dir() {
50                return Err(io::Error::new(
51                    ErrorKind::NotFound,
52                    format!("{canon_path:?} is not a directory"),
53                ));
54            }
55            canonical_mounts
56                .insert(mount_name.clone(), canon_path)
57                .map_err(|_| {
58                    io::Error::new(io::ErrorKind::InvalidData, "could not insert mount")
59                })?;
60        }
61        Ok(Self {
62            mounts: canonical_mounts,
63            cache: SeqMap::new(),
64            file_cache: SeqMap::new(),
65            next_file_id: 1,
66        })
67    }
68
69    /// # Errors
70    ///
71    pub fn add_mount(&mut self, name: &str, path: &Path) -> io::Result<()> {
72        if !path.is_dir() {
73            return Err(io::Error::new(
74                ErrorKind::NotFound,
75                format!("{path:?} is not a directory"),
76            ));
77        }
78        self.mounts
79            .insert(name.to_string(), path.to_path_buf())
80            .map_err(|_| io::Error::new(io::ErrorKind::InvalidData, "could not insert mount"))
81    }
82
83    #[must_use]
84    pub fn base_path(&self, name: &str) -> &Path {
85        self.mounts.get(&name.to_string()).map_or_else(
86            || {
87                panic!("could not find path {name}");
88            },
89            |found| found,
90        )
91    }
92
93    pub fn read_file(&mut self, path: &Path, mount_name: &str) -> io::Result<(FileId, String)> {
94        trace!(
95            ?path,
96            ?mount_name,
97            "actually reading file from secondary storage"
98        );
99        let found_base_path = self.base_path(mount_name);
100        let relative_path = diff_paths(path, found_base_path)
101            .unwrap_or_else(|| panic!("could not find relative path {path:?} {found_base_path:?}"));
102
103        let contents = fs::read_to_string(path)?;
104
105        let id = self.next_file_id;
106        self.next_file_id += 1;
107
108        self.add_manual(id, mount_name, &relative_path, &contents);
109
110        Ok((id, contents))
111    }
112
113    pub fn add_to_cache(
114        &mut self,
115        mount_name: &str,
116        relative_path: &Path,
117        contents: &str,
118        file_id: FileId,
119    ) {
120        self.add_manual(file_id, mount_name, relative_path, contents);
121        self.file_cache
122            .insert(
123                (
124                    mount_name.to_string(),
125                    relative_path.to_str().unwrap().to_string(),
126                ),
127                file_id,
128            )
129            .unwrap();
130    }
131
132    pub fn add_manual(
133        &mut self,
134        id: FileId,
135        mount_name: &str,
136        relative_path: &Path,
137        contents: &str,
138    ) {
139        let line_offsets = Self::compute_line_offsets(contents);
140
141        self.cache
142            .insert(
143                id,
144                FileInfo {
145                    mount_name: mount_name.to_string(),
146                    relative_path: relative_path.to_path_buf(),
147                    contents: contents.to_string(),
148                    line_offsets,
149                },
150            )
151            .expect("could not add file info");
152    }
153
154    pub fn add_manual_no_id(
155        &mut self,
156        mount_name: &str,
157        relative_path: &Path,
158        contents: &str,
159    ) -> FileId {
160        let line_offsets = Self::compute_line_offsets(contents);
161        let id = self.next_file_id;
162        self.next_file_id += 1;
163
164        self.cache
165            .insert(
166                id,
167                FileInfo {
168                    mount_name: mount_name.to_string(),
169                    relative_path: relative_path.to_path_buf(),
170                    contents: contents.to_string(),
171                    line_offsets,
172                },
173            )
174            .expect("could not add file info");
175        id
176    }
177
178    pub fn read_file_relative(
179        &mut self,
180        mount_name: &str,
181        relative_path: &str,
182    ) -> io::Result<(FileId, String)> {
183        if let Some(found_in_cache) = self
184            .file_cache
185            .get(&(mount_name.to_string(), relative_path.to_string()))
186        {
187            let contents = self.cache.get(found_in_cache).unwrap().contents.clone();
188            return Ok((found_in_cache.clone(), contents));
189        }
190
191        let buf = self.to_file_system_path(mount_name, relative_path)?;
192        self.read_file(&buf, mount_name)
193    }
194
195    /*
196
197    fn to_relative_path(path: &ModulePath) -> RelativePath {
198        RelativePath(
199            path.0
200                .iter()
201                .map(|local_type_identifier| local_type_identifier.as_str())
202                .collect::<Vec<_>>()
203                .join("/"),
204        )
205    }
206
207     */
208
209    fn to_file_system_path(&self, mount_name: &str, relative_path: &str) -> io::Result<PathBuf> {
210        let base_path = self.base_path(mount_name).to_path_buf();
211        let mut path_buf = base_path;
212
213        path_buf.push(relative_path);
214
215        path_buf.canonicalize().map_err(|_| {
216            io::Error::new(
217                ErrorKind::Other,
218                format!("path is wrong mount:{mount_name} relative:{relative_path}",),
219            )
220        })
221    }
222
223    fn compute_line_offsets(contents: &str) -> Box<[u16]> {
224        let mut offsets = Vec::new();
225        offsets.push(0);
226        for (i, &byte) in contents.as_bytes().iter().enumerate() {
227            if byte == b'\n' {
228                // Safety: new line is always encoded as single octet
229                let next_line_start = u16::try_from(i + 1).expect("too big file");
230                offsets.push(next_line_start);
231            }
232        }
233        offsets.into_boxed_slice()
234    }
235
236    #[must_use]
237    pub fn get_span_source(&self, file_id: FileId, offset: usize, length: usize) -> &str {
238        self.cache.get(&file_id).map_or_else(
239            || {
240                "ERROR"
241                //panic!("{}", &format!("Invalid file_id {file_id} in span"));
242            },
243            |file_info| {
244                let start = offset;
245                let end = start + length;
246                &file_info.contents[start..end]
247            },
248        )
249    }
250
251    #[must_use]
252    pub fn get_source_line(&self, file_id: FileId, line_number: usize) -> Option<&str> {
253        let file_info = self.cache.get(&file_id)?;
254
255        let start_offset = file_info.line_offsets[line_number - 1] as usize;
256        let end_offset = file_info.line_offsets[line_number] as usize;
257        Some(&file_info.contents[start_offset..end_offset - 1])
258    }
259
260    #[must_use]
261    pub fn get_span_location_utf8(&self, file_id: FileId, offset: usize) -> (usize, usize) {
262        let file_info = self.cache.get(&file_id).expect("Invalid file_id in span");
263
264        let offset = offset as u16;
265
266        // Find the line containing 'offset' via binary search.
267        let line_idx = file_info
268            .line_offsets
269            .binary_search(&offset)
270            .unwrap_or_else(|insert_point| insert_point.saturating_sub(1));
271
272        // Determine the start of the line in bytes
273        let line_start = file_info.line_offsets[line_idx] as usize;
274        let octet_offset = offset as usize;
275
276        // Extract the line slice from line_start to offset
277        let line_text = &file_info.contents[line_start..octet_offset];
278
279        // Count UTF-8 characters in that range, because that is what the end user sees in their editor.
280        let column_character_offset = line_text.chars().count();
281
282        // Add one so it makes more sense to the end user
283        (line_idx + 1, column_character_offset + 1)
284    }
285
286    #[must_use]
287    pub fn fetch_relative_filename(&self, file_id: FileId) -> &str {
288        self.cache
289            .get(&file_id)
290            .unwrap()
291            .relative_path
292            .to_str()
293            .unwrap()
294    }
295    pub fn minimal_relative_path(target: &Path, current_dir: &Path) -> io::Result<PathBuf> {
296        //let target = target.canonicalize()?;
297
298        let current_dir_components = current_dir.components().collect::<Vec<_>>();
299        let target_components = target.components().collect::<Vec<_>>();
300
301        let mut common_prefix_len = 0;
302        for i in 0..std::cmp::min(current_dir_components.len(), target_components.len()) {
303            if current_dir_components[i] == target_components[i] {
304                common_prefix_len += 1;
305            } else {
306                break;
307            }
308        }
309
310        let mut relative_path = PathBuf::new();
311
312        for _ in 0..(current_dir_components.len() - common_prefix_len) {
313            relative_path.push("..");
314        }
315
316        for component in &target_components[common_prefix_len..] {
317            relative_path.push(component);
318        }
319        Ok(relative_path)
320    }
321    pub fn get_relative_path_to(&self, file_id: FileId, current_dir: &Path) -> io::Result<PathBuf> {
322        let file_info = self.cache.get(&file_id).unwrap();
323        let mount_path = self.mounts.get(&file_info.mount_name).unwrap();
324
325        let absolute_path = mount_path.join(&file_info.relative_path);
326
327        Self::minimal_relative_path(&absolute_path, current_dir)
328    }
329}