swamp_script_source_map/
lib.rs

1/*
2 * Copyright (c) Peter Bjorklund. All rights reserved. https://github.com/swamp/script
3 * Licensed under the MIT License. See LICENSE in the project root for license information.
4 */
5use pathdiff::diff_paths;
6use seq_map::SeqMap;
7use std::path::{Path, PathBuf};
8use std::{fs, io};
9
10pub mod prelude;
11
12pub type FileId = u16;
13
14#[derive(Debug)]
15pub struct FileInfo {
16    pub relative_path: PathBuf,
17    pub contents: String,
18    pub line_offsets: Box<[u16]>,
19}
20
21#[derive(Debug)]
22pub struct SourceMap {
23    pub base_path: PathBuf,
24    pub cache: SeqMap<FileId, FileInfo>,
25    pub next_file_id: FileId,
26}
27
28#[derive(Debug)]
29pub struct RelativePath(pub String);
30
31impl SourceMap {
32    pub fn new(base_path: &Path) -> Self {
33        let canon_path = base_path
34            .canonicalize()
35            .unwrap_or_else(|_| panic!("can not canonicalize {base_path:?}"));
36        Self {
37            base_path: canon_path,
38            cache: SeqMap::new(),
39            next_file_id: 1,
40        }
41    }
42
43    pub fn base_path(&self) -> &Path {
44        &self.base_path
45    }
46
47    pub fn read_file(&mut self, path: &Path) -> io::Result<(FileId, String)> {
48        let relative_path = diff_paths(path, &self.base_path).expect(&format!(
49            "could not find relative path {:?} {:?}",
50            path, self.base_path
51        ));
52
53        let contents = fs::read_to_string(path)?;
54
55        let id = self.next_file_id;
56        self.next_file_id += 1;
57
58        self.add_manual(id, &relative_path, &contents);
59
60        Ok((id, contents))
61    }
62
63    pub fn add_manual(&mut self, id: FileId, relative_path: &Path, contents: &str) {
64        let line_offsets = Self::compute_line_offsets(contents);
65
66        self.cache
67            .insert(
68                id,
69                FileInfo {
70                    relative_path: relative_path.to_path_buf(),
71                    contents: contents.to_string(),
72                    line_offsets,
73                },
74            )
75            .expect("could not add file info");
76    }
77
78    pub fn add_manual_no_id(&mut self, relative_path: &Path, contents: &str) -> FileId {
79        let line_offsets = Self::compute_line_offsets(contents);
80        let id = self.next_file_id;
81        self.next_file_id += 1;
82
83        self.cache
84            .insert(
85                id,
86                FileInfo {
87                    relative_path: relative_path.to_path_buf(),
88                    contents: contents.to_string(),
89                    line_offsets,
90                },
91            )
92            .expect("could not add file info");
93        id
94    }
95
96    pub fn read_file_relative(&mut self, relative_path: &str) -> io::Result<(FileId, String)> {
97        let buf = self.to_file_system_path(relative_path);
98        self.read_file(&buf)
99    }
100
101    /*
102
103    fn to_relative_path(path: &ModulePath) -> RelativePath {
104        RelativePath(
105            path.0
106                .iter()
107                .map(|local_type_identifier| local_type_identifier.as_str())
108                .collect::<Vec<_>>()
109                .join("/"),
110        )
111    }
112
113     */
114
115    fn to_file_system_path(&self, path: &str) -> PathBuf {
116        let mut path_buf = self.base_path.clone();
117
118        path_buf.push(path);
119        path_buf.set_extension("swamp");
120
121        let canon_path = path_buf.canonicalize().expect("can not canonicalize");
122
123        canon_path
124    }
125
126    fn compute_line_offsets(contents: &str) -> Box<[u16]> {
127        let mut offsets = Vec::new();
128        offsets.push(0);
129        for (i, &byte) in contents.as_bytes().iter().enumerate() {
130            if byte == b'\n' {
131                // Safety: new line is always encoded as single octet
132                let next_line_start = u16::try_from(i + 1).expect("too big file");
133                offsets.push(next_line_start);
134            }
135        }
136        offsets.into_boxed_slice()
137    }
138
139    pub fn get_span_source(&self, file_id: FileId, offset: usize, length: usize) -> &str {
140        self.cache.get(&file_id).map_or_else(
141            || {
142                panic!("{}", &format!("Invalid file_id {file_id} in span"));
143            },
144            |file_info| {
145                let start = offset;
146                let end = start + length;
147                &file_info.contents[start..end]
148            },
149        )
150    }
151
152    #[must_use]
153    pub fn get_source_line(&self, file_id: FileId, line_number: usize) -> Option<&str> {
154        let file_info = self.cache.get(&file_id)?;
155
156        let start_offset = file_info.line_offsets[line_number - 1] as usize;
157        let end_offset = file_info.line_offsets[line_number] as usize;
158        Some(&file_info.contents[start_offset..end_offset - 1])
159    }
160
161    pub fn get_span_location_utf8(&self, file_id: FileId, offset: usize) -> (usize, usize) {
162        let file_info = self.cache.get(&file_id).expect("Invalid file_id in span");
163
164        let offset = offset as u16;
165
166        // Find the line containing 'offset' via binary search.
167        let line_idx = file_info
168            .line_offsets
169            .binary_search(&offset)
170            .unwrap_or_else(|insert_point| insert_point.saturating_sub(1));
171
172        // Determine the start of the line in bytes
173        let line_start = file_info.line_offsets[line_idx] as usize;
174        let octet_offset = offset as usize;
175
176        // Extract the line slice from line_start to offset
177        let line_text = &file_info.contents[line_start..octet_offset];
178
179        // Count UTF-8 characters in that range, because that is what the end user sees in their editor.
180        let column_character_offset = line_text.chars().count();
181
182        // Add one so it makes more sense to the end user
183        (line_idx + 1, column_character_offset + 1)
184    }
185
186    pub fn fetch_relative_filename(&self, file_id: FileId) -> &str {
187        self.cache
188            .get(&file_id)
189            .unwrap()
190            .relative_path
191            .to_str()
192            .unwrap()
193    }
194}