1use std::ops::Deref;
6use std::path::Path;
7use std::{fmt, vec};
8
9use unicode_width::UnicodeWidthChar;
10
11use super::span::{BytePos, ByteSpan, CharPos, Pos};
12
13#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
16pub struct Location {
17 line: usize,
19
20 column: usize,
22}
23
24impl Location {
25 pub const fn line(&self) -> usize {
27 self.line
28 }
29
30 pub const fn column(&self) -> usize {
32 self.column
33 }
34
35 pub fn display(&self) -> String {
38 format!("{}:{}", self.line, self.column + 1)
39 }
40}
41
42#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
48pub enum SourceOrigin<'a> {
49 File(&'a Path),
51
52 Anonymous,
54}
55
56impl<'a> fmt::Display for SourceOrigin<'a> {
57 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
58 match self {
59 Self::File(path) => fmt::Display::fmt(&path.display(), f),
60 Self::Anonymous => f.write_str("anonymous"),
61 }
62 }
63}
64
65#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
72pub struct MultiByteChar {
73 pos: BytePos,
75
76 bytes: u8,
78}
79
80impl MultiByteChar {
81 pub const fn pos(&self) -> &BytePos {
83 &self.pos
84 }
85
86 pub const fn width(&self) -> u8 {
88 self.bytes
89 }
90}
91
92#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
100pub enum SpecialWidthChar {
101 ZeroWidth(BytePos),
103 Wide(BytePos),
105 Tab(BytePos),
107}
108
109impl SpecialWidthChar {
110 pub fn new(pos: BytePos, width: usize) -> Self {
117 match width {
118 0 => Self::ZeroWidth(pos),
119 2 => Self::Wide(pos),
120 4 => Self::Tab(pos),
121 _ => panic!("Unsupported width for SpecialWidthChar: {width}"),
122 }
123 }
124
125 pub const fn width(&self) -> usize {
127 match self {
128 Self::ZeroWidth(_) => 0,
129 Self::Wide(_) => 2,
130 Self::Tab(_) => 4,
131 }
132 }
133
134 pub const fn pos(&self) -> &BytePos {
136 match self {
137 Self::ZeroWidth(p) | Self::Wide(p) | Self::Tab(p) => p,
138 }
139 }
140}
141
142fn analyze_source(content: &'_ str) -> (Vec<BytePos>, Vec<SpecialWidthChar>, Vec<MultiByteChar>) {
150 let mut i = 0;
152
153 let mut lines = vec![BytePos::new(0)];
154 let mut special_width_chars = Vec::new();
155 let mut multi_byte_chars = Vec::new();
156
157 while i < content.len() {
158 let byte = content.as_bytes()[i];
159
160 let mut char_len = 1;
161
162 if byte < 32 {
164 match byte {
165 b'\n' => lines.push(BytePos::from_usize(i + 1)),
166 b'\t' => special_width_chars.push(SpecialWidthChar::Tab(BytePos::from_usize(i))),
167 _ => special_width_chars.push(SpecialWidthChar::ZeroWidth(BytePos::from_usize(i))),
168 }
169 } else if byte > 127 {
170 let chr = content[i..].chars().next().expect("A valid char");
172 char_len = chr.len_utf8();
173
174 let pos = BytePos::from_usize(i);
175
176 if char_len > 1 {
177 multi_byte_chars.push(MultiByteChar {
178 pos,
179 bytes: char_len as u8,
180 })
181 }
182
183 let char_width = UnicodeWidthChar::width(chr).unwrap_or(0);
184
185 if char_width != 1 {
186 special_width_chars.push(SpecialWidthChar::new(pos, char_width));
187 }
188 }
189
190 i += char_len;
191 }
192
193 (lines, special_width_chars, multi_byte_chars)
194}
195
196#[derive(Debug, Clone, PartialEq, Eq, Hash)]
204pub struct Source<'a> {
205 pub(crate) origin: SourceOrigin<'a>,
207
208 pub(crate) content: &'a str,
210
211 pub(crate) lines: Vec<BytePos>,
213
214 pub(crate) special_width_chars: Vec<SpecialWidthChar>,
216
217 pub(crate) multi_byte_chars: Vec<MultiByteChar>,
219}
220
221impl<'a> Source<'a> {
222 pub fn new(origin: SourceOrigin<'a>, content: &'a str) -> Self {
224 let (lines, special_width_chars, multi_byte_chars) = analyze_source(content);
225
226 Self {
227 origin,
228 content,
229 lines,
230 special_width_chars,
231 multi_byte_chars,
232 }
233 }
234
235 pub fn anonymous(content: &'a str) -> Self {
238 Self::new(SourceOrigin::Anonymous, content)
239 }
240
241 pub fn file(path: &'a Path, content: &'a str) -> Self {
244 Self::new(SourceOrigin::File(path), content)
245 }
246
247 pub fn get_charpos(&self, pos: BytePos) -> CharPos {
252 let mut offset = 0;
253 let mut count = 0;
254
255 for swc in &self.special_width_chars {
256 if swc.pos() < &pos {
257 offset += swc.width();
258 count += 1;
259 } else {
260 break;
263 }
264 }
265
266 for mbc in &self.multi_byte_chars {
267 if mbc.pos() < &pos {
268 offset += 1;
269 count += mbc.width() as usize;
270 } else {
271 break;
274 }
275 }
276
277 let cpos = CharPos::from_usize((pos.as_usize() + offset) - count);
278
279 log::trace!("Translating pos: {} > {}", pos, cpos,);
280
281 cpos
282 }
283
284 pub fn get_pos_line_idx(&self, pos: BytePos) -> usize {
286 match self.lines.binary_search(&pos) {
287 Ok(idx) => idx,
288 Err(idx) => idx - 1,
289 }
290 }
291
292 pub fn get_pos_location(&self, pos: BytePos) -> Location {
294 let line_idx = self.get_pos_line_idx(pos);
295 let line_start = self.lines[line_idx];
296
297 let pos_cpos = self.get_charpos(pos);
298 let line_start_cpos = self.get_charpos(line_start);
299
300 Location {
301 line: line_idx + 1,
302 column: (pos_cpos.as_usize() - line_start_cpos.as_usize()),
303 }
304 }
305
306 pub fn get_idx_line(&self, idx: usize) -> &'a str {
308 let line_end_idx = self.lines.get(idx + 1);
309
310 let line_start = self.lines[idx];
311
312 let line_end = BytePos::from_usize(
314 line_end_idx.map_or_else(|| self.content.len(), |&idx| idx.as_usize() - 1),
315 );
316
317 &self.content[ByteSpan::new(line_start, line_end)]
318 }
319
320 pub fn get_pos_line(&self, pos: BytePos) -> &'a str {
322 self.get_idx_line(self.get_pos_line_idx(pos))
323 }
324
325 pub const fn origin(&self) -> &SourceOrigin<'_> {
327 &self.origin
328 }
329
330 pub const fn content(&self) -> &str {
332 self.content
333 }
334}
335
336impl Deref for Source<'_> {
337 type Target = str;
338
339 fn deref(&self) -> &Self::Target {
340 self.content
341 }
342}
343
344#[cfg(test)]
345mod tests {
346 use super::*;
347
348 #[test]
349 fn location_lines() {
350 crate::tests::setup_test_env();
351
352 let content = r#"Hello
353World
354Foo
355Bar"#;
356
357 let src = Source::anonymous(content);
358
359 assert_eq!(
360 src.get_pos_location(BytePos::new(0)),
361 Location { line: 1, column: 0 }
362 );
363 assert_eq!(
364 src.get_pos_location(BytePos::new(6)),
365 Location { line: 2, column: 0 }
366 );
367 }
368
369 #[test]
370 fn location_special() {
371 crate::tests::setup_test_env();
372
373 let content = "\tA\r\n\t\tHello";
374
375 let src = Source::anonymous(content);
376
377 assert_eq!(
378 src.get_pos_location(BytePos::new(1)),
379 Location { line: 1, column: 4 }
380 );
381
382 assert_eq!(
383 src.get_pos_location(BytePos::new(6)),
384 Location { line: 2, column: 8 }
385 );
386 }
387}