#[derive(Debug)]
pub struct SourceWithLineStarts {
src: String,
marks: Vec<CharMappingMark>,
}
impl SourceWithLineStarts {
pub fn new(src: &str) -> Self {
let mut iterator = src.char_indices().peekable();
let mut line = 1;
let mut column = 0;
let mut marks = vec![CharMappingMark {
offset: 0,
line,
column,
}];
loop {
match iterator.next() {
Some((_, '\r')) if matches!(iterator.peek(), Some((_, '\n'))) => {
column += 1;
}
Some((offset, '\r' | '\n')) => {
line += 1;
column = 0;
marks.push(CharMappingMark {
offset: offset + 1,
line,
column,
});
}
Some((offset, _)) => {
if column % 16 == 0 && column > 0 {
marks.push(CharMappingMark {
offset,
line,
column,
});
}
column += 1;
}
None => break,
}
}
Self {
src: src.to_owned(),
marks,
}
}
fn get_position(&self, byte_offset: usize) -> (u32, u32) {
let byte_offset = byte_offset + 1; let found = self
.marks
.binary_search_by(|mark| mark.offset.cmp(&byte_offset))
.unwrap_or_else(|x| x - 1);
let mark = &self.marks[found];
let line = mark.line;
let mut column = mark.column;
for (offset, _) in self.src[mark.offset..].char_indices() {
if mark.offset + offset >= byte_offset {
break;
}
column += 1;
}
(line, column)
}
}
#[derive(Debug)]
struct CharMappingMark {
offset: usize,
line: u32,
column: u32,
}
#[derive(Default, Clone, Copy)]
pub struct SourcePos {
byte_offset: (usize, usize),
}
impl SourcePos {
pub fn new(start: usize, end: usize) -> Self {
SourcePos {
byte_offset: (start, end),
}
}
pub fn get_byte_offsets(&self) -> (usize, usize) {
self.byte_offset
}
pub fn get_positions(&self, map: &SourceWithLineStarts) -> ((u32, u32), (u32, u32)) {
let start = map.get_position(self.byte_offset.0);
let end_off = if self.byte_offset.1 > 0 {
self.byte_offset.1 - 1
} else {
self.byte_offset.1
};
let end = map.get_position(end_off);
(start, end)
}
}
impl std::fmt::Debug for SourcePos {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.byte_offset.fmt(f)
}
}
#[cfg(test)]
mod tests {
use super::SourcePos;
use super::SourceWithLineStarts;
#[test]
fn no_linebreaks() {
let map = SourceWithLineStarts::new("qwertyuiopasdfghjklzxcvbnmQWERTYUIOPASDFGHJKLZXCVBNM");
for i in 0..20 {
assert_eq!(
SourcePos::new(i, 0).get_positions(&map).0,
(1, i as u32 + 1)
);
}
}
#[test]
fn unicode() {
let map = SourceWithLineStarts::new("!ΑαΒβΓγΔδΕεΖζΗηΘθΙιΚκΛλΜμΝνΞξΟοΠπΡρΣσςΤτΥυΦφΧχΨψΩω");
assert_eq!(SourcePos::new(0, 0).get_positions(&map).0, (1, 1));
for i in 1..20 {
assert_eq!(
SourcePos::new(i, 0).get_positions(&map).0,
(1, ((i - 1) / 2) as u32 + 2)
);
}
}
#[test]
fn many_linebreaks() {
let map = SourceWithLineStarts::new("\n\n\n\n\n\n123");
for i in 0..6 {
assert_eq!(
SourcePos::new(i, 0).get_positions(&map).0,
(i as u32 + 2, 0)
);
}
assert_eq!(SourcePos::new(7, 0).get_positions(&map).0, (7, 2));
assert_eq!(SourcePos::new(8, 0).get_positions(&map).0, (7, 3));
}
#[test]
fn after_end() {
let map = SourceWithLineStarts::new("123");
assert_eq!(SourcePos::new(100, 0).get_positions(&map).0, (1, 3));
let map = SourceWithLineStarts::new("123\n");
assert_eq!(SourcePos::new(100, 0).get_positions(&map).0, (2, 0));
let map = SourceWithLineStarts::new("123\n456");
assert_eq!(SourcePos::new(100, 0).get_positions(&map).0, (2, 3));
}
}