jsona/util/
mapper.rs

1//! Utilities for mapping between offset:length bytes and col:row character positions.
2
3use rowan::{TextRange, TextSize};
4use serde::{Deserialize, Serialize};
5use std::collections::BTreeMap;
6
7#[derive(Debug, Eq, PartialEq, Ord, PartialOrd, Copy, Clone, Default, Serialize, Deserialize)]
8pub struct Position {
9    /// Cursor position in a document
10    pub index: u64,
11    /// Line position in a document, (could be zero-based or one-based based on the usage).
12    pub line: u64,
13    /// Column position line in a document, (could be zero-based or one-based based on the usage).
14    pub column: u64,
15}
16
17impl Position {
18    #[must_use]
19    pub fn new(index: u64, line: u64, column: u64) -> Self {
20        Position {
21            index,
22            line,
23            column,
24        }
25    }
26}
27
28#[derive(Debug, Eq, PartialEq, Copy, Clone, Default, Serialize, Deserialize)]
29pub struct Range {
30    /// The range's start position.
31    pub start: Position,
32    /// The range's end position.
33    pub end: Position,
34}
35
36impl Range {
37    pub fn join(&self, other: &Range) -> Range {
38        let start = self.start.min(other.start);
39        let end = self.end.max(other.end);
40        Self { start, end }
41    }
42}
43
44/// Inclusive offset range in characters instead of bytes.
45#[derive(Debug, Clone, Copy)]
46pub struct CharacterRange(u64, u64);
47
48/// A mapper that translates offset:length bytes to
49/// 1-based line:row characters.
50#[derive(Debug, Clone)]
51pub struct Mapper {
52    /// Mapping offsets to positions.
53    offset_to_position: BTreeMap<TextSize, Position>,
54
55    /// Mapping positions to offsets.
56    position_to_offset: BTreeMap<Position, TextSize>,
57
58    /// Line count.
59    lines: usize,
60
61    /// Ending position.
62    end: Position,
63}
64
65impl Mapper {
66    /// Creates a new Mapper that remembers where
67    /// each line starts and ends.
68    ///
69    /// Uses UTF-16 character sizes for positions.
70    #[must_use]
71    pub fn new_utf16(source: &str, one_based: bool) -> Self {
72        Self::new_impl(source, true, if one_based { 1 } else { 0 })
73    }
74
75    /// Uses UTF-8 character sizes for positions.
76    #[must_use]
77    pub fn new_utf8(source: &str, one_based: bool) -> Self {
78        Self::new_impl(source, false, if one_based { 1 } else { 0 })
79    }
80
81    #[must_use]
82    pub fn offset(&self, position: Position) -> Option<TextSize> {
83        self.position_to_offset.get(&position).copied()
84    }
85
86    #[must_use]
87    pub fn text_range(&self, range: Range) -> Option<TextRange> {
88        self.offset(range.start)
89            .and_then(|start| self.offset(range.end).map(|end| TextRange::new(start, end)))
90    }
91
92    #[must_use]
93    pub fn position(&self, offset: TextSize) -> Option<Position> {
94        self.offset_to_position.get(&offset).copied()
95    }
96
97    #[must_use]
98    pub fn range(&self, range: TextRange) -> Option<Range> {
99        self.position(range.start())
100            .and_then(|start| self.position(range.end()).map(|end| Range { start, end }))
101    }
102
103    #[must_use]
104    pub fn mappings(&self) -> (&BTreeMap<TextSize, Position>, &BTreeMap<Position, TextSize>) {
105        (&self.offset_to_position, &self.position_to_offset)
106    }
107
108    #[must_use]
109    pub fn line_count(&self) -> usize {
110        self.lines
111    }
112
113    #[must_use]
114    pub fn all_range(&self) -> Range {
115        Range {
116            start: Position {
117                index: 0,
118                line: 0,
119                column: 0,
120            },
121            end: self.end,
122        }
123    }
124
125    fn new_impl(source: &str, utf16: bool, base: u64) -> Self {
126        let mut offset_to_position = BTreeMap::new();
127        let mut position_to_offset = BTreeMap::new();
128
129        let mut line: u64 = base;
130        let mut column: u64 = base;
131        let mut last_offset = 0;
132        let mut index: u64 = 0;
133
134        for (i, c) in source.chars().enumerate() {
135            index = i as u64;
136            let new_offset = last_offset + c.len_utf8();
137
138            let character_size = if utf16 { c.len_utf16() } else { 1 };
139
140            offset_to_position.extend((last_offset..new_offset).map(|b| {
141                (
142                    TextSize::from(b as u32),
143                    Position {
144                        index,
145                        line,
146                        column,
147                    },
148                )
149            }));
150
151            position_to_offset.extend((last_offset..new_offset).map(|b| {
152                (
153                    Position {
154                        index,
155                        line,
156                        column,
157                    },
158                    TextSize::from(b as u32),
159                )
160            }));
161
162            last_offset = new_offset;
163
164            column += character_size as u64;
165            if c == '\n' {
166                // LF is at the start of each line.
167                line += 1;
168                column = base;
169            }
170        }
171
172        // Last imaginary character.
173        offset_to_position.insert(
174            TextSize::from(last_offset as u32),
175            Position {
176                index,
177                line,
178                column,
179            },
180        );
181        position_to_offset.insert(
182            Position {
183                index,
184                line,
185                column,
186            },
187            TextSize::from(last_offset as u32),
188        );
189
190        Self {
191            offset_to_position,
192            position_to_offset,
193            lines: line as usize,
194            end: Position {
195                index,
196                line,
197                column,
198            },
199        }
200    }
201}