mdwright_document/
line_index.rs1use std::fmt;
16
17#[derive(Debug, Clone)]
21pub struct LineIndex {
22 line_starts: Vec<u32>,
27}
28
29#[derive(Clone, Debug, PartialEq, Eq)]
30pub enum LineIndexError {
31 OffsetOutOfBounds { byte: usize, source_len: usize },
32 OffsetTooLarge { byte: usize },
33 NotUtf8Boundary { byte: usize },
34}
35
36impl fmt::Display for LineIndexError {
37 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
38 match self {
39 Self::OffsetOutOfBounds { byte, source_len } => {
40 write!(f, "byte offset {byte} past source length {source_len}")
41 }
42 Self::OffsetTooLarge { byte } => write!(f, "byte offset {byte} exceeds u32 range"),
43 Self::NotUtf8Boundary { byte } => write!(f, "byte {byte} not on a UTF-8 boundary"),
44 }
45 }
46}
47
48impl std::error::Error for LineIndexError {}
49
50impl LineIndex {
51 #[must_use]
54 pub fn new(source: &str) -> Self {
55 let mut line_starts: Vec<u32> = Vec::with_capacity(source.len() / 40);
56 line_starts.push(0);
57 for (i, b) in source.bytes().enumerate() {
58 if b == b'\n' {
59 let next = u32::try_from(i.saturating_add(1)).unwrap_or(u32::MAX);
60 line_starts.push(next);
61 }
62 }
63 Self { line_starts }
64 }
65
66 pub fn locate(&self, source: &str, byte: usize) -> Result<(usize, usize), LineIndexError> {
77 if byte > source.len() {
78 return Err(LineIndexError::OffsetOutOfBounds {
79 byte,
80 source_len: source.len(),
81 });
82 }
83 let byte_u32 = u32::try_from(byte).map_err(|_| LineIndexError::OffsetTooLarge { byte })?;
84 let idx = match self.line_starts.binary_search(&byte_u32) {
86 Ok(i) => i,
87 Err(i) => i.saturating_sub(1),
88 };
89 let line_start = self.line_starts.get(idx).copied().unwrap_or(0) as usize;
90 let prefix = source
91 .get(line_start..byte)
92 .ok_or(LineIndexError::NotUtf8Boundary { byte })?;
93 let column = prefix.chars().count().saturating_add(1);
94 Ok((idx.saturating_add(1), column))
95 }
96
97 #[must_use]
110 pub fn byte_of_position_0based(&self, source: &str, line: usize, column: usize) -> Option<usize> {
111 let line_start = self.line_starts.get(line).copied()? as usize;
112 let line_end = self
113 .line_starts
114 .get(line.saturating_add(1))
115 .copied()
116 .map_or(source.len(), |n| n as usize);
117 let mut visible_end = line_end;
121 if visible_end > line_start && source.as_bytes().get(visible_end.saturating_sub(1)) == Some(&b'\n') {
122 visible_end = visible_end.saturating_sub(1);
123 if visible_end > line_start && source.as_bytes().get(visible_end.saturating_sub(1)) == Some(&b'\r') {
124 visible_end = visible_end.saturating_sub(1);
125 }
126 }
127 let line_text = source.get(line_start..visible_end)?;
128 let mut cursor = line_start;
129 let mut remaining = column;
130 for ch in line_text.chars() {
131 if remaining == 0 {
132 return Some(cursor);
133 }
134 cursor = cursor.saturating_add(ch.len_utf8());
135 remaining = remaining.saturating_sub(1);
136 }
137 (remaining == 0).then_some(visible_end)
142 }
143
144 #[must_use]
151 pub fn line_bounds(&self, source: &str, byte: usize) -> Option<std::ops::Range<usize>> {
152 if byte > source.len() {
153 return None;
154 }
155 let byte_u32 = u32::try_from(byte).ok()?;
156 let idx = match self.line_starts.binary_search(&byte_u32) {
157 Ok(i) => i,
158 Err(i) => i.saturating_sub(1),
159 };
160 let start = self.line_starts.get(idx).copied()? as usize;
161 let raw_end = self
162 .line_starts
163 .get(idx.saturating_add(1))
164 .copied()
165 .map_or(source.len(), |n| n as usize);
166 let mut end = raw_end;
169 if end > start && source.as_bytes().get(end.saturating_sub(1)) == Some(&b'\n') {
170 end = end.saturating_sub(1);
171 if end > start && source.as_bytes().get(end.saturating_sub(1)) == Some(&b'\r') {
172 end = end.saturating_sub(1);
173 }
174 }
175 Some(start..end)
176 }
177}
178
179#[cfg(test)]
180mod tests {
181 use super::LineIndex;
182
183 fn locate(idx: &LineIndex, source: &str, byte: usize) -> Result<(usize, usize), String> {
184 idx.locate(source, byte).map_err(|err| err.to_string())
185 }
186
187 #[test]
188 fn locate_start_of_first_line() -> Result<(), String> {
189 let src = "hello\nworld\n";
190 let idx = LineIndex::new(src);
191 assert_eq!(locate(&idx, src, 0)?, (1, 1));
192 Ok(())
193 }
194
195 #[test]
196 fn locate_after_newline() -> Result<(), String> {
197 let src = "hello\nworld\n";
198 let idx = LineIndex::new(src);
199 assert_eq!(locate(&idx, src, 6)?, (2, 1));
200 Ok(())
201 }
202
203 #[test]
204 fn locate_codepoint_column() -> Result<(), String> {
205 let src = "αβγ\n";
206 let idx = LineIndex::new(src);
207 assert_eq!(locate(&idx, src, 4)?, (1, 3));
208 Ok(())
209 }
210
211 #[test]
212 fn rejects_out_of_range() {
213 let src = "hi\n";
214 let idx = LineIndex::new(src);
215 assert!(idx.locate(src, 99).is_err());
216 }
217
218 #[test]
219 fn byte_of_position_0based_basic() {
220 let src = "hello\nworld\n";
221 let idx = LineIndex::new(src);
222 assert_eq!(idx.byte_of_position_0based(src, 0, 0), Some(0));
223 assert_eq!(idx.byte_of_position_0based(src, 0, 5), Some(5));
224 assert_eq!(idx.byte_of_position_0based(src, 1, 0), Some(6));
225 assert_eq!(idx.byte_of_position_0based(src, 1, 5), Some(11));
226 }
227
228 #[test]
229 fn byte_of_position_0based_codepoints() {
230 let src = "αβγ\n";
231 let idx = LineIndex::new(src);
232 assert_eq!(idx.byte_of_position_0based(src, 0, 0), Some(0));
233 assert_eq!(idx.byte_of_position_0based(src, 0, 1), Some(2));
234 assert_eq!(idx.byte_of_position_0based(src, 0, 2), Some(4));
235 assert_eq!(idx.byte_of_position_0based(src, 0, 3), Some(6));
236 }
237
238 #[test]
239 fn byte_of_position_0based_out_of_range() {
240 let src = "hi\n";
241 let idx = LineIndex::new(src);
242 assert!(idx.byte_of_position_0based(src, 5, 0).is_none());
243 assert!(idx.byte_of_position_0based(src, 0, 99).is_none());
244 }
245}