metamath_rs/line_cache.rs
1//! Utilities for source-offset/line-number mapping.
2
3use crate::util::HashMap;
4use std::cmp::Ordering;
5
6const PAGE: usize = 256;
7
8/// An object for efficient repeated byte offset to line conversions.
9///
10/// The first time a query is made for a given buffer, an index is constructed
11/// storing the line number at 256 byte intervals in the file. Subsequent
12/// queries can reuse the index.
13///
14/// This is expected to be a very short-lived object. If the line cache
15/// outlives any of the buffers it has been queried against, and future buffers
16/// receive the same address range, the line cache will return incorrect results
17/// (but will not crash).
18#[derive(Default, Debug)]
19pub struct LineCache {
20 map: HashMap<(usize, usize), Vec<u32>>,
21}
22
23#[inline(never)]
24fn make_index(mut buf: &[u8]) -> Vec<u32> {
25 assert!(buf.len() < u32::MAX as usize - 1);
26 let mut out = Vec::with_capacity(buf.len() / PAGE + 1);
27 out.push(0);
28 let mut count = 0u32;
29
30 // record the running total of newlines every PAGE bytes
31 while buf.len() >= PAGE {
32 let mut page = &buf[0..PAGE];
33 buf = &buf[PAGE..];
34 // use an i8 accumulator to maximize the effectiveness of vectorization.
35 // do blocks of 128 because we don't want to overflow the i8. count
36 // down because all vector hardware supported by Rust generates fewer
37 // instructions that way (the natural compare instructions produce 0 and
38 // -1, not 0 and 1).
39 #[allow(clippy::cast_sign_loss)]
40 while page.len() >= 128 {
41 let mut inner = 0i8;
42 for &ch in &page[0..128] {
43 inner += -i8::from(ch == b'\n');
44 }
45 page = &page[128..];
46 count += u32::from((inner as u8).wrapping_neg());
47 }
48 out.push(count);
49 }
50
51 out
52}
53
54// find the lowest offset for which from_offset would give the target.
55// Panics if line number out of range.
56fn line_to_offset(buf: &[u8], index: &[u32], line: u32) -> usize {
57 let page = index
58 .binary_search_by(|&ll| {
59 if ll < line {
60 Ordering::Less
61 } else {
62 Ordering::Greater
63 }
64 })
65 .expect_err("cannot match");
66 // page*PAGE is the first page-aligned which is >= to the goal line, OR it
67 // points at an incomplete end page
68 if page == 0 {
69 // page 0 always has lineno 0, so inserting before it is only possible
70 // if line is 0 or negative
71 assert!(line == 0, "line out of range");
72 return 0;
73 }
74 // (page-1)*PAGE, then, is *not* >= to the goal line, but it's close to
75 // either the goal line or EOF
76 let mut at_lineno = index[page - 1];
77 let mut at_pos = (page - 1) * PAGE;
78 while at_lineno < line {
79 assert!(at_pos < buf.len(), "line out of range");
80 if buf[at_pos] == b'\n' {
81 at_lineno += 1;
82 }
83 at_pos += 1;
84 }
85
86 at_pos
87}
88
89impl LineCache {
90 fn get_index(&mut self, buf: &[u8]) -> &Vec<u32> {
91 self.map
92 .entry((buf.as_ptr() as usize, buf.len()))
93 .or_insert_with(|| make_index(buf))
94 }
95
96 /// Map a line to a buffer index. Panics if out of range.
97 #[must_use]
98 pub fn to_offset(&mut self, buf: &[u8], line: u32) -> usize {
99 line_to_offset(buf, self.get_index(buf), line - 1)
100 }
101
102 /// Map a buffer index to a (line, column) pair.
103 /// ## Panics
104 /// Panics if the buffer is larger than 4GiB or if offset is out of range.
105 #[must_use]
106 pub fn from_offset(&mut self, buf: &[u8], offset: usize) -> (u32, u32) {
107 let index = self.get_index(buf);
108 // find a start point
109 let mut lineno = index[offset / PAGE];
110 // fine-tune
111 for &ch in &buf[offset / PAGE * PAGE..offset] {
112 if ch == b'\n' {
113 lineno += 1;
114 }
115 }
116 // now for the column
117 let colno = offset - line_to_offset(buf, index, lineno);
118 (lineno + 1, u32::try_from(colno).unwrap() + 1)
119 }
120
121 /// Find the offset just after the end of the line (usually the
122 /// location of a '\n', unless we are at the end of the file).
123 #[must_use]
124 pub fn line_end(buf: &[u8], offset: usize) -> usize {
125 for (pos, &ch) in buf.iter().enumerate().skip(offset) {
126 if ch == b'\n' {
127 return pos;
128 }
129 }
130 buf.len()
131 }
132}