1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
//! # Row
//!
//! Utilities for rows. A `Row` owns the underlying characters, the rendered
//! string and the syntax highlighting information.
use std::{iter::repeat_n, num::NonZeroUsize};
use unicode_width::UnicodeWidthChar;
use crate::ansi_escape::{RESET, WBG, push_colored};
use crate::syntax::{Conf as SyntaxConf, HlType};
/// The "Highlight State" of the row
#[derive(Clone, Copy, Default, PartialEq, Eq)]
pub enum HlState {
/// Normal state.
#[default]
Normal,
/// A multi-line comment has been open, but not yet closed.
MultiLineComment,
/// A string has been open with the given quote character (for instance
/// b'\'' or b'"'), but not yet closed.
String(u8),
/// A multi-line string has been open, but not yet closed.
MultiLineString,
}
/// Represents a row of characters and how it is rendered.
#[derive(Default)]
pub struct Row {
/// The characters of the row.
pub chars: Vec<u8>,
/// How the characters are rendered. In particular, tabs are converted into
/// several spaces, and bytes may be combined into single UTF-8
/// characters.
render: String,
/// Mapping from indices in `self.chars` to the corresponding indices in
/// `self.render`.
pub cx2rx: Vec<usize>,
/// Mapping from indices in `self.render` to the corresponding indices in
/// `self.chars`.
pub rx2cx: Vec<usize>,
/// The vector of `HLType` for each rendered character.
pub hl: Vec<HlType>,
/// The final state of the row.
pub hl_state: HlState,
/// If not `None`, the range that is currently matched during a FIND
/// operation.
pub match_segment: Option<std::ops::Range<usize>>,
}
impl Row {
/// Create a new row, containing characters `chars`.
pub fn new(chars: Vec<u8>) -> Self { Self { chars, cx2rx: vec![0], ..Self::default() } }
// TODO: Combine update and update_syntax
/// Update the row: convert tabs into spaces and compute highlight symbols
/// The `hl_state` argument is the `HLState` for the previous row.
pub fn update(&mut self, syntax: &SyntaxConf, hl_state: HlState, tab: NonZeroUsize) -> HlState {
let (..) = (self.render.clear(), self.cx2rx.clear(), self.rx2cx.clear());
let (mut cx, mut rx) = (0, 0);
for c in String::from_utf8_lossy(&self.chars).chars() {
// The number of rendered characters
let n_rend_chars =
if c == '\t' { tab.get() - (rx % tab) } else { c.width().unwrap_or(1) };
self.render.push_str(&(if c == '\t' { " ".repeat(n_rend_chars) } else { c.into() }));
self.cx2rx.extend(repeat_n(rx, c.len_utf8()));
self.rx2cx.extend(repeat_n(cx, n_rend_chars));
(rx, cx) = (rx + n_rend_chars, cx + c.len_utf8());
}
let (..) = (self.cx2rx.push(rx), self.rx2cx.push(cx));
self.update_syntax(syntax, hl_state)
}
/// Obtain the character size, in bytes, given its position in
/// `self.render`. This is done in constant time by using the difference
/// between `self.rx2cx[rx]` and the cx for the next character.
pub fn get_char_size(&self, rx: usize) -> usize {
self.rx2cx.iter().skip(rx + 1).map(|cx| cx - self.rx2cx[rx]).find(|d| *d > 0).unwrap_or(1)
}
/// Update the syntax highlighting types of the row.
fn update_syntax(&mut self, syntax: &SyntaxConf, mut hl_state: HlState) -> HlState {
self.hl.clear();
let line = self.render.as_bytes();
// Delimiters for multi-line comments and multi-line strings, as Option<&String,
// &String>
let ml_comment_delims = syntax.ml_comment_delims.as_ref().map(|(start, end)| (start, end));
let ml_string_delims = syntax.ml_string_delim.as_ref().map(|x| (x, x));
'syntax_loop: while self.hl.len() < line.len() {
let i = self.hl.len();
let find_str = |s: &str| line.get(i..(i + s.len())).is_some_and(|r| r.eq(s.as_bytes()));
if hl_state == HlState::Normal && syntax.sl_comment_start.iter().any(|s| find_str(s)) {
self.hl.extend(repeat_n(HlType::Comment, line.len() - i));
continue;
}
// Multi-line strings and multi-line comments have the same behavior; the only
// differences are: the start/end delimiters, the `HLState`, the `HLType`.
for (delims, mstate, mtype) in &[
(ml_comment_delims, HlState::MultiLineComment, HlType::MlComment),
(ml_string_delims, HlState::MultiLineString, HlType::MlString),
] {
if let Some((start, end)) = delims {
if hl_state == *mstate {
if find_str(end) {
// Highlight the remaining symbols of the multi line comment end
self.hl.extend(repeat_n(mtype, end.len()));
hl_state = HlState::Normal;
} else {
self.hl.push(*mtype);
}
continue 'syntax_loop;
} else if hl_state == HlState::Normal && find_str(start) {
// Highlight the remaining symbols of the multi line comment start
self.hl.extend(repeat_n(mtype, start.len()));
hl_state = *mstate;
continue 'syntax_loop;
}
}
}
let c = line[i];
// At this point, hl_state is Normal or String
if let HlState::String(quote) = hl_state {
self.hl.push(HlType::String);
if c == quote {
hl_state = HlState::Normal;
} else if c == b'\\' && i != line.len() - 1 {
self.hl.push(HlType::String);
}
continue;
} else if syntax.sl_string_quotes.contains(&(c as char)) {
hl_state = HlState::String(c);
self.hl.push(HlType::String);
continue;
}
let prev_sep = (i == 0) || is_sep(line[i - 1]);
if syntax.highlight_numbers
&& ((c.is_ascii_digit() && prev_sep)
|| (i != 0 && self.hl[i - 1] == HlType::Number && !prev_sep && !is_sep(c)))
{
self.hl.push(HlType::Number);
continue;
}
if prev_sep {
// This filters makes sure that names such as "in_comment" are not partially
// highlighted (even though "in" is a keyword in rust)
// The argument is the keyword that is matched at `i`.
let s_filter = |kw: &str| line.get(i + kw.len()).is_none_or(|c| is_sep(*c));
for (keyword_highlight_type, kws) in &syntax.keywords {
for keyword in kws.iter().filter(|kw| find_str(kw) && s_filter(kw)) {
self.hl.extend(repeat_n(*keyword_highlight_type, keyword.len()));
}
}
}
self.hl.push(HlType::Normal);
}
// String state doesn't propagate to the next row
self.hl_state =
if matches!(hl_state, HlState::String(_)) { HlState::Normal } else { hl_state };
self.hl_state
}
/// Draw the row and write the result to a buffer. An `offset` can be given,
/// as well as a limit on the length of the row (`max_len`). After
/// writing the characters, clear the rest of the line and move the
/// cursor to the start of the next line.
pub fn draw(&self, offset: usize, max_len: usize, buffer: &mut String, use_color: bool) {
let mut current_hl_type = HlType::Normal;
let chars = self.render.chars().skip(offset).take(max_len);
let mut rx = self.render.chars().take(offset).map(|c| c.width().unwrap_or(1)).sum();
for (c, mut hl_type) in chars.zip(self.hl.iter().skip(offset)) {
if c.is_ascii_control() {
let rendered_char = if (c as u8) <= 26 { (b'@' + c as u8) as char } else { '?' };
push_colored(buffer, WBG, &rendered_char.to_string(), use_color);
// Restore previous color
if use_color && current_hl_type != HlType::Normal {
buffer.push_str(¤t_hl_type.to_string());
}
} else {
if let Some(match_segment) = &self.match_segment {
if match_segment.contains(&rx) {
// Set the highlight type to Match, i.e. set the background to cyan
hl_type = &HlType::Match;
} else if use_color && rx == match_segment.end {
// Reset the formatting, in particular the background
buffer.push_str(RESET);
}
}
if use_color && current_hl_type != *hl_type {
buffer.push_str(&hl_type.to_string());
}
current_hl_type = *hl_type;
buffer.push(c);
}
rx += c.width().unwrap_or(1);
}
buffer.push_str(if use_color { RESET } else { "" });
}
}
/// Return whether `c` is an ASCII separator.
const fn is_sep(c: u8) -> bool {
c.is_ascii_whitespace() || c == b'\0' || (c.is_ascii_punctuation() && c != b'_')
}